From b3fe628a024a5fbab86e146d7fe9e30d1b34f61c Mon Sep 17 00:00:00 2001 From: trifonovt <87468028+TihomirTrifonov@users.noreply.github.com> Date: Thu, 16 Apr 2026 11:17:21 +0200 Subject: [PATCH] ted structural search --- docs/WAVE2_TED_STRUCTURED_SEARCH.md | 175 +++++++++++++++ .../search/TedStructuredSearchRepository.java | 211 ++++++++++++++++++ .../service/TedStructuredSearchService.java | 43 ++++ .../web/TedStructuredSearchController.java | 91 ++++++++ .../procon/ted/model/entity/Organization.java | 3 +- .../ted/model/entity/ProcurementDocument.java | 3 +- .../ted/model/entity/ProcurementLot.java | 3 +- src/main/resources/application-new.yml | 2 +- ...NotDependOnTedProcessorPropertiesTest.java | 3 + ...ructuredSearchEndpointIntegrationTest.java | 133 +++++++++++ ...StructuredSearchParityIntegrationTest.java | 158 +++++++++++++ ...actTedStructuredSearchIntegrationTest.java | 82 +++++++ .../TedStructuredSearchTestApplication.java | 54 +++++ 13 files changed, 957 insertions(+), 4 deletions(-) create mode 100644 docs/WAVE2_TED_STRUCTURED_SEARCH.md create mode 100644 src/main/java/at/procon/dip/domain/ted/search/TedStructuredSearchRepository.java create mode 100644 src/main/java/at/procon/dip/domain/ted/service/TedStructuredSearchService.java create mode 100644 src/main/java/at/procon/dip/domain/ted/web/TedStructuredSearchController.java create mode 100644 src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchEndpointIntegrationTest.java create mode 100644 src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchParityIntegrationTest.java create mode 100644 src/test/java/at/procon/dip/testsupport/AbstractTedStructuredSearchIntegrationTest.java create mode 100644 src/test/java/at/procon/dip/testsupport/TedStructuredSearchTestApplication.java diff --git a/docs/WAVE2_TED_STRUCTURED_SEARCH.md b/docs/WAVE2_TED_STRUCTURED_SEARCH.md new file mode 100644 index 0000000..9826a51 --- /dev/null +++ b/docs/WAVE2_TED_STRUCTURED_SEARCH.md @@ -0,0 +1,175 @@ +# Wave 2 — NEW TED Structured Search + +## Purpose + +Wave 2 adds a NEW-runtime TED search endpoint that keeps the legacy request and response shape of `/v1/documents/search`, but executes the search against `TED.ted_notice_projection` instead of the legacy search path. + +The goal is twofold: + +1. provide NEW-runtime structured TED search functionality +2. make cutover measurable through parity checks against the legacy search implementation + +## Runtime scope + +This functionality is active only in `RuntimeMode.NEW`. + +Controller: +- `at.procon.dip.domain.ted.web.TedStructuredSearchController` + +Service: +- `at.procon.dip.domain.ted.service.TedStructuredSearchService` + +Repository: +- `at.procon.dip.domain.ted.search.TedStructuredSearchRepository` + +## Endpoint + +### GET +`GET /v1/documents/search` + +### POST +`POST /v1/documents/search` + +The POST body uses the existing legacy-compatible DTO: +- `at.procon.ted.model.dto.DocumentDtos.SearchRequest` + +The response uses: +- `at.procon.ted.model.dto.DocumentDtos.SearchResponse` + +## Implemented structured filters + +The Wave 2 implementation supports these filters: + +- `countryCode` +- `countryCodes` +- `noticeType` +- `contractNature` +- `procedureType` +- `cpvPrefix` +- `cpvCodes` +- `nutsCode` +- `nutsCodes` +- `publicationDateFrom` +- `publicationDateTo` +- `submissionDeadlineAfter` +- `euFunded` +- `buyerNameContains` +- `projectTitleContains` + +## Sorting and pagination + +Supported sorting: + +- `publicationDate` +- `submissionDeadline` +- `buyerName` +- `projectTitle` + +Supported directions: + +- `asc` +- `desc` + +Pagination behavior: + +- page defaults to `0` +- size defaults to `DipSearchProperties.defaultPageSize` +- size is capped by `DipSearchProperties.maxPageSize` + +## Data source + +The endpoint reads from: +- `TED.ted_notice_projection` + +This means the quality and completeness of the search results depend on Wave 1 migration and projection backfill completeness. + +## Functional behavior + +The Wave 2 implementation is intentionally **structured-search-first**. + +Although the request DTO still contains: +- `semanticQuery` +- `similarityThreshold` + +these fields are currently accepted only for request compatibility and future extension. The current repository implementation does **not** apply semantic ranking or semantic filtering. + +That is deliberate for Wave 2, because the main objective is: +- structured search on the NEW model +- parity verification against legacy behavior for common structured filters + +## Parity strategy + +Wave 2 adds parity-focused tests that compare NEW structured search behavior against the legacy TED search for a common subset of structured filters. + +Recommended parity focus: + +- country filters +- notice type +- procedure type +- publication date range +- EU-funded filter +- deterministic sort order + +Parity should be evaluated on: + +- total result count +- ordered publication ids / notice ids for stable cases +- key metadata fields in `DocumentSummary` + +## Current limitations + +1. No semantic scoring is applied in the NEW structured TED search path yet. +2. No TED facets/aggregations are included yet. +3. Search is projection-based, so missing or stale `ted_notice_projection` rows can cause parity differences. +4. The Wave 2 scope is TED-specific structured retrieval, not the full generic hybrid search fusion pipeline. + +## Example GET request + +```http +GET /v1/documents/search?countryCode=AT¬iceType=CN_STANDARD&publicationDateFrom=2025-01-01&publicationDateTo=2025-12-31&page=0&size=20&sortBy=publicationDate&sortDirection=desc +``` + +## Example POST request + +```json +{ + "countryCodes": ["AT", "DE"], + "noticeType": "CN_STANDARD", + "contractNature": "SERVICES", + "procedureType": "OPEN", + "cpvPrefix": "79000000", + "cpvCodes": ["79341000"], + "nutsCodes": ["AT130", "DE300"], + "publicationDateFrom": "2025-01-01", + "publicationDateTo": "2025-12-31", + "submissionDeadlineAfter": "2025-06-01T00:00:00Z", + "euFunded": true, + "buyerNameContains": "city", + "projectTitleContains": "digital", + "semanticQuery": "framework agreement for digital transformation services", + "similarityThreshold": 0.7, + "page": 0, + "size": 20, + "sortBy": "publicationDate", + "sortDirection": "desc" +} +``` + +## Postman collection + +Use the companion file: +- `WAVE2_TED_STRUCTURED_SEARCH.postman_collection.json` + +It contains: +- basic GET search +- CPV/NUTS/buyer GET example +- full POST structured request +- a parity-oriented GET request for manual comparison against legacy search + +## Recommended next step after Wave 2 validation + +After parity is accepted, the next logical enhancement is: + +1. add TED facets and richer structural filters +2. merge structured TED narrowing with lexical/semantic ranking +3. expose a documented parity validation checklist for cutover approval diff --git a/src/main/java/at/procon/dip/domain/ted/search/TedStructuredSearchRepository.java b/src/main/java/at/procon/dip/domain/ted/search/TedStructuredSearchRepository.java new file mode 100644 index 0000000..2575901 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/ted/search/TedStructuredSearchRepository.java @@ -0,0 +1,211 @@ +package at.procon.dip.domain.ted.search; + +import at.procon.ted.model.dto.DocumentDtos.DocumentSummary; +import at.procon.ted.model.dto.DocumentDtos.SearchRequest; +import at.procon.ted.model.entity.ContractNature; +import at.procon.ted.model.entity.NoticeType; +import at.procon.ted.model.entity.ProcedureType; +import java.sql.Array; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.jdbc.core.RowMapper; +import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; +import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; +import org.springframework.stereotype.Repository; +import org.springframework.util.CollectionUtils; + +@Repository +@RequiredArgsConstructor +public class TedStructuredSearchRepository { + + private final NamedParameterJdbcTemplate jdbcTemplate; + + public List search(SearchRequest request, int page, int size) { + StringBuilder sql = new StringBuilder(""" + SELECT + COALESCE(p.legacy_procurement_document_id, p.document_id) AS id, + p.publication_id, + p.notice_id, + CAST(p.notice_type AS text) AS notice_type, + p.project_title, + p.buyer_name, + p.buyer_country_code, + p.buyer_city, + CAST(p.contract_nature AS text) AS contract_nature, + CAST(p.procedure_type AS text) AS procedure_type, + p.publication_date, + p.submission_deadline, + p.cpv_codes, + p.total_lots, + p.estimated_value, + p.estimated_value_currency + FROM ted.ted_notice_projection p + WHERE 1=1 + """); + + MapSqlParameterSource params = new MapSqlParameterSource(); + appendFilters(sql, params, request); + sql.append(" ORDER BY ").append(resolveSortColumn(request.getSortBy())).append(' ') + .append(resolveSortDirection(request.getSortDirection())) + .append(", p.publication_date DESC NULLS LAST, p.publication_id DESC NULLS LAST, p.document_id ASC"); + sql.append(" LIMIT :limit OFFSET :offset"); + params.addValue("limit", size); + params.addValue("offset", page * size); + + return jdbcTemplate.query(sql.toString(), params, new DocumentSummaryRowMapper()); + } + + public long count(SearchRequest request) { + StringBuilder sql = new StringBuilder(""" + SELECT COUNT(*) + FROM ted.ted_notice_projection p + WHERE 1=1 + """); + MapSqlParameterSource params = new MapSqlParameterSource(); + appendFilters(sql, params, request); + Long value = jdbcTemplate.queryForObject(sql.toString(), params, Long.class); + return value == null ? 0L : value; + } + + private void appendFilters(StringBuilder sql, MapSqlParameterSource params, SearchRequest request) { + if (hasText(request.getCountryCode())) { + sql.append(" AND p.buyer_country_code = :countryCode"); + params.addValue("countryCode", request.getCountryCode()); + } + if (!CollectionUtils.isEmpty(request.getCountryCodes())) { + sql.append(" AND p.buyer_country_code IN (:countryCodes)"); + params.addValue("countryCodes", request.getCountryCodes()); + } + if (request.getNoticeType() != null) { + sql.append(" AND CAST(p.notice_type AS text) = :noticeType"); + params.addValue("noticeType", request.getNoticeType().name()); + } + if (request.getContractNature() != null) { + sql.append(" AND CAST(p.contract_nature AS text) = :contractNature"); + params.addValue("contractNature", request.getContractNature().name()); + } + if (request.getProcedureType() != null) { + sql.append(" AND CAST(p.procedure_type AS text) = :procedureType"); + params.addValue("procedureType", request.getProcedureType().name()); + } + if (hasText(request.getCpvPrefix())) { + sql.append(" AND EXISTS (SELECT 1 FROM unnest(p.cpv_codes) code WHERE code LIKE :cpvPrefixLike)"); + params.addValue("cpvPrefixLike", request.getCpvPrefix() + "%"); + } + if (!CollectionUtils.isEmpty(request.getCpvCodes())) { + sql.append(" AND EXISTS (SELECT 1 FROM unnest(p.cpv_codes) code WHERE code IN (:cpvCodes))"); + params.addValue("cpvCodes", request.getCpvCodes()); + } + if (hasText(request.getNutsCode())) { + sql.append(" AND (p.buyer_nuts_code = :nutsCode OR EXISTS (SELECT 1 FROM unnest(p.nuts_codes) code WHERE code = :nutsCode))"); + params.addValue("nutsCode", request.getNutsCode()); + } + if (!CollectionUtils.isEmpty(request.getNutsCodes())) { + sql.append(" AND (p.buyer_nuts_code IN (:nutsCodes) OR EXISTS (SELECT 1 FROM unnest(p.nuts_codes) code WHERE code IN (:nutsCodes)))"); + params.addValue("nutsCodes", request.getNutsCodes()); + } + if (request.getPublicationDateFrom() != null) { + sql.append(" AND p.publication_date >= :publicationDateFrom"); + params.addValue("publicationDateFrom", request.getPublicationDateFrom()); + } + if (request.getPublicationDateTo() != null) { + sql.append(" AND p.publication_date <= :publicationDateTo"); + params.addValue("publicationDateTo", request.getPublicationDateTo()); + } + if (request.getSubmissionDeadlineAfter() != null) { + sql.append(" AND p.submission_deadline > :submissionDeadlineAfter"); + params.addValue("submissionDeadlineAfter", request.getSubmissionDeadlineAfter()); + } + if (request.getEuFunded() != null) { + sql.append(" AND p.eu_funded = :euFunded"); + params.addValue("euFunded", request.getEuFunded()); + } + if (hasText(request.getBuyerNameContains())) { + sql.append(" AND LOWER(COALESCE(p.buyer_name, '')) LIKE :buyerNameContains"); + params.addValue("buyerNameContains", like(request.getBuyerNameContains())); + } + if (hasText(request.getProjectTitleContains())) { + sql.append(" AND LOWER(COALESCE(p.project_title, '')) LIKE :projectTitleContains"); + params.addValue("projectTitleContains", like(request.getProjectTitleContains())); + } + } + + private String resolveSortColumn(String sortBy) { + if (sortBy == null || sortBy.isBlank()) { + return "p.publication_date"; + } + return switch (sortBy) { + case "submissionDeadline" -> "p.submission_deadline"; + case "buyerName" -> "p.buyer_name"; + case "projectTitle" -> "p.project_title"; + case "publicationDate" -> "p.publication_date"; + default -> "p.publication_date"; + }; + } + + private String resolveSortDirection(String direction) { + return "asc".equalsIgnoreCase(direction) ? "ASC" : "DESC"; + } + + private boolean hasText(String value) { + return value != null && !value.isBlank(); + } + + private String like(String value) { + return "%" + value.toLowerCase() + "%"; + } + + private static class DocumentSummaryRowMapper implements RowMapper { + @Override + public DocumentSummary mapRow(ResultSet rs, int rowNum) throws SQLException { + return DocumentSummary.builder() + .id(rs.getObject("id", java.util.UUID.class)) + .publicationId(rs.getString("publication_id")) + .noticeId(rs.getString("notice_id")) + .noticeType(parseNoticeType(rs.getString("notice_type"))) + .projectTitle(rs.getString("project_title")) + .buyerName(rs.getString("buyer_name")) + .buyerCountryCode(rs.getString("buyer_country_code")) + .buyerCity(rs.getString("buyer_city")) + .contractNature(parseContractNature(rs.getString("contract_nature"))) + .procedureType(parseProcedureType(rs.getString("procedure_type"))) + .publicationDate(rs.getObject("publication_date", java.time.LocalDate.class)) + .submissionDeadline(rs.getObject("submission_deadline", java.time.OffsetDateTime.class)) + .cpvCodes(readArray(rs, "cpv_codes")) + .totalLots((Integer) rs.getObject("total_lots")) + .estimatedValue(rs.getBigDecimal("estimated_value")) + .estimatedValueCurrency(rs.getString("estimated_value_currency")) + .build(); + } + + private static List readArray(ResultSet rs, String column) throws SQLException { + Array array = rs.getArray(column); + if (array == null) { + return List.of(); + } + Object value = array.getArray(); + if (value instanceof String[] strings) { + return Arrays.asList(strings); + } + if (value instanceof Object[] objects) { + return Arrays.stream(objects).map(String::valueOf).toList(); + } + return List.of(); + } + + private static NoticeType parseNoticeType(String value) { + return value == null ? null : NoticeType.valueOf(value); + } + + private static ContractNature parseContractNature(String value) { + return value == null ? null : ContractNature.valueOf(value); + } + + private static ProcedureType parseProcedureType(String value) { + return value == null ? null : ProcedureType.valueOf(value); + } + } +} diff --git a/src/main/java/at/procon/dip/domain/ted/service/TedStructuredSearchService.java b/src/main/java/at/procon/dip/domain/ted/service/TedStructuredSearchService.java new file mode 100644 index 0000000..bf469c1 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/ted/service/TedStructuredSearchService.java @@ -0,0 +1,43 @@ +package at.procon.dip.domain.ted.service; + +import at.procon.dip.domain.ted.search.TedStructuredSearchRepository; +import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; +import at.procon.dip.runtime.config.RuntimeMode; +import at.procon.dip.search.config.DipSearchProperties; +import at.procon.ted.model.dto.DocumentDtos.SearchRequest; +import at.procon.ted.model.dto.DocumentDtos.SearchResponse; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +@Service +@ConditionalOnRuntimeMode(RuntimeMode.NEW) +@RequiredArgsConstructor +@Transactional(readOnly = true) +public class TedStructuredSearchService { + + private final TedStructuredSearchRepository repository; + private final DipSearchProperties searchProperties; + + public SearchResponse search(SearchRequest request) { + int page = request.getPage() != null ? Math.max(request.getPage(), 0) : 0; + int size = Math.min( + request.getSize() != null ? Math.max(request.getSize(), 1) : searchProperties.getDefaultPageSize(), + searchProperties.getMaxPageSize() + ); + + var documents = repository.search(request, page, size); + long totalElements = repository.count(request); + int totalPages = totalElements == 0 ? 0 : (int) Math.ceil((double) totalElements / size); + + return SearchResponse.builder() + .documents(documents) + .page(page) + .size(size) + .totalElements(totalElements) + .totalPages(totalPages) + .hasNext(page < totalPages - 1) + .hasPrevious(page > 0) + .build(); + } +} diff --git a/src/main/java/at/procon/dip/domain/ted/web/TedStructuredSearchController.java b/src/main/java/at/procon/dip/domain/ted/web/TedStructuredSearchController.java new file mode 100644 index 0000000..495f4bd --- /dev/null +++ b/src/main/java/at/procon/dip/domain/ted/web/TedStructuredSearchController.java @@ -0,0 +1,91 @@ +package at.procon.dip.domain.ted.web; + +import at.procon.dip.domain.ted.service.TedStructuredSearchService; +import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; +import at.procon.dip.runtime.config.RuntimeMode; +import at.procon.ted.model.dto.DocumentDtos.SearchRequest; +import at.procon.ted.model.dto.DocumentDtos.SearchResponse; +import at.procon.ted.model.entity.ContractNature; +import at.procon.ted.model.entity.NoticeType; +import at.procon.ted.model.entity.ProcedureType; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.List; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.format.annotation.DateTimeFormat; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/v1/documents") +@RequiredArgsConstructor +@Slf4j +@ConditionalOnRuntimeMode(RuntimeMode.NEW) +public class TedStructuredSearchController { + + private final TedStructuredSearchService searchService; + + @GetMapping("/search") + public ResponseEntity searchDocuments( + @RequestParam(required = false) String countryCode, + @RequestParam(required = false) List countryCodes, + @RequestParam(required = false) NoticeType noticeType, + @RequestParam(required = false) ContractNature contractNature, + @RequestParam(required = false) ProcedureType procedureType, + @RequestParam(required = false) String cpvPrefix, + @RequestParam(required = false) List cpvCodes, + @RequestParam(required = false) String nutsCode, + @RequestParam(required = false) List nutsCodes, + @RequestParam(required = false) @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate publicationDateFrom, + @RequestParam(required = false) @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate publicationDateTo, + @RequestParam(required = false) @DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME) OffsetDateTime submissionDeadlineAfter, + @RequestParam(required = false) Boolean euFunded, + @RequestParam(required = false) String buyerNameContains, + @RequestParam(required = false) String projectTitleContains, + @RequestParam(required = false) String q, + @RequestParam(required = false, defaultValue = "0.7") Double similarityThreshold, + @RequestParam(required = false, defaultValue = "0") Integer page, + @RequestParam(required = false, defaultValue = "20") Integer size, + @RequestParam(required = false, defaultValue = "publicationDate") String sortBy, + @RequestParam(required = false, defaultValue = "desc") String sortDirection) { + + SearchRequest request = SearchRequest.builder() + .countryCode(countryCode) + .countryCodes(countryCodes) + .noticeType(noticeType) + .contractNature(contractNature) + .procedureType(procedureType) + .cpvPrefix(cpvPrefix) + .cpvCodes(cpvCodes) + .nutsCode(nutsCode) + .nutsCodes(nutsCodes) + .publicationDateFrom(publicationDateFrom) + .publicationDateTo(publicationDateTo) + .submissionDeadlineAfter(submissionDeadlineAfter) + .euFunded(euFunded) + .buyerNameContains(buyerNameContains) + .projectTitleContains(projectTitleContains) + .semanticQuery(q) + .similarityThreshold(similarityThreshold) + .page(page) + .size(size) + .sortBy(sortBy) + .sortDirection(sortDirection) + .build(); + + log.debug("NEW runtime TED structured search request: {}", request); + return ResponseEntity.ok(searchService.search(request)); + } + + @PostMapping("/search") + public ResponseEntity searchDocumentsPost(@RequestBody SearchRequest request) { + log.debug("NEW runtime TED structured search request (POST): {}", request); + return ResponseEntity.ok(searchService.search(request)); + } +} diff --git a/src/main/java/at/procon/ted/model/entity/Organization.java b/src/main/java/at/procon/ted/model/entity/Organization.java index dd175ba..151759f 100644 --- a/src/main/java/at/procon/ted/model/entity/Organization.java +++ b/src/main/java/at/procon/ted/model/entity/Organization.java @@ -1,5 +1,6 @@ package at.procon.ted.model.entity; +import at.procon.dip.architecture.SchemaNames; import jakarta.persistence.*; import lombok.*; @@ -13,7 +14,7 @@ import java.util.UUID; * @author Martin.Schweitzer@procon.co.at and claude.ai */ @Entity -@Table(name = "organization", indexes = { +@Table(schema = SchemaNames.TED, name = "organization", indexes = { @Index(name = "idx_org_document", columnList = "document_id"), @Index(name = "idx_org_country", columnList = "country_code") }, uniqueConstraints = { diff --git a/src/main/java/at/procon/ted/model/entity/ProcurementDocument.java b/src/main/java/at/procon/ted/model/entity/ProcurementDocument.java index f26539d..6069f8a 100644 --- a/src/main/java/at/procon/ted/model/entity/ProcurementDocument.java +++ b/src/main/java/at/procon/ted/model/entity/ProcurementDocument.java @@ -1,5 +1,6 @@ package at.procon.ted.model.entity; +import at.procon.dip.architecture.SchemaNames; import jakarta.persistence.*; import lombok.*; import org.hibernate.annotations.JdbcTypeCode; @@ -22,7 +23,7 @@ import java.util.UUID; * @author Martin.Schweitzer@procon.co.at and claude.ai */ @Entity -@Table(name = "procurement_document", indexes = { +@Table(schema = SchemaNames.TED, name = "procurement_document", indexes = { @Index(name = "idx_doc_hash", columnList = "documentHash"), @Index(name = "idx_doc_publication_id", columnList = "publicationId"), @Index(name = "idx_doc_buyer_country", columnList = "buyerCountryCode"), diff --git a/src/main/java/at/procon/ted/model/entity/ProcurementLot.java b/src/main/java/at/procon/ted/model/entity/ProcurementLot.java index b6ff6ae..c139033 100644 --- a/src/main/java/at/procon/ted/model/entity/ProcurementLot.java +++ b/src/main/java/at/procon/ted/model/entity/ProcurementLot.java @@ -1,5 +1,6 @@ package at.procon.ted.model.entity; +import at.procon.dip.architecture.SchemaNames; import jakarta.persistence.*; import lombok.*; import org.hibernate.annotations.JdbcTypeCode; @@ -16,7 +17,7 @@ import java.util.UUID; * @author Martin.Schweitzer@procon.co.at and claude.ai */ @Entity -@Table(name = "procurement_lot", indexes = { +@Table(schema = SchemaNames.TED, name = "procurement_lot", indexes = { @Index(name = "idx_lot_document", columnList = "document_id") }, uniqueConstraints = { @UniqueConstraint(columnNames = {"document_id", "lot_id"}) diff --git a/src/main/resources/application-new.yml b/src/main/resources/application-new.yml index bb50a29..bb1ea12 100644 --- a/src/main/resources/application-new.yml +++ b/src/main/resources/application-new.yml @@ -294,4 +294,4 @@ dip: batch-size: 500 max-documents-per-run: 0 skip-when-primary-representation-missing: true - queue-missing-embeddings: false \ No newline at end of file + queue-missing-embeddings: true \ No newline at end of file diff --git a/src/test/java/at/procon/dip/architecture/NewRuntimeMustNotDependOnTedProcessorPropertiesTest.java b/src/test/java/at/procon/dip/architecture/NewRuntimeMustNotDependOnTedProcessorPropertiesTest.java index 98e6577..9414e54 100644 --- a/src/test/java/at/procon/dip/architecture/NewRuntimeMustNotDependOnTedProcessorPropertiesTest.java +++ b/src/test/java/at/procon/dip/architecture/NewRuntimeMustNotDependOnTedProcessorPropertiesTest.java @@ -28,6 +28,9 @@ class NewRuntimeMustNotDependOnTedProcessorPropertiesTest { at.procon.dip.ingestion.service.TedPackageChildImportProcessor.class, at.procon.dip.domain.ted.service.TedNoticeProjectionService.class, at.procon.dip.domain.ted.startup.TedProjectionStartupRunner.class, + at.procon.dip.domain.ted.search.TedStructuredSearchRepository.class, + at.procon.dip.domain.ted.service.TedStructuredSearchService.class, + at.procon.dip.domain.ted.web.TedStructuredSearchController.class, at.procon.dip.search.engine.fulltext.PostgresFullTextSearchEngine.class, at.procon.dip.search.engine.trigram.PostgresTrigramSearchEngine.class, at.procon.dip.search.engine.semantic.PgVectorSemanticSearchEngine.class, diff --git a/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchEndpointIntegrationTest.java b/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchEndpointIntegrationTest.java new file mode 100644 index 0000000..c1b238c --- /dev/null +++ b/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchEndpointIntegrationTest.java @@ -0,0 +1,133 @@ +package at.procon.dip.domain.ted.search.integration; + +import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentStatus; +import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.domain.document.entity.Document; +import at.procon.dip.domain.ted.entity.TedNoticeProjection; +import at.procon.dip.testsupport.AbstractTedStructuredSearchIntegrationTest; +import at.procon.ted.model.entity.ContractNature; +import at.procon.ted.model.entity.NoticeType; +import at.procon.ted.model.entity.ProcedureType; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.UUID; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.MediaType; +import org.springframework.test.web.servlet.MockMvc; + +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + +class TedStructuredSearchEndpointIntegrationTest extends AbstractTedStructuredSearchIntegrationTest { + + @Autowired + private MockMvc mockMvc; + + @Autowired + private ObjectMapper objectMapper; + + @Test + void getSearch_should_filter_and_sort_ted_projection_results() throws Exception { + createProjection(UUID.randomUUID(), "00786665-2025", "AUT", NoticeType.CONTRACT_NOTICE, + ContractNature.SUPPLIES, ProcedureType.OPEN, "City of Vienna", "Medical gloves framework", + LocalDate.of(2025, 1, 15), OffsetDateTime.parse("2025-02-15T12:00:00Z"), new String[]{"33140000"}, new String[]{"AT130"}, true); + createProjection(UUID.randomUUID(), "00786666-2025", "DEU", NoticeType.CONTRACT_NOTICE, + ContractNature.SERVICES, ProcedureType.RESTRICTED, "Berlin Utilities", "Heating maintenance", + LocalDate.of(2025, 1, 10), OffsetDateTime.parse("2025-02-10T12:00:00Z"), new String[]{"50720000"}, new String[]{"DE300"}, false); + + mockMvc.perform(get("/v1/documents/search") + .param("countryCode", "AUT") + .param("noticeType", "CONTRACT_NOTICE") + .param("buyerNameContains", "vienna") + .param("sortBy", "publicationDate") + .param("sortDirection", "desc")) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.documents.length()").value(1)) + .andExpect(jsonPath("$.documents[0].publicationId").value("00786665-2025")) + .andExpect(jsonPath("$.documents[0].buyerName").value("City of Vienna")); + } + + @Test + void postSearch_should_support_cpv_and_nuts_filters() throws Exception { + createProjection(UUID.randomUUID(), "00786665-2025", "AUT", NoticeType.CONTRACT_NOTICE, + ContractNature.SUPPLIES, ProcedureType.OPEN, "City of Vienna", "Medical gloves framework", + LocalDate.of(2025, 1, 15), OffsetDateTime.parse("2025-02-15T12:00:00Z"), new String[]{"33140000", "33141000"}, new String[]{"AT130"}, true); + createProjection(UUID.randomUUID(), "00786666-2025", "AUT", NoticeType.CONTRACT_NOTICE, + ContractNature.SUPPLIES, ProcedureType.OPEN, "City of Graz", "Office supplies", + LocalDate.of(2025, 1, 16), OffsetDateTime.parse("2025-02-16T12:00:00Z"), new String[]{"30192000"}, new String[]{"AT221"}, true); + + String body = """ + { + "cpvPrefix": "3314", + "nutsCode": "AT130", + "page": 0, + "size": 10 + } + """; + + mockMvc.perform(post("/v1/documents/search") + .contentType(MediaType.APPLICATION_JSON) + .content(body)) + .andExpect(status().isOk()) + .andExpect(jsonPath("$.documents.length()").value(1)) + .andExpect(jsonPath("$.documents[0].publicationId").value("00786665-2025")); + } + + private void createProjection(UUID legacyId, + String publicationId, + String countryCode, + NoticeType noticeType, + ContractNature contractNature, + ProcedureType procedureType, + String buyerName, + String projectTitle, + LocalDate publicationDate, + OffsetDateTime submissionDeadline, + String[] cpvCodes, + String[] nutsCodes, + boolean euFunded) { + Document document = documentRepository.save(Document.builder() + .visibility(DocumentVisibility.PUBLIC) + .documentType(DocumentType.TED_NOTICE) + .documentFamily(DocumentFamily.PROCUREMENT) + .status(DocumentStatus.RECEIVED) + .title(projectTitle) + .summary(projectTitle) + .languageCode("en") + .mimeType("application/xml") + .businessKey(publicationId) + .dedupHash(publicationId) + .build()); + + projectionRepository.save(TedNoticeProjection.builder() + .document(document) + .legacyProcurementDocumentId(legacyId) + .publicationId(publicationId) + .noticeId("NOTICE-" + publicationId) + .noticeType(noticeType) + .contractNature(contractNature) + .procedureType(procedureType) + .buyerCountryCode(countryCode) + .buyerName(buyerName) + .buyerCity("Vienna") + .buyerNutsCode(nutsCodes != null && nutsCodes.length > 0 ? nutsCodes[0] : null) + .projectTitle(projectTitle) + .projectDescription(projectTitle + " description") + .publicationDate(publicationDate) + .submissionDeadline(submissionDeadline) + .cpvCodes(cpvCodes) + .nutsCodes(nutsCodes) + .totalLots(1) + .estimatedValue(new BigDecimal("1000.00")) + .estimatedValueCurrency("EUR") + .euFunded(euFunded) + .build()); + } +} diff --git a/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchParityIntegrationTest.java b/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchParityIntegrationTest.java new file mode 100644 index 0000000..8531308 --- /dev/null +++ b/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchParityIntegrationTest.java @@ -0,0 +1,158 @@ +package at.procon.dip.domain.ted.search.integration; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentStatus; +import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.domain.document.entity.Document; +import at.procon.dip.domain.ted.entity.TedNoticeProjection; +import at.procon.dip.domain.ted.service.TedStructuredSearchService; +import at.procon.dip.testsupport.AbstractTedStructuredSearchIntegrationTest; +import at.procon.ted.config.TedProcessorProperties; +import at.procon.ted.model.dto.DocumentDtos; +import at.procon.ted.model.entity.ContractNature; +import at.procon.ted.model.entity.NoticeType; +import at.procon.ted.model.entity.ProcedureType; +import at.procon.ted.model.entity.ProcurementDocument; +import at.procon.ted.service.SearchService; +import at.procon.ted.service.VectorizationService; +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.UUID; +import java.util.stream.Collectors; +import org.junit.jupiter.api.Test; +import org.springframework.beans.factory.annotation.Autowired; + +class TedStructuredSearchParityIntegrationTest extends AbstractTedStructuredSearchIntegrationTest { + + @Autowired + private TedStructuredSearchService newSearchService; + + @Test + void new_structured_search_should_match_legacy_search_for_common_filters() { + createLegacyAndProjection("00786665-2025", "AUT", NoticeType.CONTRACT_NOTICE, ContractNature.SUPPLIES, + ProcedureType.OPEN, "City of Vienna", "Medical gloves framework", + LocalDate.of(2025, 1, 15), OffsetDateTime.parse("2025-02-15T12:00:00Z"), true, + new String[]{"33140000"}, new String[]{"AT130"}); + createLegacyAndProjection("00786666-2025", "AUT", NoticeType.CONTRACT_NOTICE, ContractNature.SUPPLIES, + ProcedureType.OPEN, "City of Vienna", "Office furniture framework", + LocalDate.of(2025, 1, 10), OffsetDateTime.parse("2025-02-10T12:00:00Z"), false, + new String[]{"39130000"}, new String[]{"AT130"}); + createLegacyAndProjection("00786667-2025", "DEU", NoticeType.CONTRACT_NOTICE, ContractNature.SERVICES, + ProcedureType.RESTRICTED, "Berlin Utilities", "Heating maintenance", + LocalDate.of(2025, 1, 12), OffsetDateTime.parse("2025-02-11T12:00:00Z"), true, + new String[]{"50720000"}, new String[]{"DE300"}); + + DocumentDtos.SearchRequest request = DocumentDtos.SearchRequest.builder() + .countryCode("AUT") + .noticeType(NoticeType.CONTRACT_NOTICE) + .contractNature(ContractNature.SUPPLIES) + .publicationDateFrom(LocalDate.of(2025, 1, 1)) + .publicationDateTo(LocalDate.of(2025, 1, 31)) + .buyerNameContains("vienna") + .page(0) + .size(20) + .sortBy("publicationDate") + .sortDirection("desc") + .build(); + + DocumentDtos.SearchResponse newResponse = newSearchService.search(request); + DocumentDtos.SearchResponse legacyResponse = legacySearchService().search(request); + + assertThat(newResponse.getTotalElements()).isEqualTo(legacyResponse.getTotalElements()); + assertThat(newResponse.getDocuments().stream().map(DocumentDtos.DocumentSummary::getPublicationId).collect(Collectors.toList())) + .containsExactlyElementsOf(legacyResponse.getDocuments().stream().map(DocumentDtos.DocumentSummary::getPublicationId).collect(Collectors.toList())); + } + + private SearchService legacySearchService() { + VectorizationService vectorizationService = mock(VectorizationService.class); + when(vectorizationService.isAvailable()).thenReturn(false); + TedProcessorProperties properties = new TedProcessorProperties(); + properties.getSearch().setDefaultPageSize(20); + properties.getSearch().setMaxPageSize(100); + return new SearchService(procurementDocumentRepository, vectorizationService, properties); + } + + private void createLegacyAndProjection(String publicationId, + String countryCode, + NoticeType noticeType, + ContractNature contractNature, + ProcedureType procedureType, + String buyerName, + String projectTitle, + LocalDate publicationDate, + OffsetDateTime submissionDeadline, + boolean euFunded, + String[] cpvCodes, + String[] nutsCodes) { + ProcurementDocument legacy = procurementDocumentRepository.save(ProcurementDocument.builder() + .documentHash(publicationId + "-hash") + .publicationId(publicationId) + .noticeId("NOTICE-" + publicationId) + .noticeType(noticeType) + .contractNature(contractNature) + .procedureType(procedureType) + .buyerCountryCode(countryCode) + .buyerName(buyerName) + .buyerCity("Vienna") + .buyerNutsCode(nutsCodes != null && nutsCodes.length > 0 ? nutsCodes[0] : null) + .projectTitle(projectTitle) + .projectDescription(projectTitle + " description") + .publicationDate(publicationDate) + .submissionDeadline(submissionDeadline) + .cpvCodes(cpvCodes) + .nutsCodes(nutsCodes) + .totalLots(1) + .estimatedValue(new BigDecimal("1000.00")) + .estimatedValueCurrency("EUR") + .euFunded(euFunded) + .textContent(projectTitle) + .xmlDocument("") + .sourceFilename(publicationId + ".xml") + .sourcePath("/tmp/" + publicationId + ".xml") + .build()); + + Document document = documentRepository.save(Document.builder() + .visibility(DocumentVisibility.PUBLIC) + .documentType(DocumentType.TED_NOTICE) + .documentFamily(DocumentFamily.PROCUREMENT) + .status(DocumentStatus.RECEIVED) + .title(projectTitle) + .summary(projectTitle) + .languageCode("en") + .mimeType("application/xml") + .businessKey(publicationId) + .dedupHash(publicationId) + .build()); + + projectionRepository.save(TedNoticeProjection.builder() + .document(document) + .legacyProcurementDocumentId(legacy.getId()) + .publicationId(publicationId) + .noticeId(legacy.getNoticeId()) + .noticeType(noticeType) + .contractNature(contractNature) + .procedureType(procedureType) + .buyerCountryCode(countryCode) + .buyerName(buyerName) + .buyerCity("Vienna") + .buyerNutsCode(nutsCodes != null && nutsCodes.length > 0 ? nutsCodes[0] : null) + .projectTitle(projectTitle) + .projectDescription(projectTitle + " description") + .publicationDate(publicationDate) + .submissionDeadline(submissionDeadline) + .cpvCodes(cpvCodes) + .nutsCodes(nutsCodes) + .totalLots(1) + .estimatedValue(new BigDecimal("1000.00")) + .estimatedValueCurrency("EUR") + .euFunded(euFunded) + .build()); + } +} diff --git a/src/test/java/at/procon/dip/testsupport/AbstractTedStructuredSearchIntegrationTest.java b/src/test/java/at/procon/dip/testsupport/AbstractTedStructuredSearchIntegrationTest.java new file mode 100644 index 0000000..63eacfc --- /dev/null +++ b/src/test/java/at/procon/dip/testsupport/AbstractTedStructuredSearchIntegrationTest.java @@ -0,0 +1,82 @@ +package at.procon.dip.testsupport; + +import at.procon.dip.FixedPortPostgreSQLContainer; +import at.procon.dip.domain.document.repository.DocumentRepository; +import at.procon.dip.domain.ted.repository.TedNoticeProjectionRepository; +import at.procon.ted.repository.ProcurementDocumentRepository; +import javax.sql.DataSource; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestInstance; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.test.context.DynamicPropertyRegistry; +import org.springframework.test.context.DynamicPropertySource; +import org.springframework.test.context.TestPropertySource; +import org.testcontainers.containers.PostgreSQLContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +@SpringBootTest(classes = TedStructuredSearchTestApplication.class, webEnvironment = SpringBootTest.WebEnvironment.MOCK) +@Testcontainers +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +@TestPropertySource(properties = { + "spring.jpa.hibernate.ddl-auto=create-drop", + "spring.jpa.show-sql=false", + "spring.jpa.open-in-view=false", + "spring.jpa.properties.hibernate.default_schema=DOC", + "spring.main.lazy-initialization=true", + "dip.runtime.mode=NEW", + "dip.search.default-page-size=20", + "dip.search.max-page-size=100" +}) +public abstract class AbstractTedStructuredSearchIntegrationTest { + + private static final int HOST_PORT = 15434; + private static final String DB_NAME = "dip_ted_structured_search_test"; + private static final String DB_USER = "test"; + private static final String DB_PASSWORD = "test"; + private static final String JDBC_URL = "jdbc:postgresql://localhost:" + HOST_PORT + "/" + DB_NAME; + + @Container + static PostgreSQLContainer postgres = new FixedPortPostgreSQLContainer<>("postgres:16-alpine", HOST_PORT) + .withDatabaseName(DB_NAME) + .withUsername(DB_USER) + .withPassword(DB_PASSWORD) + .withInitScript("sql/create-doc-search-test-schemas.sql"); + + @DynamicPropertySource + static void registerProperties(DynamicPropertyRegistry registry) { + if (!postgres.isRunning()) { + postgres.start(); + } + registry.add("spring.datasource.url", () -> JDBC_URL); + registry.add("spring.datasource.username", () -> DB_USER); + registry.add("spring.datasource.password", () -> DB_PASSWORD); + registry.add("spring.datasource.driver-class-name", () -> "org.postgresql.Driver"); + } + + @Autowired + protected JdbcTemplate jdbcTemplate; + + @Autowired + protected DataSource dataSource; + + @Autowired + protected DocumentRepository documentRepository; + + @Autowired + protected TedNoticeProjectionRepository projectionRepository; + + @Autowired + protected ProcurementDocumentRepository procurementDocumentRepository; + + @BeforeEach + void resetDatabase() { + cleanupDatabase(); + } + + protected void cleanupDatabase() { + jdbcTemplate.execute("TRUNCATE TABLE ted.ted_notice_lot, ted.ted_notice_organization, ted.ted_notice_projection, ted.procurement_lot, ted.organization, ted.procurement_document, doc.doc_document, doc.doc_tenant RESTART IDENTITY CASCADE"); + } +} diff --git a/src/test/java/at/procon/dip/testsupport/TedStructuredSearchTestApplication.java b/src/test/java/at/procon/dip/testsupport/TedStructuredSearchTestApplication.java new file mode 100644 index 0000000..3b47db2 --- /dev/null +++ b/src/test/java/at/procon/dip/testsupport/TedStructuredSearchTestApplication.java @@ -0,0 +1,54 @@ +package at.procon.dip.testsupport; + +import at.procon.dip.config.JacksonConfig; +import at.procon.dip.domain.ted.search.TedStructuredSearchRepository; +import at.procon.dip.domain.ted.service.TedStructuredSearchService; +import at.procon.dip.domain.ted.web.TedStructuredSearchController; +import at.procon.dip.search.config.DipSearchProperties; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.autoconfigure.ImportAutoConfiguration; +import org.springframework.boot.autoconfigure.http.HttpMessageConvertersAutoConfiguration; +import org.springframework.boot.autoconfigure.jackson.JacksonAutoConfiguration; +import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration; +import org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration; +import org.springframework.boot.autoconfigure.transaction.TransactionAutoConfiguration; +import org.springframework.boot.autoconfigure.web.servlet.WebMvcAutoConfiguration; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; +import org.springframework.context.annotation.Import; +import org.springframework.data.jpa.repository.config.EnableJpaRepositories; +import org.springframework.boot.autoconfigure.domain.EntityScan; + +@SpringBootConfiguration +@AutoConfigureMockMvc +@ImportAutoConfiguration({ + JacksonAutoConfiguration.class, + HttpMessageConvertersAutoConfiguration.class, + DataSourceAutoConfiguration.class, + HibernateJpaAutoConfiguration.class, + TransactionAutoConfiguration.class, + JdbcTemplateAutoConfiguration.class, + WebMvcAutoConfiguration.class +}) +@EnableConfigurationProperties(DipSearchProperties.class) +@EntityScan(basePackages = { + "at.procon.dip.domain.document.entity", + "at.procon.dip.domain.tenant.entity", + "at.procon.dip.domain.ted.entity", + "at.procon.ted.model.entity" +}) +@EnableJpaRepositories(basePackages = { + "at.procon.dip.domain.document.repository", + "at.procon.dip.domain.tenant.repository", + "at.procon.dip.domain.ted.repository", + "at.procon.ted.repository" +}) +@Import({ + JacksonConfig.class, + TedStructuredSearchRepository.class, + TedStructuredSearchService.class, + TedStructuredSearchController.class +}) +public class TedStructuredSearchTestApplication { +}