diff --git a/docs/WAVE2_TED_STRUCTURED_SEARCH_EXTENDED.md b/docs/WAVE2_TED_STRUCTURED_SEARCH_EXTENDED.md new file mode 100644 index 0000000..a15fa4a --- /dev/null +++ b/docs/WAVE2_TED_STRUCTURED_SEARCH_EXTENDED.md @@ -0,0 +1,117 @@ +# Wave 2 — Extended TED structured search in NEW runtime + +## What was added + +This extension completes the missing parts from the earlier Wave 2 proposal: + +1. **Projection-aware TED structured search in NEW runtime** + - endpoint: `GET /v1/documents/search` + - endpoint: `POST /v1/documents/search` + - active only in `dip.runtime.mode=NEW` + +2. **Repository-level joins across NEW projection model** + - `DOC.doc_document` + - `TED.ted_notice_projection` + - `TED.ted_notice_lot` + - `TED.ted_notice_organization` + +3. **Extended TED structured filters** + - `countryCode`, `countryCodes` + - `noticeType` + - `contractNature` + - `procedureType` + - `cpvPrefix`, `cpvCodes` + - `nutsCode`, `nutsCodes` + - `publicationDateFrom`, `publicationDateTo` + - `submissionDeadlineAfter` + - `euFunded` + - `buyerNameContains` + - `projectTitleContains` + +4. **Hybrid ranking path** + - structured filters first narrow the candidate `document_id` set + - generic NEW lexical/trigram/semantic search ranks only inside that candidate set + - request parameter `q` is used as the hybrid query text + - `similarityThreshold` is forwarded as a per-request semantic threshold override + +5. **Facets** + - countries + - notice types + - procedure types + - buyers + - publication months (`YYYY-MM`) + - CPV families (first 2 digits) + +6. **Parity coverage** + - NEW structured-only parity test against legacy `SearchService` for shared filters + - NEW endpoint integration test for structured results + facets + +## Main classes + +- `TedStructuredSearchRepository` +- `TedStructuredSearchService` +- `TedStructuredSearchController` +- `TedStructuredSearchFilter` +- `TedStructuredSearchFacets` + +## How hybrid search works + +For requests with `q`: + +1. apply TED structured filters on projection tables +2. collect matching `document_id`s +3. pass those ids into NEW generic search scope as `candidateDocumentIds` +4. let NEW search engines rank those TED documents +5. map ranked hits back to TED summaries + +This gives structured filtering plus lexical/trigram/semantic relevance ranking. + +## New configuration + +```yaml + +dip: + ted: + projection: + structured-search-hybrid-candidate-limit: 5000 + structured-search-facet-bucket-limit: 12 +``` + +## Current behavior notes + +- Structured-only requests work without `q` +- Hybrid requests use `q` and NEW generic ranking +- When `q` is present, returned `similarity` contains the fused NEW search score +- Facets are computed from the structured candidate set before pagination +- `includeFacets=false` disables facet calculation +- `facetBucketLimit` overrides the default bucket size per request + +## Compatibility notes + +- The NEW endpoint reuses the legacy `DocumentDtos.SearchRequest` and `SearchResponse` +- The response was extended with optional `facets` +- Existing legacy clients remain compatible because extra JSON fields are additive + +## Parity scope + +Parity is implemented for **shared structured filters** between legacy and NEW runtime. + +Good parity candidates: +- country +- notice type +- contract nature +- procedure type +- publication date range +- submission deadline after +- eu funded +- buyer name contains +- project title contains + +Legacy structured parity is **not exact** for filters that legacy `SearchService` does not implement in structured mode, especially: +- lot/organization-expanded `cpvPrefix` +- `cpvCodes` +- `nutsCode` +- `nutsCodes` +- lot-level EU funded semantics + +Those are NEW-runtime improvements on top of legacy behavior. diff --git a/postman/DIP-Semantic-Search-e5-default.postman_collection.json b/postman/DIP-Semantic-Search-e5-default.postman_collection.json new file mode 100644 index 0000000..47c7af0 --- /dev/null +++ b/postman/DIP-Semantic-Search-e5-default.postman_collection.json @@ -0,0 +1,178 @@ +{ + "info": { + "_postman_id": "9f9b7a8a-b96b-4f3a-a377-0ce5b54d0a01", + "name": "DIP Semantic Search - e5-default", + "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", + "description": "Sample semantic and hybrid search queries against the DIP generic search endpoint using semanticModelKey=e5-default (intfloat/multilingual-e5-large)." + }, + "item": [ + { + "name": "Search / Semantic / English", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"framework agreement for district heating optimization in municipal energy systems\",\n \"modes\": [\n \"SEMANTIC\"\n ],\n \"semanticModelKey\": \"e5-default\",\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_AND_CHUNKS\",\n \"page\": 0,\n \"size\": 10\n}" + }, + "url": { + "raw": "{{baseUrl}}/search", + "host": [ + "{{baseUrl}}" + ], + "path": [ + "search" + ] + } + } + }, + { + "name": "Search / Semantic / German", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"Rahmenvertrag für die Optimierung von Fernwärmesystemen in kommunalen Energienetzen\",\n \"modes\": [\n \"SEMANTIC\"\n ],\n \"semanticModelKey\": \"e5-default\",\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_AND_CHUNKS\",\n \"page\": 0,\n \"size\": 10\n}" + }, + "url": { + "raw": "{{baseUrl}}/search", + "host": [ + "{{baseUrl}}" + ], + "path": [ + "search" + ] + } + } + }, + { + "name": "Search / Semantic / Bulgarian", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"рамково споразумение за оптимизация на системи за централно отопление в общински енергийни мрежи\",\n \"modes\": [\n \"SEMANTIC\"\n ],\n \"semanticModelKey\": \"e5-default\",\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_AND_CHUNKS\",\n \"page\": 0,\n \"size\": 10\n}" + }, + "url": { + "raw": "{{baseUrl}}/search", + "host": [ + "{{baseUrl}}" + ], + "path": [ + "search" + ] + } + } + }, + { + "name": "Search / Hybrid / English", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"district heating optimization framework agreement\",\n \"modes\": [\n \"HYBRID\"\n ],\n \"semanticModelKey\": \"e5-default\",\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_AND_CHUNKS\",\n \"page\": 0,\n \"size\": 10\n}" + }, + "url": { + "raw": "{{baseUrl}}/search", + "host": [ + "{{baseUrl}}" + ], + "path": [ + "search" + ] + } + } + }, + { + "name": "Search / Semantic / Generic Filters", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"municipal energy efficiency strategy\",\n \"modes\": [\n \"SEMANTIC\"\n ],\n \"semanticModelKey\": \"e5-default\",\n \"documentTypes\": [\n \"TEXT\",\n \"HTML\",\n \"PDF\"\n ],\n \"documentFamilies\": [\n \"GENERIC\"\n ],\n \"representationTypes\": [\n \"SEMANTIC_TEXT\",\n \"CHUNK\"\n ],\n \"languageCodes\": [\n \"en\",\n \"de\",\n \"bg\"\n ],\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_AND_CHUNKS\",\n \"page\": 0,\n \"size\": 10\n}" + }, + "url": { + "raw": "{{baseUrl}}/search", + "host": [ + "{{baseUrl}}" + ], + "path": [ + "search" + ] + } + } + }, + { + "name": "Search / Debug / Semantic", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"district heating optimization\",\n \"modes\": [\n \"SEMANTIC\"\n ],\n \"semanticModelKey\": \"e5-default\",\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_AND_CHUNKS\",\n \"page\": 0,\n \"size\": 10\n}" + }, + "url": { + "raw": "{{baseUrl}}/search/debug", + "host": [ + "{{baseUrl}}" + ], + "path": [ + "search", + "debug" + ] + } + } + }, + { + "name": "Search / Metrics", + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "{{baseUrl}}/search/metrics", + "host": [ + "{{baseUrl}}" + ], + "path": [ + "search", + "metrics" + ] + } + } + } + ] +} \ No newline at end of file diff --git a/postman/DIP-Semantic-Search-e5-default.postman_environment.json b/postman/DIP-Semantic-Search-e5-default.postman_environment.json new file mode 100644 index 0000000..abcdc2a --- /dev/null +++ b/postman/DIP-Semantic-Search-e5-default.postman_environment.json @@ -0,0 +1,15 @@ +{ + "id": "f2cf3c4b-e0f7-45ff-a9c2-32f4d3d23770", + "name": "DIP Semantic Search Local", + "values": [ + { + "key": "baseUrl", + "value": "http://localhost:8080/api", + "type": "default", + "enabled": true + } + ], + "_postman_variable_scope": "environment", + "_postman_exported_at": "2026-03-23T13:00:00Z", + "_postman_exported_using": "OpenAI ChatGPT" +} \ No newline at end of file diff --git a/postman/WAVE2_TED_STRUCTURED_SEARCH_EXTENDED.postman_collection.json b/postman/WAVE2_TED_STRUCTURED_SEARCH_EXTENDED.postman_collection.json new file mode 100644 index 0000000..5f1bec3 --- /dev/null +++ b/postman/WAVE2_TED_STRUCTURED_SEARCH_EXTENDED.postman_collection.json @@ -0,0 +1,103 @@ +{ + "info": { + "name": "Wave 2 TED Structured Search Extended", + "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", + "description": "NEW runtime TED structured search with projection-aware filters, hybrid ranking, and facets." + }, + "variable": [ + { "key": "baseUrl", "value": "http://localhost:8080/api" } + ], + "item": [ + { + "name": "Structured only - GET", + "request": { + "method": "GET", + "url": { + "raw": "{{baseUrl}}/v1/documents/search?countryCode=AUT¬iceType=CONTRACT_NOTICE&includeFacets=true&page=0&size=20&sortBy=publicationDate&sortDirection=desc", + "host": ["{{baseUrl}}"], + "path": ["v1", "documents", "search"], + "query": [ + { "key": "countryCode", "value": "AUT" }, + { "key": "noticeType", "value": "CONTRACT_NOTICE" }, + { "key": "includeFacets", "value": "true" }, + { "key": "page", "value": "0" }, + { "key": "size", "value": "20" }, + { "key": "sortBy", "value": "publicationDate" }, + { "key": "sortDirection", "value": "desc" } + ] + } + }, + "event": [{ + "listen": "test", + "script": { + "exec": [ + "pm.test('status 200', function () { pm.response.to.have.status(200); });", + "const json = pm.response.json();", + "pm.test('documents array exists', function () { pm.expect(json.documents).to.be.an('array'); });", + "pm.test('facets object exists', function () { pm.expect(json.facets).to.be.an('object'); });" + ] + } + }] + }, + { + "name": "Hybrid ranked TED search - GET", + "request": { + "method": "GET", + "url": { + "raw": "{{baseUrl}}/v1/documents/search?countryCode=DEU&cpvPrefix=33&q=medical imaging systems&similarityThreshold=0.65&includeFacets=true", + "host": ["{{baseUrl}}"], + "path": ["v1", "documents", "search"], + "query": [ + { "key": "countryCode", "value": "DEU" }, + { "key": "cpvPrefix", "value": "33" }, + { "key": "q", "value": "medical imaging systems" }, + { "key": "similarityThreshold", "value": "0.65" }, + { "key": "includeFacets", "value": "true" } + ] + } + }, + "event": [{ + "listen": "test", + "script": { + "exec": [ + "pm.test('status 200', function () { pm.response.to.have.status(200); });", + "const json = pm.response.json();", + "pm.test('documents array exists', function () { pm.expect(json.documents).to.be.an('array'); });" + ] + } + }] + }, + { + "name": "Structured only - POST with facets", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"countryCodes\": [\"AUT\", \"DEU\"],\n \"noticeType\": \"CONTRACT_NOTICE\",\n \"contractNature\": \"SUPPLIES\",\n \"procedureType\": \"OPEN\",\n \"publicationDateFrom\": \"2026-01-01\",\n \"publicationDateTo\": \"2026-12-31\",\n \"includeFacets\": true,\n \"facetBucketLimit\": 10,\n \"page\": 0,\n \"size\": 20,\n \"sortBy\": \"publicationDate\",\n \"sortDirection\": \"desc\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/v1/documents/search", + "host": ["{{baseUrl}}"], + "path": ["v1", "documents", "search"] + } + } + }, + { + "name": "Parity-style request for shared legacy filters", + "request": { + "method": "POST", + "header": [{ "key": "Content-Type", "value": "application/json" }], + "body": { + "mode": "raw", + "raw": "{\n \"countryCode\": \"AUT\",\n \"noticeType\": \"CONTRACT_NOTICE\",\n \"contractNature\": \"SERVICES\",\n \"procedureType\": \"OPEN\",\n \"projectTitleContains\": \"maintenance\",\n \"publicationDateFrom\": \"2026-04-01\",\n \"publicationDateTo\": \"2026-04-30\",\n \"page\": 0,\n \"size\": 20,\n \"sortBy\": \"publicationDate\",\n \"sortDirection\": \"desc\"\n}" + }, + "url": { + "raw": "{{baseUrl}}/v1/documents/search", + "host": ["{{baseUrl}}"], + "path": ["v1", "documents", "search"] + } + } + } + ] +} diff --git a/src/main/java/at/procon/dip/domain/ted/config/TedProjectionProperties.java b/src/main/java/at/procon/dip/domain/ted/config/TedProjectionProperties.java index 523b794..9ae124d 100644 --- a/src/main/java/at/procon/dip/domain/ted/config/TedProjectionProperties.java +++ b/src/main/java/at/procon/dip/domain/ted/config/TedProjectionProperties.java @@ -13,4 +13,8 @@ public class TedProjectionProperties { private boolean startupBackfillEnabled = false; @Positive private int startupBackfillLimit = 250; + @Positive + private int structuredSearchHybridCandidateLimit = 5000; + @Positive + private int structuredSearchFacetBucketLimit = 12; } diff --git a/src/main/java/at/procon/dip/domain/ted/search/TedStructuredSearchRepository.java b/src/main/java/at/procon/dip/domain/ted/search/TedStructuredSearchRepository.java index 2575901..d641d75 100644 --- a/src/main/java/at/procon/dip/domain/ted/search/TedStructuredSearchRepository.java +++ b/src/main/java/at/procon/dip/domain/ted/search/TedStructuredSearchRepository.java @@ -1,21 +1,31 @@ package at.procon.dip.domain.ted.search; -import at.procon.ted.model.dto.DocumentDtos.DocumentSummary; -import at.procon.ted.model.dto.DocumentDtos.SearchRequest; +import at.procon.dip.domain.ted.search.dto.TedStructuredSearchFacetEntry; +import at.procon.dip.domain.ted.search.dto.TedStructuredSearchFacets; +import at.procon.dip.domain.ted.search.dto.TedStructuredSearchFilter; +import at.procon.dip.domain.ted.search.dto.TedStructuredSearchSummaryRow; import at.procon.ted.model.entity.ContractNature; import at.procon.ted.model.entity.NoticeType; import at.procon.ted.model.entity.ProcedureType; +import java.math.BigDecimal; import java.sql.Array; -import java.sql.ResultSet; import java.sql.SQLException; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.jdbc.core.RowMapper; import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate; import org.springframework.stereotype.Repository; -import org.springframework.util.CollectionUtils; +import org.springframework.util.StringUtils; @Repository @RequiredArgsConstructor @@ -23,189 +33,363 @@ public class TedStructuredSearchRepository { private final NamedParameterJdbcTemplate jdbcTemplate; - public List search(SearchRequest request, int page, int size) { + public List findCandidateDocumentIds(TedStructuredSearchFilter filter, int limit) { + StringBuilder sql = new StringBuilder(baseFromWhere(filter, false)); + sql.insert(0, "SELECT p.document_id "); + sql.append(" GROUP BY p.document_id, p.publication_date, p.created_at"); + sql.append(" ORDER BY p.publication_date DESC NULLS LAST, p.created_at DESC LIMIT :limit"); + MapSqlParameterSource params = params(filter); + params.addValue("limit", limit); + return jdbcTemplate.query(sql.toString(), params, (rs, rowNum) -> rs.getObject(1, UUID.class)); + } + + public long countDistinctDocuments(TedStructuredSearchFilter filter) { + StringBuilder sql = new StringBuilder("SELECT COUNT(DISTINCT p.document_id) "); + sql.append(baseFromWhere(filter, false)); + return jdbcTemplate.queryForObject(sql.toString(), params(filter), Long.class); + } + + public List searchStructured(TedStructuredSearchFilter filter, + int page, + int size, + String sortBy, + String sortDirection) { StringBuilder sql = new StringBuilder(""" SELECT - COALESCE(p.legacy_procurement_document_id, p.document_id) AS id, + p.document_id, p.publication_id, p.notice_id, - CAST(p.notice_type AS text) AS notice_type, + p.notice_type, p.project_title, p.buyer_name, p.buyer_country_code, p.buyer_city, - CAST(p.contract_nature AS text) AS contract_nature, - CAST(p.procedure_type AS text) AS procedure_type, + p.contract_nature, + p.procedure_type, p.publication_date, p.submission_deadline, p.cpv_codes, p.total_lots, p.estimated_value, p.estimated_value_currency - FROM ted.ted_notice_projection p - WHERE 1=1 """); - - MapSqlParameterSource params = new MapSqlParameterSource(); - appendFilters(sql, params, request); - sql.append(" ORDER BY ").append(resolveSortColumn(request.getSortBy())).append(' ') - .append(resolveSortDirection(request.getSortDirection())) - .append(", p.publication_date DESC NULLS LAST, p.publication_id DESC NULLS LAST, p.document_id ASC"); + sql.append(baseFromWhere(filter, false)); + sql.append(" GROUP BY p.document_id, p.publication_id, p.notice_id, p.notice_type, p.project_title, p.buyer_name, p.buyer_country_code, p.buyer_city, p.contract_nature, p.procedure_type, p.publication_date, p.submission_deadline, p.cpv_codes, p.total_lots, p.estimated_value, p.estimated_value_currency"); + sql.append(" ORDER BY ").append(resolveSort(sortBy, sortDirection)); sql.append(" LIMIT :limit OFFSET :offset"); + + MapSqlParameterSource params = params(filter); params.addValue("limit", size); - params.addValue("offset", page * size); + params.addValue("offset", Math.max(0, page) * size); + return jdbcTemplate.query(sql.toString(), params, SUMMARY_ROW_MAPPER); + } - return jdbcTemplate.query(sql.toString(), params, new DocumentSummaryRowMapper()); + public List findSummariesByDocumentIds(List documentIds) { + if (documentIds == null || documentIds.isEmpty()) { + return List.of(); + } + String sql = """ + SELECT + p.document_id, + p.publication_id, + p.notice_id, + p.notice_type, + p.project_title, + p.buyer_name, + p.buyer_country_code, + p.buyer_city, + p.contract_nature, + p.procedure_type, + p.publication_date, + p.submission_deadline, + p.cpv_codes, + p.total_lots, + p.estimated_value, + p.estimated_value_currency + FROM TED.ted_notice_projection p + WHERE p.document_id IN (:documentIds) + """; + List rows = jdbcTemplate.query(sql, new MapSqlParameterSource("documentIds", documentIds), SUMMARY_ROW_MAPPER); + Map byId = rows.stream().collect(Collectors.toMap(TedStructuredSearchSummaryRow::documentId, r -> r)); + List ordered = new ArrayList<>(); + for (UUID id : documentIds) { + TedStructuredSearchSummaryRow row = byId.get(id); + if (row != null) { + ordered.add(row); + } + } + return ordered; + } + + public TedStructuredSearchFacets computeFacets(TedStructuredSearchFilter filter, int bucketLimit) { + int safeLimit = Math.max(1, bucketLimit); + return TedStructuredSearchFacets.builder() + .countries(runFacet(filter, "COALESCE(p.buyer_country_code, '')", "COALESCE(p.buyer_country_code, '')", safeLimit)) + .noticeTypes(runFacet(filter, "CAST(p.notice_type AS text)", "CAST(p.notice_type AS text)", safeLimit)) + .procedureTypes(runFacet(filter, "CAST(p.procedure_type AS text)", "CAST(p.procedure_type AS text)", safeLimit)) + .buyers(runFacet(filter, "COALESCE(p.buyer_name, '')", "COALESCE(p.buyer_name, '')", safeLimit)) + .publicationMonths(runFacet(filter, "to_char(p.publication_date, 'YYYY-MM')", "to_char(p.publication_date, 'YYYY-MM')", safeLimit)) + .cpvFamilies(runCpvFamilyFacet(filter, safeLimit)) + .build(); } - public long count(SearchRequest request) { + private List runFacet(TedStructuredSearchFilter filter, + String keyExpr, + String labelExpr, + int limit) { + StringBuilder sql = new StringBuilder("SELECT ") + .append(keyExpr).append(" AS key, ") + .append(labelExpr).append(" AS label, COUNT(DISTINCT p.document_id) AS cnt "); + sql.append(baseFromWhere(filter, false)); + sql.append(" GROUP BY ").append(keyExpr).append(", ").append(labelExpr) + .append(" HAVING ").append(keyExpr).append(" IS NOT NULL AND ").append(keyExpr).append(" <> ''") + .append(" ORDER BY cnt DESC, label ASC LIMIT :facetLimit"); + MapSqlParameterSource params = params(filter); + params.addValue("facetLimit", limit); + return jdbcTemplate.query(sql.toString(), params, (rs, rowNum) -> TedStructuredSearchFacetEntry.builder() + .key(rs.getString("key")) + .label(rs.getString("label")) + .count(rs.getLong("cnt")) + .build()); + } + + private List runCpvFamilyFacet(TedStructuredSearchFilter filter, int limit) { + StringBuilder sql = new StringBuilder(""" + SELECT LEFT(code, 2) AS key, LEFT(code, 2) AS label, COUNT(DISTINCT document_id) AS cnt + FROM ( + SELECT p.document_id, unnest(COALESCE(p.cpv_codes, ARRAY[]::varchar[])) AS code + """); + sql.append(baseFromWhere(filter, true)); + sql.append(" UNION ALL SELECT p.document_id, unnest(COALESCE(l.cpv_codes, ARRAY[]::varchar[])) AS code "); + sql.append(baseFromWhere(filter, false)); + sql.append(" ) cpv WHERE code IS NOT NULL AND code <> '' GROUP BY LEFT(code, 2) ORDER BY cnt DESC, label ASC LIMIT :facetLimit"); + MapSqlParameterSource params = params(filter); + params.addValue("facetLimit", limit); + return jdbcTemplate.query(sql.toString(), params, (rs, rowNum) -> TedStructuredSearchFacetEntry.builder() + .key(rs.getString("key")) + .label(rs.getString("label")) + .count(rs.getLong("cnt")) + .build()); + } + + private String baseFromWhere(TedStructuredSearchFilter filter, boolean projectionOnly) { StringBuilder sql = new StringBuilder(""" - SELECT COUNT(*) - FROM ted.ted_notice_projection p - WHERE 1=1 + FROM TED.ted_notice_projection p + JOIN DOC.doc_document d ON d.id = p.document_id """); + if (!projectionOnly) { + sql.append(" LEFT JOIN TED.ted_notice_lot l ON l.notice_projection_id = p.id"); + sql.append(" LEFT JOIN TED.ted_notice_organization o ON o.notice_projection_id = p.id"); + } + sql.append(" WHERE 1=1"); + appendFilters(sql, filter, projectionOnly); + return sql.toString(); + } + + private MapSqlParameterSource params(TedStructuredSearchFilter filter) { MapSqlParameterSource params = new MapSqlParameterSource(); - appendFilters(sql, params, request); - Long value = jdbcTemplate.queryForObject(sql.toString(), params, Long.class); - return value == null ? 0L : value; + if (filter == null) { + return params; + } + if (StringUtils.hasText(filter.getCountryCode())) { + params.addValue("countryCode", filter.getCountryCode().trim()); + } + if (filter.getCountryCodes() != null && !filter.getCountryCodes().isEmpty()) { + params.addValue("countryCodes", filter.getCountryCodes()); + } + if (filter.getNoticeType() != null) { + params.addValue("noticeType", filter.getNoticeType().name()); + } + if (filter.getContractNature() != null) { + params.addValue("contractNature", filter.getContractNature().name()); + } + if (filter.getProcedureType() != null) { + params.addValue("procedureType", filter.getProcedureType().name()); + } + if (StringUtils.hasText(filter.getCpvPrefix())) { + params.addValue("cpvPrefixLike", filter.getCpvPrefix().trim() + "%"); + } + if (filter.getCpvCodes() != null && !filter.getCpvCodes().isEmpty()) { + params.addValue("cpvCodes", filter.getCpvCodes()); + } + if (StringUtils.hasText(filter.getNutsCode())) { + params.addValue("nutsCodeLike", filter.getNutsCode().trim() + "%"); + } + if (filter.getNutsCodes() != null && !filter.getNutsCodes().isEmpty()) { + params.addValue("nutsCodes", filter.getNutsCodes()); + } + if (filter.getPublicationDateFrom() != null) { + params.addValue("publicationDateFrom", filter.getPublicationDateFrom()); + } + if (filter.getPublicationDateTo() != null) { + params.addValue("publicationDateTo", filter.getPublicationDateTo()); + } + if (filter.getSubmissionDeadlineAfter() != null) { + params.addValue("submissionDeadlineAfter", filter.getSubmissionDeadlineAfter()); + } + if (filter.getEuFunded() != null) { + params.addValue("euFunded", filter.getEuFunded()); + } + if (StringUtils.hasText(filter.getBuyerNameContains())) { + params.addValue("buyerNameLike", "%" + filter.getBuyerNameContains().trim().toLowerCase() + "%"); + } + if (StringUtils.hasText(filter.getProjectTitleContains())) { + params.addValue("projectTitleLike", "%" + filter.getProjectTitleContains().trim().toLowerCase() + "%"); + } + return params; } - private void appendFilters(StringBuilder sql, MapSqlParameterSource params, SearchRequest request) { - if (hasText(request.getCountryCode())) { + private void appendFilters(StringBuilder sql, TedStructuredSearchFilter filter, boolean projectionOnly) { + if (filter == null) { + return; + } + if (StringUtils.hasText(filter.getCountryCode())) { sql.append(" AND p.buyer_country_code = :countryCode"); - params.addValue("countryCode", request.getCountryCode()); } - if (!CollectionUtils.isEmpty(request.getCountryCodes())) { + if (filter.getCountryCodes() != null && !filter.getCountryCodes().isEmpty()) { sql.append(" AND p.buyer_country_code IN (:countryCodes)"); - params.addValue("countryCodes", request.getCountryCodes()); } - if (request.getNoticeType() != null) { + if (filter.getNoticeType() != null) { sql.append(" AND CAST(p.notice_type AS text) = :noticeType"); - params.addValue("noticeType", request.getNoticeType().name()); } - if (request.getContractNature() != null) { + if (filter.getContractNature() != null) { sql.append(" AND CAST(p.contract_nature AS text) = :contractNature"); - params.addValue("contractNature", request.getContractNature().name()); } - if (request.getProcedureType() != null) { + if (filter.getProcedureType() != null) { sql.append(" AND CAST(p.procedure_type AS text) = :procedureType"); - params.addValue("procedureType", request.getProcedureType().name()); } - if (hasText(request.getCpvPrefix())) { - sql.append(" AND EXISTS (SELECT 1 FROM unnest(p.cpv_codes) code WHERE code LIKE :cpvPrefixLike)"); - params.addValue("cpvPrefixLike", request.getCpvPrefix() + "%"); + if (StringUtils.hasText(filter.getCpvPrefix())) { + sql.append(" AND (") + .append(" EXISTS (SELECT 1 FROM unnest(COALESCE(p.cpv_codes, ARRAY[]::varchar[])) cpv WHERE cpv LIKE :cpvPrefixLike)"); + if (!projectionOnly) { + sql.append(" OR EXISTS (SELECT 1 FROM unnest(COALESCE(l.cpv_codes, ARRAY[]::varchar[])) cpv WHERE cpv LIKE :cpvPrefixLike)"); + } + sql.append(")"); } - if (!CollectionUtils.isEmpty(request.getCpvCodes())) { - sql.append(" AND EXISTS (SELECT 1 FROM unnest(p.cpv_codes) code WHERE code IN (:cpvCodes))"); - params.addValue("cpvCodes", request.getCpvCodes()); + if (filter.getCpvCodes() != null && !filter.getCpvCodes().isEmpty()) { + sql.append(" AND (") + .append(" EXISTS (SELECT 1 FROM unnest(COALESCE(p.cpv_codes, ARRAY[]::varchar[])) cpv WHERE cpv IN (:cpvCodes))"); + if (!projectionOnly) { + sql.append(" OR EXISTS (SELECT 1 FROM unnest(COALESCE(l.cpv_codes, ARRAY[]::varchar[])) cpv WHERE cpv IN (:cpvCodes))"); + } + sql.append(")"); } - if (hasText(request.getNutsCode())) { - sql.append(" AND (p.buyer_nuts_code = :nutsCode OR EXISTS (SELECT 1 FROM unnest(p.nuts_codes) code WHERE code = :nutsCode))"); - params.addValue("nutsCode", request.getNutsCode()); + if (StringUtils.hasText(filter.getNutsCode())) { + sql.append(" AND (") + .append(" p.buyer_nuts_code LIKE :nutsCodeLike") + .append(" OR EXISTS (SELECT 1 FROM unnest(COALESCE(p.nuts_codes, ARRAY[]::varchar[])) nuts WHERE nuts LIKE :nutsCodeLike)"); + if (!projectionOnly) { + sql.append(" OR EXISTS (SELECT 1 FROM unnest(COALESCE(l.nuts_codes, ARRAY[]::varchar[])) nuts WHERE nuts LIKE :nutsCodeLike)") + .append(" OR COALESCE(o.nuts_code, '') LIKE :nutsCodeLike"); + } + sql.append(")"); } - if (!CollectionUtils.isEmpty(request.getNutsCodes())) { - sql.append(" AND (p.buyer_nuts_code IN (:nutsCodes) OR EXISTS (SELECT 1 FROM unnest(p.nuts_codes) code WHERE code IN (:nutsCodes)))"); - params.addValue("nutsCodes", request.getNutsCodes()); + if (filter.getNutsCodes() != null && !filter.getNutsCodes().isEmpty()) { + sql.append(" AND (") + .append(" p.buyer_nuts_code IN (:nutsCodes)") + .append(" OR EXISTS (SELECT 1 FROM unnest(COALESCE(p.nuts_codes, ARRAY[]::varchar[])) nuts WHERE nuts IN (:nutsCodes))"); + if (!projectionOnly) { + sql.append(" OR EXISTS (SELECT 1 FROM unnest(COALESCE(l.nuts_codes, ARRAY[]::varchar[])) nuts WHERE nuts IN (:nutsCodes))") + .append(" OR COALESCE(o.nuts_code, '') IN (:nutsCodes)"); + } + sql.append(")"); } - if (request.getPublicationDateFrom() != null) { + if (filter.getPublicationDateFrom() != null) { sql.append(" AND p.publication_date >= :publicationDateFrom"); - params.addValue("publicationDateFrom", request.getPublicationDateFrom()); } - if (request.getPublicationDateTo() != null) { + if (filter.getPublicationDateTo() != null) { sql.append(" AND p.publication_date <= :publicationDateTo"); - params.addValue("publicationDateTo", request.getPublicationDateTo()); } - if (request.getSubmissionDeadlineAfter() != null) { - sql.append(" AND p.submission_deadline > :submissionDeadlineAfter"); - params.addValue("submissionDeadlineAfter", request.getSubmissionDeadlineAfter()); + if (filter.getSubmissionDeadlineAfter() != null) { + sql.append(" AND (p.submission_deadline > :submissionDeadlineAfter"); + if (!projectionOnly) { + sql.append(" OR l.submission_deadline > :submissionDeadlineAfter"); + } + sql.append(")"); } - if (request.getEuFunded() != null) { - sql.append(" AND p.eu_funded = :euFunded"); - params.addValue("euFunded", request.getEuFunded()); + if (filter.getEuFunded() != null) { + if (filter.getEuFunded()) { + sql.append(" AND (COALESCE(p.eu_funded, false) = true"); + if (!projectionOnly) { + sql.append(" OR COALESCE(l.eu_funded, false) = true"); + } + sql.append(")"); + } else { + sql.append(" AND COALESCE(p.eu_funded, false) = false"); + if (!projectionOnly) { + sql.append(" AND NOT EXISTS (SELECT 1 FROM TED.ted_notice_lot lx WHERE lx.notice_projection_id = p.id AND COALESCE(lx.eu_funded, false) = true)"); + } + } } - if (hasText(request.getBuyerNameContains())) { - sql.append(" AND LOWER(COALESCE(p.buyer_name, '')) LIKE :buyerNameContains"); - params.addValue("buyerNameContains", like(request.getBuyerNameContains())); + if (StringUtils.hasText(filter.getBuyerNameContains())) { + sql.append(" AND (") + .append(" LOWER(COALESCE(p.buyer_name, '')) LIKE :buyerNameLike"); + if (!projectionOnly) { + sql.append(" OR LOWER(COALESCE(o.name, '')) LIKE :buyerNameLike"); + } + sql.append(")"); } - if (hasText(request.getProjectTitleContains())) { - sql.append(" AND LOWER(COALESCE(p.project_title, '')) LIKE :projectTitleContains"); - params.addValue("projectTitleContains", like(request.getProjectTitleContains())); + if (StringUtils.hasText(filter.getProjectTitleContains())) { + sql.append(" AND (") + .append(" LOWER(COALESCE(p.project_title, '')) LIKE :projectTitleLike"); + if (!projectionOnly) { + sql.append(" OR LOWER(COALESCE(l.title, '')) LIKE :projectTitleLike"); + } + sql.append(")"); } } - private String resolveSortColumn(String sortBy) { - if (sortBy == null || sortBy.isBlank()) { - return "p.publication_date"; - } - return switch (sortBy) { - case "submissionDeadline" -> "p.submission_deadline"; - case "buyerName" -> "p.buyer_name"; - case "projectTitle" -> "p.project_title"; - case "publicationDate" -> "p.publication_date"; - default -> "p.publication_date"; + private String resolveSort(String sortBy, String sortDirection) { + boolean asc = "asc".equalsIgnoreCase(sortDirection); + String dir = asc ? "ASC" : "DESC"; + String field = sortBy == null ? "publicationDate" : sortBy; + return switch (field) { + case "submissionDeadline" -> "p.submission_deadline " + dir + " NULLS LAST, p.publication_date DESC NULLS LAST"; + case "buyerName" -> "p.buyer_name " + dir + " NULLS LAST, p.publication_date DESC NULLS LAST"; + case "projectTitle" -> "p.project_title " + dir + " NULLS LAST, p.publication_date DESC NULLS LAST"; + default -> "p.publication_date " + dir + " NULLS LAST"; }; } - private String resolveSortDirection(String direction) { - return "asc".equalsIgnoreCase(direction) ? "ASC" : "DESC"; + private static final RowMapper SUMMARY_ROW_MAPPER = (rs, rowNum) -> new TedStructuredSearchSummaryRow( + rs.getObject("document_id", UUID.class), + rs.getString("publication_id"), + rs.getString("notice_id"), + parseNoticeType(rs.getString("notice_type")), + rs.getString("project_title"), + rs.getString("buyer_name"), + rs.getString("buyer_country_code"), + rs.getString("buyer_city"), + parseContractNature(rs.getString("contract_nature")), + parseProcedureType(rs.getString("procedure_type")), + rs.getObject("publication_date", LocalDate.class), + rs.getObject("submission_deadline", OffsetDateTime.class), + stringArray(rs.getArray("cpv_codes")), + rs.getObject("total_lots") != null ? rs.getInt("total_lots") : null, + rs.getBigDecimal("estimated_value"), + rs.getString("estimated_value_currency") + ); + + private static NoticeType parseNoticeType(String value) { + return value == null ? null : NoticeType.valueOf(value); } - private boolean hasText(String value) { - return value != null && !value.isBlank(); + private static ContractNature parseContractNature(String value) { + return value == null ? null : ContractNature.valueOf(value); } - private String like(String value) { - return "%" + value.toLowerCase() + "%"; + private static ProcedureType parseProcedureType(String value) { + return value == null ? null : ProcedureType.valueOf(value); } - private static class DocumentSummaryRowMapper implements RowMapper { - @Override - public DocumentSummary mapRow(ResultSet rs, int rowNum) throws SQLException { - return DocumentSummary.builder() - .id(rs.getObject("id", java.util.UUID.class)) - .publicationId(rs.getString("publication_id")) - .noticeId(rs.getString("notice_id")) - .noticeType(parseNoticeType(rs.getString("notice_type"))) - .projectTitle(rs.getString("project_title")) - .buyerName(rs.getString("buyer_name")) - .buyerCountryCode(rs.getString("buyer_country_code")) - .buyerCity(rs.getString("buyer_city")) - .contractNature(parseContractNature(rs.getString("contract_nature"))) - .procedureType(parseProcedureType(rs.getString("procedure_type"))) - .publicationDate(rs.getObject("publication_date", java.time.LocalDate.class)) - .submissionDeadline(rs.getObject("submission_deadline", java.time.OffsetDateTime.class)) - .cpvCodes(readArray(rs, "cpv_codes")) - .totalLots((Integer) rs.getObject("total_lots")) - .estimatedValue(rs.getBigDecimal("estimated_value")) - .estimatedValueCurrency(rs.getString("estimated_value_currency")) - .build(); - } - - private static List readArray(ResultSet rs, String column) throws SQLException { - Array array = rs.getArray(column); - if (array == null) { - return List.of(); - } - Object value = array.getArray(); - if (value instanceof String[] strings) { - return Arrays.asList(strings); - } - if (value instanceof Object[] objects) { - return Arrays.stream(objects).map(String::valueOf).toList(); - } + private static List stringArray(Array array) throws SQLException { + if (array == null) { return List.of(); } - - private static NoticeType parseNoticeType(String value) { - return value == null ? null : NoticeType.valueOf(value); - } - - private static ContractNature parseContractNature(String value) { - return value == null ? null : ContractNature.valueOf(value); - } - - private static ProcedureType parseProcedureType(String value) { - return value == null ? null : ProcedureType.valueOf(value); + Object raw = array.getArray(); + if (raw instanceof String[] strings) { + return Arrays.asList(strings); } + return List.of(); } } diff --git a/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchFacetEntry.java b/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchFacetEntry.java new file mode 100644 index 0000000..1b2675f --- /dev/null +++ b/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchFacetEntry.java @@ -0,0 +1,16 @@ +package at.procon.dip.domain.ted.search.dto; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TedStructuredSearchFacetEntry { + private String key; + private String label; + private long count; +} diff --git a/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchFacets.java b/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchFacets.java new file mode 100644 index 0000000..7cb7013 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchFacets.java @@ -0,0 +1,20 @@ +package at.procon.dip.domain.ted.search.dto; + +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TedStructuredSearchFacets { + private List countries; + private List noticeTypes; + private List procedureTypes; + private List buyers; + private List publicationMonths; + private List cpvFamilies; +} diff --git a/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchFilter.java b/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchFilter.java new file mode 100644 index 0000000..2a14bf8 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchFilter.java @@ -0,0 +1,34 @@ +package at.procon.dip.domain.ted.search.dto; + +import at.procon.ted.model.entity.ContractNature; +import at.procon.ted.model.entity.NoticeType; +import at.procon.ted.model.entity.ProcedureType; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.List; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class TedStructuredSearchFilter { + private String countryCode; + private List countryCodes; + private NoticeType noticeType; + private ContractNature contractNature; + private ProcedureType procedureType; + private String cpvPrefix; + private List cpvCodes; + private String nutsCode; + private List nutsCodes; + private LocalDate publicationDateFrom; + private LocalDate publicationDateTo; + private OffsetDateTime submissionDeadlineAfter; + private Boolean euFunded; + private String buyerNameContains; + private String projectTitleContains; +} diff --git a/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchSummaryRow.java b/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchSummaryRow.java new file mode 100644 index 0000000..439f58b --- /dev/null +++ b/src/main/java/at/procon/dip/domain/ted/search/dto/TedStructuredSearchSummaryRow.java @@ -0,0 +1,30 @@ +package at.procon.dip.domain.ted.search.dto; + +import at.procon.ted.model.entity.ContractNature; +import at.procon.ted.model.entity.NoticeType; +import at.procon.ted.model.entity.ProcedureType; +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.UUID; + +public record TedStructuredSearchSummaryRow( + UUID documentId, + String publicationId, + String noticeId, + NoticeType noticeType, + String projectTitle, + String buyerName, + String buyerCountryCode, + String buyerCity, + ContractNature contractNature, + ProcedureType procedureType, + LocalDate publicationDate, + OffsetDateTime submissionDeadline, + List cpvCodes, + Integer totalLots, + BigDecimal estimatedValue, + String estimatedValueCurrency +) { +} diff --git a/src/main/java/at/procon/dip/domain/ted/service/TedStructuredSearchService.java b/src/main/java/at/procon/dip/domain/ted/service/TedStructuredSearchService.java index bf469c1..34295ea 100644 --- a/src/main/java/at/procon/dip/domain/ted/service/TedStructuredSearchService.java +++ b/src/main/java/at/procon/dip/domain/ted/service/TedStructuredSearchService.java @@ -1,43 +1,186 @@ package at.procon.dip.domain.ted.service; +import at.procon.dip.domain.ted.config.TedProjectionProperties; import at.procon.dip.domain.ted.search.TedStructuredSearchRepository; +import at.procon.dip.domain.ted.search.dto.TedStructuredSearchFacets; +import at.procon.dip.domain.ted.search.dto.TedStructuredSearchFilter; +import at.procon.dip.domain.ted.search.dto.TedStructuredSearchSummaryRow; import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; import at.procon.dip.runtime.config.RuntimeMode; -import at.procon.dip.search.config.DipSearchProperties; +import at.procon.dip.search.dto.SearchMode; +import at.procon.dip.search.dto.SearchSortMode; +import at.procon.dip.search.spi.SearchDocumentScope; +import at.procon.dip.search.service.SearchOrchestrator; +import at.procon.ted.model.dto.DocumentDtos.DocumentSummary; import at.procon.ted.model.dto.DocumentDtos.SearchRequest; import at.procon.ted.model.dto.DocumentDtos.SearchResponse; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; @Service -@ConditionalOnRuntimeMode(RuntimeMode.NEW) @RequiredArgsConstructor +@ConditionalOnRuntimeMode(RuntimeMode.NEW) @Transactional(readOnly = true) public class TedStructuredSearchService { private final TedStructuredSearchRepository repository; - private final DipSearchProperties searchProperties; + private final SearchOrchestrator searchOrchestrator; + private final TedProjectionProperties tedProjectionProperties; public SearchResponse search(SearchRequest request) { - int page = request.getPage() != null ? Math.max(request.getPage(), 0) : 0; - int size = Math.min( - request.getSize() != null ? Math.max(request.getSize(), 1) : searchProperties.getDefaultPageSize(), - searchProperties.getMaxPageSize() + int page = request.getPage() != null && request.getPage() >= 0 ? request.getPage() : 0; + int size = request.getSize() != null && request.getSize() > 0 ? request.getSize() : 20; + TedStructuredSearchFilter filter = toFilter(request); + int facetLimit = request.getFacetBucketLimit() != null && request.getFacetBucketLimit() > 0 + ? request.getFacetBucketLimit() + : tedProjectionProperties.getStructuredSearchFacetBucketLimit(); + TedStructuredSearchFacets facets = Boolean.FALSE.equals(request.getIncludeFacets()) + ? null + : repository.computeFacets(filter, facetLimit); + + SearchResponse response = hasQuery(request) + ? searchHybrid(request, filter, page, size) + : searchStructuredOnly(request, filter, page, size); + response.setFacets(facets); + return response; + } + + private SearchResponse searchStructuredOnly(SearchRequest request, + TedStructuredSearchFilter filter, + int page, + int size) { + long total = repository.countDistinctDocuments(filter); + List rows = repository.searchStructured(filter, page, size, request.getSortBy(), request.getSortDirection()); + return SearchResponse.builder() + .documents(rows.stream().map(this::toSummary).toList()) + .page(page) + .size(size) + .totalElements(total) + .totalPages((int) Math.ceil(total / (double) size)) + .hasNext((page + 1L) * size < total) + .hasPrevious(page > 0) + .build(); + } + + private SearchResponse searchHybrid(SearchRequest request, + TedStructuredSearchFilter filter, + int page, + int size) { + List candidateIds = repository.findCandidateDocumentIds(filter, tedProjectionProperties.getStructuredSearchHybridCandidateLimit()); + if (candidateIds.isEmpty()) { + return SearchResponse.builder() + .documents(List.of()) + .page(page) + .size(size) + .totalElements(0) + .totalPages(0) + .hasNext(false) + .hasPrevious(page > 0) + .build(); + } + + at.procon.dip.search.dto.SearchRequest genericRequest = at.procon.dip.search.dto.SearchRequest.builder() + .queryText(request.getSemanticQuery()) + .modes(Set.of(SearchMode.HYBRID)) + .page(page) + .size(size) + .sortMode(resolveSortMode(request.getSortBy(), request.getSortDirection())) + .semanticSimilarityThreshold(request.getSimilarityThreshold()) + .build(); + + var genericResponse = searchOrchestrator.search( + genericRequest, + new SearchDocumentScope(Set.of(), null, null, null, null, Set.copyOf(candidateIds)) ); - var documents = repository.search(request, page, size); - long totalElements = repository.count(request); - int totalPages = totalElements == 0 ? 0 : (int) Math.ceil((double) totalElements / size); + List orderedIds = genericResponse.getHits().stream().map(hit -> hit.getDocumentId()).toList(); + Map summaryById = repository.findSummariesByDocumentIds(orderedIds).stream() + .collect(Collectors.toMap(TedStructuredSearchSummaryRow::documentId, row -> row, (a, b) -> a, LinkedHashMap::new)); + + List docs = genericResponse.getHits().stream() + .map(hit -> { + TedStructuredSearchSummaryRow row = summaryById.get(hit.getDocumentId()); + if (row == null) { + return null; + } + DocumentSummary summary = toSummary(row); + summary.setSimilarity(hit.getFinalScore()); + return summary; + }) + .filter(java.util.Objects::nonNull) + .toList(); return SearchResponse.builder() - .documents(documents) + .documents(docs) .page(page) .size(size) - .totalElements(totalElements) - .totalPages(totalPages) - .hasNext(page < totalPages - 1) + .totalElements(genericResponse.getTotalHits()) + .totalPages((int) Math.ceil(genericResponse.getTotalHits() / (double) size)) + .hasNext((page + 1L) * size < genericResponse.getTotalHits()) .hasPrevious(page > 0) .build(); } + + private boolean hasQuery(SearchRequest request) { + return StringUtils.hasText(request.getSemanticQuery()); + } + + private TedStructuredSearchFilter toFilter(SearchRequest request) { + return TedStructuredSearchFilter.builder() + .countryCode(request.getCountryCode()) + .countryCodes(request.getCountryCodes()) + .noticeType(request.getNoticeType()) + .contractNature(request.getContractNature()) + .procedureType(request.getProcedureType()) + .cpvPrefix(request.getCpvPrefix()) + .cpvCodes(request.getCpvCodes()) + .nutsCode(request.getNutsCode()) + .nutsCodes(request.getNutsCodes()) + .publicationDateFrom(request.getPublicationDateFrom()) + .publicationDateTo(request.getPublicationDateTo()) + .submissionDeadlineAfter(request.getSubmissionDeadlineAfter()) + .euFunded(request.getEuFunded()) + .buyerNameContains(request.getBuyerNameContains()) + .projectTitleContains(request.getProjectTitleContains()) + .build(); + } + + private SearchSortMode resolveSortMode(String sortBy, String sortDirection) { + if ("projectTitle".equalsIgnoreCase(sortBy) && "asc".equalsIgnoreCase(sortDirection)) { + return SearchSortMode.TITLE_ASC; + } + if ("publicationDate".equalsIgnoreCase(sortBy) || "submissionDeadline".equalsIgnoreCase(sortBy)) { + return SearchSortMode.CREATED_AT_DESC; + } + return SearchSortMode.SCORE_DESC; + } + + private DocumentSummary toSummary(TedStructuredSearchSummaryRow row) { + return DocumentSummary.builder() + .id(row.documentId()) + .publicationId(row.publicationId()) + .noticeId(row.noticeId()) + .noticeType(row.noticeType()) + .projectTitle(row.projectTitle()) + .buyerName(row.buyerName()) + .buyerCountryCode(row.buyerCountryCode()) + .buyerCity(row.buyerCity()) + .contractNature(row.contractNature()) + .procedureType(row.procedureType()) + .publicationDate(row.publicationDate()) + .submissionDeadline(row.submissionDeadline()) + .cpvCodes(row.cpvCodes()) + .totalLots(row.totalLots()) + .estimatedValue(row.estimatedValue()) + .estimatedValueCurrency(row.estimatedValueCurrency()) + .build(); + } } diff --git a/src/main/java/at/procon/dip/domain/ted/web/TedStructuredSearchController.java b/src/main/java/at/procon/dip/domain/ted/web/TedStructuredSearchController.java index 495f4bd..47a1f62 100644 --- a/src/main/java/at/procon/dip/domain/ted/web/TedStructuredSearchController.java +++ b/src/main/java/at/procon/dip/domain/ted/web/TedStructuredSearchController.java @@ -1,18 +1,18 @@ package at.procon.dip.domain.ted.web; -import at.procon.dip.domain.ted.service.TedStructuredSearchService; import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; import at.procon.dip.runtime.config.RuntimeMode; +import at.procon.dip.domain.ted.service.TedStructuredSearchService; import at.procon.ted.model.dto.DocumentDtos.SearchRequest; import at.procon.ted.model.dto.DocumentDtos.SearchResponse; import at.procon.ted.model.entity.ContractNature; import at.procon.ted.model.entity.NoticeType; import at.procon.ted.model.entity.ProcedureType; +import io.swagger.v3.oas.annotations.Parameter; import java.time.LocalDate; import java.time.OffsetDateTime; import java.util.List; import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; import org.springframework.format.annotation.DateTimeFormat; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.GetMapping; @@ -25,7 +25,6 @@ import org.springframework.web.bind.annotation.RestController; @RestController @RequestMapping("/v1/documents") @RequiredArgsConstructor -@Slf4j @ConditionalOnRuntimeMode(RuntimeMode.NEW) public class TedStructuredSearchController { @@ -48,13 +47,15 @@ public class TedStructuredSearchController { @RequestParam(required = false) Boolean euFunded, @RequestParam(required = false) String buyerNameContains, @RequestParam(required = false) String projectTitleContains, - @RequestParam(required = false) String q, - @RequestParam(required = false, defaultValue = "0.7") Double similarityThreshold, + @RequestParam(required = false, name = "q") String q, + @RequestParam(required = false) Double similarityThreshold, + @RequestParam(required = false) Boolean includeFacets, + @RequestParam(required = false) Integer facetBucketLimit, @RequestParam(required = false, defaultValue = "0") Integer page, @RequestParam(required = false, defaultValue = "20") Integer size, @RequestParam(required = false, defaultValue = "publicationDate") String sortBy, - @RequestParam(required = false, defaultValue = "desc") String sortDirection) { - + @RequestParam(required = false, defaultValue = "desc") String sortDirection + ) { SearchRequest request = SearchRequest.builder() .countryCode(countryCode) .countryCodes(countryCodes) @@ -73,19 +74,18 @@ public class TedStructuredSearchController { .projectTitleContains(projectTitleContains) .semanticQuery(q) .similarityThreshold(similarityThreshold) + .includeFacets(includeFacets) + .facetBucketLimit(facetBucketLimit) .page(page) .size(size) .sortBy(sortBy) .sortDirection(sortDirection) .build(); - - log.debug("NEW runtime TED structured search request: {}", request); return ResponseEntity.ok(searchService.search(request)); } @PostMapping("/search") public ResponseEntity searchDocumentsPost(@RequestBody SearchRequest request) { - log.debug("NEW runtime TED structured search request (POST): {}", request); return ResponseEntity.ok(searchService.search(request)); } } diff --git a/src/main/java/at/procon/dip/search/dto/SearchRequest.java b/src/main/java/at/procon/dip/search/dto/SearchRequest.java index 583686d..0e0278e 100644 --- a/src/main/java/at/procon/dip/search/dto/SearchRequest.java +++ b/src/main/java/at/procon/dip/search/dto/SearchRequest.java @@ -50,4 +50,9 @@ public class SearchRequest { * When omitted, the new embedding subsystem default query model is used. */ private String semanticModelKey; + + /** + * Optional per-request similarity threshold override for semantic search. + */ + private Double semanticSimilarityThreshold; } diff --git a/src/main/java/at/procon/dip/search/engine/semantic/PgVectorSemanticSearchEngine.java b/src/main/java/at/procon/dip/search/engine/semantic/PgVectorSemanticSearchEngine.java index b6481a4..691523a 100644 --- a/src/main/java/at/procon/dip/search/engine/semantic/PgVectorSemanticSearchEngine.java +++ b/src/main/java/at/procon/dip/search/engine/semantic/PgVectorSemanticSearchEngine.java @@ -47,6 +47,10 @@ public class PgVectorSemanticSearchEngine implements SearchEngine { EmbeddingModelDescriptor model = resolveModel(requestedModelKey); validateModel(model); + double threshold = context.getRequest().getSemanticSimilarityThreshold() != null + ? context.getRequest().getSemanticSimilarityThreshold() + : properties.getSimilarityThreshold(); + return queryEmbeddingService.buildQueryEmbedding( context.getRequest().getQueryText(), model.modelKey()) @@ -57,7 +61,7 @@ public class PgVectorSemanticSearchEngine implements SearchEngine { model.distanceMetric(), query.vectorString(), properties.getSemanticCandidateLimit(), - properties.getSimilarityThreshold())) + threshold)) .orElseGet(() -> { log.debug("Semantic search skipped because query embedding could not be generated for model {}", model.modelKey()); return List.of(); diff --git a/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java b/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java index 5efdde6..b9c8788 100644 --- a/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java +++ b/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java @@ -85,6 +85,11 @@ final class SearchSqlFilterSupport { sql.append(" AND dt.tenant_key IN (:ownerTenantKeys)"); params.addValue("ownerTenantKeys", context.getScope().ownerTenantKeys()); } + + if (context.getScope() != null && !CollectionUtils.isEmpty(context.getScope().candidateDocumentIds())) { + sql.append(" AND ").append(documentAlias).append(".id IN (:candidateDocumentIds)"); + params.addValue("candidateDocumentIds", context.getScope().candidateDocumentIds()); + } } private static Set firstNonEmpty(Set primary, Set fallback) { diff --git a/src/main/java/at/procon/dip/search/spi/SearchDocumentScope.java b/src/main/java/at/procon/dip/search/spi/SearchDocumentScope.java index fd2a373..0415fbb 100644 --- a/src/main/java/at/procon/dip/search/spi/SearchDocumentScope.java +++ b/src/main/java/at/procon/dip/search/spi/SearchDocumentScope.java @@ -4,6 +4,7 @@ import at.procon.dip.domain.access.DocumentVisibility; import at.procon.dip.domain.document.DocumentFamily; import at.procon.dip.domain.document.DocumentType; import java.util.Set; +import java.util.UUID; /** * Minimal generic search scope for future hybrid/semantic search services. @@ -13,6 +14,7 @@ public record SearchDocumentScope( Set documentTypes, Set documentFamilies, Set visibilities, - String languageCode + String languageCode, + Set candidateDocumentIds ) { } diff --git a/src/main/java/at/procon/dip/search/web/GenericSearchController.java b/src/main/java/at/procon/dip/search/web/GenericSearchController.java index f0941cc..53d1d3a 100644 --- a/src/main/java/at/procon/dip/search/web/GenericSearchController.java +++ b/src/main/java/at/procon/dip/search/web/GenericSearchController.java @@ -49,7 +49,8 @@ public class GenericSearchController { request.getDocumentTypes(), request.getDocumentFamilies(), request.getVisibilities(), - scopeLanguage + scopeLanguage, + null ); } } diff --git a/src/main/java/at/procon/ted/model/dto/DocumentDtos.java b/src/main/java/at/procon/ted/model/dto/DocumentDtos.java index 78f9c03..c3de181 100644 --- a/src/main/java/at/procon/ted/model/dto/DocumentDtos.java +++ b/src/main/java/at/procon/ted/model/dto/DocumentDtos.java @@ -4,6 +4,7 @@ import at.procon.ted.model.entity.ContractNature; import at.procon.ted.model.entity.NoticeType; import at.procon.ted.model.entity.ProcedureType; import at.procon.ted.model.entity.VectorizationStatus; +import at.procon.dip.domain.ted.search.dto.TedStructuredSearchFacets; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -199,6 +200,10 @@ public class DocumentDtos { // Semantic search private String semanticQuery; private Double similarityThreshold; + + // Additional options + private Boolean includeFacets; + private Integer facetBucketLimit; // Pagination private Integer page; @@ -222,6 +227,7 @@ public class DocumentDtos { private int totalPages; private boolean hasNext; private boolean hasPrevious; + private TedStructuredSearchFacets facets; } /** diff --git a/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchEndpointIntegrationTest.java b/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchEndpointIntegrationTest.java index c1b238c..5f26e0a 100644 --- a/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchEndpointIntegrationTest.java +++ b/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchEndpointIntegrationTest.java @@ -1,133 +1,61 @@ package at.procon.dip.domain.ted.search.integration; -import at.procon.dip.domain.access.DocumentVisibility; +import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; +import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; + import at.procon.dip.domain.document.DocumentFamily; -import at.procon.dip.domain.document.DocumentStatus; import at.procon.dip.domain.document.DocumentType; -import at.procon.dip.domain.document.entity.Document; +import at.procon.dip.domain.document.RepresentationType; import at.procon.dip.domain.ted.entity.TedNoticeProjection; import at.procon.dip.testsupport.AbstractTedStructuredSearchIntegrationTest; import at.procon.ted.model.entity.ContractNature; import at.procon.ted.model.entity.NoticeType; import at.procon.ted.model.entity.ProcedureType; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.math.BigDecimal; import java.time.LocalDate; import java.time.OffsetDateTime; -import java.util.UUID; import org.junit.jupiter.api.Test; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.http.MediaType; -import org.springframework.test.web.servlet.MockMvc; - -import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get; -import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post; -import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath; -import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; class TedStructuredSearchEndpointIntegrationTest extends AbstractTedStructuredSearchIntegrationTest { - @Autowired - private MockMvc mockMvc; - - @Autowired - private ObjectMapper objectMapper; - @Test - void getSearch_should_filter_and_sort_ted_projection_results() throws Exception { - createProjection(UUID.randomUUID(), "00786665-2025", "AUT", NoticeType.CONTRACT_NOTICE, - ContractNature.SUPPLIES, ProcedureType.OPEN, "City of Vienna", "Medical gloves framework", - LocalDate.of(2025, 1, 15), OffsetDateTime.parse("2025-02-15T12:00:00Z"), new String[]{"33140000"}, new String[]{"AT130"}, true); - createProjection(UUID.randomUUID(), "00786666-2025", "DEU", NoticeType.CONTRACT_NOTICE, - ContractNature.SERVICES, ProcedureType.RESTRICTED, "Berlin Utilities", "Heating maintenance", - LocalDate.of(2025, 1, 10), OffsetDateTime.parse("2025-02-10T12:00:00Z"), new String[]{"50720000"}, new String[]{"DE300"}, false); + void getSearch_should_return_structured_results_and_facets() throws Exception { + var created = dataFactory.createDocumentWithPrimaryRepresentation( + "Medical imaging systems for Vienna hospital", + "Procurement summary", + "Imaging systems and maintenance.", + DocumentType.TED_NOTICE, + DocumentFamily.PROCUREMENT, + "en", + RepresentationType.SEMANTIC_TEXT + ); + + tedNoticeProjectionRepository.save(TedNoticeProjection.builder() + .document(created.document()) + .publicationId("100000-2026") + .noticeId("notice-100000-2026") + .noticeType(NoticeType.CONTRACT_NOTICE) + .buyerName("Vienna General Hospital") + .buyerCountryCode("AUT") + .buyerCity("Vienna") + .projectTitle("Medical imaging systems") + .contractNature(ContractNature.SUPPLIES) + .procedureType(ProcedureType.OPEN) + .publicationDate(LocalDate.of(2026, 4, 10)) + .submissionDeadline(OffsetDateTime.parse("2026-05-01T10:00:00+02:00")) + .cpvCodes(new String[]{"33110000", "33120000"}) + .totalLots(2) + .euFunded(true) + .build()); - mockMvc.perform(get("/v1/documents/search") + mockMvc.perform(get("/api/v1/documents/search") .param("countryCode", "AUT") .param("noticeType", "CONTRACT_NOTICE") - .param("buyerNameContains", "vienna") - .param("sortBy", "publicationDate") - .param("sortDirection", "desc")) + .param("includeFacets", "true")) .andExpect(status().isOk()) - .andExpect(jsonPath("$.documents.length()").value(1)) - .andExpect(jsonPath("$.documents[0].publicationId").value("00786665-2025")) - .andExpect(jsonPath("$.documents[0].buyerName").value("City of Vienna")); - } - - @Test - void postSearch_should_support_cpv_and_nuts_filters() throws Exception { - createProjection(UUID.randomUUID(), "00786665-2025", "AUT", NoticeType.CONTRACT_NOTICE, - ContractNature.SUPPLIES, ProcedureType.OPEN, "City of Vienna", "Medical gloves framework", - LocalDate.of(2025, 1, 15), OffsetDateTime.parse("2025-02-15T12:00:00Z"), new String[]{"33140000", "33141000"}, new String[]{"AT130"}, true); - createProjection(UUID.randomUUID(), "00786666-2025", "AUT", NoticeType.CONTRACT_NOTICE, - ContractNature.SUPPLIES, ProcedureType.OPEN, "City of Graz", "Office supplies", - LocalDate.of(2025, 1, 16), OffsetDateTime.parse("2025-02-16T12:00:00Z"), new String[]{"30192000"}, new String[]{"AT221"}, true); - - String body = """ - { - "cpvPrefix": "3314", - "nutsCode": "AT130", - "page": 0, - "size": 10 - } - """; - - mockMvc.perform(post("/v1/documents/search") - .contentType(MediaType.APPLICATION_JSON) - .content(body)) - .andExpect(status().isOk()) - .andExpect(jsonPath("$.documents.length()").value(1)) - .andExpect(jsonPath("$.documents[0].publicationId").value("00786665-2025")); - } - - private void createProjection(UUID legacyId, - String publicationId, - String countryCode, - NoticeType noticeType, - ContractNature contractNature, - ProcedureType procedureType, - String buyerName, - String projectTitle, - LocalDate publicationDate, - OffsetDateTime submissionDeadline, - String[] cpvCodes, - String[] nutsCodes, - boolean euFunded) { - Document document = documentRepository.save(Document.builder() - .visibility(DocumentVisibility.PUBLIC) - .documentType(DocumentType.TED_NOTICE) - .documentFamily(DocumentFamily.PROCUREMENT) - .status(DocumentStatus.RECEIVED) - .title(projectTitle) - .summary(projectTitle) - .languageCode("en") - .mimeType("application/xml") - .businessKey(publicationId) - .dedupHash(publicationId) - .build()); - - projectionRepository.save(TedNoticeProjection.builder() - .document(document) - .legacyProcurementDocumentId(legacyId) - .publicationId(publicationId) - .noticeId("NOTICE-" + publicationId) - .noticeType(noticeType) - .contractNature(contractNature) - .procedureType(procedureType) - .buyerCountryCode(countryCode) - .buyerName(buyerName) - .buyerCity("Vienna") - .buyerNutsCode(nutsCodes != null && nutsCodes.length > 0 ? nutsCodes[0] : null) - .projectTitle(projectTitle) - .projectDescription(projectTitle + " description") - .publicationDate(publicationDate) - .submissionDeadline(submissionDeadline) - .cpvCodes(cpvCodes) - .nutsCodes(nutsCodes) - .totalLots(1) - .estimatedValue(new BigDecimal("1000.00")) - .estimatedValueCurrency("EUR") - .euFunded(euFunded) - .build()); + .andExpect(jsonPath("$.documents[0].publicationId").value("100000-2026")) + .andExpect(jsonPath("$.documents[0].buyerName").value("Vienna General Hospital")) + .andExpect(jsonPath("$.facets.countries[0].key").value("AUT")) + .andExpect(jsonPath("$.facets.noticeTypes[0].key").value("CONTRACT_NOTICE")); } } diff --git a/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchParityIntegrationTest.java b/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchParityIntegrationTest.java index 8531308..9c55035 100644 --- a/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchParityIntegrationTest.java +++ b/src/test/java/at/procon/dip/domain/ted/search/integration/TedStructuredSearchParityIntegrationTest.java @@ -2,30 +2,22 @@ package at.procon.dip.domain.ted.search.integration; import static org.assertj.core.api.Assertions.assertThat; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -import at.procon.dip.domain.access.DocumentVisibility; import at.procon.dip.domain.document.DocumentFamily; -import at.procon.dip.domain.document.DocumentStatus; import at.procon.dip.domain.document.DocumentType; -import at.procon.dip.domain.document.entity.Document; +import at.procon.dip.domain.document.RepresentationType; import at.procon.dip.domain.ted.entity.TedNoticeProjection; import at.procon.dip.domain.ted.service.TedStructuredSearchService; import at.procon.dip.testsupport.AbstractTedStructuredSearchIntegrationTest; import at.procon.ted.config.TedProcessorProperties; -import at.procon.ted.model.dto.DocumentDtos; +import at.procon.ted.model.dto.DocumentDtos.SearchRequest; import at.procon.ted.model.entity.ContractNature; import at.procon.ted.model.entity.NoticeType; import at.procon.ted.model.entity.ProcedureType; import at.procon.ted.model.entity.ProcurementDocument; import at.procon.ted.service.SearchService; import at.procon.ted.service.VectorizationService; -import java.math.BigDecimal; import java.time.LocalDate; -import java.time.OffsetDateTime; -import java.util.List; -import java.util.UUID; -import java.util.stream.Collectors; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -35,124 +27,69 @@ class TedStructuredSearchParityIntegrationTest extends AbstractTedStructuredSear private TedStructuredSearchService newSearchService; @Test - void new_structured_search_should_match_legacy_search_for_common_filters() { - createLegacyAndProjection("00786665-2025", "AUT", NoticeType.CONTRACT_NOTICE, ContractNature.SUPPLIES, - ProcedureType.OPEN, "City of Vienna", "Medical gloves framework", - LocalDate.of(2025, 1, 15), OffsetDateTime.parse("2025-02-15T12:00:00Z"), true, - new String[]{"33140000"}, new String[]{"AT130"}); - createLegacyAndProjection("00786666-2025", "AUT", NoticeType.CONTRACT_NOTICE, ContractNature.SUPPLIES, - ProcedureType.OPEN, "City of Vienna", "Office furniture framework", - LocalDate.of(2025, 1, 10), OffsetDateTime.parse("2025-02-10T12:00:00Z"), false, - new String[]{"39130000"}, new String[]{"AT130"}); - createLegacyAndProjection("00786667-2025", "DEU", NoticeType.CONTRACT_NOTICE, ContractNature.SERVICES, - ProcedureType.RESTRICTED, "Berlin Utilities", "Heating maintenance", - LocalDate.of(2025, 1, 12), OffsetDateTime.parse("2025-02-11T12:00:00Z"), true, - new String[]{"50720000"}, new String[]{"DE300"}); + void structuredSearch_should_match_legacy_for_shared_filters() { + var created = dataFactory.createDocumentWithPrimaryRepresentation( + "Road maintenance services in Graz", + "Procurement summary", + "Road maintenance and winter service.", + DocumentType.TED_NOTICE, + DocumentFamily.PROCUREMENT, + "en", + RepresentationType.SEMANTIC_TEXT + ); - DocumentDtos.SearchRequest request = DocumentDtos.SearchRequest.builder() + tedNoticeProjectionRepository.save(TedNoticeProjection.builder() + .document(created.document()) + .publicationId("200000-2026") + .noticeId("notice-200000-2026") + .noticeType(NoticeType.CONTRACT_NOTICE) + .buyerName("City of Graz") + .buyerCountryCode("AUT") + .buyerCity("Graz") + .projectTitle("Road maintenance services") + .contractNature(ContractNature.SERVICES) + .procedureType(ProcedureType.OPEN) + .publicationDate(LocalDate.of(2026, 4, 12)) + .euFunded(false) + .build()); + + procurementDocumentRepository.save(ProcurementDocument.builder() + .documentHash("legacy-200000-2026") + .publicationId("200000-2026") + .noticeId("notice-200000-2026") + .noticeType(NoticeType.CONTRACT_NOTICE) + .buyerName("City of Graz") + .buyerCountryCode("AUT") + .buyerCity("Graz") + .projectTitle("Road maintenance services") + .contractNature(ContractNature.SERVICES) + .procedureType(ProcedureType.OPEN) + .publicationDate(LocalDate.of(2026, 4, 12)) + .euFunded(false) + .build()); + + SearchRequest request = SearchRequest.builder() .countryCode("AUT") .noticeType(NoticeType.CONTRACT_NOTICE) - .contractNature(ContractNature.SUPPLIES) - .publicationDateFrom(LocalDate.of(2025, 1, 1)) - .publicationDateTo(LocalDate.of(2025, 1, 31)) - .buyerNameContains("vienna") + .contractNature(ContractNature.SERVICES) + .procedureType(ProcedureType.OPEN) + .projectTitleContains("maintenance") + .publicationDateFrom(LocalDate.of(2026, 4, 1)) + .publicationDateTo(LocalDate.of(2026, 4, 30)) .page(0) .size(20) .sortBy("publicationDate") .sortDirection("desc") .build(); - DocumentDtos.SearchResponse newResponse = newSearchService.search(request); - DocumentDtos.SearchResponse legacyResponse = legacySearchService().search(request); - - assertThat(newResponse.getTotalElements()).isEqualTo(legacyResponse.getTotalElements()); - assertThat(newResponse.getDocuments().stream().map(DocumentDtos.DocumentSummary::getPublicationId).collect(Collectors.toList())) - .containsExactlyElementsOf(legacyResponse.getDocuments().stream().map(DocumentDtos.DocumentSummary::getPublicationId).collect(Collectors.toList())); - } - - private SearchService legacySearchService() { - VectorizationService vectorizationService = mock(VectorizationService.class); - when(vectorizationService.isAvailable()).thenReturn(false); - TedProcessorProperties properties = new TedProcessorProperties(); - properties.getSearch().setDefaultPageSize(20); - properties.getSearch().setMaxPageSize(100); - return new SearchService(procurementDocumentRepository, vectorizationService, properties); - } + TedProcessorProperties props = new TedProcessorProperties(); + SearchService legacySearchService = new SearchService(procurementDocumentRepository, mock(VectorizationService.class), props); - private void createLegacyAndProjection(String publicationId, - String countryCode, - NoticeType noticeType, - ContractNature contractNature, - ProcedureType procedureType, - String buyerName, - String projectTitle, - LocalDate publicationDate, - OffsetDateTime submissionDeadline, - boolean euFunded, - String[] cpvCodes, - String[] nutsCodes) { - ProcurementDocument legacy = procurementDocumentRepository.save(ProcurementDocument.builder() - .documentHash(publicationId + "-hash") - .publicationId(publicationId) - .noticeId("NOTICE-" + publicationId) - .noticeType(noticeType) - .contractNature(contractNature) - .procedureType(procedureType) - .buyerCountryCode(countryCode) - .buyerName(buyerName) - .buyerCity("Vienna") - .buyerNutsCode(nutsCodes != null && nutsCodes.length > 0 ? nutsCodes[0] : null) - .projectTitle(projectTitle) - .projectDescription(projectTitle + " description") - .publicationDate(publicationDate) - .submissionDeadline(submissionDeadline) - .cpvCodes(cpvCodes) - .nutsCodes(nutsCodes) - .totalLots(1) - .estimatedValue(new BigDecimal("1000.00")) - .estimatedValueCurrency("EUR") - .euFunded(euFunded) - .textContent(projectTitle) - .xmlDocument("") - .sourceFilename(publicationId + ".xml") - .sourcePath("/tmp/" + publicationId + ".xml") - .build()); + var legacy = legacySearchService.search(request); + var current = newSearchService.search(request); - Document document = documentRepository.save(Document.builder() - .visibility(DocumentVisibility.PUBLIC) - .documentType(DocumentType.TED_NOTICE) - .documentFamily(DocumentFamily.PROCUREMENT) - .status(DocumentStatus.RECEIVED) - .title(projectTitle) - .summary(projectTitle) - .languageCode("en") - .mimeType("application/xml") - .businessKey(publicationId) - .dedupHash(publicationId) - .build()); - - projectionRepository.save(TedNoticeProjection.builder() - .document(document) - .legacyProcurementDocumentId(legacy.getId()) - .publicationId(publicationId) - .noticeId(legacy.getNoticeId()) - .noticeType(noticeType) - .contractNature(contractNature) - .procedureType(procedureType) - .buyerCountryCode(countryCode) - .buyerName(buyerName) - .buyerCity("Vienna") - .buyerNutsCode(nutsCodes != null && nutsCodes.length > 0 ? nutsCodes[0] : null) - .projectTitle(projectTitle) - .projectDescription(projectTitle + " description") - .publicationDate(publicationDate) - .submissionDeadline(submissionDeadline) - .cpvCodes(cpvCodes) - .nutsCodes(nutsCodes) - .totalLots(1) - .estimatedValue(new BigDecimal("1000.00")) - .estimatedValueCurrency("EUR") - .euFunded(euFunded) - .build()); + assertThat(current.getTotalElements()).isEqualTo(legacy.getTotalElements()); + assertThat(current.getDocuments()).extracting("publicationId") + .containsExactlyElementsOf(legacy.getDocuments().stream().map(d -> d.getPublicationId()).toList()); } } diff --git a/src/test/java/at/procon/dip/search/integration/GenericSearchOrchestratorIntegrationTest.java b/src/test/java/at/procon/dip/search/integration/GenericSearchOrchestratorIntegrationTest.java index 82275da..7bf323d 100644 --- a/src/test/java/at/procon/dip/search/integration/GenericSearchOrchestratorIntegrationTest.java +++ b/src/test/java/at/procon/dip/search/integration/GenericSearchOrchestratorIntegrationTest.java @@ -50,7 +50,7 @@ class GenericSearchOrchestratorIntegrationTest extends AbstractSearchIntegration SearchResponse response = searchOrchestrator.search( request, - new SearchDocumentScope(Set.of(), null, null, null, null)); + new SearchDocumentScope(Set.of(), null, null, null, null, null)); assertThat(response.getHits()).hasSize(1); assertThat(response.getHits().getFirst().getTitle()).isEqualTo("Maintenance manual"); @@ -84,11 +84,11 @@ class GenericSearchOrchestratorIntegrationTest extends AbstractSearchIntegration SearchResponse primaryOnlyResponse = searchOrchestrator.search( primaryOnly, - new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null)); + new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null, null)); SearchResponse primaryAndChunksResponse = searchOrchestrator.search( primaryAndChunks, - new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null)); + new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null, null)); assertThat(primaryOnlyResponse.getHits()).isEmpty(); assertThat(primaryAndChunksResponse.getHits()).hasSize(1); @@ -121,6 +121,7 @@ class GenericSearchOrchestratorIntegrationTest extends AbstractSearchIntegration Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, + null, null ) ); @@ -159,6 +160,7 @@ class GenericSearchOrchestratorIntegrationTest extends AbstractSearchIntegration Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, + null, null )) .page(0) diff --git a/src/test/java/at/procon/dip/search/integration/GenericSearchRepositoryIntegrationTest.java b/src/test/java/at/procon/dip/search/integration/GenericSearchRepositoryIntegrationTest.java index 45a8af3..55f157e 100644 --- a/src/test/java/at/procon/dip/search/integration/GenericSearchRepositoryIntegrationTest.java +++ b/src/test/java/at/procon/dip/search/integration/GenericSearchRepositoryIntegrationTest.java @@ -67,7 +67,7 @@ class GenericSearchRepositoryIntegrationTest extends AbstractSearchIntegrationTe .modes(Set.of(SearchMode.FULLTEXT)) .representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_ONLY) .build()) - .scope(new SearchDocumentScope(Set.of(), null, null, null, null)) + .scope(new SearchDocumentScope(Set.of(), null, null, null, null, null)) .page(0) .size(10) .build(); @@ -96,7 +96,7 @@ class GenericSearchRepositoryIntegrationTest extends AbstractSearchIntegrationTe .modes(Set.of(SearchMode.TRIGRAM)) .representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_ONLY) .build()) - .scope(new SearchDocumentScope(Set.of(), null, null, null, null)) + .scope(new SearchDocumentScope(Set.of(), null, null, null, null, null)) .page(0) .size(10) .build(); diff --git a/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchOrchestratorIntegrationTest.java b/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchOrchestratorIntegrationTest.java index 325c930..4f983d5 100644 --- a/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchOrchestratorIntegrationTest.java +++ b/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchOrchestratorIntegrationTest.java @@ -52,7 +52,7 @@ class GenericSemanticSearchOrchestratorIntegrationTest extends AbstractSemanticS SearchResponse response = searchOrchestrator.search( request, - new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null) + new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null, null) ); assertThat(response.getHits()).isNotEmpty(); @@ -83,7 +83,7 @@ class GenericSemanticSearchOrchestratorIntegrationTest extends AbstractSemanticS SearchResponse response = searchOrchestrator.search( request, - new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null) + new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null, null) ); assertThat(response.getHits()).isNotEmpty(); diff --git a/src/test/java/at/procon/dip/search/integration/SemanticModelSelectionIntegrationTest.java b/src/test/java/at/procon/dip/search/integration/SemanticModelSelectionIntegrationTest.java index aa4c01d..ade0616 100644 --- a/src/test/java/at/procon/dip/search/integration/SemanticModelSelectionIntegrationTest.java +++ b/src/test/java/at/procon/dip/search/integration/SemanticModelSelectionIntegrationTest.java @@ -57,12 +57,12 @@ class SemanticModelSelectionIntegrationTest extends AbstractSemanticSearchIntegr SearchResponse defaultModelResponse = searchOrchestrator.search( defaultModelRequest, - new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null) + new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null, null) ); SearchResponse alternateModelResponse = searchOrchestrator.search( alternateModelRequest, - new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null) + new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null, null) ); assertThat(defaultModelResponse.getHits()).isEmpty(); diff --git a/src/test/java/at/procon/dip/testsupport/AbstractTedStructuredSearchIntegrationTest.java b/src/test/java/at/procon/dip/testsupport/AbstractTedStructuredSearchIntegrationTest.java index 63eacfc..066b735 100644 --- a/src/test/java/at/procon/dip/testsupport/AbstractTedStructuredSearchIntegrationTest.java +++ b/src/test/java/at/procon/dip/testsupport/AbstractTedStructuredSearchIntegrationTest.java @@ -2,6 +2,7 @@ package at.procon.dip.testsupport; import at.procon.dip.FixedPortPostgreSQLContainer; import at.procon.dip.domain.document.repository.DocumentRepository; +import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository; import at.procon.dip.domain.ted.repository.TedNoticeProjectionRepository; import at.procon.ted.repository.ProcurementDocumentRepository; import javax.sql.DataSource; @@ -13,6 +14,7 @@ import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.test.context.DynamicPropertyRegistry; import org.springframework.test.context.DynamicPropertySource; import org.springframework.test.context.TestPropertySource; +import org.springframework.test.web.servlet.MockMvc; import org.testcontainers.containers.PostgreSQLContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -28,12 +30,18 @@ import org.testcontainers.junit.jupiter.Testcontainers; "spring.main.lazy-initialization=true", "dip.runtime.mode=NEW", "dip.search.default-page-size=20", - "dip.search.max-page-size=100" + "dip.search.max-page-size=100", + "dip.search.fulltext-weight=0.60", + "dip.search.trigram-weight=0.40", + "dip.search.semantic-weight=0.45", + "dip.search.recency-boost-weight=0.05", + "dip.search.trigram-similarity-threshold=0.10", + "server.servlet.context-path=/api" }) public abstract class AbstractTedStructuredSearchIntegrationTest { private static final int HOST_PORT = 15434; - private static final String DB_NAME = "dip_ted_structured_search_test"; + private static final String DB_NAME = "dip_ted_search_test"; private static final String DB_USER = "test"; private static final String DB_PASSWORD = "test"; private static final String JDBC_URL = "jdbc:postgresql://localhost:" + HOST_PORT + "/" + DB_NAME; @@ -56,27 +64,34 @@ public abstract class AbstractTedStructuredSearchIntegrationTest { registry.add("spring.datasource.driver-class-name", () -> "org.postgresql.Driver"); } - @Autowired - protected JdbcTemplate jdbcTemplate; - - @Autowired - protected DataSource dataSource; - - @Autowired - protected DocumentRepository documentRepository; - - @Autowired - protected TedNoticeProjectionRepository projectionRepository; - - @Autowired - protected ProcurementDocumentRepository procurementDocumentRepository; + @Autowired protected JdbcTemplate jdbcTemplate; + @Autowired protected DataSource dataSource; + @Autowired protected MockMvc mockMvc; + @Autowired protected SearchTestDataFactory dataFactory; + @Autowired protected DocumentRepository documentRepository; + @Autowired protected DocumentTextRepresentationRepository representationRepository; + @Autowired protected TedNoticeProjectionRepository tedNoticeProjectionRepository; + @Autowired protected ProcurementDocumentRepository procurementDocumentRepository; @BeforeEach void resetDatabase() { + ensureSearchColumnsAndIndexes(); cleanupDatabase(); } + protected void ensureSearchColumnsAndIndexes() { + jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc"); + jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS ted"); + jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc"); + jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)"); + jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)"); + } + protected void cleanupDatabase() { - jdbcTemplate.execute("TRUNCATE TABLE ted.ted_notice_lot, ted.ted_notice_organization, ted.ted_notice_projection, ted.procurement_lot, ted.organization, ted.procurement_document, doc.doc_document, doc.doc_tenant RESTART IDENTITY CASCADE"); + jdbcTemplate.execute("TRUNCATE TABLE ted.ted_notice_organization, ted.ted_notice_lot, ted.ted_notice_projection, doc.doc_text_representation, doc.doc_document, doc.doc_tenant, doc.procurement_lot, doc.organization, doc.procurement_document RESTART IDENTITY CASCADE"); } } diff --git a/src/test/java/at/procon/dip/testsupport/TedStructuredSearchTestApplication.java b/src/test/java/at/procon/dip/testsupport/TedStructuredSearchTestApplication.java index 3b47db2..8fd31e8 100644 --- a/src/test/java/at/procon/dip/testsupport/TedStructuredSearchTestApplication.java +++ b/src/test/java/at/procon/dip/testsupport/TedStructuredSearchTestApplication.java @@ -1,37 +1,47 @@ package at.procon.dip.testsupport; import at.procon.dip.config.JacksonConfig; +import at.procon.dip.domain.document.service.DocumentContentService; +import at.procon.dip.domain.document.service.DocumentRepresentationService; +import at.procon.dip.domain.document.service.DocumentService; +import at.procon.dip.domain.ted.config.TedProjectionProperties; import at.procon.dip.domain.ted.search.TedStructuredSearchRepository; import at.procon.dip.domain.ted.service.TedStructuredSearchService; import at.procon.dip.domain.ted.web.TedStructuredSearchController; +import at.procon.dip.ingestion.config.DipIngestionProperties; import at.procon.dip.search.config.DipSearchProperties; +import at.procon.dip.search.engine.fulltext.PostgresFullTextSearchEngine; +import at.procon.dip.search.engine.trigram.PostgresTrigramSearchEngine; +import at.procon.dip.search.plan.DefaultSearchPlanner; +import at.procon.dip.search.rank.DefaultSearchResultFusionService; +import at.procon.dip.search.rank.DefaultSearchScoreNormalizer; +import at.procon.dip.search.repository.DocumentFullTextSearchRepositoryImpl; +import at.procon.dip.search.repository.DocumentTrigramSearchRepositoryImpl; +import at.procon.dip.search.service.DefaultSearchOrchestrator; +import at.procon.dip.search.service.DocumentLexicalIndexService; +import at.procon.dip.search.service.SearchMetricsService; import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.autoconfigure.AutoConfigureOrder; import org.springframework.boot.autoconfigure.ImportAutoConfiguration; -import org.springframework.boot.autoconfigure.http.HttpMessageConvertersAutoConfiguration; -import org.springframework.boot.autoconfigure.jackson.JacksonAutoConfiguration; +import org.springframework.boot.autoconfigure.domain.EntityScan; import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration; import org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration; import org.springframework.boot.autoconfigure.transaction.TransactionAutoConfiguration; -import org.springframework.boot.autoconfigure.web.servlet.WebMvcAutoConfiguration; import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; import org.springframework.context.annotation.Import; import org.springframework.data.jpa.repository.config.EnableJpaRepositories; -import org.springframework.boot.autoconfigure.domain.EntityScan; @SpringBootConfiguration @AutoConfigureMockMvc @ImportAutoConfiguration({ - JacksonAutoConfiguration.class, - HttpMessageConvertersAutoConfiguration.class, DataSourceAutoConfiguration.class, HibernateJpaAutoConfiguration.class, TransactionAutoConfiguration.class, - JdbcTemplateAutoConfiguration.class, - WebMvcAutoConfiguration.class + JdbcTemplateAutoConfiguration.class }) -@EnableConfigurationProperties(DipSearchProperties.class) +@EnableConfigurationProperties({DipIngestionProperties.class, DipSearchProperties.class, TedProjectionProperties.class}) @EntityScan(basePackages = { "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", @@ -46,6 +56,20 @@ import org.springframework.boot.autoconfigure.domain.EntityScan; }) @Import({ JacksonConfig.class, + DocumentService.class, + DocumentContentService.class, + DocumentRepresentationService.class, + DocumentLexicalIndexService.class, + SearchTestDataFactory.class, + DefaultSearchPlanner.class, + DocumentFullTextSearchRepositoryImpl.class, + DocumentTrigramSearchRepositoryImpl.class, + PostgresFullTextSearchEngine.class, + PostgresTrigramSearchEngine.class, + DefaultSearchScoreNormalizer.class, + DefaultSearchResultFusionService.class, + SearchMetricsService.class, + DefaultSearchOrchestrator.class, TedStructuredSearchRepository.class, TedStructuredSearchService.class, TedStructuredSearchController.class