diff --git a/README_SLICE3.txt b/README_SLICE3.txt new file mode 100644 index 0000000..a2f2d05 --- /dev/null +++ b/README_SLICE3.txt @@ -0,0 +1,16 @@ +Slice 3 patch for the generic search platform. + +Contents: +- long-text CHUNK representations for generic and TED documents +- representation selection mode for generic search (PRIMARY_ONLY / PRIMARY_AND_CHUNKS / ALL) +- chunk-aware document collapse and matchedRepresentationCount in fused results +- recency-aware scoring boost +- lightweight search metrics endpoint: GET /api/search/metrics + +Assumptions: +- apply on top of Slice 2 and the Slice 2 fix patch +- no additional DB migration is required in this slice + +Notes: +- Maven compile was not available in the patch generation environment +- this patch intentionally keeps TED and Mail structured search for later slices diff --git a/docs/testing/SEARCH_TEST_PLAN.md b/docs/testing/SEARCH_TEST_PLAN.md new file mode 100644 index 0000000..9591c44 --- /dev/null +++ b/docs/testing/SEARCH_TEST_PLAN.md @@ -0,0 +1,27 @@ +# Generic Search Slice Test Plan + +This patch adds a minimal but useful integration-test baseline for the new generic search slices. + +## What is covered + +- PostgreSQL full-text search over `DOC.doc_text_representation.search_vector` +- PostgreSQL trigram search over document title / summary / representation text +- hybrid orchestration and document-level collapse +- representation selection modes (`PRIMARY_ONLY`, `PRIMARY_AND_CHUNKS`) +- REST endpoint smoke tests for: + - `POST /api/search` + - `POST /api/search/debug` + - `GET /api/search/metrics` + +## Recommended execution order + +1. Apply the search-slice DB migration(s) or ensure the runtime schema already contains the lexical search columns. +2. Run the new integration tests with PostgreSQL Testcontainers. +3. Start the application locally and try the included Postman requests. +4. Only after lexical tests are green, add semantic engine integration tests. + +## Notes + +- The test application intentionally imports only the DOC domain services and lexical search beans. +- Semantic/vector beans are left out to keep the test context small and deterministic. +- The base test class adds the `search_config` and `search_vector` columns if they are not already present. diff --git a/pom.xml b/pom.xml index 54fd084..b9f634d 100644 --- a/pom.xml +++ b/pom.xml @@ -238,6 +238,10 @@ 1.21.4 test + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + diff --git a/postman/DIP-Generic-Search.postman_collection.json b/postman/DIP-Generic-Search.postman_collection.json new file mode 100644 index 0000000..0ce8553 --- /dev/null +++ b/postman/DIP-Generic-Search.postman_collection.json @@ -0,0 +1,92 @@ +{ + "info": { + "name": "DIP Generic Search", + "_postman_id": "2d8f227e-4f38-45c0-9d59-b0642773c993", + "description": "Sample requests for the generic lexical search slices (full-text, trigram, hybrid, debug, metrics).", + "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json" + }, + "variable": [ + {"key": "baseUrl", "value": "http://localhost:8889/api"} + ], + "item": [ + { + "name": "Search - fulltext exact", + "request": { + "method": "POST", + "header": [{"key": "Content-Type", "value": "application/json"}], + "url": "{{baseUrl}}/search", + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"framework agreement\",\n \"modes\": [\"FULLTEXT\"],\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_ONLY\",\n \"page\": 0,\n \"size\": 10\n}" + } + } + }, + { + "name": "Search - trigram fuzzy title", + "request": { + "method": "POST", + "header": [{"key": "Content-Type", "value": "application/json"}], + "url": "{{baseUrl}}/search", + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"Viena school renovtion\",\n \"modes\": [\"TRIGRAM\"],\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_ONLY\",\n \"page\": 0,\n \"size\": 10\n}" + } + } + }, + { + "name": "Search - hybrid lexical", + "request": { + "method": "POST", + "header": [{"key": "Content-Type", "value": "application/json"}], + "url": "{{baseUrl}}/search", + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"Maintenance manual\",\n \"modes\": [\"HYBRID\"],\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_ONLY\",\n \"page\": 0,\n \"size\": 10\n}" + } + } + }, + { + "name": "Search - chunk-aware", + "request": { + "method": "POST", + "header": [{"key": "Content-Type", "value": "application/json"}], + "url": "{{baseUrl}}/search", + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"district heating optimization\",\n \"modes\": [\"FULLTEXT\"],\n \"documentTypes\": [\"TEXT\"],\n \"documentFamilies\": [\"GENERIC\"],\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_AND_CHUNKS\",\n \"page\": 0,\n \"size\": 10\n}" + } + } + }, + { + "name": "Search - createdFrom filter", + "request": { + "method": "POST", + "header": [{"key": "Content-Type", "value": "application/json"}], + "url": "{{baseUrl}}/search", + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"framework agreement\",\n \"modes\": [\"FULLTEXT\"],\n \"createdFrom\": \"2026-01-01T00:00:00Z\",\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_ONLY\",\n \"page\": 0,\n \"size\": 10\n}" + } + } + }, + { + "name": "Search - debug", + "request": { + "method": "POST", + "header": [{"key": "Content-Type", "value": "application/json"}], + "url": "{{baseUrl}}/search/debug", + "body": { + "mode": "raw", + "raw": "{\n \"queryText\": \"maintenence manual\",\n \"modes\": [\"HYBRID\"],\n \"collapseByDocument\": true,\n \"representationSelectionMode\": \"PRIMARY_ONLY\",\n \"page\": 0,\n \"size\": 10\n}" + } + } + }, + { + "name": "Search - metrics", + "request": { + "method": "GET", + "url": "{{baseUrl}}/search/metrics" + } + } + ] +} diff --git a/src/main/java/at/procon/dip/domain/document/repository/DocumentTextRepresentationRepository.java b/src/main/java/at/procon/dip/domain/document/repository/DocumentTextRepresentationRepository.java index 8dcbf34..c831cfe 100644 --- a/src/main/java/at/procon/dip/domain/document/repository/DocumentTextRepresentationRepository.java +++ b/src/main/java/at/procon/dip/domain/document/repository/DocumentTextRepresentationRepository.java @@ -15,5 +15,9 @@ public interface DocumentTextRepresentationRepository extends JpaRepository findByPrimaryRepresentationTrue(); + long countByPrimaryRepresentationTrue(); + + long countByRepresentationType(RepresentationType representationType); + Optional findFirstByDocument_IdAndPrimaryRepresentationTrue(UUID documentId); } diff --git a/src/main/java/at/procon/dip/domain/document/service/DocumentRepresentationService.java b/src/main/java/at/procon/dip/domain/document/service/DocumentRepresentationService.java index 88fe97a..c081f54 100644 --- a/src/main/java/at/procon/dip/domain/document/service/DocumentRepresentationService.java +++ b/src/main/java/at/procon/dip/domain/document/service/DocumentRepresentationService.java @@ -36,7 +36,7 @@ public class DocumentRepresentationService { .primaryRepresentation(command.primaryRepresentation()) .textBody(command.textBody()) .build(); - DocumentTextRepresentation saved = representationRepository.save(representation); + DocumentTextRepresentation saved = representationRepository.saveAndFlush(representation); lexicalIndexService.indexRepresentation(saved.getId()); return saved; } diff --git a/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java b/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java index 081b744..7013b39 100644 --- a/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java +++ b/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java @@ -12,7 +12,6 @@ import at.procon.ted.config.TedProcessorProperties; import at.procon.ted.model.entity.Organization; import at.procon.ted.model.entity.ProcurementDocument; import at.procon.ted.model.entity.ProcurementLot; -import at.procon.ted.service.TedPhase2GenericDocumentService; import java.util.ArrayList; import java.util.List; import java.util.UUID; @@ -30,7 +29,7 @@ import org.springframework.transaction.annotation.Transactional; public class TedNoticeProjectionService { private final TedProcessorProperties properties; - private final TedPhase2GenericDocumentService tedPhase2GenericDocumentService; + private final TedGenericDocumentRootService tedGenericDocumentRootService; private final DocumentRepository documentRepository; private final TedNoticeProjectionRepository projectionRepository; private final TedNoticeLotRepository lotRepository; @@ -42,9 +41,8 @@ public class TedNoticeProjectionService { return null; } - TedPhase2GenericDocumentService.TedGenericDocumentSyncResult syncResult = - tedPhase2GenericDocumentService.syncTedDocument(legacyDocument); - return registerOrRefreshProjection(legacyDocument, syncResult.documentId()); + UUID genericDocumentId = tedGenericDocumentRootService.ensureGenericTedDocumentRoot(legacyDocument); + return registerOrRefreshProjection(legacyDocument, genericDocumentId); } @Transactional @@ -55,7 +53,7 @@ public class TedNoticeProjectionService { UUID resolvedDocumentId = genericDocumentId; if (resolvedDocumentId == null) { - resolvedDocumentId = tedPhase2GenericDocumentService.ensureGenericTedDocument(legacyDocument); + resolvedDocumentId = tedGenericDocumentRootService.ensureGenericTedDocumentRoot(legacyDocument); } UUID finalResolvedDocumentId = resolvedDocumentId; diff --git a/src/main/java/at/procon/dip/ingestion/service/GenericDocumentImportService.java b/src/main/java/at/procon/dip/ingestion/service/GenericDocumentImportService.java index 0834f37..f0cadae 100644 --- a/src/main/java/at/procon/dip/ingestion/service/GenericDocumentImportService.java +++ b/src/main/java/at/procon/dip/ingestion/service/GenericDocumentImportService.java @@ -425,8 +425,8 @@ public class GenericDocumentImportService { draft.languageCode(), null, draft.chunkIndex(), - null, - null, + draft.chunkStartOffset(), + draft.chunkEndOffset(), draft.primary(), draft.textBody() )); diff --git a/src/main/java/at/procon/dip/normalization/impl/ChunkedLongTextRepresentationBuilder.java b/src/main/java/at/procon/dip/normalization/impl/ChunkedLongTextRepresentationBuilder.java new file mode 100644 index 0000000..42f78f5 --- /dev/null +++ b/src/main/java/at/procon/dip/normalization/impl/ChunkedLongTextRepresentationBuilder.java @@ -0,0 +1,97 @@ +package at.procon.dip.normalization.impl; + +import at.procon.dip.domain.document.ContentRole; +import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.domain.document.RepresentationType; +import at.procon.dip.normalization.spi.RepresentationBuildRequest; +import at.procon.dip.normalization.spi.TextRepresentationBuilder; +import at.procon.dip.normalization.spi.TextRepresentationDraft; +import at.procon.ted.config.TedProcessorProperties; +import java.util.ArrayList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; +import org.springframework.util.StringUtils; + +@Component +@Order(200) +@RequiredArgsConstructor +public class ChunkedLongTextRepresentationBuilder implements TextRepresentationBuilder { + + public static final String BUILDER_KEY = "long-text-chunker"; + + private final TedProcessorProperties properties; + + @Override + public boolean supports(DocumentType documentType) { + return true; + } + + @Override + public List build(RepresentationBuildRequest request) { + if (!properties.getSearch().isChunkingEnabled()) { + return List.of(); + } + + String baseText = request.extractionResult().derivedTextByRole().get(ContentRole.NORMALIZED_TEXT); + if (!StringUtils.hasText(baseText)) { + baseText = request.extractionResult().derivedTextByRole().get(ContentRole.HTML_CLEAN); + } + if (!StringUtils.hasText(baseText)) { + return List.of(); + } + + int target = Math.max(400, properties.getSearch().getChunkTargetChars()); + int overlap = Math.max(0, Math.min(target / 3, properties.getSearch().getChunkOverlapChars())); + if (baseText.length() <= target + overlap) { + return List.of(); + } + + List drafts = new ArrayList<>(); + int start = 0; + int chunkIndex = 0; + while (start < baseText.length() && chunkIndex < properties.getSearch().getMaxChunksPerDocument()) { + int end = Math.min(baseText.length(), start + target); + if (end < baseText.length()) { + int boundary = findBoundary(baseText, end, Math.min(baseText.length(), end + 160)); + if (boundary > start + 200) { + end = boundary; + } + } + + String chunk = baseText.substring(start, end).trim(); + if (StringUtils.hasText(chunk)) { + drafts.add(new TextRepresentationDraft( + RepresentationType.CHUNK, + BUILDER_KEY, + request.detectionResult().languageCode(), + chunk, + false, + chunkIndex, + start, + end, + ContentRole.NORMALIZED_TEXT, + Boolean.TRUE + )); + chunkIndex++; + } + + if (end >= baseText.length()) { + break; + } + start = Math.max(end - overlap, start + 1); + } + return drafts; + } + + private int findBoundary(String text, int preferred, int max) { + for (int i = preferred; i < max; i++) { + char c = text.charAt(i); + if (c == '\n' || c == '.' || c == '!' || c == '?' || c == ';') { + return i + 1; + } + } + return preferred; + } +} diff --git a/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java b/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java index fd82a8a..93bea89 100644 --- a/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java +++ b/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java @@ -41,7 +41,6 @@ public class DefaultGenericTextRepresentationBuilder implements TextRepresentati String semantic = buildSemanticText(title, summary, request.detectionResult().documentType()); List drafts = new ArrayList<>(); - /* drafts.add(new TextRepresentationDraft( RepresentationType.FULLTEXT, BUILDER_KEY, @@ -49,10 +48,11 @@ public class DefaultGenericTextRepresentationBuilder implements TextRepresentati baseText, false, null, + null, + null, ContentRole.NORMALIZED_TEXT, - Boolean.TRUE + Boolean.FALSE )); - */ drafts.add(new TextRepresentationDraft( RepresentationType.SEMANTIC_TEXT, BUILDER_KEY, @@ -60,10 +60,11 @@ public class DefaultGenericTextRepresentationBuilder implements TextRepresentati semantic, true, null, + null, + null, ContentRole.NORMALIZED_TEXT, Boolean.TRUE )); - /* if (StringUtils.hasText(title)) { drafts.add(new TextRepresentationDraft( RepresentationType.TITLE_ABSTRACT, @@ -72,11 +73,24 @@ public class DefaultGenericTextRepresentationBuilder implements TextRepresentati title + "\n\n" + summary, false, null, + null, + null, ContentRole.NORMALIZED_TEXT, Boolean.FALSE )); } - */ + drafts.add(new TextRepresentationDraft( + RepresentationType.SUMMARY, + BUILDER_KEY, + request.detectionResult().languageCode(), + summary, + false, + null, + null, + null, + ContentRole.NORMALIZED_TEXT, + Boolean.FALSE + )); return drafts; } diff --git a/src/main/java/at/procon/dip/normalization/impl/TedStructuredTextRepresentationBuilder.java b/src/main/java/at/procon/dip/normalization/impl/TedStructuredTextRepresentationBuilder.java index 834082c..4d9289a 100644 --- a/src/main/java/at/procon/dip/normalization/impl/TedStructuredTextRepresentationBuilder.java +++ b/src/main/java/at/procon/dip/normalization/impl/TedStructuredTextRepresentationBuilder.java @@ -61,10 +61,11 @@ public class TedStructuredTextRepresentationBuilder implements TextRepresentatio semanticText, true, null, + null, + null, ContentRole.NORMALIZED_TEXT, Boolean.TRUE )); - /* drafts.add(new TextRepresentationDraft( RepresentationType.FULLTEXT, BUILDER_KEY, @@ -72,8 +73,10 @@ public class TedStructuredTextRepresentationBuilder implements TextRepresentatio normalizedText, false, null, + null, + null, ContentRole.NORMALIZED_TEXT, - Boolean.TRUE + Boolean.FALSE )); if (StringUtils.hasText(title)) { drafts.add(new TextRepresentationDraft( @@ -83,6 +86,8 @@ public class TedStructuredTextRepresentationBuilder implements TextRepresentatio title + "\n\n" + summary, false, null, + null, + null, ContentRole.NORMALIZED_TEXT, Boolean.FALSE )); @@ -94,10 +99,11 @@ public class TedStructuredTextRepresentationBuilder implements TextRepresentatio summary, false, null, + null, + null, ContentRole.NORMALIZED_TEXT, Boolean.FALSE )); - */ return drafts; } diff --git a/src/main/java/at/procon/dip/normalization/spi/TextRepresentationDraft.java b/src/main/java/at/procon/dip/normalization/spi/TextRepresentationDraft.java index 4b7322d..cdb5f06 100644 --- a/src/main/java/at/procon/dip/normalization/spi/TextRepresentationDraft.java +++ b/src/main/java/at/procon/dip/normalization/spi/TextRepresentationDraft.java @@ -13,6 +13,8 @@ public record TextRepresentationDraft( String textBody, boolean primary, Integer chunkIndex, + Integer chunkStartOffset, + Integer chunkEndOffset, ContentRole sourceContentRole, Boolean queueForEmbedding ) { @@ -22,6 +24,7 @@ public record TextRepresentationDraft( String textBody, boolean primary, Integer chunkIndex) { - this(representationType, null, languageCode, textBody, primary, chunkIndex, ContentRole.NORMALIZED_TEXT, null); + this(representationType, null, languageCode, textBody, primary, chunkIndex, null, null, ContentRole.NORMALIZED_TEXT, null); } } + diff --git a/src/main/java/at/procon/dip/processing/impl/TedStructuredDocumentProcessor.java b/src/main/java/at/procon/dip/processing/impl/TedStructuredDocumentProcessor.java index a6aeeab..ee05ffc 100644 --- a/src/main/java/at/procon/dip/processing/impl/TedStructuredDocumentProcessor.java +++ b/src/main/java/at/procon/dip/processing/impl/TedStructuredDocumentProcessor.java @@ -14,7 +14,6 @@ import at.procon.dip.processing.spi.DocumentProcessingPolicy; import at.procon.dip.processing.spi.StructuredDocumentProcessor; import at.procon.dip.processing.spi.StructuredProcessingRequest; import at.procon.ted.model.entity.ProcurementDocument; -import at.procon.ted.service.TedPhase2GenericDocumentService; import at.procon.ted.service.XmlParserService; import java.nio.charset.StandardCharsets; import java.util.LinkedHashMap; @@ -32,7 +31,6 @@ public class TedStructuredDocumentProcessor implements StructuredDocumentProcess private final XmlParserService xmlParserService; private final DocumentService documentService; - private final TedPhase2GenericDocumentService tedPhase2GenericDocumentService; private final TedNoticeProjectionService tedNoticeProjectionService; @Override @@ -77,7 +75,6 @@ public class TedStructuredDocumentProcessor implements StructuredDocumentProcess } documentService.save(canonical); - tedPhase2GenericDocumentService.syncTedDocument(tedDocument); tedNoticeProjectionService.registerOrRefreshProjection(tedDocument, canonical.getId()); Map payload = new LinkedHashMap<>(); diff --git a/src/main/java/at/procon/dip/search/dto/SearchHit.java b/src/main/java/at/procon/dip/search/dto/SearchHit.java index 263389f..d231ab5 100644 --- a/src/main/java/at/procon/dip/search/dto/SearchHit.java +++ b/src/main/java/at/procon/dip/search/dto/SearchHit.java @@ -3,6 +3,7 @@ package at.procon.dip.search.dto; import at.procon.dip.domain.access.DocumentVisibility; import at.procon.dip.domain.document.DocumentFamily; import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.domain.document.RepresentationType; import java.time.OffsetDateTime; import java.util.UUID; import lombok.AllArgsConstructor; @@ -27,6 +28,13 @@ public class SearchHit { private String languageCode; private String mimeType; + private RepresentationType representationType; + private boolean primaryRepresentation; + private Integer chunkIndex; + private Integer chunkStartOffset; + private Integer chunkEndOffset; + private int matchedRepresentationCount; + private SearchEngineType primaryEngine; private SearchMatchField matchedField; private String snippet; diff --git a/src/main/java/at/procon/dip/search/dto/SearchMetricsResponse.java b/src/main/java/at/procon/dip/search/dto/SearchMetricsResponse.java new file mode 100644 index 0000000..037260d --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/SearchMetricsResponse.java @@ -0,0 +1,22 @@ +package at.procon.dip.search.dto; + +import at.procon.dip.domain.document.RepresentationType; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class SearchMetricsResponse { + private long totalSearchRequests; + private long totalDebugRequests; + private long totalCollapsedHitsReturned; + private Map engineExecutions; + private Map representationCounts; + private long primaryRepresentationCount; + private long chunkRepresentationCount; +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchRepresentationSelectionMode.java b/src/main/java/at/procon/dip/search/dto/SearchRepresentationSelectionMode.java new file mode 100644 index 0000000..f32c183 --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/SearchRepresentationSelectionMode.java @@ -0,0 +1,11 @@ +package at.procon.dip.search.dto; + +/** + * Controls which document text representations participate in generic search + * when no explicit representationTypes filter is supplied. + */ +public enum SearchRepresentationSelectionMode { + PRIMARY_ONLY, + PRIMARY_AND_CHUNKS, + ALL +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchRequest.java b/src/main/java/at/procon/dip/search/dto/SearchRequest.java index 236e583..0b6becb 100644 --- a/src/main/java/at/procon/dip/search/dto/SearchRequest.java +++ b/src/main/java/at/procon/dip/search/dto/SearchRequest.java @@ -40,4 +40,8 @@ public class SearchRequest { @Builder.Default private boolean collapseByDocument = true; + + @Builder.Default + private SearchRepresentationSelectionMode representationSelectionMode = + SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS; } diff --git a/src/main/java/at/procon/dip/search/rank/DefaultSearchResultFusionService.java b/src/main/java/at/procon/dip/search/rank/DefaultSearchResultFusionService.java index 38c0198..e61c010 100644 --- a/src/main/java/at/procon/dip/search/rank/DefaultSearchResultFusionService.java +++ b/src/main/java/at/procon/dip/search/rank/DefaultSearchResultFusionService.java @@ -2,6 +2,7 @@ package at.procon.dip.search.rank; import at.procon.dip.search.api.SearchExecutionContext; import at.procon.dip.search.api.SearchExecutionPlan; +import at.procon.dip.domain.document.RepresentationType; import at.procon.dip.search.dto.SearchEngineType; import at.procon.dip.search.dto.SearchHit; import at.procon.dip.search.dto.SearchResponse; @@ -57,8 +58,20 @@ public class DefaultSearchResultFusionService implements SearchResultFusionServi normalized.forEach((engine, hits) -> { for (SearchHit hit : hits) { Aggregate aggregate = aggregates.computeIfAbsent(hit.getDocumentId(), id -> new Aggregate()); - aggregate.bestByEngine.put(engine, hit); - if (aggregate.representative == null || hit.getNormalizedScore() > aggregate.representative.getNormalizedScore()) { + SearchHit currentBestForEngine = aggregate.bestByEngine.get(engine); + if (currentBestForEngine == null + || hit.getNormalizedScore() > currentBestForEngine.getNormalizedScore() + || (Double.compare(hit.getNormalizedScore(), currentBestForEngine.getNormalizedScore()) == 0 + && representationPriority(hit) < representationPriority(currentBestForEngine))) { + aggregate.bestByEngine.put(engine, hit); + } + if (hit.getRepresentationId() != null) { + aggregate.representationIds.add(hit.getRepresentationId()); + } + if (aggregate.representative == null + || hit.getNormalizedScore() > aggregate.representative.getNormalizedScore() + || (Double.compare(hit.getNormalizedScore(), aggregate.representative.getNormalizedScore()) == 0 + && representationPriority(hit) < representationPriority(aggregate.representative))) { aggregate.representative = hit; } } @@ -69,8 +82,12 @@ public class DefaultSearchResultFusionService implements SearchResultFusionServi SearchHit representative = aggregate.representative; double finalScore = weight(SearchEngineType.POSTGRES_FULLTEXT, aggregate) + weight(SearchEngineType.POSTGRES_TRIGRAM, aggregate) + - weight(SearchEngineType.PGVECTOR_SEMANTIC, aggregate); - fused.add(representative.toBuilder().finalScore(finalScore).build()); + weight(SearchEngineType.PGVECTOR_SEMANTIC, aggregate) + + recencyBoost(representative); + fused.add(representative.toBuilder() + .finalScore(finalScore) + .matchedRepresentationCount(aggregate.representationIds.size()) + .build()); } return fused; } @@ -97,7 +114,10 @@ public class DefaultSearchResultFusionService implements SearchResultFusionServi case POSTGRES_TRIGRAM -> hit.getNormalizedScore() * properties.getSearch().getTrigramWeight(); case PGVECTOR_SEMANTIC -> hit.getNormalizedScore() * properties.getSearch().getSemanticWeight(); }; - merged.add(hit.toBuilder().finalScore(finalScore).build()); + merged.add(hit.toBuilder() + .finalScore(finalScore + recencyBoost(hit)) + .matchedRepresentationCount(1) + .build()); } }); return merged; @@ -117,8 +137,42 @@ public class DefaultSearchResultFusionService implements SearchResultFusionServi hits.sort(comparator); } + private double recencyBoost(SearchHit hit) { + if (properties.getSearch().getRecencyBoostWeight() <= 0.0d || hit.getCreatedAt() == null) { + return 0.0d; + } + double halfLifeDays = Math.max(1.0d, properties.getSearch().getRecencyHalfLifeDays()); + double ageDays = Math.max(0.0d, java.time.Duration.between(hit.getCreatedAt(), java.time.OffsetDateTime.now()).toSeconds() / 86400.0d); + double normalized = Math.exp(-Math.log(2.0d) * (ageDays / halfLifeDays)); + return normalized * properties.getSearch().getRecencyBoostWeight(); + } + + private int representationPriority(SearchHit hit) { + if (hit == null) { + return Integer.MAX_VALUE; + } + if (hit.isPrimaryRepresentation()) { + return 0; + } + RepresentationType type = hit.getRepresentationType(); + if (type == RepresentationType.SEMANTIC_TEXT) { + return 1; + } + if (type == RepresentationType.TITLE_ABSTRACT) { + return 2; + } + if (type == RepresentationType.SUMMARY) { + return 3; + } + if (type == RepresentationType.CHUNK) { + return 4; + } + return 5; + } + private static final class Aggregate { private final Map bestByEngine = new EnumMap<>(SearchEngineType.class); + private final Set representationIds = new java.util.LinkedHashSet<>(); private SearchHit representative; } } diff --git a/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java b/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java index ce18494..a2b635e 100644 --- a/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java +++ b/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java @@ -22,6 +22,7 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea SELECT d.id AS document_id, dtr.id AS representation_id, + CAST(dtr.representation_type AS text) AS representation_type, CAST(d.document_type AS text) AS document_type, CAST(d.document_family AS text) AS document_family, CAST(d.visibility AS text) AS visibility, @@ -31,23 +32,56 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea d.mime_type AS mime_type, d.created_at AS created_at, d.updated_at AS updated_at, - ts_headline('simple', COALESCE(dtr.text_body, ''), websearch_to_tsquery('simple', :queryText), - 'MaxFragments=2, MinWords=5, MaxWords=20') AS snippet, - ts_rank_cd(dtr.search_vector, websearch_to_tsquery('simple', :queryText)) AS score + ts_headline( + CASE + WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig + ELSE dtr.search_config::regconfig + END, + COALESCE(dtr.text_body, ''), + websearch_to_tsquery( + CASE + WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig + ELSE dtr.search_config::regconfig + END, + :queryText + ), + 'MaxFragments=2, MinWords=5, MaxWords=20' + ) AS snippet, + ts_rank_cd( + dtr.search_vector, + websearch_to_tsquery( + CASE + WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig + ELSE dtr.search_config::regconfig + END, + :queryText + ) + ) AS score FROM doc.doc_text_representation dtr JOIN doc.doc_document d ON d.id = dtr.document_id LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id WHERE dtr.search_vector IS NOT NULL - AND dtr.search_vector @@ websearch_to_tsquery('simple', :queryText) + AND dtr.search_vector @@ websearch_to_tsquery( + CASE + WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig + ELSE dtr.search_config::regconfig + END, + :queryText + ) """); MapSqlParameterSource params = new MapSqlParameterSource(); params.addValue("queryText", context.getRequest().getQueryText()); + SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true); + sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit"); params.addValue("limit", limit); - return jdbcTemplate.query(sql.toString(), params, - new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT)); + return jdbcTemplate.query( + sql.toString(), + params, + new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT) + ); } -} +} \ No newline at end of file diff --git a/src/main/java/at/procon/dip/search/repository/DocumentSemanticSearchRepository.java b/src/main/java/at/procon/dip/search/repository/DocumentSemanticSearchRepository.java index 63c4a1c..e6d2b9f 100644 --- a/src/main/java/at/procon/dip/search/repository/DocumentSemanticSearchRepository.java +++ b/src/main/java/at/procon/dip/search/repository/DocumentSemanticSearchRepository.java @@ -33,6 +33,11 @@ public class DocumentSemanticSearchRepository { d.summary AS summary, COALESCE(dtr.language_code, d.language_code) AS language_code, d.mime_type AS mime_type, + CAST(dtr.representation_type AS text) AS representation_type, + dtr.is_primary AS is_primary, + dtr.chunk_index AS chunk_index, + dtr.chunk_start_offset AS chunk_start_offset, + dtr.chunk_end_offset AS chunk_end_offset, d.created_at AS created_at, d.updated_at AS updated_at, LEFT(COALESCE(dtr.text_body, COALESCE(d.summary, d.title, '')), 400) AS snippet, diff --git a/src/main/java/at/procon/dip/search/repository/SearchHitRowMapper.java b/src/main/java/at/procon/dip/search/repository/SearchHitRowMapper.java index 7121243..9feb403 100644 --- a/src/main/java/at/procon/dip/search/repository/SearchHitRowMapper.java +++ b/src/main/java/at/procon/dip/search/repository/SearchHitRowMapper.java @@ -3,6 +3,7 @@ package at.procon.dip.search.repository; import at.procon.dip.domain.access.DocumentVisibility; import at.procon.dip.domain.document.DocumentFamily; import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.domain.document.RepresentationType; import at.procon.dip.search.dto.SearchEngineType; import at.procon.dip.search.dto.SearchHit; import at.procon.dip.search.dto.SearchMatchField; @@ -33,6 +34,11 @@ final class SearchHitRowMapper implements RowMapper { .summary(safeGetString(rs, "summary")) .languageCode(safeGetString(rs, "language_code")) .mimeType(safeGetString(rs, "mime_type")) + .representationType(parseRepresentationType(safeGetString(rs, "representation_type"))) + .primaryRepresentation(safeGetBoolean(rs, "is_primary")) + .chunkIndex(safeGetInteger(rs, "chunk_index")) + .chunkStartOffset(safeGetInteger(rs, "chunk_start_offset")) + .chunkEndOffset(safeGetInteger(rs, "chunk_end_offset")) .primaryEngine(engineType) .matchedField(matchedField == null || matchedField.isBlank() ? defaultField @@ -51,4 +57,25 @@ final class SearchHitRowMapper implements RowMapper { return null; } } + + private Integer safeGetInteger(ResultSet rs, String column) { + try { + int value = rs.getInt(column); + return rs.wasNull() ? null : value; + } catch (SQLException ignore) { + return null; + } + } + + private boolean safeGetBoolean(ResultSet rs, String column) { + try { + return rs.getBoolean(column) && !rs.wasNull(); + } catch (SQLException ignore) { + return false; + } + } + + private RepresentationType parseRepresentationType(String value) { + return value == null || value.isBlank() ? null : RepresentationType.valueOf(value); + } } diff --git a/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java b/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java index 9ed4811..5efdde6 100644 --- a/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java +++ b/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java @@ -5,6 +5,7 @@ import at.procon.dip.domain.document.DocumentFamily; import at.procon.dip.domain.document.DocumentType; import at.procon.dip.domain.document.RepresentationType; import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.dto.SearchRepresentationSelectionMode; import java.util.Collection; import java.util.List; import java.util.Set; @@ -56,7 +57,19 @@ final class SearchSqlFilterSupport { sql.append(" AND CAST(").append(representationAlias).append(".representation_type AS text) IN (:representationTypes)"); params.addValue("representationTypes", enumNames(representationTypes)); } else { - sql.append(" AND ").append(representationAlias).append(".is_primary = true"); + SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode(); + if (selectionMode == null) { + selectionMode = SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS; + } + switch (selectionMode) { + case PRIMARY_ONLY -> sql.append(" AND ").append(representationAlias).append(".is_primary = true"); + case PRIMARY_AND_CHUNKS -> sql.append(" AND (") + .append(representationAlias).append(".is_primary = true OR CAST(") + .append(representationAlias).append(".representation_type AS text) = 'CHUNK')"); + case ALL -> { + // no implicit representation restriction + } + } } if (context.getRequest().getCreatedFrom() != null) { diff --git a/src/main/java/at/procon/dip/search/service/DefaultSearchOrchestrator.java b/src/main/java/at/procon/dip/search/service/DefaultSearchOrchestrator.java index b1b33b3..9799168 100644 --- a/src/main/java/at/procon/dip/search/service/DefaultSearchOrchestrator.java +++ b/src/main/java/at/procon/dip/search/service/DefaultSearchOrchestrator.java @@ -28,17 +28,21 @@ public class DefaultSearchOrchestrator implements SearchOrchestrator { private final SearchPlanner planner; private final List engines; private final SearchResultFusionService fusionService; + private final SearchMetricsService metricsService; @Override public SearchResponse search(SearchRequest request, SearchDocumentScope scope) { SearchExecution execution = executeInternal(request, scope); - return fusionService.fuse(execution.context(), execution.plan(), execution.engineResults()); + SearchResponse response = fusionService.fuse(execution.context(), execution.plan(), execution.engineResults()); + metricsService.recordSearch(execution.engineResults(), response.getHits().size(), false); + return response; } @Override public SearchDebugResponse debug(SearchRequest request, SearchDocumentScope scope) { SearchExecution execution = executeInternal(request, scope); SearchResponse fused = fusionService.fuse(execution.context(), execution.plan(), execution.engineResults()); + metricsService.recordSearch(execution.engineResults(), fused.getHits().size(), true); List debugResults = new ArrayList<>(); int topLimit = properties.getSearch().getDebugTopHitsPerEngine(); @@ -56,6 +60,11 @@ public class DefaultSearchOrchestrator implements SearchOrchestrator { .build(); } + @Override + public at.procon.dip.search.dto.SearchMetricsResponse metrics() { + return metricsService.snapshot(); + } + private SearchExecution executeInternal(SearchRequest request, SearchDocumentScope scope) { int page = request.getPage() == null || request.getPage() < 0 ? 0 : request.getPage(); int requestedSize = request.getSize() == null || request.getSize() <= 0 diff --git a/src/main/java/at/procon/dip/search/service/DocumentLexicalIndexService.java b/src/main/java/at/procon/dip/search/service/DocumentLexicalIndexService.java index 976af52..0d37c12 100644 --- a/src/main/java/at/procon/dip/search/service/DocumentLexicalIndexService.java +++ b/src/main/java/at/procon/dip/search/service/DocumentLexicalIndexService.java @@ -2,6 +2,9 @@ package at.procon.dip.search.service; import java.util.List; import java.util.UUID; + +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.jdbc.core.JdbcTemplate; @@ -18,6 +21,9 @@ public class DocumentLexicalIndexService { private final NamedParameterJdbcTemplate namedParameterJdbcTemplate; private final JdbcTemplate jdbcTemplate; + @PersistenceContext + private EntityManager entityManager; + /** * New Slice 2 name kept for current code. */ @@ -26,9 +32,6 @@ public class DocumentLexicalIndexService { refreshRepresentationLexicalIndex(representationId); } - /** - * Backward-compatible Slice 1 method name. - */ @Transactional public void refreshRepresentationLexicalIndex(UUID representationId) { if (!isLexicalSearchSchemaAvailable()) { @@ -36,25 +39,32 @@ public class DocumentLexicalIndexService { return; } + entityManager.flush(); + MapSqlParameterSource params = new MapSqlParameterSource(); params.addValue("representationId", representationId); - namedParameterJdbcTemplate.update(""" - UPDATE doc.doc_text_representation - SET search_config = CASE - WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german' - WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english' - ELSE 'simple' + + int updated = namedParameterJdbcTemplate.update(""" + UPDATE doc.doc_text_representation + SET search_config = CASE + WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german' + WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english' + ELSE 'simple' + END, + search_vector = to_tsvector( + CASE + WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig + WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig + ELSE 'simple'::regconfig END, - search_vector = to_tsvector( - CASE - WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig - WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig - ELSE 'simple'::regconfig - END, - coalesce(text_body, '') - ) - WHERE id = :representationId - """, params); + coalesce(text_body, '') + ) + WHERE id = :representationId + """, params); + + if (updated == 0) { + log.warn("Lexical indexing updated 0 rows for representation {}", representationId); + } } /** diff --git a/src/main/java/at/procon/dip/search/service/SearchMetricsService.java b/src/main/java/at/procon/dip/search/service/SearchMetricsService.java new file mode 100644 index 0000000..86257b1 --- /dev/null +++ b/src/main/java/at/procon/dip/search/service/SearchMetricsService.java @@ -0,0 +1,55 @@ +package at.procon.dip.search.service; + +import at.procon.dip.domain.document.RepresentationType; +import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository; +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchMetricsResponse; +import java.util.Arrays; +import java.util.EnumMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Service; + +@Service +@RequiredArgsConstructor +public class SearchMetricsService { + + private final DocumentTextRepresentationRepository representationRepository; + + private final AtomicLong totalSearchRequests = new AtomicLong(); + private final AtomicLong totalDebugRequests = new AtomicLong(); + private final AtomicLong totalCollapsedHitsReturned = new AtomicLong(); + private final Map engineExecutions = new ConcurrentHashMap<>(); + + public void recordSearch(Map engineResults, int collapsedHits, boolean debug) { + totalSearchRequests.incrementAndGet(); + if (debug) { + totalDebugRequests.incrementAndGet(); + } + totalCollapsedHitsReturned.addAndGet(collapsedHits); + engineResults.keySet().forEach(engine -> engineExecutions + .computeIfAbsent(engine, key -> new AtomicLong()) + .incrementAndGet()); + } + + public SearchMetricsResponse snapshot() { + Map engineCounts = new EnumMap<>(SearchEngineType.class); + engineExecutions.forEach((engine, value) -> engineCounts.put(engine, value.get())); + + Map representationCounts = new EnumMap<>(RepresentationType.class); + Arrays.stream(RepresentationType.values()) + .forEach(type -> representationCounts.put(type, representationRepository.countByRepresentationType(type))); + + return SearchMetricsResponse.builder() + .totalSearchRequests(totalSearchRequests.get()) + .totalDebugRequests(totalDebugRequests.get()) + .totalCollapsedHitsReturned(totalCollapsedHitsReturned.get()) + .engineExecutions(engineCounts) + .representationCounts(representationCounts) + .primaryRepresentationCount(representationRepository.countByPrimaryRepresentationTrue()) + .chunkRepresentationCount(representationRepository.countByRepresentationType(RepresentationType.CHUNK)) + .build(); + } +} diff --git a/src/main/java/at/procon/dip/search/service/SearchOrchestrator.java b/src/main/java/at/procon/dip/search/service/SearchOrchestrator.java index b5f8c36..c22e57c 100644 --- a/src/main/java/at/procon/dip/search/service/SearchOrchestrator.java +++ b/src/main/java/at/procon/dip/search/service/SearchOrchestrator.java @@ -1,6 +1,7 @@ package at.procon.dip.search.service; import at.procon.dip.search.dto.SearchDebugResponse; +import at.procon.dip.search.dto.SearchMetricsResponse; import at.procon.dip.search.dto.SearchRequest; import at.procon.dip.search.dto.SearchResponse; import at.procon.dip.search.spi.SearchDocumentScope; @@ -8,4 +9,6 @@ import at.procon.dip.search.spi.SearchDocumentScope; public interface SearchOrchestrator { SearchResponse search(SearchRequest request, SearchDocumentScope scope); SearchDebugResponse debug(SearchRequest request, SearchDocumentScope scope); + + SearchMetricsResponse metrics(); } diff --git a/src/main/java/at/procon/dip/search/web/GenericSearchController.java b/src/main/java/at/procon/dip/search/web/GenericSearchController.java index 91bcb30..752af22 100644 --- a/src/main/java/at/procon/dip/search/web/GenericSearchController.java +++ b/src/main/java/at/procon/dip/search/web/GenericSearchController.java @@ -1,6 +1,7 @@ package at.procon.dip.search.web; import at.procon.dip.search.dto.SearchDebugResponse; +import at.procon.dip.search.dto.SearchMetricsResponse; import at.procon.dip.search.dto.SearchRequest; import at.procon.dip.search.dto.SearchResponse; import at.procon.dip.search.service.SearchOrchestrator; @@ -9,6 +10,7 @@ import jakarta.validation.Valid; import java.util.Set; import lombok.RequiredArgsConstructor; import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; @@ -30,6 +32,11 @@ public class GenericSearchController { return searchOrchestrator.debug(request, buildScope(request)); } + @GetMapping("/metrics") + public SearchMetricsResponse metrics() { + return searchOrchestrator.metrics(); + } + private SearchDocumentScope buildScope(SearchRequest request) { String scopeLanguage = (request.getLanguageCodes() == null || request.getLanguageCodes().isEmpty()) ? null diff --git a/src/main/java/at/procon/ted/config/TedProcessorProperties.java b/src/main/java/at/procon/ted/config/TedProcessorProperties.java index 4397bc0..9799f5f 100644 --- a/src/main/java/at/procon/ted/config/TedProcessorProperties.java +++ b/src/main/java/at/procon/ted/config/TedProcessorProperties.java @@ -234,6 +234,41 @@ public class TedProcessorProperties { private double trigramWeight = 0.20; private double semanticWeight = 0.45; + + /** + * Enable chunk representations for long documents. + */ + private boolean chunkingEnabled = true; + + /** + * Target chunk size in characters for CHUNK representations. + */ + @Positive + private int chunkTargetChars = 1800; + + /** + * Overlap between consecutive chunks in characters. + */ + @Min(0) + private int chunkOverlapChars = 200; + + /** + * Maximum CHUNK representations generated per document. + */ + @Positive + private int maxChunksPerDocument = 12; + + /** + * Additional score weight for recency. + */ + private double recencyBoostWeight = 0.05; + + /** + * Half-life in days used for recency decay. + */ + @Positive + private int recencyHalfLifeDays = 30; + /** * Startup backfill limit for missing DOC lexical vectors. */ diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 88a317a..7e0fa09 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -134,6 +134,18 @@ ted: fulltext-weight: 0.35 trigram-weight: 0.20 semantic-weight: 0.45 + # Additional score weight for recency + recency-boost-weight: 0.05 + # Recency half-life in days + recency-half-life-days: 30 + # Enable chunk representations for long documents + chunking-enabled: true + # Target chunk size in characters + chunk-target-chars: 1800 + # Overlap between consecutive chunks + chunk-overlap-chars: 200 + # Maximum number of chunks generated per document + max-chunks-per-document: 12 # Startup backfill limit for missing lexical vectors startup-lexical-backfill-limit: 500 # Number of top hits per engine returned by /search/debug @@ -142,7 +154,7 @@ ted: # TED Daily Package Download configuration download: # Enable/disable automatic package download - enabled: false + enabled: true # User service-based camel route use-service-based: false # Base URL for TED Daily Packages @@ -177,7 +189,7 @@ ted: # IMAP Mail configuration mail: # Enable/disable mail processing - enabled: true + enabled: false # IMAP server hostname host: mail.mymagenta.business # IMAP server port (993 for IMAPS) diff --git a/src/test/java/at/procon/dip/ingestion/integration/MailBundleProcessingIntegrationTest.java b/src/test/java/at/procon/dip/ingestion/integration/MailBundleProcessingIntegrationTest.java index 7157cec..d171706 100644 --- a/src/test/java/at/procon/dip/ingestion/integration/MailBundleProcessingIntegrationTest.java +++ b/src/test/java/at/procon/dip/ingestion/integration/MailBundleProcessingIntegrationTest.java @@ -100,8 +100,10 @@ import static org.assertj.core.api.Assertions.assertThat; }) class MailBundleProcessingIntegrationTest { + private static final int HOST_PORT = 15433; + @Container - static PostgreSQLContainer postgres = new FixedPortPostgreSQLContainer<>("postgres:16-alpine", 15432) + static PostgreSQLContainer postgres = new FixedPortPostgreSQLContainer<>("postgres:16-alpine", HOST_PORT) .withDatabaseName("dip_test") .withUsername("test") .withPassword("test")