From 47894257a4bc6585534193f61daf802aee255522 Mon Sep 17 00:00:00 2001 From: trifonovt <87468028+TihomirTrifonov@users.noreply.github.com> Date: Thu, 19 Mar 2026 18:37:48 +0100 Subject: [PATCH] Refactor phases 5 - search - slice 1 --- PATCH_NOTES.md | 16 ++ .../entity/DocumentTextRepresentation.java | 3 + .../DocumentRepresentationService.java | 6 +- ...faultGenericTextRepresentationBuilder.java | 4 + .../search/api/SearchExecutionContext.java | 16 ++ .../dip/search/api/SearchExecutionPlan.java | 16 ++ .../dip/search/dto/SearchEngineType.java | 6 + .../at/procon/dip/search/dto/SearchHit.java | 41 +++++ .../dip/search/dto/SearchMatchField.java | 7 + .../at/procon/dip/search/dto/SearchMode.java | 7 + .../procon/dip/search/dto/SearchRequest.java | 48 ++++++ .../procon/dip/search/dto/SearchResponse.java | 22 +++ .../procon/dip/search/dto/SearchSortMode.java | 7 + .../dip/search/engine/SearchEngine.java | 12 ++ .../PostgresFullTextSearchEngine.java | 71 +++++++++ .../trigram/PostgresTrigramSearchEngine.java | 75 ++++++++++ .../dip/search/plan/DefaultSearchPlanner.java | 33 +++++ .../procon/dip/search/plan/SearchPlanner.java | 8 + .../DefaultSearchResultFusionService.java | 107 +++++++++++++ .../rank/DefaultSearchScoreNormalizer.java | 28 ++++ .../rank/SearchResultFusionService.java | 17 +++ .../search/rank/SearchScoreNormalizer.java | 9 ++ .../AbstractNativeSearchRepository.java | 140 ++++++++++++++++++ .../DocumentFullTextSearchRepository.java | 8 + .../DocumentFullTextSearchRepositoryImpl.java | 72 +++++++++ .../DocumentTrigramSearchRepository.java | 8 + .../DocumentTrigramSearchRepositoryImpl.java | 102 +++++++++++++ .../search/repository/FullTextSearchRow.java | 21 +++ .../search/repository/TrigramSearchRow.java | 22 +++ .../service/DefaultSearchOrchestrator.java | 47 ++++++ .../service/DocumentLexicalIndexService.java | 80 ++++++++++ .../search/service/SearchOrchestrator.java | 9 ++ .../startup/LexicalSearchStartupRunner.java | 21 +++ .../search/web/GenericSearchController.java | 35 +++++ .../V9__doc_search_slice1_support.sql | 26 ++++ 35 files changed, 1149 insertions(+), 1 deletion(-) create mode 100644 PATCH_NOTES.md create mode 100644 src/main/java/at/procon/dip/search/api/SearchExecutionContext.java create mode 100644 src/main/java/at/procon/dip/search/api/SearchExecutionPlan.java create mode 100644 src/main/java/at/procon/dip/search/dto/SearchEngineType.java create mode 100644 src/main/java/at/procon/dip/search/dto/SearchHit.java create mode 100644 src/main/java/at/procon/dip/search/dto/SearchMatchField.java create mode 100644 src/main/java/at/procon/dip/search/dto/SearchMode.java create mode 100644 src/main/java/at/procon/dip/search/dto/SearchRequest.java create mode 100644 src/main/java/at/procon/dip/search/dto/SearchResponse.java create mode 100644 src/main/java/at/procon/dip/search/dto/SearchSortMode.java create mode 100644 src/main/java/at/procon/dip/search/engine/SearchEngine.java create mode 100644 src/main/java/at/procon/dip/search/engine/fulltext/PostgresFullTextSearchEngine.java create mode 100644 src/main/java/at/procon/dip/search/engine/trigram/PostgresTrigramSearchEngine.java create mode 100644 src/main/java/at/procon/dip/search/plan/DefaultSearchPlanner.java create mode 100644 src/main/java/at/procon/dip/search/plan/SearchPlanner.java create mode 100644 src/main/java/at/procon/dip/search/rank/DefaultSearchResultFusionService.java create mode 100644 src/main/java/at/procon/dip/search/rank/DefaultSearchScoreNormalizer.java create mode 100644 src/main/java/at/procon/dip/search/rank/SearchResultFusionService.java create mode 100644 src/main/java/at/procon/dip/search/rank/SearchScoreNormalizer.java create mode 100644 src/main/java/at/procon/dip/search/repository/AbstractNativeSearchRepository.java create mode 100644 src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepository.java create mode 100644 src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java create mode 100644 src/main/java/at/procon/dip/search/repository/DocumentTrigramSearchRepository.java create mode 100644 src/main/java/at/procon/dip/search/repository/DocumentTrigramSearchRepositoryImpl.java create mode 100644 src/main/java/at/procon/dip/search/repository/FullTextSearchRow.java create mode 100644 src/main/java/at/procon/dip/search/repository/TrigramSearchRow.java create mode 100644 src/main/java/at/procon/dip/search/service/DefaultSearchOrchestrator.java create mode 100644 src/main/java/at/procon/dip/search/service/DocumentLexicalIndexService.java create mode 100644 src/main/java/at/procon/dip/search/service/SearchOrchestrator.java create mode 100644 src/main/java/at/procon/dip/search/startup/LexicalSearchStartupRunner.java create mode 100644 src/main/java/at/procon/dip/search/web/GenericSearchController.java create mode 100644 src/main/resources/db/migration/V9__doc_search_slice1_support.sql diff --git a/PATCH_NOTES.md b/PATCH_NOTES.md new file mode 100644 index 0000000..e5538a2 --- /dev/null +++ b/PATCH_NOTES.md @@ -0,0 +1,16 @@ +Slice 1 generic search patch + +Included changes: +- Generic search DTOs, planner, orchestrator, engine SPI +- PostgreSQL full-text engine and repository +- PostgreSQL trigram engine and repository +- Score normalization and result fusion +- Generic /search endpoint +- Lexical index maintenance service and startup backfill runner +- DOC lexical search migration (V9) +- Modified DOC representation write path to refresh search vectors + +Important note: +- Full-text search requires V9__doc_search_slice1_support.sql to be applied. +- The lexical index service is guarded and will no-op if the search columns are not yet present. +- Because Flyway is currently disabled in application.yml, apply the migration manually or enable Flyway before using the new search endpoint. diff --git a/src/main/java/at/procon/dip/domain/document/entity/DocumentTextRepresentation.java b/src/main/java/at/procon/dip/domain/document/entity/DocumentTextRepresentation.java index cfb4774..3dc2758 100644 --- a/src/main/java/at/procon/dip/domain/document/entity/DocumentTextRepresentation.java +++ b/src/main/java/at/procon/dip/domain/document/entity/DocumentTextRepresentation.java @@ -84,6 +84,9 @@ public class DocumentTextRepresentation { @Column(name = "text_body", columnDefinition = "TEXT", nullable = false) private String textBody; + @Column(name = "search_config", length = 64) + private String searchConfig; + @Builder.Default @Column(name = "created_at", nullable = false, updatable = false) private OffsetDateTime createdAt = OffsetDateTime.now(); diff --git a/src/main/java/at/procon/dip/domain/document/service/DocumentRepresentationService.java b/src/main/java/at/procon/dip/domain/document/service/DocumentRepresentationService.java index 8111e08..e466387 100644 --- a/src/main/java/at/procon/dip/domain/document/service/DocumentRepresentationService.java +++ b/src/main/java/at/procon/dip/domain/document/service/DocumentRepresentationService.java @@ -4,6 +4,7 @@ import at.procon.dip.domain.document.entity.DocumentContent; import at.procon.dip.domain.document.entity.DocumentTextRepresentation; import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository; import at.procon.dip.domain.document.service.command.AddDocumentTextRepresentationCommand; +import at.procon.dip.search.service.DocumentLexicalIndexService; import java.util.List; import java.util.UUID; import lombok.RequiredArgsConstructor; @@ -18,6 +19,7 @@ public class DocumentRepresentationService { private final DocumentService documentService; private final DocumentContentService contentService; private final DocumentTextRepresentationRepository representationRepository; + private final DocumentLexicalIndexService lexicalIndexService; public DocumentTextRepresentation addRepresentation(AddDocumentTextRepresentationCommand command) { DocumentContent content = command.contentId() == null ? null : contentService.getRequired(command.contentId()); @@ -34,7 +36,9 @@ public class DocumentRepresentationService { .primaryRepresentation(command.primaryRepresentation()) .textBody(command.textBody()) .build(); - return representationRepository.save(representation); + DocumentTextRepresentation saved = representationRepository.save(representation); + lexicalIndexService.refreshRepresentationLexicalIndex(saved.getId()); + return saved; } @Transactional(readOnly = true) diff --git a/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java b/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java index 7b5e1ab..fd82a8a 100644 --- a/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java +++ b/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java @@ -41,6 +41,7 @@ public class DefaultGenericTextRepresentationBuilder implements TextRepresentati String semantic = buildSemanticText(title, summary, request.detectionResult().documentType()); List drafts = new ArrayList<>(); + /* drafts.add(new TextRepresentationDraft( RepresentationType.FULLTEXT, BUILDER_KEY, @@ -51,6 +52,7 @@ public class DefaultGenericTextRepresentationBuilder implements TextRepresentati ContentRole.NORMALIZED_TEXT, Boolean.TRUE )); + */ drafts.add(new TextRepresentationDraft( RepresentationType.SEMANTIC_TEXT, BUILDER_KEY, @@ -61,6 +63,7 @@ public class DefaultGenericTextRepresentationBuilder implements TextRepresentati ContentRole.NORMALIZED_TEXT, Boolean.TRUE )); + /* if (StringUtils.hasText(title)) { drafts.add(new TextRepresentationDraft( RepresentationType.TITLE_ABSTRACT, @@ -73,6 +76,7 @@ public class DefaultGenericTextRepresentationBuilder implements TextRepresentati Boolean.FALSE )); } + */ return drafts; } diff --git a/src/main/java/at/procon/dip/search/api/SearchExecutionContext.java b/src/main/java/at/procon/dip/search/api/SearchExecutionContext.java new file mode 100644 index 0000000..5a6eb02 --- /dev/null +++ b/src/main/java/at/procon/dip/search/api/SearchExecutionContext.java @@ -0,0 +1,16 @@ +package at.procon.dip.search.api; + +import at.procon.dip.search.dto.SearchRequest; +import at.procon.dip.search.spi.SearchDocumentScope; +import lombok.Builder; +import lombok.Getter; + +@Getter +@Builder +public class SearchExecutionContext { + + private final SearchRequest request; + private final SearchDocumentScope scope; + private final int page; + private final int size; +} diff --git a/src/main/java/at/procon/dip/search/api/SearchExecutionPlan.java b/src/main/java/at/procon/dip/search/api/SearchExecutionPlan.java new file mode 100644 index 0000000..3475488 --- /dev/null +++ b/src/main/java/at/procon/dip/search/api/SearchExecutionPlan.java @@ -0,0 +1,16 @@ +package at.procon.dip.search.api; + +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchSortMode; +import java.util.List; +import lombok.Builder; +import lombok.Getter; + +@Getter +@Builder +public class SearchExecutionPlan { + + private final List engines; + private final boolean collapseByDocument; + private final SearchSortMode sortMode; +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchEngineType.java b/src/main/java/at/procon/dip/search/dto/SearchEngineType.java new file mode 100644 index 0000000..71ceac3 --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/SearchEngineType.java @@ -0,0 +1,6 @@ +package at.procon.dip.search.dto; + +public enum SearchEngineType { + POSTGRES_FULLTEXT, + POSTGRES_TRIGRAM +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchHit.java b/src/main/java/at/procon/dip/search/dto/SearchHit.java new file mode 100644 index 0000000..5344b39 --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/SearchHit.java @@ -0,0 +1,41 @@ +package at.procon.dip.search.dto; + +import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentType; +import java.time.OffsetDateTime; +import java.util.UUID; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder(toBuilder = true) +@NoArgsConstructor +@AllArgsConstructor +public class SearchHit { + + private UUID documentId; + private UUID representationId; + + private DocumentType documentType; + private DocumentFamily documentFamily; + private DocumentVisibility visibility; + + private String title; + private String summary; + private String languageCode; + private String mimeType; + + private SearchEngineType primaryEngine; + private SearchMatchField matchedField; + private String snippet; + + private double rawScore; + private double normalizedScore; + private double finalScore; + + private OffsetDateTime createdAt; + private OffsetDateTime updatedAt; +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchMatchField.java b/src/main/java/at/procon/dip/search/dto/SearchMatchField.java new file mode 100644 index 0000000..df579a1 --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/SearchMatchField.java @@ -0,0 +1,7 @@ +package at.procon.dip.search.dto; + +public enum SearchMatchField { + DOCUMENT_TITLE, + DOCUMENT_SUMMARY, + REPRESENTATION_TEXT +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchMode.java b/src/main/java/at/procon/dip/search/dto/SearchMode.java new file mode 100644 index 0000000..cc659ce --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/SearchMode.java @@ -0,0 +1,7 @@ +package at.procon.dip.search.dto; + +public enum SearchMode { + FULLTEXT, + TRIGRAM, + HYBRID +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchRequest.java b/src/main/java/at/procon/dip/search/dto/SearchRequest.java new file mode 100644 index 0000000..f5c4b9f --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/SearchRequest.java @@ -0,0 +1,48 @@ +package at.procon.dip.search.dto; + +import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.domain.document.RepresentationType; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotBlank; +import java.time.OffsetDateTime; +import java.util.LinkedHashSet; +import java.util.Set; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class SearchRequest { + + @NotBlank + private String queryText; + + @Builder.Default + private Set modes = new LinkedHashSet<>(Set.of(SearchMode.HYBRID)); + + private Set documentTypes; + private Set documentFamilies; + private Set visibilities; + private Set languageCodes; + private Set representationTypes; + private OffsetDateTime createdFrom; + private OffsetDateTime createdTo; + + @Min(0) + private Integer page; + + @Min(1) + private Integer size; + + @Builder.Default + private SearchSortMode sortMode = SearchSortMode.SCORE_DESC; + + @Builder.Default + private boolean collapseByDocument = true; +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchResponse.java b/src/main/java/at/procon/dip/search/dto/SearchResponse.java new file mode 100644 index 0000000..ddf8922 --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/SearchResponse.java @@ -0,0 +1,22 @@ +package at.procon.dip.search.dto; + +import java.util.List; +import java.util.Set; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class SearchResponse { + + private List hits; + private int page; + private int size; + private long totalHits; + private boolean truncated; + private Set enginesUsed; +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchSortMode.java b/src/main/java/at/procon/dip/search/dto/SearchSortMode.java new file mode 100644 index 0000000..9f1993b --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/SearchSortMode.java @@ -0,0 +1,7 @@ +package at.procon.dip.search.dto; + +public enum SearchSortMode { + SCORE_DESC, + CREATED_AT_DESC, + TITLE_ASC +} diff --git a/src/main/java/at/procon/dip/search/engine/SearchEngine.java b/src/main/java/at/procon/dip/search/engine/SearchEngine.java new file mode 100644 index 0000000..ba811e7 --- /dev/null +++ b/src/main/java/at/procon/dip/search/engine/SearchEngine.java @@ -0,0 +1,12 @@ +package at.procon.dip.search.engine; + +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchHit; +import java.util.List; + +public interface SearchEngine { + SearchEngineType type(); + boolean supports(SearchExecutionContext context); + List execute(SearchExecutionContext context); +} diff --git a/src/main/java/at/procon/dip/search/engine/fulltext/PostgresFullTextSearchEngine.java b/src/main/java/at/procon/dip/search/engine/fulltext/PostgresFullTextSearchEngine.java new file mode 100644 index 0000000..ab406f8 --- /dev/null +++ b/src/main/java/at/procon/dip/search/engine/fulltext/PostgresFullTextSearchEngine.java @@ -0,0 +1,71 @@ +package at.procon.dip.search.engine.fulltext; + +import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchHit; +import at.procon.dip.search.dto.SearchMatchField; +import at.procon.dip.search.engine.SearchEngine; +import at.procon.dip.search.repository.DocumentFullTextSearchRepository; +import at.procon.dip.search.repository.FullTextSearchRow; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Component; + +@Component +@RequiredArgsConstructor +public class PostgresFullTextSearchEngine implements SearchEngine { + + private final DocumentFullTextSearchRepository repository; + + @Override + public SearchEngineType type() { + return SearchEngineType.POSTGRES_FULLTEXT; + } + + @Override + public boolean supports(SearchExecutionContext context) { + return context.getRequest().getQueryText() != null && !context.getRequest().getQueryText().isBlank(); + } + + @Override + public List execute(SearchExecutionContext context) { + return repository.search(context).stream() + .map(this::mapRow) + .toList(); + } + + private SearchHit mapRow(FullTextSearchRow row) { + return SearchHit.builder() + .documentId(row.documentId()) + .representationId(row.representationId()) + .documentType(parseDocumentType(row.documentType())) + .documentFamily(parseDocumentFamily(row.documentFamily())) + .visibility(parseVisibility(row.visibility())) + .title(row.title()) + .summary(row.summary()) + .languageCode(row.languageCode()) + .mimeType(row.mimeType()) + .primaryEngine(SearchEngineType.POSTGRES_FULLTEXT) + .matchedField(SearchMatchField.REPRESENTATION_TEXT) + .snippet(row.snippet()) + .rawScore(row.score() == null ? 0.0d : row.score()) + .createdAt(row.createdAt()) + .updatedAt(row.updatedAt()) + .build(); + } + + private DocumentType parseDocumentType(String value) { + return value == null ? null : DocumentType.valueOf(value); + } + + private DocumentFamily parseDocumentFamily(String value) { + return value == null ? null : DocumentFamily.valueOf(value); + } + + private DocumentVisibility parseVisibility(String value) { + return value == null ? null : DocumentVisibility.valueOf(value); + } +} diff --git a/src/main/java/at/procon/dip/search/engine/trigram/PostgresTrigramSearchEngine.java b/src/main/java/at/procon/dip/search/engine/trigram/PostgresTrigramSearchEngine.java new file mode 100644 index 0000000..1a3a951 --- /dev/null +++ b/src/main/java/at/procon/dip/search/engine/trigram/PostgresTrigramSearchEngine.java @@ -0,0 +1,75 @@ +package at.procon.dip.search.engine.trigram; + +import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchHit; +import at.procon.dip.search.dto.SearchMatchField; +import at.procon.dip.search.engine.SearchEngine; +import at.procon.dip.search.repository.DocumentTrigramSearchRepository; +import at.procon.dip.search.repository.TrigramSearchRow; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Component; + +@Component +@RequiredArgsConstructor +public class PostgresTrigramSearchEngine implements SearchEngine { + + private final DocumentTrigramSearchRepository repository; + + @Override + public SearchEngineType type() { + return SearchEngineType.POSTGRES_TRIGRAM; + } + + @Override + public boolean supports(SearchExecutionContext context) { + return context.getRequest().getQueryText() != null && !context.getRequest().getQueryText().isBlank(); + } + + @Override + public List execute(SearchExecutionContext context) { + return repository.search(context).stream() + .map(this::mapRow) + .toList(); + } + + private SearchHit mapRow(TrigramSearchRow row) { + return SearchHit.builder() + .documentId(row.documentId()) + .representationId(row.representationId()) + .documentType(parseDocumentType(row.documentType())) + .documentFamily(parseDocumentFamily(row.documentFamily())) + .visibility(parseVisibility(row.visibility())) + .title(row.title()) + .summary(row.summary()) + .languageCode(row.languageCode()) + .mimeType(row.mimeType()) + .primaryEngine(SearchEngineType.POSTGRES_TRIGRAM) + .matchedField(parseMatchField(row.matchedField())) + .snippet(row.snippet()) + .rawScore(row.score() == null ? 0.0d : row.score()) + .createdAt(row.createdAt()) + .updatedAt(row.updatedAt()) + .build(); + } + + private SearchMatchField parseMatchField(String value) { + return value == null ? SearchMatchField.REPRESENTATION_TEXT : SearchMatchField.valueOf(value); + } + + private DocumentType parseDocumentType(String value) { + return value == null ? null : DocumentType.valueOf(value); + } + + private DocumentFamily parseDocumentFamily(String value) { + return value == null ? null : DocumentFamily.valueOf(value); + } + + private DocumentVisibility parseVisibility(String value) { + return value == null ? null : DocumentVisibility.valueOf(value); + } +} diff --git a/src/main/java/at/procon/dip/search/plan/DefaultSearchPlanner.java b/src/main/java/at/procon/dip/search/plan/DefaultSearchPlanner.java new file mode 100644 index 0000000..1cfefe1 --- /dev/null +++ b/src/main/java/at/procon/dip/search/plan/DefaultSearchPlanner.java @@ -0,0 +1,33 @@ +package at.procon.dip.search.plan; + +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.api.SearchExecutionPlan; +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchMode; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import org.springframework.stereotype.Component; + +@Component +public class DefaultSearchPlanner implements SearchPlanner { + + @Override + public SearchExecutionPlan plan(SearchExecutionContext context) { + Set modes = context.getRequest().getModes(); + List engines = new ArrayList<>(); + + if (modes == null || modes.isEmpty() || modes.contains(SearchMode.HYBRID) || modes.contains(SearchMode.FULLTEXT)) { + engines.add(SearchEngineType.POSTGRES_FULLTEXT); + } + if (modes == null || modes.isEmpty() || modes.contains(SearchMode.HYBRID) || modes.contains(SearchMode.TRIGRAM)) { + engines.add(SearchEngineType.POSTGRES_TRIGRAM); + } + + return SearchExecutionPlan.builder() + .engines(engines) + .collapseByDocument(context.getRequest().isCollapseByDocument()) + .sortMode(context.getRequest().getSortMode()) + .build(); + } +} diff --git a/src/main/java/at/procon/dip/search/plan/SearchPlanner.java b/src/main/java/at/procon/dip/search/plan/SearchPlanner.java new file mode 100644 index 0000000..a7de9ef --- /dev/null +++ b/src/main/java/at/procon/dip/search/plan/SearchPlanner.java @@ -0,0 +1,8 @@ +package at.procon.dip.search.plan; + +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.api.SearchExecutionPlan; + +public interface SearchPlanner { + SearchExecutionPlan plan(SearchExecutionContext context); +} diff --git a/src/main/java/at/procon/dip/search/rank/DefaultSearchResultFusionService.java b/src/main/java/at/procon/dip/search/rank/DefaultSearchResultFusionService.java new file mode 100644 index 0000000..3b80557 --- /dev/null +++ b/src/main/java/at/procon/dip/search/rank/DefaultSearchResultFusionService.java @@ -0,0 +1,107 @@ +package at.procon.dip.search.rank; + +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.api.SearchExecutionPlan; +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchHit; +import at.procon.dip.search.dto.SearchResponse; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import org.springframework.stereotype.Component; + +@Component +public class DefaultSearchResultFusionService implements SearchResultFusionService { + + private static final double FULLTEXT_WEIGHT = 0.60d; + private static final double TRIGRAM_WEIGHT = 0.40d; + + private final SearchScoreNormalizer scoreNormalizer; + + public DefaultSearchResultFusionService(SearchScoreNormalizer scoreNormalizer) { + this.scoreNormalizer = scoreNormalizer; + } + + @Override + public SearchResponse fuse(SearchExecutionContext context, + SearchExecutionPlan plan, + Map> engineResults) { + Map> normalizedResults = new LinkedHashMap<>(); + for (Map.Entry> entry : engineResults.entrySet()) { + normalizedResults.put(entry.getKey(), scoreNormalizer.normalize(entry.getKey(), entry.getValue())); + } + + List ranked = plan.isCollapseByDocument() + ? collapseByDocument(normalizedResults) + : flatten(normalizedResults); + + ranked.sort(Comparator + .comparingDouble(SearchHit::getFinalScore).reversed() + .thenComparing(SearchHit::getUpdatedAt, Comparator.nullsLast(Comparator.reverseOrder()))); + + int totalHits = ranked.size(); + int fromIndex = Math.min(context.getPage() * context.getSize(), ranked.size()); + int toIndex = Math.min(fromIndex + context.getSize(), ranked.size()); + List pageHits = ranked.subList(fromIndex, toIndex); + + return SearchResponse.builder() + .hits(new ArrayList<>(pageHits)) + .page(context.getPage()) + .size(context.getSize()) + .totalHits(totalHits) + .truncated(toIndex < totalHits) + .enginesUsed(new LinkedHashSet<>(normalizedResults.keySet())) + .build(); + } + + private List flatten(Map> normalizedResults) { + List merged = new ArrayList<>(); + for (Map.Entry> entry : normalizedResults.entrySet()) { + for (SearchHit hit : entry.getValue()) { + merged.add(hit.toBuilder().finalScore(weight(entry.getKey()) * hit.getNormalizedScore()).build()); + } + } + return merged; + } + + private List collapseByDocument(Map> normalizedResults) { + Map collapsed = new LinkedHashMap<>(); + Map accumulatedScores = new LinkedHashMap<>(); + Set docOrder = new LinkedHashSet<>(); + + for (Map.Entry> entry : normalizedResults.entrySet()) { + double weight = weight(entry.getKey()); + for (SearchHit hit : entry.getValue()) { + docOrder.add(hit.getDocumentId()); + double contribution = weight * hit.getNormalizedScore(); + accumulatedScores.merge(hit.getDocumentId(), contribution, Double::sum); + + SearchHit existing = collapsed.get(hit.getDocumentId()); + if (existing == null || hit.getNormalizedScore() > existing.getNormalizedScore()) { + collapsed.put(hit.getDocumentId(), hit); + } + } + } + + List results = new ArrayList<>(docOrder.size()); + for (UUID documentId : docOrder) { + SearchHit base = collapsed.get(documentId); + if (base != null) { + results.add(base.toBuilder().finalScore(accumulatedScores.getOrDefault(documentId, 0.0d)).build()); + } + } + return results; + } + + private double weight(SearchEngineType engineType) { + return switch (engineType) { + case POSTGRES_FULLTEXT -> FULLTEXT_WEIGHT; + case POSTGRES_TRIGRAM -> TRIGRAM_WEIGHT; + }; + } +} diff --git a/src/main/java/at/procon/dip/search/rank/DefaultSearchScoreNormalizer.java b/src/main/java/at/procon/dip/search/rank/DefaultSearchScoreNormalizer.java new file mode 100644 index 0000000..35f05c1 --- /dev/null +++ b/src/main/java/at/procon/dip/search/rank/DefaultSearchScoreNormalizer.java @@ -0,0 +1,28 @@ +package at.procon.dip.search.rank; + +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchHit; +import java.util.ArrayList; +import java.util.List; +import org.springframework.stereotype.Component; + +@Component +public class DefaultSearchScoreNormalizer implements SearchScoreNormalizer { + + @Override + public List normalize(SearchEngineType engineType, List hits) { + if (hits == null || hits.isEmpty()) { + return List.of(); + } + double max = hits.stream().mapToDouble(SearchHit::getRawScore).max().orElse(0.0d); + if (max <= 0.0d) { + max = 1.0d; + } + List normalized = new ArrayList<>(hits.size()); + for (SearchHit hit : hits) { + double score = Math.max(0.0d, Math.min(1.0d, hit.getRawScore() / max)); + normalized.add(hit.toBuilder().normalizedScore(score).build()); + } + return normalized; + } +} diff --git a/src/main/java/at/procon/dip/search/rank/SearchResultFusionService.java b/src/main/java/at/procon/dip/search/rank/SearchResultFusionService.java new file mode 100644 index 0000000..d41c17d --- /dev/null +++ b/src/main/java/at/procon/dip/search/rank/SearchResultFusionService.java @@ -0,0 +1,17 @@ +package at.procon.dip.search.rank; + +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.api.SearchExecutionPlan; +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchHit; +import at.procon.dip.search.dto.SearchResponse; +import java.util.List; +import java.util.Map; + +public interface SearchResultFusionService { + SearchResponse fuse( + SearchExecutionContext context, + SearchExecutionPlan plan, + Map> engineResults + ); +} diff --git a/src/main/java/at/procon/dip/search/rank/SearchScoreNormalizer.java b/src/main/java/at/procon/dip/search/rank/SearchScoreNormalizer.java new file mode 100644 index 0000000..e79e45a --- /dev/null +++ b/src/main/java/at/procon/dip/search/rank/SearchScoreNormalizer.java @@ -0,0 +1,9 @@ +package at.procon.dip.search.rank; + +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchHit; +import java.util.List; + +public interface SearchScoreNormalizer { + List normalize(SearchEngineType engineType, List hits); +} diff --git a/src/main/java/at/procon/dip/search/repository/AbstractNativeSearchRepository.java b/src/main/java/at/procon/dip/search/repository/AbstractNativeSearchRepository.java new file mode 100644 index 0000000..9cc3150 --- /dev/null +++ b/src/main/java/at/procon/dip/search/repository/AbstractNativeSearchRepository.java @@ -0,0 +1,140 @@ +package at.procon.dip.search.repository; + +import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.dto.SearchRequest; +import jakarta.persistence.Query; +import java.sql.Timestamp; +import java.time.LocalDateTime; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.util.Collection; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.UUID; + +abstract class AbstractNativeSearchRepository { + + protected int engineLimit(SearchExecutionContext context) { + return Math.max(50, (context.getPage() + 1) * context.getSize() * 3); + } + + protected void appendGenericFilters(StringBuilder sql, Map params, SearchExecutionContext context) { + SearchRequest request = context.getRequest(); + + appendEnumInClause(sql, params, "d.document_type::text", "documentType", request.getDocumentTypes()); + appendEnumInClause(sql, params, "d.document_family::text", "documentFamily", request.getDocumentFamilies()); + + Collection visibilities = request.getVisibilities(); + if ((visibilities == null || visibilities.isEmpty()) && context.getScope() != null) { + visibilities = context.getScope().visibilities(); + } + appendEnumInClause(sql, params, "d.visibility::text", "visibility", visibilities); + + Collection ownerTenantKeys = context.getScope() == null ? null : context.getScope().ownerTenantKeys(); + if (ownerTenantKeys != null && !ownerTenantKeys.isEmpty()) { + appendStringInClause(sql, params, "COALESCE(dt.tenant_key, '')", "tenantKey", ownerTenantKeys); + } + + Collection languageCodes = request.getLanguageCodes(); + if ((languageCodes == null || languageCodes.isEmpty()) && context.getScope() != null && context.getScope().languageCode() != null) { + languageCodes = java.util.List.of(context.getScope().languageCode()); + } + appendStringInClause(sql, params, "COALESCE(dtr.language_code, d.language_code, '')", "languageCode", languageCodes); + + appendEnumInClause(sql, params, "dtr.representation_type::text", "representationType", request.getRepresentationTypes()); + + if (request.getCreatedFrom() != null) { + sql.append(" AND d.created_at >= :createdFrom"); + params.put("createdFrom", request.getCreatedFrom()); + } + if (request.getCreatedTo() != null) { + sql.append(" AND d.created_at <= :createdTo"); + params.put("createdTo", request.getCreatedTo()); + } + } + + protected void bindParameters(Query query, Map params) { + for (Map.Entry entry : params.entrySet()) { + query.setParameter(entry.getKey(), entry.getValue()); + } + } + + protected Map newParams() { + return new LinkedHashMap<>(); + } + + protected void appendEnumInClause(StringBuilder sql, Map params, String expression, String baseParam, Collection values) { + if (values == null || values.isEmpty()) { + return; + } + sql.append(" AND ").append(expression).append(" IN ("); + int i = 0; + for (Object value : values) { + String param = baseParam + i++; + if (i > 1) { + sql.append(", "); + } + sql.append(':').append(param); + params.put(param, value.toString()); + } + sql.append(')'); + } + + protected void appendStringInClause(StringBuilder sql, Map params, String expression, String baseParam, Collection values) { + if (values == null || values.isEmpty()) { + return; + } + sql.append(" AND ").append(expression).append(" IN ("); + int i = 0; + for (String value : values) { + String param = baseParam + i++; + if (i > 1) { + sql.append(", "); + } + sql.append(':').append(param); + params.put(param, value); + } + sql.append(')'); + } + + protected UUID asUuid(Object value) { + if (value == null) { + return null; + } + if (value instanceof UUID uuid) { + return uuid; + } + return UUID.fromString(value.toString()); + } + + protected OffsetDateTime asOffsetDateTime(Object value) { + if (value == null) { + return null; + } + if (value instanceof OffsetDateTime odt) { + return odt; + } + if (value instanceof Timestamp timestamp) { + return timestamp.toInstant().atOffset(ZoneOffset.UTC); + } + if (value instanceof LocalDateTime ldt) { + return ldt.atOffset(ZoneOffset.UTC); + } + throw new IllegalArgumentException("Unsupported timestamp value: " + value.getClass()); + } + + protected String asString(Object value) { + return value == null ? null : value.toString(); + } + + protected Double asDouble(Object value) { + if (value == null) { + return null; + } + if (value instanceof Number number) { + return number.doubleValue(); + } + return Double.parseDouble(value.toString()); + } +} diff --git a/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepository.java b/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepository.java new file mode 100644 index 0000000..db355ca --- /dev/null +++ b/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepository.java @@ -0,0 +1,8 @@ +package at.procon.dip.search.repository; + +import at.procon.dip.search.api.SearchExecutionContext; +import java.util.List; + +public interface DocumentFullTextSearchRepository { + List search(SearchExecutionContext context); +} diff --git a/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java b/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java new file mode 100644 index 0000000..aa50443 --- /dev/null +++ b/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java @@ -0,0 +1,72 @@ +package at.procon.dip.search.repository; + +import at.procon.dip.search.api.SearchExecutionContext; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.springframework.stereotype.Repository; + +@Repository +public class DocumentFullTextSearchRepositoryImpl extends AbstractNativeSearchRepository implements DocumentFullTextSearchRepository { + + @PersistenceContext + private EntityManager entityManager; + + @Override + public List search(SearchExecutionContext context) { + StringBuilder sql = new StringBuilder(""" + SELECT + d.id AS document_id, + dtr.id AS representation_id, + d.title AS title, + d.summary AS summary, + COALESCE(dtr.language_code, d.language_code) AS language_code, + d.mime_type AS mime_type, + d.document_type AS document_type, + d.document_family AS document_family, + d.visibility AS visibility, + d.created_at AS created_at, + d.updated_at AS updated_at, + ts_headline('simple', COALESCE(dtr.text_body, ''), websearch_to_tsquery('simple', :queryText)) AS snippet, + ts_rank_cd(dtr.search_vector, websearch_to_tsquery('simple', :queryText)) AS score + FROM DOC.doc_text_representation dtr + JOIN DOC.doc_document d ON d.id = dtr.document_id + LEFT JOIN DOC.doc_tenant dt ON dt.id = d.owner_tenant_id + WHERE dtr.search_vector @@ websearch_to_tsquery('simple', :queryText) + """); + + Map params = newParams(); + params.put("queryText", context.getRequest().getQueryText().trim()); + appendGenericFilters(sql, params, context); + sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit"); + params.put("limit", engineLimit(context)); + + Query query = entityManager.createNativeQuery(sql.toString()); + bindParameters(query, params); + + List rows = query.getResultList(); + List results = new ArrayList<>(rows.size()); + for (Object row : rows) { + Object[] cols = (Object[]) row; + results.add(new FullTextSearchRow( + asUuid(cols[0]), + asUuid(cols[1]), + asString(cols[2]), + asString(cols[3]), + asString(cols[4]), + asString(cols[5]), + asString(cols[6]), + asString(cols[7]), + asString(cols[8]), + asOffsetDateTime(cols[9]), + asOffsetDateTime(cols[10]), + asString(cols[11]), + asDouble(cols[12]) + )); + } + return results; + } +} diff --git a/src/main/java/at/procon/dip/search/repository/DocumentTrigramSearchRepository.java b/src/main/java/at/procon/dip/search/repository/DocumentTrigramSearchRepository.java new file mode 100644 index 0000000..59082cf --- /dev/null +++ b/src/main/java/at/procon/dip/search/repository/DocumentTrigramSearchRepository.java @@ -0,0 +1,8 @@ +package at.procon.dip.search.repository; + +import at.procon.dip.search.api.SearchExecutionContext; +import java.util.List; + +public interface DocumentTrigramSearchRepository { + List search(SearchExecutionContext context); +} diff --git a/src/main/java/at/procon/dip/search/repository/DocumentTrigramSearchRepositoryImpl.java b/src/main/java/at/procon/dip/search/repository/DocumentTrigramSearchRepositoryImpl.java new file mode 100644 index 0000000..4f7c47e --- /dev/null +++ b/src/main/java/at/procon/dip/search/repository/DocumentTrigramSearchRepositoryImpl.java @@ -0,0 +1,102 @@ +package at.procon.dip.search.repository; + +import at.procon.dip.search.api.SearchExecutionContext; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.persistence.Query; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.springframework.stereotype.Repository; + +@Repository +public class DocumentTrigramSearchRepositoryImpl extends AbstractNativeSearchRepository implements DocumentTrigramSearchRepository { + + @PersistenceContext + private EntityManager entityManager; + + @Override + public List search(SearchExecutionContext context) { + StringBuilder sql = new StringBuilder(""" + SELECT + d.id AS document_id, + dtr.id AS representation_id, + d.title AS title, + d.summary AS summary, + COALESCE(dtr.language_code, d.language_code) AS language_code, + d.mime_type AS mime_type, + d.document_type AS document_type, + d.document_family AS document_family, + d.visibility AS visibility, + d.created_at AS created_at, + d.updated_at AS updated_at, + CASE + WHEN similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(d.summary, ''), :queryText) + AND similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText) + THEN COALESCE(d.title, '') + WHEN similarity(COALESCE(d.summary, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText) + THEN COALESCE(d.summary, '') + ELSE LEFT(COALESCE(dtr.text_body, ''), 400) + END AS snippet, + GREATEST( + similarity(COALESCE(d.title, ''), :queryText), + similarity(COALESCE(d.summary, ''), :queryText), + similarity(COALESCE(dtr.text_body, ''), :queryText) + ) AS score, + CASE + WHEN similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(d.summary, ''), :queryText) + AND similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText) + THEN 'DOCUMENT_TITLE' + WHEN similarity(COALESCE(d.summary, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText) + THEN 'DOCUMENT_SUMMARY' + ELSE 'REPRESENTATION_TEXT' + END AS matched_field + FROM DOC.doc_text_representation dtr + JOIN DOC.doc_document d ON d.id = dtr.document_id + LEFT JOIN DOC.doc_tenant dt ON dt.id = d.owner_tenant_id + WHERE ( + COALESCE(d.title, '') % :queryText + OR COALESCE(d.summary, '') % :queryText + OR COALESCE(dtr.text_body, '') % :queryText + ) + """); + + Map params = newParams(); + params.put("queryText", context.getRequest().getQueryText().trim()); + appendGenericFilters(sql, params, context); + sql.append(" AND GREATEST(") + .append(" similarity(COALESCE(d.title, ''), :queryText),") + .append(" similarity(COALESCE(d.summary, ''), :queryText),") + .append(" similarity(COALESCE(dtr.text_body, ''), :queryText)") + .append(") >= :minSimilarity"); + sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit"); + params.put("minSimilarity", 0.10d); + params.put("limit", engineLimit(context)); + + Query query = entityManager.createNativeQuery(sql.toString()); + bindParameters(query, params); + + List rows = query.getResultList(); + List results = new ArrayList<>(rows.size()); + for (Object row : rows) { + Object[] cols = (Object[]) row; + results.add(new TrigramSearchRow( + asUuid(cols[0]), + asUuid(cols[1]), + asString(cols[2]), + asString(cols[3]), + asString(cols[4]), + asString(cols[5]), + asString(cols[6]), + asString(cols[7]), + asString(cols[8]), + asOffsetDateTime(cols[9]), + asOffsetDateTime(cols[10]), + asString(cols[11]), + asDouble(cols[12]), + asString(cols[13]) + )); + } + return results; + } +} diff --git a/src/main/java/at/procon/dip/search/repository/FullTextSearchRow.java b/src/main/java/at/procon/dip/search/repository/FullTextSearchRow.java new file mode 100644 index 0000000..19166e3 --- /dev/null +++ b/src/main/java/at/procon/dip/search/repository/FullTextSearchRow.java @@ -0,0 +1,21 @@ +package at.procon.dip.search.repository; + +import java.time.OffsetDateTime; +import java.util.UUID; + +public record FullTextSearchRow( + UUID documentId, + UUID representationId, + String title, + String summary, + String languageCode, + String mimeType, + String documentType, + String documentFamily, + String visibility, + OffsetDateTime createdAt, + OffsetDateTime updatedAt, + String snippet, + Double score +) { +} diff --git a/src/main/java/at/procon/dip/search/repository/TrigramSearchRow.java b/src/main/java/at/procon/dip/search/repository/TrigramSearchRow.java new file mode 100644 index 0000000..85f8836 --- /dev/null +++ b/src/main/java/at/procon/dip/search/repository/TrigramSearchRow.java @@ -0,0 +1,22 @@ +package at.procon.dip.search.repository; + +import java.time.OffsetDateTime; +import java.util.UUID; + +public record TrigramSearchRow( + UUID documentId, + UUID representationId, + String title, + String summary, + String languageCode, + String mimeType, + String documentType, + String documentFamily, + String visibility, + OffsetDateTime createdAt, + OffsetDateTime updatedAt, + String snippet, + Double score, + String matchedField +) { +} diff --git a/src/main/java/at/procon/dip/search/service/DefaultSearchOrchestrator.java b/src/main/java/at/procon/dip/search/service/DefaultSearchOrchestrator.java new file mode 100644 index 0000000..88dd269 --- /dev/null +++ b/src/main/java/at/procon/dip/search/service/DefaultSearchOrchestrator.java @@ -0,0 +1,47 @@ +package at.procon.dip.search.service; + +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.api.SearchExecutionPlan; +import at.procon.dip.search.dto.SearchEngineType; +import at.procon.dip.search.dto.SearchHit; +import at.procon.dip.search.dto.SearchRequest; +import at.procon.dip.search.dto.SearchResponse; +import at.procon.dip.search.engine.SearchEngine; +import at.procon.dip.search.plan.SearchPlanner; +import at.procon.dip.search.rank.SearchResultFusionService; +import at.procon.dip.search.spi.SearchDocumentScope; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Service; + +@Service +@RequiredArgsConstructor +public class DefaultSearchOrchestrator implements SearchOrchestrator { + + private final SearchPlanner planner; + private final List engines; + private final SearchResultFusionService fusionService; + + @Override + public SearchResponse search(SearchRequest request, SearchDocumentScope scope) { + SearchExecutionContext context = SearchExecutionContext.builder() + .request(request) + .scope(scope) + .page(request.getPage() == null ? 0 : request.getPage()) + .size(request.getSize() == null ? 20 : request.getSize()) + .build(); + + SearchExecutionPlan plan = planner.plan(context); + + Map> engineResults = new LinkedHashMap<>(); + for (SearchEngine engine : engines) { + if (plan.getEngines().contains(engine.type()) && engine.supports(context)) { + engineResults.put(engine.type(), engine.execute(context)); + } + } + + return fusionService.fuse(context, plan, engineResults); + } +} diff --git a/src/main/java/at/procon/dip/search/service/DocumentLexicalIndexService.java b/src/main/java/at/procon/dip/search/service/DocumentLexicalIndexService.java new file mode 100644 index 0000000..3cc8ef3 --- /dev/null +++ b/src/main/java/at/procon/dip/search/service/DocumentLexicalIndexService.java @@ -0,0 +1,80 @@ +package at.procon.dip.search.service; + +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; +import jakarta.transaction.Transactional; +import java.util.UUID; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +@Service +@Transactional +@Slf4j +public class DocumentLexicalIndexService { + + @PersistenceContext + private EntityManager entityManager; + + public void refreshRepresentationLexicalIndex(UUID representationId) { + if (!isLexicalSearchSchemaAvailable()) { + log.debug("Skipping lexical index refresh for representation {} because search columns are not available yet", representationId); + return; + } + entityManager.createNativeQuery(""" + UPDATE DOC.doc_text_representation + SET search_config = CASE + WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german' + WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english' + ELSE 'simple' + END, + search_vector = to_tsvector( + CASE + WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig + WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig + ELSE 'simple'::regconfig + END, + coalesce(text_body, '') + ) + WHERE id = :representationId + """) + .setParameter("representationId", representationId) + .executeUpdate(); + } + + public void refreshAllMissingLexicalIndexes() { + if (!isLexicalSearchSchemaAvailable()) { + log.info("Lexical search columns are not available yet. Skipping startup backfill for DOC lexical indexes."); + return; + } + entityManager.createNativeQuery(""" + UPDATE DOC.doc_text_representation + SET search_config = CASE + WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german' + WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english' + ELSE 'simple' + END, + search_vector = to_tsvector( + CASE + WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig + WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig + ELSE 'simple'::regconfig + END, + coalesce(text_body, '') + ) + WHERE search_vector IS NULL + """) + .executeUpdate(); + } + + private boolean isLexicalSearchSchemaAvailable() { + Number count = (Number) entityManager.createNativeQuery(""" + SELECT COUNT(*) + FROM information_schema.columns + WHERE table_schema = 'doc' + AND table_name = 'doc_text_representation' + AND column_name IN ('search_config', 'search_vector') + """) + .getSingleResult(); + return count != null && count.intValue() >= 2; + } +} diff --git a/src/main/java/at/procon/dip/search/service/SearchOrchestrator.java b/src/main/java/at/procon/dip/search/service/SearchOrchestrator.java new file mode 100644 index 0000000..0131bbd --- /dev/null +++ b/src/main/java/at/procon/dip/search/service/SearchOrchestrator.java @@ -0,0 +1,9 @@ +package at.procon.dip.search.service; + +import at.procon.dip.search.dto.SearchRequest; +import at.procon.dip.search.dto.SearchResponse; +import at.procon.dip.search.spi.SearchDocumentScope; + +public interface SearchOrchestrator { + SearchResponse search(SearchRequest request, SearchDocumentScope scope); +} diff --git a/src/main/java/at/procon/dip/search/startup/LexicalSearchStartupRunner.java b/src/main/java/at/procon/dip/search/startup/LexicalSearchStartupRunner.java new file mode 100644 index 0000000..05e4faa --- /dev/null +++ b/src/main/java/at/procon/dip/search/startup/LexicalSearchStartupRunner.java @@ -0,0 +1,21 @@ +package at.procon.dip.search.startup; + +import at.procon.dip.search.service.DocumentLexicalIndexService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.boot.CommandLineRunner; +import org.springframework.stereotype.Component; + +@Component +@RequiredArgsConstructor +@Slf4j +public class LexicalSearchStartupRunner implements CommandLineRunner { + + private final DocumentLexicalIndexService lexicalIndexService; + + @Override + public void run(String... args) { + log.info("Refreshing missing lexical search vectors for DOC text representations"); + lexicalIndexService.refreshAllMissingLexicalIndexes(); + } +} diff --git a/src/main/java/at/procon/dip/search/web/GenericSearchController.java b/src/main/java/at/procon/dip/search/web/GenericSearchController.java new file mode 100644 index 0000000..f819de0 --- /dev/null +++ b/src/main/java/at/procon/dip/search/web/GenericSearchController.java @@ -0,0 +1,35 @@ +package at.procon.dip.search.web; + +import at.procon.dip.search.dto.SearchRequest; +import at.procon.dip.search.dto.SearchResponse; +import at.procon.dip.search.service.SearchOrchestrator; +import at.procon.dip.search.spi.SearchDocumentScope; +import jakarta.validation.Valid; +import java.util.Set; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestBody; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/search") +@RequiredArgsConstructor +public class GenericSearchController { + + private final SearchOrchestrator searchOrchestrator; + + @PostMapping + public SearchResponse search(@Valid @RequestBody SearchRequest request) { + SearchDocumentScope scope = new SearchDocumentScope( + Set.of(), + request.getDocumentTypes(), + request.getDocumentFamilies(), + request.getVisibilities(), + request.getLanguageCodes() == null || request.getLanguageCodes().isEmpty() + ? null + : request.getLanguageCodes().iterator().next() + ); + return searchOrchestrator.search(request, scope); + } +} diff --git a/src/main/resources/db/migration/V9__doc_search_slice1_support.sql b/src/main/resources/db/migration/V9__doc_search_slice1_support.sql new file mode 100644 index 0000000..d797ad9 --- /dev/null +++ b/src/main/resources/db/migration/V9__doc_search_slice1_support.sql @@ -0,0 +1,26 @@ +-- Slice 1 generic lexical search support. +-- Adds PostgreSQL full-text and trigram search infrastructure for DOC-side search. + +CREATE EXTENSION IF NOT EXISTS pg_trgm; + +ALTER TABLE DOC.doc_text_representation + ADD COLUMN IF NOT EXISTS search_config VARCHAR(64); + +ALTER TABLE DOC.doc_text_representation + ADD COLUMN IF NOT EXISTS search_vector tsvector; + +CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector + ON DOC.doc_text_representation + USING GIN (search_vector); + +CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm + ON DOC.doc_document + USING GIN (title gin_trgm_ops); + +CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm + ON DOC.doc_document + USING GIN (summary gin_trgm_ops); + +CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm + ON DOC.doc_text_representation + USING GIN (text_body gin_trgm_ops);