Refactor phases 5 - search - slice 1
parent
90093ab98d
commit
47894257a4
@ -0,0 +1,16 @@
|
|||||||
|
Slice 1 generic search patch
|
||||||
|
|
||||||
|
Included changes:
|
||||||
|
- Generic search DTOs, planner, orchestrator, engine SPI
|
||||||
|
- PostgreSQL full-text engine and repository
|
||||||
|
- PostgreSQL trigram engine and repository
|
||||||
|
- Score normalization and result fusion
|
||||||
|
- Generic /search endpoint
|
||||||
|
- Lexical index maintenance service and startup backfill runner
|
||||||
|
- DOC lexical search migration (V9)
|
||||||
|
- Modified DOC representation write path to refresh search vectors
|
||||||
|
|
||||||
|
Important note:
|
||||||
|
- Full-text search requires V9__doc_search_slice1_support.sql to be applied.
|
||||||
|
- The lexical index service is guarded and will no-op if the search columns are not yet present.
|
||||||
|
- Because Flyway is currently disabled in application.yml, apply the migration manually or enable Flyway before using the new search endpoint.
|
||||||
@ -0,0 +1,16 @@
|
|||||||
|
package at.procon.dip.search.api;
|
||||||
|
|
||||||
|
import at.procon.dip.search.dto.SearchRequest;
|
||||||
|
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Builder
|
||||||
|
public class SearchExecutionContext {
|
||||||
|
|
||||||
|
private final SearchRequest request;
|
||||||
|
private final SearchDocumentScope scope;
|
||||||
|
private final int page;
|
||||||
|
private final int size;
|
||||||
|
}
|
||||||
@ -0,0 +1,16 @@
|
|||||||
|
package at.procon.dip.search.api;
|
||||||
|
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchSortMode;
|
||||||
|
import java.util.List;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Builder
|
||||||
|
public class SearchExecutionPlan {
|
||||||
|
|
||||||
|
private final List<SearchEngineType> engines;
|
||||||
|
private final boolean collapseByDocument;
|
||||||
|
private final SearchSortMode sortMode;
|
||||||
|
}
|
||||||
@ -0,0 +1,6 @@
|
|||||||
|
package at.procon.dip.search.dto;
|
||||||
|
|
||||||
|
public enum SearchEngineType {
|
||||||
|
POSTGRES_FULLTEXT,
|
||||||
|
POSTGRES_TRIGRAM
|
||||||
|
}
|
||||||
@ -0,0 +1,41 @@
|
|||||||
|
package at.procon.dip.search.dto;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.access.DocumentVisibility;
|
||||||
|
import at.procon.dip.domain.document.DocumentFamily;
|
||||||
|
import at.procon.dip.domain.document.DocumentType;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Builder(toBuilder = true)
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class SearchHit {
|
||||||
|
|
||||||
|
private UUID documentId;
|
||||||
|
private UUID representationId;
|
||||||
|
|
||||||
|
private DocumentType documentType;
|
||||||
|
private DocumentFamily documentFamily;
|
||||||
|
private DocumentVisibility visibility;
|
||||||
|
|
||||||
|
private String title;
|
||||||
|
private String summary;
|
||||||
|
private String languageCode;
|
||||||
|
private String mimeType;
|
||||||
|
|
||||||
|
private SearchEngineType primaryEngine;
|
||||||
|
private SearchMatchField matchedField;
|
||||||
|
private String snippet;
|
||||||
|
|
||||||
|
private double rawScore;
|
||||||
|
private double normalizedScore;
|
||||||
|
private double finalScore;
|
||||||
|
|
||||||
|
private OffsetDateTime createdAt;
|
||||||
|
private OffsetDateTime updatedAt;
|
||||||
|
}
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
package at.procon.dip.search.dto;
|
||||||
|
|
||||||
|
public enum SearchMatchField {
|
||||||
|
DOCUMENT_TITLE,
|
||||||
|
DOCUMENT_SUMMARY,
|
||||||
|
REPRESENTATION_TEXT
|
||||||
|
}
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
package at.procon.dip.search.dto;
|
||||||
|
|
||||||
|
public enum SearchMode {
|
||||||
|
FULLTEXT,
|
||||||
|
TRIGRAM,
|
||||||
|
HYBRID
|
||||||
|
}
|
||||||
@ -0,0 +1,48 @@
|
|||||||
|
package at.procon.dip.search.dto;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.access.DocumentVisibility;
|
||||||
|
import at.procon.dip.domain.document.DocumentFamily;
|
||||||
|
import at.procon.dip.domain.document.DocumentType;
|
||||||
|
import at.procon.dip.domain.document.RepresentationType;
|
||||||
|
import jakarta.validation.constraints.Min;
|
||||||
|
import jakarta.validation.constraints.NotBlank;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class SearchRequest {
|
||||||
|
|
||||||
|
@NotBlank
|
||||||
|
private String queryText;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
private Set<SearchMode> modes = new LinkedHashSet<>(Set.of(SearchMode.HYBRID));
|
||||||
|
|
||||||
|
private Set<DocumentType> documentTypes;
|
||||||
|
private Set<DocumentFamily> documentFamilies;
|
||||||
|
private Set<DocumentVisibility> visibilities;
|
||||||
|
private Set<String> languageCodes;
|
||||||
|
private Set<RepresentationType> representationTypes;
|
||||||
|
private OffsetDateTime createdFrom;
|
||||||
|
private OffsetDateTime createdTo;
|
||||||
|
|
||||||
|
@Min(0)
|
||||||
|
private Integer page;
|
||||||
|
|
||||||
|
@Min(1)
|
||||||
|
private Integer size;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
private SearchSortMode sortMode = SearchSortMode.SCORE_DESC;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
private boolean collapseByDocument = true;
|
||||||
|
}
|
||||||
@ -0,0 +1,22 @@
|
|||||||
|
package at.procon.dip.search.dto;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@Builder
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class SearchResponse {
|
||||||
|
|
||||||
|
private List<SearchHit> hits;
|
||||||
|
private int page;
|
||||||
|
private int size;
|
||||||
|
private long totalHits;
|
||||||
|
private boolean truncated;
|
||||||
|
private Set<SearchEngineType> enginesUsed;
|
||||||
|
}
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
package at.procon.dip.search.dto;
|
||||||
|
|
||||||
|
public enum SearchSortMode {
|
||||||
|
SCORE_DESC,
|
||||||
|
CREATED_AT_DESC,
|
||||||
|
TITLE_ASC
|
||||||
|
}
|
||||||
@ -0,0 +1,12 @@
|
|||||||
|
package at.procon.dip.search.engine;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchHit;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public interface SearchEngine {
|
||||||
|
SearchEngineType type();
|
||||||
|
boolean supports(SearchExecutionContext context);
|
||||||
|
List<SearchHit> execute(SearchExecutionContext context);
|
||||||
|
}
|
||||||
@ -0,0 +1,71 @@
|
|||||||
|
package at.procon.dip.search.engine.fulltext;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.access.DocumentVisibility;
|
||||||
|
import at.procon.dip.domain.document.DocumentFamily;
|
||||||
|
import at.procon.dip.domain.document.DocumentType;
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchHit;
|
||||||
|
import at.procon.dip.search.dto.SearchMatchField;
|
||||||
|
import at.procon.dip.search.engine.SearchEngine;
|
||||||
|
import at.procon.dip.search.repository.DocumentFullTextSearchRepository;
|
||||||
|
import at.procon.dip.search.repository.FullTextSearchRow;
|
||||||
|
import java.util.List;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class PostgresFullTextSearchEngine implements SearchEngine {
|
||||||
|
|
||||||
|
private final DocumentFullTextSearchRepository repository;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchEngineType type() {
|
||||||
|
return SearchEngineType.POSTGRES_FULLTEXT;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean supports(SearchExecutionContext context) {
|
||||||
|
return context.getRequest().getQueryText() != null && !context.getRequest().getQueryText().isBlank();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<SearchHit> execute(SearchExecutionContext context) {
|
||||||
|
return repository.search(context).stream()
|
||||||
|
.map(this::mapRow)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
private SearchHit mapRow(FullTextSearchRow row) {
|
||||||
|
return SearchHit.builder()
|
||||||
|
.documentId(row.documentId())
|
||||||
|
.representationId(row.representationId())
|
||||||
|
.documentType(parseDocumentType(row.documentType()))
|
||||||
|
.documentFamily(parseDocumentFamily(row.documentFamily()))
|
||||||
|
.visibility(parseVisibility(row.visibility()))
|
||||||
|
.title(row.title())
|
||||||
|
.summary(row.summary())
|
||||||
|
.languageCode(row.languageCode())
|
||||||
|
.mimeType(row.mimeType())
|
||||||
|
.primaryEngine(SearchEngineType.POSTGRES_FULLTEXT)
|
||||||
|
.matchedField(SearchMatchField.REPRESENTATION_TEXT)
|
||||||
|
.snippet(row.snippet())
|
||||||
|
.rawScore(row.score() == null ? 0.0d : row.score())
|
||||||
|
.createdAt(row.createdAt())
|
||||||
|
.updatedAt(row.updatedAt())
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocumentType parseDocumentType(String value) {
|
||||||
|
return value == null ? null : DocumentType.valueOf(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocumentFamily parseDocumentFamily(String value) {
|
||||||
|
return value == null ? null : DocumentFamily.valueOf(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocumentVisibility parseVisibility(String value) {
|
||||||
|
return value == null ? null : DocumentVisibility.valueOf(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,75 @@
|
|||||||
|
package at.procon.dip.search.engine.trigram;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.access.DocumentVisibility;
|
||||||
|
import at.procon.dip.domain.document.DocumentFamily;
|
||||||
|
import at.procon.dip.domain.document.DocumentType;
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchHit;
|
||||||
|
import at.procon.dip.search.dto.SearchMatchField;
|
||||||
|
import at.procon.dip.search.engine.SearchEngine;
|
||||||
|
import at.procon.dip.search.repository.DocumentTrigramSearchRepository;
|
||||||
|
import at.procon.dip.search.repository.TrigramSearchRow;
|
||||||
|
import java.util.List;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class PostgresTrigramSearchEngine implements SearchEngine {
|
||||||
|
|
||||||
|
private final DocumentTrigramSearchRepository repository;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchEngineType type() {
|
||||||
|
return SearchEngineType.POSTGRES_TRIGRAM;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean supports(SearchExecutionContext context) {
|
||||||
|
return context.getRequest().getQueryText() != null && !context.getRequest().getQueryText().isBlank();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<SearchHit> execute(SearchExecutionContext context) {
|
||||||
|
return repository.search(context).stream()
|
||||||
|
.map(this::mapRow)
|
||||||
|
.toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
private SearchHit mapRow(TrigramSearchRow row) {
|
||||||
|
return SearchHit.builder()
|
||||||
|
.documentId(row.documentId())
|
||||||
|
.representationId(row.representationId())
|
||||||
|
.documentType(parseDocumentType(row.documentType()))
|
||||||
|
.documentFamily(parseDocumentFamily(row.documentFamily()))
|
||||||
|
.visibility(parseVisibility(row.visibility()))
|
||||||
|
.title(row.title())
|
||||||
|
.summary(row.summary())
|
||||||
|
.languageCode(row.languageCode())
|
||||||
|
.mimeType(row.mimeType())
|
||||||
|
.primaryEngine(SearchEngineType.POSTGRES_TRIGRAM)
|
||||||
|
.matchedField(parseMatchField(row.matchedField()))
|
||||||
|
.snippet(row.snippet())
|
||||||
|
.rawScore(row.score() == null ? 0.0d : row.score())
|
||||||
|
.createdAt(row.createdAt())
|
||||||
|
.updatedAt(row.updatedAt())
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private SearchMatchField parseMatchField(String value) {
|
||||||
|
return value == null ? SearchMatchField.REPRESENTATION_TEXT : SearchMatchField.valueOf(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocumentType parseDocumentType(String value) {
|
||||||
|
return value == null ? null : DocumentType.valueOf(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocumentFamily parseDocumentFamily(String value) {
|
||||||
|
return value == null ? null : DocumentFamily.valueOf(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocumentVisibility parseVisibility(String value) {
|
||||||
|
return value == null ? null : DocumentVisibility.valueOf(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,33 @@
|
|||||||
|
package at.procon.dip.search.plan;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import at.procon.dip.search.api.SearchExecutionPlan;
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchMode;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class DefaultSearchPlanner implements SearchPlanner {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchExecutionPlan plan(SearchExecutionContext context) {
|
||||||
|
Set<SearchMode> modes = context.getRequest().getModes();
|
||||||
|
List<SearchEngineType> engines = new ArrayList<>();
|
||||||
|
|
||||||
|
if (modes == null || modes.isEmpty() || modes.contains(SearchMode.HYBRID) || modes.contains(SearchMode.FULLTEXT)) {
|
||||||
|
engines.add(SearchEngineType.POSTGRES_FULLTEXT);
|
||||||
|
}
|
||||||
|
if (modes == null || modes.isEmpty() || modes.contains(SearchMode.HYBRID) || modes.contains(SearchMode.TRIGRAM)) {
|
||||||
|
engines.add(SearchEngineType.POSTGRES_TRIGRAM);
|
||||||
|
}
|
||||||
|
|
||||||
|
return SearchExecutionPlan.builder()
|
||||||
|
.engines(engines)
|
||||||
|
.collapseByDocument(context.getRequest().isCollapseByDocument())
|
||||||
|
.sortMode(context.getRequest().getSortMode())
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,8 @@
|
|||||||
|
package at.procon.dip.search.plan;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import at.procon.dip.search.api.SearchExecutionPlan;
|
||||||
|
|
||||||
|
public interface SearchPlanner {
|
||||||
|
SearchExecutionPlan plan(SearchExecutionContext context);
|
||||||
|
}
|
||||||
@ -0,0 +1,107 @@
|
|||||||
|
package at.procon.dip.search.rank;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import at.procon.dip.search.api.SearchExecutionPlan;
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchHit;
|
||||||
|
import at.procon.dip.search.dto.SearchResponse;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.UUID;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class DefaultSearchResultFusionService implements SearchResultFusionService {
|
||||||
|
|
||||||
|
private static final double FULLTEXT_WEIGHT = 0.60d;
|
||||||
|
private static final double TRIGRAM_WEIGHT = 0.40d;
|
||||||
|
|
||||||
|
private final SearchScoreNormalizer scoreNormalizer;
|
||||||
|
|
||||||
|
public DefaultSearchResultFusionService(SearchScoreNormalizer scoreNormalizer) {
|
||||||
|
this.scoreNormalizer = scoreNormalizer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchResponse fuse(SearchExecutionContext context,
|
||||||
|
SearchExecutionPlan plan,
|
||||||
|
Map<SearchEngineType, List<SearchHit>> engineResults) {
|
||||||
|
Map<SearchEngineType, List<SearchHit>> normalizedResults = new LinkedHashMap<>();
|
||||||
|
for (Map.Entry<SearchEngineType, List<SearchHit>> entry : engineResults.entrySet()) {
|
||||||
|
normalizedResults.put(entry.getKey(), scoreNormalizer.normalize(entry.getKey(), entry.getValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
List<SearchHit> ranked = plan.isCollapseByDocument()
|
||||||
|
? collapseByDocument(normalizedResults)
|
||||||
|
: flatten(normalizedResults);
|
||||||
|
|
||||||
|
ranked.sort(Comparator
|
||||||
|
.comparingDouble(SearchHit::getFinalScore).reversed()
|
||||||
|
.thenComparing(SearchHit::getUpdatedAt, Comparator.nullsLast(Comparator.reverseOrder())));
|
||||||
|
|
||||||
|
int totalHits = ranked.size();
|
||||||
|
int fromIndex = Math.min(context.getPage() * context.getSize(), ranked.size());
|
||||||
|
int toIndex = Math.min(fromIndex + context.getSize(), ranked.size());
|
||||||
|
List<SearchHit> pageHits = ranked.subList(fromIndex, toIndex);
|
||||||
|
|
||||||
|
return SearchResponse.builder()
|
||||||
|
.hits(new ArrayList<>(pageHits))
|
||||||
|
.page(context.getPage())
|
||||||
|
.size(context.getSize())
|
||||||
|
.totalHits(totalHits)
|
||||||
|
.truncated(toIndex < totalHits)
|
||||||
|
.enginesUsed(new LinkedHashSet<>(normalizedResults.keySet()))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<SearchHit> flatten(Map<SearchEngineType, List<SearchHit>> normalizedResults) {
|
||||||
|
List<SearchHit> merged = new ArrayList<>();
|
||||||
|
for (Map.Entry<SearchEngineType, List<SearchHit>> entry : normalizedResults.entrySet()) {
|
||||||
|
for (SearchHit hit : entry.getValue()) {
|
||||||
|
merged.add(hit.toBuilder().finalScore(weight(entry.getKey()) * hit.getNormalizedScore()).build());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<SearchHit> collapseByDocument(Map<SearchEngineType, List<SearchHit>> normalizedResults) {
|
||||||
|
Map<UUID, SearchHit> collapsed = new LinkedHashMap<>();
|
||||||
|
Map<UUID, Double> accumulatedScores = new LinkedHashMap<>();
|
||||||
|
Set<UUID> docOrder = new LinkedHashSet<>();
|
||||||
|
|
||||||
|
for (Map.Entry<SearchEngineType, List<SearchHit>> entry : normalizedResults.entrySet()) {
|
||||||
|
double weight = weight(entry.getKey());
|
||||||
|
for (SearchHit hit : entry.getValue()) {
|
||||||
|
docOrder.add(hit.getDocumentId());
|
||||||
|
double contribution = weight * hit.getNormalizedScore();
|
||||||
|
accumulatedScores.merge(hit.getDocumentId(), contribution, Double::sum);
|
||||||
|
|
||||||
|
SearchHit existing = collapsed.get(hit.getDocumentId());
|
||||||
|
if (existing == null || hit.getNormalizedScore() > existing.getNormalizedScore()) {
|
||||||
|
collapsed.put(hit.getDocumentId(), hit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
List<SearchHit> results = new ArrayList<>(docOrder.size());
|
||||||
|
for (UUID documentId : docOrder) {
|
||||||
|
SearchHit base = collapsed.get(documentId);
|
||||||
|
if (base != null) {
|
||||||
|
results.add(base.toBuilder().finalScore(accumulatedScores.getOrDefault(documentId, 0.0d)).build());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double weight(SearchEngineType engineType) {
|
||||||
|
return switch (engineType) {
|
||||||
|
case POSTGRES_FULLTEXT -> FULLTEXT_WEIGHT;
|
||||||
|
case POSTGRES_TRIGRAM -> TRIGRAM_WEIGHT;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,28 @@
|
|||||||
|
package at.procon.dip.search.rank;
|
||||||
|
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchHit;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class DefaultSearchScoreNormalizer implements SearchScoreNormalizer {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<SearchHit> normalize(SearchEngineType engineType, List<SearchHit> hits) {
|
||||||
|
if (hits == null || hits.isEmpty()) {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
double max = hits.stream().mapToDouble(SearchHit::getRawScore).max().orElse(0.0d);
|
||||||
|
if (max <= 0.0d) {
|
||||||
|
max = 1.0d;
|
||||||
|
}
|
||||||
|
List<SearchHit> normalized = new ArrayList<>(hits.size());
|
||||||
|
for (SearchHit hit : hits) {
|
||||||
|
double score = Math.max(0.0d, Math.min(1.0d, hit.getRawScore() / max));
|
||||||
|
normalized.add(hit.toBuilder().normalizedScore(score).build());
|
||||||
|
}
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,17 @@
|
|||||||
|
package at.procon.dip.search.rank;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import at.procon.dip.search.api.SearchExecutionPlan;
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchHit;
|
||||||
|
import at.procon.dip.search.dto.SearchResponse;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public interface SearchResultFusionService {
|
||||||
|
SearchResponse fuse(
|
||||||
|
SearchExecutionContext context,
|
||||||
|
SearchExecutionPlan plan,
|
||||||
|
Map<SearchEngineType, List<SearchHit>> engineResults
|
||||||
|
);
|
||||||
|
}
|
||||||
@ -0,0 +1,9 @@
|
|||||||
|
package at.procon.dip.search.rank;
|
||||||
|
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchHit;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public interface SearchScoreNormalizer {
|
||||||
|
List<SearchHit> normalize(SearchEngineType engineType, List<SearchHit> hits);
|
||||||
|
}
|
||||||
@ -0,0 +1,140 @@
|
|||||||
|
package at.procon.dip.search.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.access.DocumentVisibility;
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import at.procon.dip.search.dto.SearchRequest;
|
||||||
|
import jakarta.persistence.Query;
|
||||||
|
import java.sql.Timestamp;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.time.ZoneOffset;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
abstract class AbstractNativeSearchRepository {
|
||||||
|
|
||||||
|
protected int engineLimit(SearchExecutionContext context) {
|
||||||
|
return Math.max(50, (context.getPage() + 1) * context.getSize() * 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void appendGenericFilters(StringBuilder sql, Map<String, Object> params, SearchExecutionContext context) {
|
||||||
|
SearchRequest request = context.getRequest();
|
||||||
|
|
||||||
|
appendEnumInClause(sql, params, "d.document_type::text", "documentType", request.getDocumentTypes());
|
||||||
|
appendEnumInClause(sql, params, "d.document_family::text", "documentFamily", request.getDocumentFamilies());
|
||||||
|
|
||||||
|
Collection<DocumentVisibility> visibilities = request.getVisibilities();
|
||||||
|
if ((visibilities == null || visibilities.isEmpty()) && context.getScope() != null) {
|
||||||
|
visibilities = context.getScope().visibilities();
|
||||||
|
}
|
||||||
|
appendEnumInClause(sql, params, "d.visibility::text", "visibility", visibilities);
|
||||||
|
|
||||||
|
Collection<String> ownerTenantKeys = context.getScope() == null ? null : context.getScope().ownerTenantKeys();
|
||||||
|
if (ownerTenantKeys != null && !ownerTenantKeys.isEmpty()) {
|
||||||
|
appendStringInClause(sql, params, "COALESCE(dt.tenant_key, '')", "tenantKey", ownerTenantKeys);
|
||||||
|
}
|
||||||
|
|
||||||
|
Collection<String> languageCodes = request.getLanguageCodes();
|
||||||
|
if ((languageCodes == null || languageCodes.isEmpty()) && context.getScope() != null && context.getScope().languageCode() != null) {
|
||||||
|
languageCodes = java.util.List.of(context.getScope().languageCode());
|
||||||
|
}
|
||||||
|
appendStringInClause(sql, params, "COALESCE(dtr.language_code, d.language_code, '')", "languageCode", languageCodes);
|
||||||
|
|
||||||
|
appendEnumInClause(sql, params, "dtr.representation_type::text", "representationType", request.getRepresentationTypes());
|
||||||
|
|
||||||
|
if (request.getCreatedFrom() != null) {
|
||||||
|
sql.append(" AND d.created_at >= :createdFrom");
|
||||||
|
params.put("createdFrom", request.getCreatedFrom());
|
||||||
|
}
|
||||||
|
if (request.getCreatedTo() != null) {
|
||||||
|
sql.append(" AND d.created_at <= :createdTo");
|
||||||
|
params.put("createdTo", request.getCreatedTo());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void bindParameters(Query query, Map<String, Object> params) {
|
||||||
|
for (Map.Entry<String, Object> entry : params.entrySet()) {
|
||||||
|
query.setParameter(entry.getKey(), entry.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Map<String, Object> newParams() {
|
||||||
|
return new LinkedHashMap<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void appendEnumInClause(StringBuilder sql, Map<String, Object> params, String expression, String baseParam, Collection<?> values) {
|
||||||
|
if (values == null || values.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sql.append(" AND ").append(expression).append(" IN (");
|
||||||
|
int i = 0;
|
||||||
|
for (Object value : values) {
|
||||||
|
String param = baseParam + i++;
|
||||||
|
if (i > 1) {
|
||||||
|
sql.append(", ");
|
||||||
|
}
|
||||||
|
sql.append(':').append(param);
|
||||||
|
params.put(param, value.toString());
|
||||||
|
}
|
||||||
|
sql.append(')');
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void appendStringInClause(StringBuilder sql, Map<String, Object> params, String expression, String baseParam, Collection<String> values) {
|
||||||
|
if (values == null || values.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
sql.append(" AND ").append(expression).append(" IN (");
|
||||||
|
int i = 0;
|
||||||
|
for (String value : values) {
|
||||||
|
String param = baseParam + i++;
|
||||||
|
if (i > 1) {
|
||||||
|
sql.append(", ");
|
||||||
|
}
|
||||||
|
sql.append(':').append(param);
|
||||||
|
params.put(param, value);
|
||||||
|
}
|
||||||
|
sql.append(')');
|
||||||
|
}
|
||||||
|
|
||||||
|
protected UUID asUuid(Object value) {
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (value instanceof UUID uuid) {
|
||||||
|
return uuid;
|
||||||
|
}
|
||||||
|
return UUID.fromString(value.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected OffsetDateTime asOffsetDateTime(Object value) {
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (value instanceof OffsetDateTime odt) {
|
||||||
|
return odt;
|
||||||
|
}
|
||||||
|
if (value instanceof Timestamp timestamp) {
|
||||||
|
return timestamp.toInstant().atOffset(ZoneOffset.UTC);
|
||||||
|
}
|
||||||
|
if (value instanceof LocalDateTime ldt) {
|
||||||
|
return ldt.atOffset(ZoneOffset.UTC);
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Unsupported timestamp value: " + value.getClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String asString(Object value) {
|
||||||
|
return value == null ? null : value.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Double asDouble(Object value) {
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (value instanceof Number number) {
|
||||||
|
return number.doubleValue();
|
||||||
|
}
|
||||||
|
return Double.parseDouble(value.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,8 @@
|
|||||||
|
package at.procon.dip.search.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public interface DocumentFullTextSearchRepository {
|
||||||
|
List<FullTextSearchRow> search(SearchExecutionContext context);
|
||||||
|
}
|
||||||
@ -0,0 +1,72 @@
|
|||||||
|
package at.procon.dip.search.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import jakarta.persistence.EntityManager;
|
||||||
|
import jakarta.persistence.PersistenceContext;
|
||||||
|
import jakarta.persistence.Query;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public class DocumentFullTextSearchRepositoryImpl extends AbstractNativeSearchRepository implements DocumentFullTextSearchRepository {
|
||||||
|
|
||||||
|
@PersistenceContext
|
||||||
|
private EntityManager entityManager;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<FullTextSearchRow> search(SearchExecutionContext context) {
|
||||||
|
StringBuilder sql = new StringBuilder("""
|
||||||
|
SELECT
|
||||||
|
d.id AS document_id,
|
||||||
|
dtr.id AS representation_id,
|
||||||
|
d.title AS title,
|
||||||
|
d.summary AS summary,
|
||||||
|
COALESCE(dtr.language_code, d.language_code) AS language_code,
|
||||||
|
d.mime_type AS mime_type,
|
||||||
|
d.document_type AS document_type,
|
||||||
|
d.document_family AS document_family,
|
||||||
|
d.visibility AS visibility,
|
||||||
|
d.created_at AS created_at,
|
||||||
|
d.updated_at AS updated_at,
|
||||||
|
ts_headline('simple', COALESCE(dtr.text_body, ''), websearch_to_tsquery('simple', :queryText)) AS snippet,
|
||||||
|
ts_rank_cd(dtr.search_vector, websearch_to_tsquery('simple', :queryText)) AS score
|
||||||
|
FROM DOC.doc_text_representation dtr
|
||||||
|
JOIN DOC.doc_document d ON d.id = dtr.document_id
|
||||||
|
LEFT JOIN DOC.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||||
|
WHERE dtr.search_vector @@ websearch_to_tsquery('simple', :queryText)
|
||||||
|
""");
|
||||||
|
|
||||||
|
Map<String, Object> params = newParams();
|
||||||
|
params.put("queryText", context.getRequest().getQueryText().trim());
|
||||||
|
appendGenericFilters(sql, params, context);
|
||||||
|
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
|
||||||
|
params.put("limit", engineLimit(context));
|
||||||
|
|
||||||
|
Query query = entityManager.createNativeQuery(sql.toString());
|
||||||
|
bindParameters(query, params);
|
||||||
|
|
||||||
|
List<?> rows = query.getResultList();
|
||||||
|
List<FullTextSearchRow> results = new ArrayList<>(rows.size());
|
||||||
|
for (Object row : rows) {
|
||||||
|
Object[] cols = (Object[]) row;
|
||||||
|
results.add(new FullTextSearchRow(
|
||||||
|
asUuid(cols[0]),
|
||||||
|
asUuid(cols[1]),
|
||||||
|
asString(cols[2]),
|
||||||
|
asString(cols[3]),
|
||||||
|
asString(cols[4]),
|
||||||
|
asString(cols[5]),
|
||||||
|
asString(cols[6]),
|
||||||
|
asString(cols[7]),
|
||||||
|
asString(cols[8]),
|
||||||
|
asOffsetDateTime(cols[9]),
|
||||||
|
asOffsetDateTime(cols[10]),
|
||||||
|
asString(cols[11]),
|
||||||
|
asDouble(cols[12])
|
||||||
|
));
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,8 @@
|
|||||||
|
package at.procon.dip.search.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public interface DocumentTrigramSearchRepository {
|
||||||
|
List<TrigramSearchRow> search(SearchExecutionContext context);
|
||||||
|
}
|
||||||
@ -0,0 +1,102 @@
|
|||||||
|
package at.procon.dip.search.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import jakarta.persistence.EntityManager;
|
||||||
|
import jakarta.persistence.PersistenceContext;
|
||||||
|
import jakarta.persistence.Query;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public class DocumentTrigramSearchRepositoryImpl extends AbstractNativeSearchRepository implements DocumentTrigramSearchRepository {
|
||||||
|
|
||||||
|
@PersistenceContext
|
||||||
|
private EntityManager entityManager;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<TrigramSearchRow> search(SearchExecutionContext context) {
|
||||||
|
StringBuilder sql = new StringBuilder("""
|
||||||
|
SELECT
|
||||||
|
d.id AS document_id,
|
||||||
|
dtr.id AS representation_id,
|
||||||
|
d.title AS title,
|
||||||
|
d.summary AS summary,
|
||||||
|
COALESCE(dtr.language_code, d.language_code) AS language_code,
|
||||||
|
d.mime_type AS mime_type,
|
||||||
|
d.document_type AS document_type,
|
||||||
|
d.document_family AS document_family,
|
||||||
|
d.visibility AS visibility,
|
||||||
|
d.created_at AS created_at,
|
||||||
|
d.updated_at AS updated_at,
|
||||||
|
CASE
|
||||||
|
WHEN similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(d.summary, ''), :queryText)
|
||||||
|
AND similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText)
|
||||||
|
THEN COALESCE(d.title, '')
|
||||||
|
WHEN similarity(COALESCE(d.summary, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText)
|
||||||
|
THEN COALESCE(d.summary, '')
|
||||||
|
ELSE LEFT(COALESCE(dtr.text_body, ''), 400)
|
||||||
|
END AS snippet,
|
||||||
|
GREATEST(
|
||||||
|
similarity(COALESCE(d.title, ''), :queryText),
|
||||||
|
similarity(COALESCE(d.summary, ''), :queryText),
|
||||||
|
similarity(COALESCE(dtr.text_body, ''), :queryText)
|
||||||
|
) AS score,
|
||||||
|
CASE
|
||||||
|
WHEN similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(d.summary, ''), :queryText)
|
||||||
|
AND similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText)
|
||||||
|
THEN 'DOCUMENT_TITLE'
|
||||||
|
WHEN similarity(COALESCE(d.summary, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText)
|
||||||
|
THEN 'DOCUMENT_SUMMARY'
|
||||||
|
ELSE 'REPRESENTATION_TEXT'
|
||||||
|
END AS matched_field
|
||||||
|
FROM DOC.doc_text_representation dtr
|
||||||
|
JOIN DOC.doc_document d ON d.id = dtr.document_id
|
||||||
|
LEFT JOIN DOC.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||||
|
WHERE (
|
||||||
|
COALESCE(d.title, '') % :queryText
|
||||||
|
OR COALESCE(d.summary, '') % :queryText
|
||||||
|
OR COALESCE(dtr.text_body, '') % :queryText
|
||||||
|
)
|
||||||
|
""");
|
||||||
|
|
||||||
|
Map<String, Object> params = newParams();
|
||||||
|
params.put("queryText", context.getRequest().getQueryText().trim());
|
||||||
|
appendGenericFilters(sql, params, context);
|
||||||
|
sql.append(" AND GREATEST(")
|
||||||
|
.append(" similarity(COALESCE(d.title, ''), :queryText),")
|
||||||
|
.append(" similarity(COALESCE(d.summary, ''), :queryText),")
|
||||||
|
.append(" similarity(COALESCE(dtr.text_body, ''), :queryText)")
|
||||||
|
.append(") >= :minSimilarity");
|
||||||
|
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
|
||||||
|
params.put("minSimilarity", 0.10d);
|
||||||
|
params.put("limit", engineLimit(context));
|
||||||
|
|
||||||
|
Query query = entityManager.createNativeQuery(sql.toString());
|
||||||
|
bindParameters(query, params);
|
||||||
|
|
||||||
|
List<?> rows = query.getResultList();
|
||||||
|
List<TrigramSearchRow> results = new ArrayList<>(rows.size());
|
||||||
|
for (Object row : rows) {
|
||||||
|
Object[] cols = (Object[]) row;
|
||||||
|
results.add(new TrigramSearchRow(
|
||||||
|
asUuid(cols[0]),
|
||||||
|
asUuid(cols[1]),
|
||||||
|
asString(cols[2]),
|
||||||
|
asString(cols[3]),
|
||||||
|
asString(cols[4]),
|
||||||
|
asString(cols[5]),
|
||||||
|
asString(cols[6]),
|
||||||
|
asString(cols[7]),
|
||||||
|
asString(cols[8]),
|
||||||
|
asOffsetDateTime(cols[9]),
|
||||||
|
asOffsetDateTime(cols[10]),
|
||||||
|
asString(cols[11]),
|
||||||
|
asDouble(cols[12]),
|
||||||
|
asString(cols[13])
|
||||||
|
));
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,21 @@
|
|||||||
|
package at.procon.dip.search.repository;
|
||||||
|
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public record FullTextSearchRow(
|
||||||
|
UUID documentId,
|
||||||
|
UUID representationId,
|
||||||
|
String title,
|
||||||
|
String summary,
|
||||||
|
String languageCode,
|
||||||
|
String mimeType,
|
||||||
|
String documentType,
|
||||||
|
String documentFamily,
|
||||||
|
String visibility,
|
||||||
|
OffsetDateTime createdAt,
|
||||||
|
OffsetDateTime updatedAt,
|
||||||
|
String snippet,
|
||||||
|
Double score
|
||||||
|
) {
|
||||||
|
}
|
||||||
@ -0,0 +1,22 @@
|
|||||||
|
package at.procon.dip.search.repository;
|
||||||
|
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
public record TrigramSearchRow(
|
||||||
|
UUID documentId,
|
||||||
|
UUID representationId,
|
||||||
|
String title,
|
||||||
|
String summary,
|
||||||
|
String languageCode,
|
||||||
|
String mimeType,
|
||||||
|
String documentType,
|
||||||
|
String documentFamily,
|
||||||
|
String visibility,
|
||||||
|
OffsetDateTime createdAt,
|
||||||
|
OffsetDateTime updatedAt,
|
||||||
|
String snippet,
|
||||||
|
Double score,
|
||||||
|
String matchedField
|
||||||
|
) {
|
||||||
|
}
|
||||||
@ -0,0 +1,47 @@
|
|||||||
|
package at.procon.dip.search.service;
|
||||||
|
|
||||||
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
|
import at.procon.dip.search.api.SearchExecutionPlan;
|
||||||
|
import at.procon.dip.search.dto.SearchEngineType;
|
||||||
|
import at.procon.dip.search.dto.SearchHit;
|
||||||
|
import at.procon.dip.search.dto.SearchRequest;
|
||||||
|
import at.procon.dip.search.dto.SearchResponse;
|
||||||
|
import at.procon.dip.search.engine.SearchEngine;
|
||||||
|
import at.procon.dip.search.plan.SearchPlanner;
|
||||||
|
import at.procon.dip.search.rank.SearchResultFusionService;
|
||||||
|
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class DefaultSearchOrchestrator implements SearchOrchestrator {
|
||||||
|
|
||||||
|
private final SearchPlanner planner;
|
||||||
|
private final List<SearchEngine> engines;
|
||||||
|
private final SearchResultFusionService fusionService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchResponse search(SearchRequest request, SearchDocumentScope scope) {
|
||||||
|
SearchExecutionContext context = SearchExecutionContext.builder()
|
||||||
|
.request(request)
|
||||||
|
.scope(scope)
|
||||||
|
.page(request.getPage() == null ? 0 : request.getPage())
|
||||||
|
.size(request.getSize() == null ? 20 : request.getSize())
|
||||||
|
.build();
|
||||||
|
|
||||||
|
SearchExecutionPlan plan = planner.plan(context);
|
||||||
|
|
||||||
|
Map<SearchEngineType, List<SearchHit>> engineResults = new LinkedHashMap<>();
|
||||||
|
for (SearchEngine engine : engines) {
|
||||||
|
if (plan.getEngines().contains(engine.type()) && engine.supports(context)) {
|
||||||
|
engineResults.put(engine.type(), engine.execute(context));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fusionService.fuse(context, plan, engineResults);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,80 @@
|
|||||||
|
package at.procon.dip.search.service;
|
||||||
|
|
||||||
|
import jakarta.persistence.EntityManager;
|
||||||
|
import jakarta.persistence.PersistenceContext;
|
||||||
|
import jakarta.transaction.Transactional;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@Transactional
|
||||||
|
@Slf4j
|
||||||
|
public class DocumentLexicalIndexService {
|
||||||
|
|
||||||
|
@PersistenceContext
|
||||||
|
private EntityManager entityManager;
|
||||||
|
|
||||||
|
public void refreshRepresentationLexicalIndex(UUID representationId) {
|
||||||
|
if (!isLexicalSearchSchemaAvailable()) {
|
||||||
|
log.debug("Skipping lexical index refresh for representation {} because search columns are not available yet", representationId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
entityManager.createNativeQuery("""
|
||||||
|
UPDATE DOC.doc_text_representation
|
||||||
|
SET search_config = CASE
|
||||||
|
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'
|
||||||
|
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'
|
||||||
|
ELSE 'simple'
|
||||||
|
END,
|
||||||
|
search_vector = to_tsvector(
|
||||||
|
CASE
|
||||||
|
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig
|
||||||
|
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig
|
||||||
|
ELSE 'simple'::regconfig
|
||||||
|
END,
|
||||||
|
coalesce(text_body, '')
|
||||||
|
)
|
||||||
|
WHERE id = :representationId
|
||||||
|
""")
|
||||||
|
.setParameter("representationId", representationId)
|
||||||
|
.executeUpdate();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void refreshAllMissingLexicalIndexes() {
|
||||||
|
if (!isLexicalSearchSchemaAvailable()) {
|
||||||
|
log.info("Lexical search columns are not available yet. Skipping startup backfill for DOC lexical indexes.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
entityManager.createNativeQuery("""
|
||||||
|
UPDATE DOC.doc_text_representation
|
||||||
|
SET search_config = CASE
|
||||||
|
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'
|
||||||
|
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'
|
||||||
|
ELSE 'simple'
|
||||||
|
END,
|
||||||
|
search_vector = to_tsvector(
|
||||||
|
CASE
|
||||||
|
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig
|
||||||
|
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig
|
||||||
|
ELSE 'simple'::regconfig
|
||||||
|
END,
|
||||||
|
coalesce(text_body, '')
|
||||||
|
)
|
||||||
|
WHERE search_vector IS NULL
|
||||||
|
""")
|
||||||
|
.executeUpdate();
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isLexicalSearchSchemaAvailable() {
|
||||||
|
Number count = (Number) entityManager.createNativeQuery("""
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM information_schema.columns
|
||||||
|
WHERE table_schema = 'doc'
|
||||||
|
AND table_name = 'doc_text_representation'
|
||||||
|
AND column_name IN ('search_config', 'search_vector')
|
||||||
|
""")
|
||||||
|
.getSingleResult();
|
||||||
|
return count != null && count.intValue() >= 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,9 @@
|
|||||||
|
package at.procon.dip.search.service;
|
||||||
|
|
||||||
|
import at.procon.dip.search.dto.SearchRequest;
|
||||||
|
import at.procon.dip.search.dto.SearchResponse;
|
||||||
|
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||||
|
|
||||||
|
public interface SearchOrchestrator {
|
||||||
|
SearchResponse search(SearchRequest request, SearchDocumentScope scope);
|
||||||
|
}
|
||||||
@ -0,0 +1,21 @@
|
|||||||
|
package at.procon.dip.search.startup;
|
||||||
|
|
||||||
|
import at.procon.dip.search.service.DocumentLexicalIndexService;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.boot.CommandLineRunner;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class LexicalSearchStartupRunner implements CommandLineRunner {
|
||||||
|
|
||||||
|
private final DocumentLexicalIndexService lexicalIndexService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run(String... args) {
|
||||||
|
log.info("Refreshing missing lexical search vectors for DOC text representations");
|
||||||
|
lexicalIndexService.refreshAllMissingLexicalIndexes();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
package at.procon.dip.search.web;
|
||||||
|
|
||||||
|
import at.procon.dip.search.dto.SearchRequest;
|
||||||
|
import at.procon.dip.search.dto.SearchResponse;
|
||||||
|
import at.procon.dip.search.service.SearchOrchestrator;
|
||||||
|
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||||
|
import jakarta.validation.Valid;
|
||||||
|
import java.util.Set;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.web.bind.annotation.PostMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestBody;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/search")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class GenericSearchController {
|
||||||
|
|
||||||
|
private final SearchOrchestrator searchOrchestrator;
|
||||||
|
|
||||||
|
@PostMapping
|
||||||
|
public SearchResponse search(@Valid @RequestBody SearchRequest request) {
|
||||||
|
SearchDocumentScope scope = new SearchDocumentScope(
|
||||||
|
Set.of(),
|
||||||
|
request.getDocumentTypes(),
|
||||||
|
request.getDocumentFamilies(),
|
||||||
|
request.getVisibilities(),
|
||||||
|
request.getLanguageCodes() == null || request.getLanguageCodes().isEmpty()
|
||||||
|
? null
|
||||||
|
: request.getLanguageCodes().iterator().next()
|
||||||
|
);
|
||||||
|
return searchOrchestrator.search(request, scope);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,26 @@
|
|||||||
|
-- Slice 1 generic lexical search support.
|
||||||
|
-- Adds PostgreSQL full-text and trigram search infrastructure for DOC-side search.
|
||||||
|
|
||||||
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||||
|
|
||||||
|
ALTER TABLE DOC.doc_text_representation
|
||||||
|
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
|
||||||
|
|
||||||
|
ALTER TABLE DOC.doc_text_representation
|
||||||
|
ADD COLUMN IF NOT EXISTS search_vector tsvector;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector
|
||||||
|
ON DOC.doc_text_representation
|
||||||
|
USING GIN (search_vector);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
|
||||||
|
ON DOC.doc_document
|
||||||
|
USING GIN (title gin_trgm_ops);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
|
||||||
|
ON DOC.doc_document
|
||||||
|
USING GIN (summary gin_trgm_ops);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
|
||||||
|
ON DOC.doc_text_representation
|
||||||
|
USING GIN (text_body gin_trgm_ops);
|
||||||
Loading…
Reference in New Issue