Improve generic search query performance

This commit is contained in:
trifonovt 2026-05-18 13:21:34 +02:00
parent f9df7c8d22
commit 5c3133d19d
6 changed files with 244 additions and 42 deletions

120
.gitignore vendored Normal file
View File

@ -0,0 +1,120 @@
# TED Procurement Processor - Git Ignore
# Author: Martin.Schweitzer@procon.co.at and claude.ai
# Compiled class files
*.class
# Maven
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
.mvn/wrapper/maven-wrapper.jar
# Gradle
.gradle
build/
# IDE - IntelliJ IDEA
.idea/
*.iws
*.iml
*.ipr
out/
# IDE - Eclipse
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
bin/
# IDE - NetBeans
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
# IDE - VS Code
.vscode/
# OS Files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# Logs
*.log
logs/
# Application
application-local.yml
application-dev.yml
application-prod.yml
# Docker
.docker/
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
venv/
.venv/
ENV/
env.bak/
venv.bak/
.eggs/
*.egg-info/
dist/
*.egg
# Model cache
models/
.cache/
# Test data
test-data/
*.xml.bak
# Temporary files
*.tmp
*.temp
*.swp
*~
# Secrets
*.pem
*.key
secrets/
.env
.env.local
.env.*.local
# Database
*.db
*.sqlite
*.sqlite3
# Processed files (Camel)
.processed/
.error/
*.bak
.claude

View File

@ -18,11 +18,18 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
@Override @Override
public List<SearchHit> search(SearchExecutionContext context, int limit) { public List<SearchHit> search(SearchExecutionContext context, int limit) {
String effectiveConfigExpr = effectiveConfigExpression("dtr");
String tsQueryExpr = tsQueryExpression(effectiveConfigExpr);
StringBuilder sql = new StringBuilder(""" StringBuilder sql = new StringBuilder("""
SELECT SELECT
d.id AS document_id, d.id AS document_id,
dtr.id AS representation_id, dtr.id AS representation_id,
CAST(dtr.representation_type AS text) AS representation_type, CAST(dtr.representation_type AS text) AS representation_type,
dtr.is_primary AS is_primary,
dtr.chunk_index AS chunk_index,
dtr.chunk_start_offset AS chunk_start_offset,
dtr.chunk_end_offset AS chunk_end_offset,
CAST(d.document_type AS text) AS document_type, CAST(d.document_type AS text) AS document_type,
CAST(d.document_family AS text) AS document_family, CAST(d.document_family AS text) AS document_family,
CAST(d.visibility AS text) AS visibility, CAST(d.visibility AS text) AS visibility,
@ -33,41 +40,29 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
d.created_at AS created_at, d.created_at AS created_at,
d.updated_at AS updated_at, d.updated_at AS updated_at,
ts_headline( ts_headline(
CASE """
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig ).append(effectiveConfigExpr).append("""
ELSE dtr.search_config::regconfig ,
END,
COALESCE(dtr.text_body, ''), COALESCE(dtr.text_body, ''),
websearch_to_tsquery( """).append(tsQueryExpr).append("""
CASE ,
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
ELSE dtr.search_config::regconfig
END,
:queryText
),
'MaxFragments=2, MinWords=5, MaxWords=20' 'MaxFragments=2, MinWords=5, MaxWords=20'
) AS snippet, ) AS snippet,
ranked.score AS score
FROM (
SELECT
d.id AS document_id,
dtr.id AS representation_id,
ts_rank_cd( ts_rank_cd(
dtr.search_vector, dtr.search_vector,
websearch_to_tsquery( """).append(tsQueryExpr).append("""
CASE ) AS score,
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig d.updated_at AS updated_at
ELSE dtr.search_config::regconfig
END,
:queryText
)
) AS score
FROM doc.doc_text_representation dtr FROM doc.doc_text_representation dtr
JOIN doc.doc_document d ON d.id = dtr.document_id JOIN doc.doc_document d ON d.id = dtr.document_id
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
WHERE dtr.search_vector IS NOT NULL WHERE dtr.search_vector IS NOT NULL
AND dtr.search_vector @@ websearch_to_tsquery( AND dtr.search_vector @@ """).append(tsQueryExpr).append("""
CASE
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
ELSE dtr.search_config::regconfig
END,
:queryText
)
"""); """);
MapSqlParameterSource params = new MapSqlParameterSource(); MapSqlParameterSource params = new MapSqlParameterSource();
@ -75,7 +70,14 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true); SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit"); sql.append("""
ORDER BY score DESC, d.updated_at DESC
LIMIT :limit
) ranked
JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id
JOIN doc.doc_document d ON d.id = ranked.document_id
ORDER BY ranked.score DESC, d.updated_at DESC
""");
params.addValue("limit", limit); params.addValue("limit", limit);
return jdbcTemplate.query( return jdbcTemplate.query(
@ -84,4 +86,22 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT) new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT)
); );
} }
private static String effectiveConfigExpression(String representationAlias) {
return """
CASE
WHEN NULLIF(%s.search_config, '') IS NULL THEN 'simple'::regconfig
ELSE %s.search_config::regconfig
END
""".formatted(representationAlias, representationAlias).trim();
}
private static String tsQueryExpression(String configExpression) {
return """
websearch_to_tsquery(
%s,
:queryText
)
""".formatted(configExpression).trim();
}
} }

View File

@ -34,20 +34,17 @@ final class SearchSqlFilterSupport {
boolean tenantJoinPresent) { boolean tenantJoinPresent) {
Set<DocumentType> documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes()); Set<DocumentType> documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes());
if (!CollectionUtils.isEmpty(documentTypes)) { if (!CollectionUtils.isEmpty(documentTypes)) {
sql.append(" AND CAST(").append(documentAlias).append(".document_type AS text) IN (:documentTypes)"); appendTextEnumFilter(sql, params, documentAlias + ".document_type", documentTypes, "documentTypes");
params.addValue("documentTypes", enumNames(documentTypes));
} }
Set<DocumentFamily> documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies()); Set<DocumentFamily> documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies());
if (!CollectionUtils.isEmpty(documentFamilies)) { if (!CollectionUtils.isEmpty(documentFamilies)) {
sql.append(" AND CAST(").append(documentAlias).append(".document_family AS text) IN (:documentFamilies)"); appendTextEnumFilter(sql, params, documentAlias + ".document_family", documentFamilies, "documentFamilies");
params.addValue("documentFamilies", enumNames(documentFamilies));
} }
Set<DocumentVisibility> visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities()); Set<DocumentVisibility> visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities());
if (!CollectionUtils.isEmpty(visibilities)) { if (!CollectionUtils.isEmpty(visibilities)) {
sql.append(" AND CAST(").append(documentAlias).append(".visibility AS text) IN (:visibilities)"); appendTextEnumFilter(sql, params, documentAlias + ".visibility", visibilities, "visibilities");
params.addValue("visibilities", enumNames(visibilities));
} }
Set<String> languageCodes = context.getRequest().getLanguageCodes(); Set<String> languageCodes = context.getRequest().getLanguageCodes();
@ -62,8 +59,7 @@ final class SearchSqlFilterSupport {
Set<RepresentationType> representationTypes = context.getRequest().getRepresentationTypes(); Set<RepresentationType> representationTypes = context.getRequest().getRepresentationTypes();
if (!CollectionUtils.isEmpty(representationTypes)) { if (!CollectionUtils.isEmpty(representationTypes)) {
sql.append(" AND CAST(").append(representationAlias).append(".representation_type AS text) IN (:representationTypes)"); appendTextEnumFilter(sql, params, representationAlias + ".representation_type", representationTypes, "representationTypes");
params.addValue("representationTypes", enumNames(representationTypes));
} else { } else {
SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode(); SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode();
if (selectionMode == null) { if (selectionMode == null) {
@ -242,6 +238,15 @@ final class SearchSqlFilterSupport {
return !CollectionUtils.isEmpty(primary) ? primary : fallback; return !CollectionUtils.isEmpty(primary) ? primary : fallback;
} }
private static void appendTextEnumFilter(StringBuilder sql,
MapSqlParameterSource params,
String columnExpression,
Collection<? extends Enum<?>> values,
String parameterName) {
sql.append(" AND CAST(").append(columnExpression).append(" AS text) IN (:").append(parameterName).append(")");
params.addValue(parameterName, enumNames(values));
}
private static List<String> enumNames(Collection<? extends Enum<?>> values) { private static List<String> enumNames(Collection<? extends Enum<?>> values) {
return values.stream().map(Enum::name).collect(Collectors.toList()); return values.stream().map(Enum::name).collect(Collectors.toList());
} }

View File

@ -0,0 +1,10 @@
-- Search performance support indexes for filtered DOC fulltext lookups.
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_updated
ON DOC.doc_document(document_type, document_family, updated_at DESC, id);
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_updated
ON DOC.doc_document(document_type, document_family, visibility, updated_at DESC, id);
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_document_primary_type
ON DOC.doc_text_representation(document_id, is_primary, representation_type);

View File

@ -0,0 +1,17 @@
-- Support cast-to-text search filters on installations where DOC type columns are varchar.
-- These indexes align with the query shape used by generic search filters.
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_text_updated
ON DOC.doc_document ((CAST(document_type AS text)), (CAST(document_family AS text)), updated_at DESC, id);
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_text_updated
ON DOC.doc_document (
(CAST(document_type AS text)),
(CAST(document_family AS text)),
(CAST(visibility AS text)),
updated_at DESC,
id
);
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_primary_type_text_document
ON DOC.doc_text_representation (is_primary, (CAST(representation_type AS text)), document_id);

View File

@ -2,10 +2,13 @@ package at.procon.dip.search.repository;
import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThat;
import at.procon.dip.domain.document.DocumentFamily;
import at.procon.dip.domain.document.DocumentType;
import at.procon.dip.domain.document.DocumentAttributeValueType; import at.procon.dip.domain.document.DocumentAttributeValueType;
import at.procon.dip.search.api.SearchExecutionContext; import at.procon.dip.search.api.SearchExecutionContext;
import at.procon.dip.search.dto.DocumentAttributeFilterOperator; import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
import at.procon.dip.search.dto.DocumentAttributeFilterRequest; import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
import at.procon.dip.search.dto.SearchRequest; import at.procon.dip.search.dto.SearchRequest;
import at.procon.dip.search.spi.SearchDocumentScope; import at.procon.dip.search.spi.SearchDocumentScope;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -54,4 +57,31 @@ class SearchSqlFilterSupportTest {
assertThat(params.getValue("attributeName1")).isEqualTo("version"); assertThat(params.getValue("attributeName1")).isEqualTo("version");
assertThat(params.getValue("attributeValue1")).isEqualTo(3L); assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
} }
@Test
void shouldUseTypedEnumComparisonsForIndexedFilters() {
SearchRequest request = SearchRequest.builder()
.queryText("vertrieb")
.documentTypes(java.util.Set.of(DocumentType.TIME_ENTRY))
.documentFamilies(java.util.Set.of(DocumentFamily.TIME))
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS)
.build();
SearchExecutionContext context = SearchExecutionContext.builder()
.request(request)
.scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null))
.page(0)
.size(20)
.build();
StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1");
MapSqlParameterSource params = new MapSqlParameterSource();
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false);
String rendered = sql.toString();
assertThat(rendered).contains("CAST(d.document_type AS text) IN (:documentTypes)");
assertThat(rendered).contains("CAST(d.document_family AS text) IN (:documentFamilies)");
assertThat(rendered).contains("CAST(dtr.representation_type AS text) = 'CHUNK'");
assertThat(params.getValue("documentTypes")).isEqualTo(java.util.List.of("TIME_ENTRY"));
assertThat(params.getValue("documentFamilies")).isEqualTo(java.util.List.of("TIME"));
}
} }