Improve generic search query performance
This commit is contained in:
parent
f9df7c8d22
commit
5c3133d19d
|
|
@ -0,0 +1,120 @@
|
|||
# TED Procurement Processor - Git Ignore
|
||||
# Author: Martin.Schweitzer@procon.co.at and claude.ai
|
||||
|
||||
# Compiled class files
|
||||
*.class
|
||||
|
||||
# Maven
|
||||
target/
|
||||
pom.xml.tag
|
||||
pom.xml.releaseBackup
|
||||
pom.xml.versionsBackup
|
||||
pom.xml.next
|
||||
release.properties
|
||||
dependency-reduced-pom.xml
|
||||
buildNumber.properties
|
||||
.mvn/timing.properties
|
||||
.mvn/wrapper/maven-wrapper.jar
|
||||
|
||||
# Gradle
|
||||
.gradle
|
||||
build/
|
||||
|
||||
# IDE - IntelliJ IDEA
|
||||
.idea/
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
out/
|
||||
|
||||
# IDE - Eclipse
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
bin/
|
||||
|
||||
# IDE - NetBeans
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
|
||||
# IDE - VS Code
|
||||
.vscode/
|
||||
|
||||
# OS Files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Application
|
||||
application-local.yml
|
||||
application-dev.yml
|
||||
application-prod.yml
|
||||
|
||||
# Docker
|
||||
.docker/
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
env/
|
||||
venv/
|
||||
.venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
.eggs/
|
||||
*.egg-info/
|
||||
dist/
|
||||
*.egg
|
||||
|
||||
# Model cache
|
||||
models/
|
||||
.cache/
|
||||
|
||||
# Test data
|
||||
test-data/
|
||||
*.xml.bak
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.temp
|
||||
*.swp
|
||||
*~
|
||||
|
||||
# Secrets
|
||||
*.pem
|
||||
*.key
|
||||
secrets/
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# Database
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
|
||||
# Processed files (Camel)
|
||||
.processed/
|
||||
.error/
|
||||
|
||||
*.bak
|
||||
.claude
|
||||
|
|
@ -18,11 +18,18 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
|||
|
||||
@Override
|
||||
public List<SearchHit> search(SearchExecutionContext context, int limit) {
|
||||
String effectiveConfigExpr = effectiveConfigExpression("dtr");
|
||||
String tsQueryExpr = tsQueryExpression(effectiveConfigExpr);
|
||||
|
||||
StringBuilder sql = new StringBuilder("""
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
dtr.id AS representation_id,
|
||||
CAST(dtr.representation_type AS text) AS representation_type,
|
||||
dtr.is_primary AS is_primary,
|
||||
dtr.chunk_index AS chunk_index,
|
||||
dtr.chunk_start_offset AS chunk_start_offset,
|
||||
dtr.chunk_end_offset AS chunk_end_offset,
|
||||
CAST(d.document_type AS text) AS document_type,
|
||||
CAST(d.document_family AS text) AS document_family,
|
||||
CAST(d.visibility AS text) AS visibility,
|
||||
|
|
@ -33,41 +40,29 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
|||
d.created_at AS created_at,
|
||||
d.updated_at AS updated_at,
|
||||
ts_headline(
|
||||
CASE
|
||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE dtr.search_config::regconfig
|
||||
END,
|
||||
"""
|
||||
).append(effectiveConfigExpr).append("""
|
||||
,
|
||||
COALESCE(dtr.text_body, ''),
|
||||
websearch_to_tsquery(
|
||||
CASE
|
||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE dtr.search_config::regconfig
|
||||
END,
|
||||
:queryText
|
||||
),
|
||||
""").append(tsQueryExpr).append("""
|
||||
,
|
||||
'MaxFragments=2, MinWords=5, MaxWords=20'
|
||||
) AS snippet,
|
||||
ts_rank_cd(
|
||||
dtr.search_vector,
|
||||
websearch_to_tsquery(
|
||||
CASE
|
||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE dtr.search_config::regconfig
|
||||
END,
|
||||
:queryText
|
||||
)
|
||||
) AS score
|
||||
FROM doc.doc_text_representation dtr
|
||||
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||
WHERE dtr.search_vector IS NOT NULL
|
||||
AND dtr.search_vector @@ websearch_to_tsquery(
|
||||
CASE
|
||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE dtr.search_config::regconfig
|
||||
END,
|
||||
:queryText
|
||||
)
|
||||
ranked.score AS score
|
||||
FROM (
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
dtr.id AS representation_id,
|
||||
ts_rank_cd(
|
||||
dtr.search_vector,
|
||||
""").append(tsQueryExpr).append("""
|
||||
) AS score,
|
||||
d.updated_at AS updated_at
|
||||
FROM doc.doc_text_representation dtr
|
||||
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||
WHERE dtr.search_vector IS NOT NULL
|
||||
AND dtr.search_vector @@ """).append(tsQueryExpr).append("""
|
||||
""");
|
||||
|
||||
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||
|
|
@ -75,7 +70,14 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
|||
|
||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||
|
||||
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
|
||||
sql.append("""
|
||||
ORDER BY score DESC, d.updated_at DESC
|
||||
LIMIT :limit
|
||||
) ranked
|
||||
JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id
|
||||
JOIN doc.doc_document d ON d.id = ranked.document_id
|
||||
ORDER BY ranked.score DESC, d.updated_at DESC
|
||||
""");
|
||||
params.addValue("limit", limit);
|
||||
|
||||
return jdbcTemplate.query(
|
||||
|
|
@ -84,4 +86,22 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
|||
new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT)
|
||||
);
|
||||
}
|
||||
|
||||
private static String effectiveConfigExpression(String representationAlias) {
|
||||
return """
|
||||
CASE
|
||||
WHEN NULLIF(%s.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE %s.search_config::regconfig
|
||||
END
|
||||
""".formatted(representationAlias, representationAlias).trim();
|
||||
}
|
||||
|
||||
private static String tsQueryExpression(String configExpression) {
|
||||
return """
|
||||
websearch_to_tsquery(
|
||||
%s,
|
||||
:queryText
|
||||
)
|
||||
""".formatted(configExpression).trim();
|
||||
}
|
||||
}
|
||||
|
|
@ -34,20 +34,17 @@ final class SearchSqlFilterSupport {
|
|||
boolean tenantJoinPresent) {
|
||||
Set<DocumentType> documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes());
|
||||
if (!CollectionUtils.isEmpty(documentTypes)) {
|
||||
sql.append(" AND CAST(").append(documentAlias).append(".document_type AS text) IN (:documentTypes)");
|
||||
params.addValue("documentTypes", enumNames(documentTypes));
|
||||
appendTextEnumFilter(sql, params, documentAlias + ".document_type", documentTypes, "documentTypes");
|
||||
}
|
||||
|
||||
Set<DocumentFamily> documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies());
|
||||
if (!CollectionUtils.isEmpty(documentFamilies)) {
|
||||
sql.append(" AND CAST(").append(documentAlias).append(".document_family AS text) IN (:documentFamilies)");
|
||||
params.addValue("documentFamilies", enumNames(documentFamilies));
|
||||
appendTextEnumFilter(sql, params, documentAlias + ".document_family", documentFamilies, "documentFamilies");
|
||||
}
|
||||
|
||||
Set<DocumentVisibility> visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities());
|
||||
if (!CollectionUtils.isEmpty(visibilities)) {
|
||||
sql.append(" AND CAST(").append(documentAlias).append(".visibility AS text) IN (:visibilities)");
|
||||
params.addValue("visibilities", enumNames(visibilities));
|
||||
appendTextEnumFilter(sql, params, documentAlias + ".visibility", visibilities, "visibilities");
|
||||
}
|
||||
|
||||
Set<String> languageCodes = context.getRequest().getLanguageCodes();
|
||||
|
|
@ -62,8 +59,7 @@ final class SearchSqlFilterSupport {
|
|||
|
||||
Set<RepresentationType> representationTypes = context.getRequest().getRepresentationTypes();
|
||||
if (!CollectionUtils.isEmpty(representationTypes)) {
|
||||
sql.append(" AND CAST(").append(representationAlias).append(".representation_type AS text) IN (:representationTypes)");
|
||||
params.addValue("representationTypes", enumNames(representationTypes));
|
||||
appendTextEnumFilter(sql, params, representationAlias + ".representation_type", representationTypes, "representationTypes");
|
||||
} else {
|
||||
SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode();
|
||||
if (selectionMode == null) {
|
||||
|
|
@ -242,6 +238,15 @@ final class SearchSqlFilterSupport {
|
|||
return !CollectionUtils.isEmpty(primary) ? primary : fallback;
|
||||
}
|
||||
|
||||
private static void appendTextEnumFilter(StringBuilder sql,
|
||||
MapSqlParameterSource params,
|
||||
String columnExpression,
|
||||
Collection<? extends Enum<?>> values,
|
||||
String parameterName) {
|
||||
sql.append(" AND CAST(").append(columnExpression).append(" AS text) IN (:").append(parameterName).append(")");
|
||||
params.addValue(parameterName, enumNames(values));
|
||||
}
|
||||
|
||||
private static List<String> enumNames(Collection<? extends Enum<?>> values) {
|
||||
return values.stream().map(Enum::name).collect(Collectors.toList());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,10 @@
|
|||
-- Search performance support indexes for filtered DOC fulltext lookups.
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_updated
|
||||
ON DOC.doc_document(document_type, document_family, updated_at DESC, id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_updated
|
||||
ON DOC.doc_document(document_type, document_family, visibility, updated_at DESC, id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_document_primary_type
|
||||
ON DOC.doc_text_representation(document_id, is_primary, representation_type);
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
-- Support cast-to-text search filters on installations where DOC type columns are varchar.
|
||||
-- These indexes align with the query shape used by generic search filters.
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_text_updated
|
||||
ON DOC.doc_document ((CAST(document_type AS text)), (CAST(document_family AS text)), updated_at DESC, id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_text_updated
|
||||
ON DOC.doc_document (
|
||||
(CAST(document_type AS text)),
|
||||
(CAST(document_family AS text)),
|
||||
(CAST(visibility AS text)),
|
||||
updated_at DESC,
|
||||
id
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_primary_type_text_document
|
||||
ON DOC.doc_text_representation (is_primary, (CAST(representation_type AS text)), document_id);
|
||||
|
|
@ -2,10 +2,13 @@ package at.procon.dip.search.repository;
|
|||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||
import at.procon.dip.search.api.SearchExecutionContext;
|
||||
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
|
||||
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
|
||||
import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
|
||||
import at.procon.dip.search.dto.SearchRequest;
|
||||
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
|
@ -54,4 +57,31 @@ class SearchSqlFilterSupportTest {
|
|||
assertThat(params.getValue("attributeName1")).isEqualTo("version");
|
||||
assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldUseTypedEnumComparisonsForIndexedFilters() {
|
||||
SearchRequest request = SearchRequest.builder()
|
||||
.queryText("vertrieb")
|
||||
.documentTypes(java.util.Set.of(DocumentType.TIME_ENTRY))
|
||||
.documentFamilies(java.util.Set.of(DocumentFamily.TIME))
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS)
|
||||
.build();
|
||||
SearchExecutionContext context = SearchExecutionContext.builder()
|
||||
.request(request)
|
||||
.scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null))
|
||||
.page(0)
|
||||
.size(20)
|
||||
.build();
|
||||
|
||||
StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1");
|
||||
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false);
|
||||
|
||||
String rendered = sql.toString();
|
||||
assertThat(rendered).contains("CAST(d.document_type AS text) IN (:documentTypes)");
|
||||
assertThat(rendered).contains("CAST(d.document_family AS text) IN (:documentFamilies)");
|
||||
assertThat(rendered).contains("CAST(dtr.representation_type AS text) = 'CHUNK'");
|
||||
assertThat(params.getValue("documentTypes")).isEqualTo(java.util.List.of("TIME_ENTRY"));
|
||||
assertThat(params.getValue("documentFamilies")).isEqualTo(java.util.List.of("TIME"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue