Improve generic search query performance
This commit is contained in:
parent
f9df7c8d22
commit
5c3133d19d
|
|
@ -0,0 +1,120 @@
|
||||||
|
# TED Procurement Processor - Git Ignore
|
||||||
|
# Author: Martin.Schweitzer@procon.co.at and claude.ai
|
||||||
|
|
||||||
|
# Compiled class files
|
||||||
|
*.class
|
||||||
|
|
||||||
|
# Maven
|
||||||
|
target/
|
||||||
|
pom.xml.tag
|
||||||
|
pom.xml.releaseBackup
|
||||||
|
pom.xml.versionsBackup
|
||||||
|
pom.xml.next
|
||||||
|
release.properties
|
||||||
|
dependency-reduced-pom.xml
|
||||||
|
buildNumber.properties
|
||||||
|
.mvn/timing.properties
|
||||||
|
.mvn/wrapper/maven-wrapper.jar
|
||||||
|
|
||||||
|
# Gradle
|
||||||
|
.gradle
|
||||||
|
build/
|
||||||
|
|
||||||
|
# IDE - IntelliJ IDEA
|
||||||
|
.idea/
|
||||||
|
*.iws
|
||||||
|
*.iml
|
||||||
|
*.ipr
|
||||||
|
out/
|
||||||
|
|
||||||
|
# IDE - Eclipse
|
||||||
|
.apt_generated
|
||||||
|
.classpath
|
||||||
|
.factorypath
|
||||||
|
.project
|
||||||
|
.settings
|
||||||
|
.springBeans
|
||||||
|
.sts4-cache
|
||||||
|
bin/
|
||||||
|
|
||||||
|
# IDE - NetBeans
|
||||||
|
/nbproject/private/
|
||||||
|
/nbbuild/
|
||||||
|
/dist/
|
||||||
|
/nbdist/
|
||||||
|
/.nb-gradle/
|
||||||
|
|
||||||
|
# IDE - VS Code
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# OS Files
|
||||||
|
.DS_Store
|
||||||
|
.DS_Store?
|
||||||
|
._*
|
||||||
|
.Spotlight-V100
|
||||||
|
.Trashes
|
||||||
|
ehthumbs.db
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
logs/
|
||||||
|
|
||||||
|
# Application
|
||||||
|
application-local.yml
|
||||||
|
application-dev.yml
|
||||||
|
application-prod.yml
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
.docker/
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
.venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
.eggs/
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# Model cache
|
||||||
|
models/
|
||||||
|
.cache/
|
||||||
|
|
||||||
|
# Test data
|
||||||
|
test-data/
|
||||||
|
*.xml.bak
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
*.tmp
|
||||||
|
*.temp
|
||||||
|
*.swp
|
||||||
|
*~
|
||||||
|
|
||||||
|
# Secrets
|
||||||
|
*.pem
|
||||||
|
*.key
|
||||||
|
secrets/
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
|
||||||
|
# Database
|
||||||
|
*.db
|
||||||
|
*.sqlite
|
||||||
|
*.sqlite3
|
||||||
|
|
||||||
|
# Processed files (Camel)
|
||||||
|
.processed/
|
||||||
|
.error/
|
||||||
|
|
||||||
|
*.bak
|
||||||
|
.claude
|
||||||
|
|
@ -18,11 +18,18 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<SearchHit> search(SearchExecutionContext context, int limit) {
|
public List<SearchHit> search(SearchExecutionContext context, int limit) {
|
||||||
|
String effectiveConfigExpr = effectiveConfigExpression("dtr");
|
||||||
|
String tsQueryExpr = tsQueryExpression(effectiveConfigExpr);
|
||||||
|
|
||||||
StringBuilder sql = new StringBuilder("""
|
StringBuilder sql = new StringBuilder("""
|
||||||
SELECT
|
SELECT
|
||||||
d.id AS document_id,
|
d.id AS document_id,
|
||||||
dtr.id AS representation_id,
|
dtr.id AS representation_id,
|
||||||
CAST(dtr.representation_type AS text) AS representation_type,
|
CAST(dtr.representation_type AS text) AS representation_type,
|
||||||
|
dtr.is_primary AS is_primary,
|
||||||
|
dtr.chunk_index AS chunk_index,
|
||||||
|
dtr.chunk_start_offset AS chunk_start_offset,
|
||||||
|
dtr.chunk_end_offset AS chunk_end_offset,
|
||||||
CAST(d.document_type AS text) AS document_type,
|
CAST(d.document_type AS text) AS document_type,
|
||||||
CAST(d.document_family AS text) AS document_family,
|
CAST(d.document_family AS text) AS document_family,
|
||||||
CAST(d.visibility AS text) AS visibility,
|
CAST(d.visibility AS text) AS visibility,
|
||||||
|
|
@ -33,41 +40,29 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
||||||
d.created_at AS created_at,
|
d.created_at AS created_at,
|
||||||
d.updated_at AS updated_at,
|
d.updated_at AS updated_at,
|
||||||
ts_headline(
|
ts_headline(
|
||||||
CASE
|
"""
|
||||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
).append(effectiveConfigExpr).append("""
|
||||||
ELSE dtr.search_config::regconfig
|
,
|
||||||
END,
|
|
||||||
COALESCE(dtr.text_body, ''),
|
COALESCE(dtr.text_body, ''),
|
||||||
websearch_to_tsquery(
|
""").append(tsQueryExpr).append("""
|
||||||
CASE
|
,
|
||||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
|
||||||
ELSE dtr.search_config::regconfig
|
|
||||||
END,
|
|
||||||
:queryText
|
|
||||||
),
|
|
||||||
'MaxFragments=2, MinWords=5, MaxWords=20'
|
'MaxFragments=2, MinWords=5, MaxWords=20'
|
||||||
) AS snippet,
|
) AS snippet,
|
||||||
ts_rank_cd(
|
ranked.score AS score
|
||||||
dtr.search_vector,
|
FROM (
|
||||||
websearch_to_tsquery(
|
SELECT
|
||||||
CASE
|
d.id AS document_id,
|
||||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
dtr.id AS representation_id,
|
||||||
ELSE dtr.search_config::regconfig
|
ts_rank_cd(
|
||||||
END,
|
dtr.search_vector,
|
||||||
:queryText
|
""").append(tsQueryExpr).append("""
|
||||||
)
|
) AS score,
|
||||||
) AS score
|
d.updated_at AS updated_at
|
||||||
FROM doc.doc_text_representation dtr
|
FROM doc.doc_text_representation dtr
|
||||||
JOIN doc.doc_document d ON d.id = dtr.document_id
|
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||||
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||||
WHERE dtr.search_vector IS NOT NULL
|
WHERE dtr.search_vector IS NOT NULL
|
||||||
AND dtr.search_vector @@ websearch_to_tsquery(
|
AND dtr.search_vector @@ """).append(tsQueryExpr).append("""
|
||||||
CASE
|
|
||||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
|
||||||
ELSE dtr.search_config::regconfig
|
|
||||||
END,
|
|
||||||
:queryText
|
|
||||||
)
|
|
||||||
""");
|
""");
|
||||||
|
|
||||||
MapSqlParameterSource params = new MapSqlParameterSource();
|
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||||
|
|
@ -75,7 +70,14 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
||||||
|
|
||||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||||
|
|
||||||
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
|
sql.append("""
|
||||||
|
ORDER BY score DESC, d.updated_at DESC
|
||||||
|
LIMIT :limit
|
||||||
|
) ranked
|
||||||
|
JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id
|
||||||
|
JOIN doc.doc_document d ON d.id = ranked.document_id
|
||||||
|
ORDER BY ranked.score DESC, d.updated_at DESC
|
||||||
|
""");
|
||||||
params.addValue("limit", limit);
|
params.addValue("limit", limit);
|
||||||
|
|
||||||
return jdbcTemplate.query(
|
return jdbcTemplate.query(
|
||||||
|
|
@ -84,4 +86,22 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
||||||
new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT)
|
new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static String effectiveConfigExpression(String representationAlias) {
|
||||||
|
return """
|
||||||
|
CASE
|
||||||
|
WHEN NULLIF(%s.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||||
|
ELSE %s.search_config::regconfig
|
||||||
|
END
|
||||||
|
""".formatted(representationAlias, representationAlias).trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String tsQueryExpression(String configExpression) {
|
||||||
|
return """
|
||||||
|
websearch_to_tsquery(
|
||||||
|
%s,
|
||||||
|
:queryText
|
||||||
|
)
|
||||||
|
""".formatted(configExpression).trim();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -34,20 +34,17 @@ final class SearchSqlFilterSupport {
|
||||||
boolean tenantJoinPresent) {
|
boolean tenantJoinPresent) {
|
||||||
Set<DocumentType> documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes());
|
Set<DocumentType> documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes());
|
||||||
if (!CollectionUtils.isEmpty(documentTypes)) {
|
if (!CollectionUtils.isEmpty(documentTypes)) {
|
||||||
sql.append(" AND CAST(").append(documentAlias).append(".document_type AS text) IN (:documentTypes)");
|
appendTextEnumFilter(sql, params, documentAlias + ".document_type", documentTypes, "documentTypes");
|
||||||
params.addValue("documentTypes", enumNames(documentTypes));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<DocumentFamily> documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies());
|
Set<DocumentFamily> documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies());
|
||||||
if (!CollectionUtils.isEmpty(documentFamilies)) {
|
if (!CollectionUtils.isEmpty(documentFamilies)) {
|
||||||
sql.append(" AND CAST(").append(documentAlias).append(".document_family AS text) IN (:documentFamilies)");
|
appendTextEnumFilter(sql, params, documentAlias + ".document_family", documentFamilies, "documentFamilies");
|
||||||
params.addValue("documentFamilies", enumNames(documentFamilies));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<DocumentVisibility> visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities());
|
Set<DocumentVisibility> visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities());
|
||||||
if (!CollectionUtils.isEmpty(visibilities)) {
|
if (!CollectionUtils.isEmpty(visibilities)) {
|
||||||
sql.append(" AND CAST(").append(documentAlias).append(".visibility AS text) IN (:visibilities)");
|
appendTextEnumFilter(sql, params, documentAlias + ".visibility", visibilities, "visibilities");
|
||||||
params.addValue("visibilities", enumNames(visibilities));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<String> languageCodes = context.getRequest().getLanguageCodes();
|
Set<String> languageCodes = context.getRequest().getLanguageCodes();
|
||||||
|
|
@ -62,8 +59,7 @@ final class SearchSqlFilterSupport {
|
||||||
|
|
||||||
Set<RepresentationType> representationTypes = context.getRequest().getRepresentationTypes();
|
Set<RepresentationType> representationTypes = context.getRequest().getRepresentationTypes();
|
||||||
if (!CollectionUtils.isEmpty(representationTypes)) {
|
if (!CollectionUtils.isEmpty(representationTypes)) {
|
||||||
sql.append(" AND CAST(").append(representationAlias).append(".representation_type AS text) IN (:representationTypes)");
|
appendTextEnumFilter(sql, params, representationAlias + ".representation_type", representationTypes, "representationTypes");
|
||||||
params.addValue("representationTypes", enumNames(representationTypes));
|
|
||||||
} else {
|
} else {
|
||||||
SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode();
|
SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode();
|
||||||
if (selectionMode == null) {
|
if (selectionMode == null) {
|
||||||
|
|
@ -242,6 +238,15 @@ final class SearchSqlFilterSupport {
|
||||||
return !CollectionUtils.isEmpty(primary) ? primary : fallback;
|
return !CollectionUtils.isEmpty(primary) ? primary : fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void appendTextEnumFilter(StringBuilder sql,
|
||||||
|
MapSqlParameterSource params,
|
||||||
|
String columnExpression,
|
||||||
|
Collection<? extends Enum<?>> values,
|
||||||
|
String parameterName) {
|
||||||
|
sql.append(" AND CAST(").append(columnExpression).append(" AS text) IN (:").append(parameterName).append(")");
|
||||||
|
params.addValue(parameterName, enumNames(values));
|
||||||
|
}
|
||||||
|
|
||||||
private static List<String> enumNames(Collection<? extends Enum<?>> values) {
|
private static List<String> enumNames(Collection<? extends Enum<?>> values) {
|
||||||
return values.stream().map(Enum::name).collect(Collectors.toList());
|
return values.stream().map(Enum::name).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
-- Search performance support indexes for filtered DOC fulltext lookups.
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_updated
|
||||||
|
ON DOC.doc_document(document_type, document_family, updated_at DESC, id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_updated
|
||||||
|
ON DOC.doc_document(document_type, document_family, visibility, updated_at DESC, id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_document_primary_type
|
||||||
|
ON DOC.doc_text_representation(document_id, is_primary, representation_type);
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
-- Support cast-to-text search filters on installations where DOC type columns are varchar.
|
||||||
|
-- These indexes align with the query shape used by generic search filters.
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_text_updated
|
||||||
|
ON DOC.doc_document ((CAST(document_type AS text)), (CAST(document_family AS text)), updated_at DESC, id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_text_updated
|
||||||
|
ON DOC.doc_document (
|
||||||
|
(CAST(document_type AS text)),
|
||||||
|
(CAST(document_family AS text)),
|
||||||
|
(CAST(visibility AS text)),
|
||||||
|
updated_at DESC,
|
||||||
|
id
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_primary_type_text_document
|
||||||
|
ON DOC.doc_text_representation (is_primary, (CAST(representation_type AS text)), document_id);
|
||||||
|
|
@ -2,10 +2,13 @@ package at.procon.dip.search.repository;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.DocumentFamily;
|
||||||
|
import at.procon.dip.domain.document.DocumentType;
|
||||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||||
import at.procon.dip.search.api.SearchExecutionContext;
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
|
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
|
||||||
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
|
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
|
||||||
|
import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
|
||||||
import at.procon.dip.search.dto.SearchRequest;
|
import at.procon.dip.search.dto.SearchRequest;
|
||||||
import at.procon.dip.search.spi.SearchDocumentScope;
|
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
@ -54,4 +57,31 @@ class SearchSqlFilterSupportTest {
|
||||||
assertThat(params.getValue("attributeName1")).isEqualTo("version");
|
assertThat(params.getValue("attributeName1")).isEqualTo("version");
|
||||||
assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
|
assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void shouldUseTypedEnumComparisonsForIndexedFilters() {
|
||||||
|
SearchRequest request = SearchRequest.builder()
|
||||||
|
.queryText("vertrieb")
|
||||||
|
.documentTypes(java.util.Set.of(DocumentType.TIME_ENTRY))
|
||||||
|
.documentFamilies(java.util.Set.of(DocumentFamily.TIME))
|
||||||
|
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS)
|
||||||
|
.build();
|
||||||
|
SearchExecutionContext context = SearchExecutionContext.builder()
|
||||||
|
.request(request)
|
||||||
|
.scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null))
|
||||||
|
.page(0)
|
||||||
|
.size(20)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1");
|
||||||
|
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||||
|
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false);
|
||||||
|
|
||||||
|
String rendered = sql.toString();
|
||||||
|
assertThat(rendered).contains("CAST(d.document_type AS text) IN (:documentTypes)");
|
||||||
|
assertThat(rendered).contains("CAST(d.document_family AS text) IN (:documentFamilies)");
|
||||||
|
assertThat(rendered).contains("CAST(dtr.representation_type AS text) = 'CHUNK'");
|
||||||
|
assertThat(params.getValue("documentTypes")).isEqualTo(java.util.List.of("TIME_ENTRY"));
|
||||||
|
assertThat(params.getValue("documentFamilies")).isEqualTo(java.util.List.of("TIME"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue