diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..de79da0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,120 @@ +# TED Procurement Processor - Git Ignore +# Author: Martin.Schweitzer@procon.co.at and claude.ai + +# Compiled class files +*.class + +# Maven +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties +.mvn/wrapper/maven-wrapper.jar + +# Gradle +.gradle +build/ + +# IDE - IntelliJ IDEA +.idea/ +*.iws +*.iml +*.ipr +out/ + +# IDE - Eclipse +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +bin/ + +# IDE - NetBeans +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ + +# IDE - VS Code +.vscode/ + +# OS Files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Logs +*.log +logs/ + +# Application +application-local.yml +application-dev.yml +application-prod.yml + +# Docker +.docker/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +.venv/ +ENV/ +env.bak/ +venv.bak/ +.eggs/ +*.egg-info/ +dist/ +*.egg + +# Model cache +models/ +.cache/ + +# Test data +test-data/ +*.xml.bak + +# Temporary files +*.tmp +*.temp +*.swp +*~ + +# Secrets +*.pem +*.key +secrets/ +.env +.env.local +.env.*.local + +# Database +*.db +*.sqlite +*.sqlite3 + +# Processed files (Camel) +.processed/ +.error/ + +*.bak +.claude \ No newline at end of file diff --git a/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java b/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java index a2b635e..0980de7 100644 --- a/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java +++ b/src/main/java/at/procon/dip/search/repository/DocumentFullTextSearchRepositoryImpl.java @@ -18,11 +18,18 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea @Override public List search(SearchExecutionContext context, int limit) { + String effectiveConfigExpr = effectiveConfigExpression("dtr"); + String tsQueryExpr = tsQueryExpression(effectiveConfigExpr); + StringBuilder sql = new StringBuilder(""" SELECT d.id AS document_id, dtr.id AS representation_id, CAST(dtr.representation_type AS text) AS representation_type, + dtr.is_primary AS is_primary, + dtr.chunk_index AS chunk_index, + dtr.chunk_start_offset AS chunk_start_offset, + dtr.chunk_end_offset AS chunk_end_offset, CAST(d.document_type AS text) AS document_type, CAST(d.document_family AS text) AS document_family, CAST(d.visibility AS text) AS visibility, @@ -33,41 +40,29 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea d.created_at AS created_at, d.updated_at AS updated_at, ts_headline( - CASE - WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig - ELSE dtr.search_config::regconfig - END, + """ + ).append(effectiveConfigExpr).append(""" + , COALESCE(dtr.text_body, ''), - websearch_to_tsquery( - CASE - WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig - ELSE dtr.search_config::regconfig - END, - :queryText - ), + """).append(tsQueryExpr).append(""" + , 'MaxFragments=2, MinWords=5, MaxWords=20' ) AS snippet, - ts_rank_cd( - dtr.search_vector, - websearch_to_tsquery( - CASE - WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig - ELSE dtr.search_config::regconfig - END, - :queryText - ) - ) AS score - FROM doc.doc_text_representation dtr - JOIN doc.doc_document d ON d.id = dtr.document_id - LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id - WHERE dtr.search_vector IS NOT NULL - AND dtr.search_vector @@ websearch_to_tsquery( - CASE - WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig - ELSE dtr.search_config::regconfig - END, - :queryText - ) + ranked.score AS score + FROM ( + SELECT + d.id AS document_id, + dtr.id AS representation_id, + ts_rank_cd( + dtr.search_vector, + """).append(tsQueryExpr).append(""" + ) AS score, + d.updated_at AS updated_at + FROM doc.doc_text_representation dtr + JOIN doc.doc_document d ON d.id = dtr.document_id + LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id + WHERE dtr.search_vector IS NOT NULL + AND dtr.search_vector @@ """).append(tsQueryExpr).append(""" """); MapSqlParameterSource params = new MapSqlParameterSource(); @@ -75,7 +70,14 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true); - sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit"); + sql.append(""" + ORDER BY score DESC, d.updated_at DESC + LIMIT :limit + ) ranked + JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id + JOIN doc.doc_document d ON d.id = ranked.document_id + ORDER BY ranked.score DESC, d.updated_at DESC + """); params.addValue("limit", limit); return jdbcTemplate.query( @@ -84,4 +86,22 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT) ); } -} \ No newline at end of file + + private static String effectiveConfigExpression(String representationAlias) { + return """ + CASE + WHEN NULLIF(%s.search_config, '') IS NULL THEN 'simple'::regconfig + ELSE %s.search_config::regconfig + END + """.formatted(representationAlias, representationAlias).trim(); + } + + private static String tsQueryExpression(String configExpression) { + return """ + websearch_to_tsquery( + %s, + :queryText + ) + """.formatted(configExpression).trim(); + } +} diff --git a/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java b/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java index 9a7862f..e5d8a75 100644 --- a/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java +++ b/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java @@ -34,20 +34,17 @@ final class SearchSqlFilterSupport { boolean tenantJoinPresent) { Set documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes()); if (!CollectionUtils.isEmpty(documentTypes)) { - sql.append(" AND CAST(").append(documentAlias).append(".document_type AS text) IN (:documentTypes)"); - params.addValue("documentTypes", enumNames(documentTypes)); + appendTextEnumFilter(sql, params, documentAlias + ".document_type", documentTypes, "documentTypes"); } Set documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies()); if (!CollectionUtils.isEmpty(documentFamilies)) { - sql.append(" AND CAST(").append(documentAlias).append(".document_family AS text) IN (:documentFamilies)"); - params.addValue("documentFamilies", enumNames(documentFamilies)); + appendTextEnumFilter(sql, params, documentAlias + ".document_family", documentFamilies, "documentFamilies"); } Set visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities()); if (!CollectionUtils.isEmpty(visibilities)) { - sql.append(" AND CAST(").append(documentAlias).append(".visibility AS text) IN (:visibilities)"); - params.addValue("visibilities", enumNames(visibilities)); + appendTextEnumFilter(sql, params, documentAlias + ".visibility", visibilities, "visibilities"); } Set languageCodes = context.getRequest().getLanguageCodes(); @@ -62,8 +59,7 @@ final class SearchSqlFilterSupport { Set representationTypes = context.getRequest().getRepresentationTypes(); if (!CollectionUtils.isEmpty(representationTypes)) { - sql.append(" AND CAST(").append(representationAlias).append(".representation_type AS text) IN (:representationTypes)"); - params.addValue("representationTypes", enumNames(representationTypes)); + appendTextEnumFilter(sql, params, representationAlias + ".representation_type", representationTypes, "representationTypes"); } else { SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode(); if (selectionMode == null) { @@ -242,6 +238,15 @@ final class SearchSqlFilterSupport { return !CollectionUtils.isEmpty(primary) ? primary : fallback; } + private static void appendTextEnumFilter(StringBuilder sql, + MapSqlParameterSource params, + String columnExpression, + Collection> values, + String parameterName) { + sql.append(" AND CAST(").append(columnExpression).append(" AS text) IN (:").append(parameterName).append(")"); + params.addValue(parameterName, enumNames(values)); + } + private static List enumNames(Collection> values) { return values.stream().map(Enum::name).collect(Collectors.toList()); } diff --git a/src/main/resources/db/migration/V44__doc_search_performance_indexes.sql b/src/main/resources/db/migration/V44__doc_search_performance_indexes.sql new file mode 100644 index 0000000..5c796c3 --- /dev/null +++ b/src/main/resources/db/migration/V44__doc_search_performance_indexes.sql @@ -0,0 +1,10 @@ +-- Search performance support indexes for filtered DOC fulltext lookups. + +CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_updated + ON DOC.doc_document(document_type, document_family, updated_at DESC, id); + +CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_updated + ON DOC.doc_document(document_type, document_family, visibility, updated_at DESC, id); + +CREATE INDEX IF NOT EXISTS idx_doc_text_repr_document_primary_type + ON DOC.doc_text_representation(document_id, is_primary, representation_type); diff --git a/src/main/resources/db/migration/V45__doc_search_text_filter_expression_indexes.sql b/src/main/resources/db/migration/V45__doc_search_text_filter_expression_indexes.sql new file mode 100644 index 0000000..6166786 --- /dev/null +++ b/src/main/resources/db/migration/V45__doc_search_text_filter_expression_indexes.sql @@ -0,0 +1,17 @@ +-- Support cast-to-text search filters on installations where DOC type columns are varchar. +-- These indexes align with the query shape used by generic search filters. + +CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_text_updated + ON DOC.doc_document ((CAST(document_type AS text)), (CAST(document_family AS text)), updated_at DESC, id); + +CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_text_updated + ON DOC.doc_document ( + (CAST(document_type AS text)), + (CAST(document_family AS text)), + (CAST(visibility AS text)), + updated_at DESC, + id + ); + +CREATE INDEX IF NOT EXISTS idx_doc_text_repr_primary_type_text_document + ON DOC.doc_text_representation (is_primary, (CAST(representation_type AS text)), document_id); diff --git a/src/test/java/at/procon/dip/search/repository/SearchSqlFilterSupportTest.java b/src/test/java/at/procon/dip/search/repository/SearchSqlFilterSupportTest.java index ad5cfcc..a32a4cf 100644 --- a/src/test/java/at/procon/dip/search/repository/SearchSqlFilterSupportTest.java +++ b/src/test/java/at/procon/dip/search/repository/SearchSqlFilterSupportTest.java @@ -2,10 +2,13 @@ package at.procon.dip.search.repository; import static org.assertj.core.api.Assertions.assertThat; +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentType; import at.procon.dip.domain.document.DocumentAttributeValueType; import at.procon.dip.search.api.SearchExecutionContext; import at.procon.dip.search.dto.DocumentAttributeFilterOperator; import at.procon.dip.search.dto.DocumentAttributeFilterRequest; +import at.procon.dip.search.dto.SearchRepresentationSelectionMode; import at.procon.dip.search.dto.SearchRequest; import at.procon.dip.search.spi.SearchDocumentScope; import org.junit.jupiter.api.Test; @@ -54,4 +57,31 @@ class SearchSqlFilterSupportTest { assertThat(params.getValue("attributeName1")).isEqualTo("version"); assertThat(params.getValue("attributeValue1")).isEqualTo(3L); } + + @Test + void shouldUseTypedEnumComparisonsForIndexedFilters() { + SearchRequest request = SearchRequest.builder() + .queryText("vertrieb") + .documentTypes(java.util.Set.of(DocumentType.TIME_ENTRY)) + .documentFamilies(java.util.Set.of(DocumentFamily.TIME)) + .representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS) + .build(); + SearchExecutionContext context = SearchExecutionContext.builder() + .request(request) + .scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null)) + .page(0) + .size(20) + .build(); + + StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1"); + MapSqlParameterSource params = new MapSqlParameterSource(); + SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false); + + String rendered = sql.toString(); + assertThat(rendered).contains("CAST(d.document_type AS text) IN (:documentTypes)"); + assertThat(rendered).contains("CAST(d.document_family AS text) IN (:documentFamilies)"); + assertThat(rendered).contains("CAST(dtr.representation_type AS text) = 'CHUNK'"); + assertThat(params.getValue("documentTypes")).isEqualTo(java.util.List.of("TIME_ENTRY")); + assertThat(params.getValue("documentFamilies")).isEqualTo(java.util.List.of("TIME")); + } }