From 847cb40f8a194054af96c8deedc8a67f0de1daef Mon Sep 17 00:00:00 2001 From: trifonovt <87468028+TihomirTrifonov@users.noreply.github.com> Date: Mon, 23 Mar 2026 15:09:14 +0100 Subject: [PATCH] embedding nv3.1 - test --- ...eAndExtensionDocumentTypeDetectorTest.java | 2 +- .../SpreadsheetDocumentExtractorTest.java | 8 ++- ...ailDocumentIngestionAdapterBundleTest.java | 2 +- .../MailBundleProcessingIntegrationTest.java | 2 + ...ericSearchOrchestratorIntegrationTest.java | 2 +- ...SemanticSearchEndpointIntegrationTest.java | 13 ++++- ...nticSearchOrchestratorIntegrationTest.java | 4 ++ ...SemanticModelSelectionIntegrationTest.java | 4 ++ .../AbstractSearchIntegrationTest.java | 14 ++--- ...AbstractSemanticSearchIntegrationTest.java | 34 +++++++---- .../SearchSemanticTestApplication.java | 57 +++++++++++++++++-- .../sql/create-doc-search-test-schemas.sql | 2 +- 12 files changed, 108 insertions(+), 36 deletions(-) diff --git a/src/test/java/at/procon/dip/extraction/impl/BasicMimeAndExtensionDocumentTypeDetectorTest.java b/src/test/java/at/procon/dip/extraction/impl/BasicMimeAndExtensionDocumentTypeDetectorTest.java index e0c43cc..ea0c754 100644 --- a/src/test/java/at/procon/dip/extraction/impl/BasicMimeAndExtensionDocumentTypeDetectorTest.java +++ b/src/test/java/at/procon/dip/extraction/impl/BasicMimeAndExtensionDocumentTypeDetectorTest.java @@ -38,7 +38,7 @@ class BasicMimeAndExtensionDocumentTypeDetectorTest { assertThat(result.documentType()).isEqualTo(DocumentType.GENERIC_BINARY); assertThat(result.mimeType()).isEqualTo("application/vnd.ms-excel"); assertThat(result.attributes()).containsEntry("detectedExtension", "xls"); - assertThat(result.attributes()).containsEntry("effectiveMediaType", "application/vnd.ms-excel"); + assertThat(result.attributes()).containsEntry("normalizedMediaType", "application/vnd.ms-excel"); } @Test diff --git a/src/test/java/at/procon/dip/extraction/impl/SpreadsheetDocumentExtractorTest.java b/src/test/java/at/procon/dip/extraction/impl/SpreadsheetDocumentExtractorTest.java index 894f372..d2162df 100644 --- a/src/test/java/at/procon/dip/extraction/impl/SpreadsheetDocumentExtractorTest.java +++ b/src/test/java/at/procon/dip/extraction/impl/SpreadsheetDocumentExtractorTest.java @@ -47,8 +47,8 @@ class SpreadsheetDocumentExtractorTest { String text = result.derivedTextByRole().get(ContentRole.NORMALIZED_TEXT); assertNotNull(text); assertTrue(text.contains("Sheet: Sheet1")); - assertTrue(text.contains("Name | Amount")); - assertTrue(text.contains("Alice | 42")); + assertTrue(text.contains("Name\tAmount")); + assertTrue(text.contains("Alice\t42")); } @Test @@ -74,7 +74,9 @@ class SpreadsheetDocumentExtractorTest { ExtractionResult result = extractor.extract(new ExtractionRequest(source, detection, csv, data)); String text = result.derivedTextByRole().get(ContentRole.NORMALIZED_TEXT); - assertEquals("Name | Amount\nAlice | 42\nBob | 77", text); + assertEquals("Name,Amount\n" + + "Alice,42\n" + + "Bob,77", text); } private byte[] createLegacyXls() throws Exception { diff --git a/src/test/java/at/procon/dip/ingestion/adapter/MailDocumentIngestionAdapterBundleTest.java b/src/test/java/at/procon/dip/ingestion/adapter/MailDocumentIngestionAdapterBundleTest.java index bf4f413..6d15c6f 100644 --- a/src/test/java/at/procon/dip/ingestion/adapter/MailDocumentIngestionAdapterBundleTest.java +++ b/src/test/java/at/procon/dip/ingestion/adapter/MailDocumentIngestionAdapterBundleTest.java @@ -63,7 +63,7 @@ class MailDocumentIngestionAdapterBundleTest { properties.getGenericIngestion().setMailAdapterEnabled(true); properties.getGenericIngestion().setExpandMailZipAttachments(false); properties.getGenericIngestion().setMailImportBatchId("test-mail-bundle"); - when(zipExtractionService.canHandle(any(), any())).thenReturn(false); + lenient().when(zipExtractionService.canHandle(any(), any())).thenReturn(false); adapter = new MailDocumentIngestionAdapter(properties, importService, new MailMessageExtractionService(), relationService, zipExtractionService); } diff --git a/src/test/java/at/procon/dip/ingestion/integration/MailBundleProcessingIntegrationTest.java b/src/test/java/at/procon/dip/ingestion/integration/MailBundleProcessingIntegrationTest.java index d171706..7c30b86 100644 --- a/src/test/java/at/procon/dip/ingestion/integration/MailBundleProcessingIntegrationTest.java +++ b/src/test/java/at/procon/dip/ingestion/integration/MailBundleProcessingIntegrationTest.java @@ -35,6 +35,7 @@ import at.procon.dip.ingestion.spi.SourceDescriptor; import at.procon.dip.normalization.impl.DefaultGenericTextRepresentationBuilder; import at.procon.dip.normalization.service.TextRepresentationBuildService; import at.procon.dip.processing.service.StructuredDocumentProcessingService; +import at.procon.dip.search.service.DocumentLexicalIndexService; import at.procon.ted.config.TedProcessorProperties; import at.procon.ted.service.attachment.PdfExtractionService; import at.procon.ted.service.attachment.ZipExtractionService; @@ -369,6 +370,7 @@ class MailBundleProcessingIntegrationTest { DocumentExtractionService.class, GenericDocumentImportService.class, StructuredDocumentProcessingService.class, + DocumentLexicalIndexService.class }) static class TestApplication { diff --git a/src/test/java/at/procon/dip/search/integration/GenericSearchOrchestratorIntegrationTest.java b/src/test/java/at/procon/dip/search/integration/GenericSearchOrchestratorIntegrationTest.java index 86b463e..82275da 100644 --- a/src/test/java/at/procon/dip/search/integration/GenericSearchOrchestratorIntegrationTest.java +++ b/src/test/java/at/procon/dip/search/integration/GenericSearchOrchestratorIntegrationTest.java @@ -18,7 +18,7 @@ import org.springframework.test.annotation.DirtiesContext; import static org.assertj.core.api.Assertions.assertThat; -@DirtiesContext(classMode = DirtiesContext.ClassMode.BEFORE_CLASS) +//@DirtiesContext(classMode = DirtiesContext.ClassMode.BEFORE_CLASS) class GenericSearchOrchestratorIntegrationTest extends AbstractSearchIntegrationTest { @Autowired diff --git a/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchEndpointIntegrationTest.java b/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchEndpointIntegrationTest.java index db8644a..8db322d 100644 --- a/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchEndpointIntegrationTest.java +++ b/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchEndpointIntegrationTest.java @@ -8,11 +8,16 @@ import at.procon.dip.search.dto.SearchRepresentationSelectionMode; import at.procon.dip.search.dto.SearchRequest; import at.procon.dip.testsupport.AbstractSemanticSearchIntegrationTest; import at.procon.dip.testsupport.SemanticSearchTestDataFactory; +import at.procon.dip.testsupport.config.SearchTestConfig; import at.procon.dip.testsupport.config.SearchTestJacksonConfig; import com.fasterxml.jackson.databind.ObjectMapper; import java.util.Set; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.autoconfigure.ImportAutoConfiguration; +import org.springframework.boot.autoconfigure.http.HttpMessageConvertersAutoConfiguration; +import org.springframework.boot.autoconfigure.jackson.JacksonAutoConfiguration; +import org.springframework.boot.autoconfigure.web.servlet.WebMvcAutoConfiguration; import org.springframework.context.annotation.Import; import org.springframework.http.MediaType; import org.springframework.test.context.TestPropertySource; @@ -24,9 +29,11 @@ import static org.springframework.test.web.servlet.result.MockMvcResultMatchers. import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status; @AutoConfigureMockMvc -@Import(SearchTestJacksonConfig.class) -@TestPropertySource(properties = { - "spring.mvc.converters.preferred-json-mapper=jackson" +@Import(SearchTestConfig.class) +@ImportAutoConfiguration({ + JacksonAutoConfiguration.class, + HttpMessageConvertersAutoConfiguration.class, + WebMvcAutoConfiguration.class }) class GenericSemanticSearchEndpointIntegrationTest extends AbstractSemanticSearchIntegrationTest { diff --git a/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchOrchestratorIntegrationTest.java b/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchOrchestratorIntegrationTest.java index d60182a..325c930 100644 --- a/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchOrchestratorIntegrationTest.java +++ b/src/test/java/at/procon/dip/search/integration/GenericSemanticSearchOrchestratorIntegrationTest.java @@ -13,11 +13,15 @@ import at.procon.dip.search.spi.SearchDocumentScope; import at.procon.dip.testsupport.AbstractSemanticSearchIntegrationTest; import at.procon.dip.testsupport.SemanticSearchTestDataFactory; import java.util.Set; + +import at.procon.dip.testsupport.config.SearchTestConfig; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; import static org.assertj.core.api.Assertions.assertThat; +@Import(SearchTestConfig.class) class GenericSemanticSearchOrchestratorIntegrationTest extends AbstractSemanticSearchIntegrationTest { @Autowired diff --git a/src/test/java/at/procon/dip/search/integration/SemanticModelSelectionIntegrationTest.java b/src/test/java/at/procon/dip/search/integration/SemanticModelSelectionIntegrationTest.java index f9deee4..aa4c01d 100644 --- a/src/test/java/at/procon/dip/search/integration/SemanticModelSelectionIntegrationTest.java +++ b/src/test/java/at/procon/dip/search/integration/SemanticModelSelectionIntegrationTest.java @@ -12,11 +12,15 @@ import at.procon.dip.search.spi.SearchDocumentScope; import at.procon.dip.testsupport.AbstractSemanticSearchIntegrationTest; import at.procon.dip.testsupport.SemanticSearchTestDataFactory; import java.util.Set; + +import at.procon.dip.testsupport.config.SearchTestConfig; import org.junit.jupiter.api.Test; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Import; import static org.assertj.core.api.Assertions.assertThat; +@Import(SearchTestConfig.class) class SemanticModelSelectionIntegrationTest extends AbstractSemanticSearchIntegrationTest { @Autowired diff --git a/src/test/java/at/procon/dip/testsupport/AbstractSearchIntegrationTest.java b/src/test/java/at/procon/dip/testsupport/AbstractSearchIntegrationTest.java index ad97fe5..da3e04f 100644 --- a/src/test/java/at/procon/dip/testsupport/AbstractSearchIntegrationTest.java +++ b/src/test/java/at/procon/dip/testsupport/AbstractSearchIntegrationTest.java @@ -6,16 +6,10 @@ import at.procon.dip.domain.document.repository.DocumentTextRepresentationReposi import at.procon.dip.domain.tenant.repository.DocumentTenantRepository; import javax.sql.DataSource; -import at.procon.dip.testsupport.config.SearchTestConfig; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.TestInstance; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.boot.autoconfigure.ImportAutoConfiguration; -import org.springframework.boot.autoconfigure.http.HttpMessageConvertersAutoConfiguration; -import org.springframework.boot.autoconfigure.jackson.JacksonAutoConfiguration; -import org.springframework.boot.autoconfigure.web.servlet.WebMvcAutoConfiguration; import org.springframework.boot.test.context.SpringBootTest; -import org.springframework.context.annotation.Import; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.test.context.DynamicPropertyRegistry; import org.springframework.test.context.DynamicPropertySource; @@ -92,14 +86,14 @@ public abstract class AbstractSearchIntegrationTest { } protected void ensureSearchColumnsAndIndexes() { - jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm"); jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc"); + jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc"); jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)"); jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector"); jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)"); - jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title gin_trgm_ops)"); - jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary gin_trgm_ops)"); - jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body gin_trgm_ops)"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)"); } protected void cleanupDatabase() { diff --git a/src/test/java/at/procon/dip/testsupport/AbstractSemanticSearchIntegrationTest.java b/src/test/java/at/procon/dip/testsupport/AbstractSemanticSearchIntegrationTest.java index 93e593b..cb15ed6 100644 --- a/src/test/java/at/procon/dip/testsupport/AbstractSemanticSearchIntegrationTest.java +++ b/src/test/java/at/procon/dip/testsupport/AbstractSemanticSearchIntegrationTest.java @@ -1,5 +1,6 @@ package at.procon.dip.testsupport; +import at.procon.dip.FixedPortPostgreSQLContainer; import at.procon.dip.domain.document.repository.DocumentEmbeddingModelRepository; import at.procon.dip.domain.document.repository.DocumentEmbeddingRepository; import at.procon.dip.domain.document.repository.DocumentRepository; @@ -18,6 +19,7 @@ import org.springframework.test.context.TestPropertySource; import org.testcontainers.containers.PostgreSQLContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.DockerImageName; @SpringBootTest(classes = SearchSemanticTestApplication.class, webEnvironment = SpringBootTest.WebEnvironment.MOCK) @Testcontainers @@ -62,23 +64,33 @@ import org.testcontainers.junit.jupiter.Testcontainers; }) public abstract class AbstractSemanticSearchIntegrationTest { + private static final int HOST_PORT = 15433; + private static final String DB_NAME = "dip_semantic_search_test"; + private static final String DB_USER = "test"; + private static final String DB_PASSWORD = "test"; + private static final String JDBC_URL = "jdbc:postgresql://localhost:" + HOST_PORT + "/" + DB_NAME; + @Container - static PostgreSQLContainer postgres = new PostgreSQLContainer<>("pgvector/pgvector:pg16") - .withDatabaseName("dip_semantic_search_test") - .withUsername("test") - .withPassword("test") + static PostgreSQLContainer postgres = new FixedPortPostgreSQLContainer<>( + DockerImageName.parse("pgvector/pgvector:pg16-trixie") + .asCompatibleSubstituteFor("postgres").toString() + , HOST_PORT) + .withDatabaseName(DB_NAME) + .withUsername(DB_USER) + .withPassword(DB_PASSWORD) + .withCommand("postgres", "-c", "fsync=off") .withInitScript("sql/create-doc-search-test-schemas.sql"); - static { - postgres.start(); - } @DynamicPropertySource static void registerProperties(DynamicPropertyRegistry registry) { - registry.add("spring.datasource.url", postgres::getJdbcUrl); - registry.add("spring.datasource.username", postgres::getUsername); - registry.add("spring.datasource.password", postgres::getPassword); - registry.add("spring.datasource.driver-class-name", postgres::getDriverClassName); + if (!postgres.isRunning()) { + postgres.start(); + } + registry.add("spring.datasource.url", () -> JDBC_URL); + registry.add("spring.datasource.username", () -> DB_USER); + registry.add("spring.datasource.password", () -> DB_PASSWORD); + registry.add("spring.datasource.driver-class-name", () -> "org.postgresql.Driver"); } @Autowired diff --git a/src/test/java/at/procon/dip/testsupport/SearchSemanticTestApplication.java b/src/test/java/at/procon/dip/testsupport/SearchSemanticTestApplication.java index 62f8bf4..c1c7105 100644 --- a/src/test/java/at/procon/dip/testsupport/SearchSemanticTestApplication.java +++ b/src/test/java/at/procon/dip/testsupport/SearchSemanticTestApplication.java @@ -1,22 +1,52 @@ package at.procon.dip.testsupport; +import at.procon.dip.domain.document.repository.DocumentContentRepository; +import at.procon.dip.domain.document.repository.DocumentEmbeddingModelRepository; +import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository; +import at.procon.dip.domain.document.service.DocumentContentService; +import at.procon.dip.domain.document.service.DocumentRepresentationService; +import at.procon.dip.domain.document.service.DocumentService; import at.procon.dip.embedding.config.EmbeddingProperties; +import at.procon.dip.search.engine.fulltext.PostgresFullTextSearchEngine; +import at.procon.dip.search.engine.trigram.PostgresTrigramSearchEngine; +import at.procon.dip.search.plan.DefaultSearchPlanner; +import at.procon.dip.search.rank.DefaultSearchResultFusionService; +import at.procon.dip.search.rank.DefaultSearchScoreNormalizer; +import at.procon.dip.search.repository.DocumentFullTextSearchRepositoryImpl; +import at.procon.dip.search.repository.DocumentTrigramSearchRepositoryImpl; +import at.procon.dip.domain.document.repository.DocumentEmbeddingRepository; +import at.procon.dip.domain.document.repository.DocumentRepository; +import at.procon.dip.search.service.DefaultSearchOrchestrator; +import at.procon.dip.search.service.DocumentLexicalIndexService; +import at.procon.dip.search.service.SearchMetricsService; +import at.procon.dip.search.web.GenericSearchController; import at.procon.ted.config.TedProcessorProperties; import org.springframework.boot.SpringBootConfiguration; import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.ImportAutoConfiguration; import org.springframework.boot.autoconfigure.domain.EntityScan; +import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration; +import org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration; +import org.springframework.boot.autoconfigure.transaction.TransactionAutoConfiguration; import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Import; import org.springframework.data.jpa.repository.config.EnableJpaRepositories; + /** * Narrow semantic-search test application that loads the new generic search subsystem * plus the new parallel embedding subsystem. */ @SpringBootConfiguration -@EnableAutoConfiguration(excludeName = { - "org.apache.camel.spring.boot.CamelAutoConfiguration", - "org.springframework.boot.autoconfigure.task.TaskSchedulingAutoConfiguration" +@AutoConfigureMockMvc +@ImportAutoConfiguration({ + DataSourceAutoConfiguration.class, + HibernateJpaAutoConfiguration.class, + TransactionAutoConfiguration.class, + JdbcTemplateAutoConfiguration.class }) @EnableConfigurationProperties({TedProcessorProperties.class, EmbeddingProperties.class}) @EntityScan(basePackages = { @@ -32,8 +62,25 @@ import org.springframework.data.jpa.repository.config.EnableJpaRepositories; @ComponentScan(basePackages = { "at.procon.dip.domain.document.service", "at.procon.dip.embedding", - "at.procon.dip.search", - "at.procon.dip.testsupport" + "at.procon.dip.search" +}) +@Import({ + DocumentService.class, + DocumentContentService.class, + DocumentRepresentationService.class, + DocumentLexicalIndexService.class, + SearchTestDataFactory.class, + SemanticSearchTestDataFactory.class, + DefaultSearchPlanner.class, + DocumentFullTextSearchRepositoryImpl.class, + DocumentTrigramSearchRepositoryImpl.class, + PostgresFullTextSearchEngine.class, + PostgresTrigramSearchEngine.class, + DefaultSearchScoreNormalizer.class, + DefaultSearchResultFusionService.class, + SearchMetricsService.class, + DefaultSearchOrchestrator.class, + GenericSearchController.class }) public class SearchSemanticTestApplication { } diff --git a/src/test/resources/sql/create-doc-search-test-schemas.sql b/src/test/resources/sql/create-doc-search-test-schemas.sql index dcff347..d2f36a7 100644 --- a/src/test/resources/sql/create-doc-search-test-schemas.sql +++ b/src/test/resources/sql/create-doc-search-test-schemas.sql @@ -1,3 +1,3 @@ CREATE SCHEMA IF NOT EXISTS DOC; CREATE SCHEMA IF NOT EXISTS TED; -CREATE EXTENSION IF NOT EXISTS pg_trgm; +CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;