embedding nv3.1 - test
This commit is contained in:
parent
ca502cb369
commit
847cb40f8a
|
|
@ -38,7 +38,7 @@ class BasicMimeAndExtensionDocumentTypeDetectorTest {
|
|||
assertThat(result.documentType()).isEqualTo(DocumentType.GENERIC_BINARY);
|
||||
assertThat(result.mimeType()).isEqualTo("application/vnd.ms-excel");
|
||||
assertThat(result.attributes()).containsEntry("detectedExtension", "xls");
|
||||
assertThat(result.attributes()).containsEntry("effectiveMediaType", "application/vnd.ms-excel");
|
||||
assertThat(result.attributes()).containsEntry("normalizedMediaType", "application/vnd.ms-excel");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ class SpreadsheetDocumentExtractorTest {
|
|||
String text = result.derivedTextByRole().get(ContentRole.NORMALIZED_TEXT);
|
||||
assertNotNull(text);
|
||||
assertTrue(text.contains("Sheet: Sheet1"));
|
||||
assertTrue(text.contains("Name | Amount"));
|
||||
assertTrue(text.contains("Alice | 42"));
|
||||
assertTrue(text.contains("Name\tAmount"));
|
||||
assertTrue(text.contains("Alice\t42"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -74,7 +74,9 @@ class SpreadsheetDocumentExtractorTest {
|
|||
ExtractionResult result = extractor.extract(new ExtractionRequest(source, detection, csv, data));
|
||||
|
||||
String text = result.derivedTextByRole().get(ContentRole.NORMALIZED_TEXT);
|
||||
assertEquals("Name | Amount\nAlice | 42\nBob | 77", text);
|
||||
assertEquals("Name,Amount\n" +
|
||||
"Alice,42\n" +
|
||||
"Bob,77", text);
|
||||
}
|
||||
|
||||
private byte[] createLegacyXls() throws Exception {
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ class MailDocumentIngestionAdapterBundleTest {
|
|||
properties.getGenericIngestion().setMailAdapterEnabled(true);
|
||||
properties.getGenericIngestion().setExpandMailZipAttachments(false);
|
||||
properties.getGenericIngestion().setMailImportBatchId("test-mail-bundle");
|
||||
when(zipExtractionService.canHandle(any(), any())).thenReturn(false);
|
||||
lenient().when(zipExtractionService.canHandle(any(), any())).thenReturn(false);
|
||||
adapter = new MailDocumentIngestionAdapter(properties, importService, new MailMessageExtractionService(), relationService, zipExtractionService);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import at.procon.dip.ingestion.spi.SourceDescriptor;
|
|||
import at.procon.dip.normalization.impl.DefaultGenericTextRepresentationBuilder;
|
||||
import at.procon.dip.normalization.service.TextRepresentationBuildService;
|
||||
import at.procon.dip.processing.service.StructuredDocumentProcessingService;
|
||||
import at.procon.dip.search.service.DocumentLexicalIndexService;
|
||||
import at.procon.ted.config.TedProcessorProperties;
|
||||
import at.procon.ted.service.attachment.PdfExtractionService;
|
||||
import at.procon.ted.service.attachment.ZipExtractionService;
|
||||
|
|
@ -369,6 +370,7 @@ class MailBundleProcessingIntegrationTest {
|
|||
DocumentExtractionService.class,
|
||||
GenericDocumentImportService.class,
|
||||
StructuredDocumentProcessingService.class,
|
||||
DocumentLexicalIndexService.class
|
||||
|
||||
})
|
||||
static class TestApplication {
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ import org.springframework.test.annotation.DirtiesContext;
|
|||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@DirtiesContext(classMode = DirtiesContext.ClassMode.BEFORE_CLASS)
|
||||
//@DirtiesContext(classMode = DirtiesContext.ClassMode.BEFORE_CLASS)
|
||||
class GenericSearchOrchestratorIntegrationTest extends AbstractSearchIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
|
|
|
|||
|
|
@ -8,11 +8,16 @@ import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
|
|||
import at.procon.dip.search.dto.SearchRequest;
|
||||
import at.procon.dip.testsupport.AbstractSemanticSearchIntegrationTest;
|
||||
import at.procon.dip.testsupport.SemanticSearchTestDataFactory;
|
||||
import at.procon.dip.testsupport.config.SearchTestConfig;
|
||||
import at.procon.dip.testsupport.config.SearchTestJacksonConfig;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import java.util.Set;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.http.HttpMessageConvertersAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jackson.JacksonAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.web.servlet.WebMvcAutoConfiguration;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.test.context.TestPropertySource;
|
||||
|
|
@ -24,9 +29,11 @@ import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.
|
|||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
||||
|
||||
@AutoConfigureMockMvc
|
||||
@Import(SearchTestJacksonConfig.class)
|
||||
@TestPropertySource(properties = {
|
||||
"spring.mvc.converters.preferred-json-mapper=jackson"
|
||||
@Import(SearchTestConfig.class)
|
||||
@ImportAutoConfiguration({
|
||||
JacksonAutoConfiguration.class,
|
||||
HttpMessageConvertersAutoConfiguration.class,
|
||||
WebMvcAutoConfiguration.class
|
||||
})
|
||||
class GenericSemanticSearchEndpointIntegrationTest extends AbstractSemanticSearchIntegrationTest {
|
||||
|
||||
|
|
|
|||
|
|
@ -13,11 +13,15 @@ import at.procon.dip.search.spi.SearchDocumentScope;
|
|||
import at.procon.dip.testsupport.AbstractSemanticSearchIntegrationTest;
|
||||
import at.procon.dip.testsupport.SemanticSearchTestDataFactory;
|
||||
import java.util.Set;
|
||||
|
||||
import at.procon.dip.testsupport.config.SearchTestConfig;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.context.annotation.Import;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Import(SearchTestConfig.class)
|
||||
class GenericSemanticSearchOrchestratorIntegrationTest extends AbstractSemanticSearchIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
|
|
|
|||
|
|
@ -12,11 +12,15 @@ import at.procon.dip.search.spi.SearchDocumentScope;
|
|||
import at.procon.dip.testsupport.AbstractSemanticSearchIntegrationTest;
|
||||
import at.procon.dip.testsupport.SemanticSearchTestDataFactory;
|
||||
import java.util.Set;
|
||||
|
||||
import at.procon.dip.testsupport.config.SearchTestConfig;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.context.annotation.Import;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@Import(SearchTestConfig.class)
|
||||
class SemanticModelSelectionIntegrationTest extends AbstractSemanticSearchIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
|
|
|
|||
|
|
@ -6,16 +6,10 @@ import at.procon.dip.domain.document.repository.DocumentTextRepresentationReposi
|
|||
import at.procon.dip.domain.tenant.repository.DocumentTenantRepository;
|
||||
import javax.sql.DataSource;
|
||||
|
||||
import at.procon.dip.testsupport.config.SearchTestConfig;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.TestInstance;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.http.HttpMessageConvertersAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jackson.JacksonAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.web.servlet.WebMvcAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.test.context.DynamicPropertyRegistry;
|
||||
import org.springframework.test.context.DynamicPropertySource;
|
||||
|
|
@ -92,14 +86,14 @@ public abstract class AbstractSearchIntegrationTest {
|
|||
}
|
||||
|
||||
protected void ensureSearchColumnsAndIndexes() {
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm");
|
||||
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
|
||||
}
|
||||
|
||||
protected void cleanupDatabase() {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package at.procon.dip.testsupport;
|
||||
|
||||
import at.procon.dip.FixedPortPostgreSQLContainer;
|
||||
import at.procon.dip.domain.document.repository.DocumentEmbeddingModelRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentEmbeddingRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentRepository;
|
||||
|
|
@ -18,6 +19,7 @@ import org.springframework.test.context.TestPropertySource;
|
|||
import org.testcontainers.containers.PostgreSQLContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
import org.testcontainers.utility.DockerImageName;
|
||||
|
||||
@SpringBootTest(classes = SearchSemanticTestApplication.class, webEnvironment = SpringBootTest.WebEnvironment.MOCK)
|
||||
@Testcontainers
|
||||
|
|
@ -62,23 +64,33 @@ import org.testcontainers.junit.jupiter.Testcontainers;
|
|||
})
|
||||
public abstract class AbstractSemanticSearchIntegrationTest {
|
||||
|
||||
private static final int HOST_PORT = 15433;
|
||||
private static final String DB_NAME = "dip_semantic_search_test";
|
||||
private static final String DB_USER = "test";
|
||||
private static final String DB_PASSWORD = "test";
|
||||
private static final String JDBC_URL = "jdbc:postgresql://localhost:" + HOST_PORT + "/" + DB_NAME;
|
||||
|
||||
@Container
|
||||
static PostgreSQLContainer<?> postgres = new PostgreSQLContainer<>("pgvector/pgvector:pg16")
|
||||
.withDatabaseName("dip_semantic_search_test")
|
||||
.withUsername("test")
|
||||
.withPassword("test")
|
||||
static PostgreSQLContainer<?> postgres = new FixedPortPostgreSQLContainer<>(
|
||||
DockerImageName.parse("pgvector/pgvector:pg16-trixie")
|
||||
.asCompatibleSubstituteFor("postgres").toString()
|
||||
, HOST_PORT)
|
||||
.withDatabaseName(DB_NAME)
|
||||
.withUsername(DB_USER)
|
||||
.withPassword(DB_PASSWORD)
|
||||
.withCommand("postgres", "-c", "fsync=off")
|
||||
.withInitScript("sql/create-doc-search-test-schemas.sql");
|
||||
|
||||
static {
|
||||
postgres.start();
|
||||
}
|
||||
|
||||
@DynamicPropertySource
|
||||
static void registerProperties(DynamicPropertyRegistry registry) {
|
||||
registry.add("spring.datasource.url", postgres::getJdbcUrl);
|
||||
registry.add("spring.datasource.username", postgres::getUsername);
|
||||
registry.add("spring.datasource.password", postgres::getPassword);
|
||||
registry.add("spring.datasource.driver-class-name", postgres::getDriverClassName);
|
||||
if (!postgres.isRunning()) {
|
||||
postgres.start();
|
||||
}
|
||||
registry.add("spring.datasource.url", () -> JDBC_URL);
|
||||
registry.add("spring.datasource.username", () -> DB_USER);
|
||||
registry.add("spring.datasource.password", () -> DB_PASSWORD);
|
||||
registry.add("spring.datasource.driver-class-name", () -> "org.postgresql.Driver");
|
||||
}
|
||||
|
||||
@Autowired
|
||||
|
|
|
|||
|
|
@ -1,22 +1,52 @@
|
|||
package at.procon.dip.testsupport;
|
||||
|
||||
import at.procon.dip.domain.document.repository.DocumentContentRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentEmbeddingModelRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository;
|
||||
import at.procon.dip.domain.document.service.DocumentContentService;
|
||||
import at.procon.dip.domain.document.service.DocumentRepresentationService;
|
||||
import at.procon.dip.domain.document.service.DocumentService;
|
||||
import at.procon.dip.embedding.config.EmbeddingProperties;
|
||||
import at.procon.dip.search.engine.fulltext.PostgresFullTextSearchEngine;
|
||||
import at.procon.dip.search.engine.trigram.PostgresTrigramSearchEngine;
|
||||
import at.procon.dip.search.plan.DefaultSearchPlanner;
|
||||
import at.procon.dip.search.rank.DefaultSearchResultFusionService;
|
||||
import at.procon.dip.search.rank.DefaultSearchScoreNormalizer;
|
||||
import at.procon.dip.search.repository.DocumentFullTextSearchRepositoryImpl;
|
||||
import at.procon.dip.search.repository.DocumentTrigramSearchRepositoryImpl;
|
||||
import at.procon.dip.domain.document.repository.DocumentEmbeddingRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentRepository;
|
||||
import at.procon.dip.search.service.DefaultSearchOrchestrator;
|
||||
import at.procon.dip.search.service.DocumentLexicalIndexService;
|
||||
import at.procon.dip.search.service.SearchMetricsService;
|
||||
import at.procon.dip.search.web.GenericSearchController;
|
||||
import at.procon.ted.config.TedProcessorProperties;
|
||||
import org.springframework.boot.SpringBootConfiguration;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.domain.EntityScan;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.transaction.TransactionAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc;
|
||||
import org.springframework.context.annotation.ComponentScan;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
|
||||
|
||||
|
||||
/**
|
||||
* Narrow semantic-search test application that loads the new generic search subsystem
|
||||
* plus the new parallel embedding subsystem.
|
||||
*/
|
||||
@SpringBootConfiguration
|
||||
@EnableAutoConfiguration(excludeName = {
|
||||
"org.apache.camel.spring.boot.CamelAutoConfiguration",
|
||||
"org.springframework.boot.autoconfigure.task.TaskSchedulingAutoConfiguration"
|
||||
@AutoConfigureMockMvc
|
||||
@ImportAutoConfiguration({
|
||||
DataSourceAutoConfiguration.class,
|
||||
HibernateJpaAutoConfiguration.class,
|
||||
TransactionAutoConfiguration.class,
|
||||
JdbcTemplateAutoConfiguration.class
|
||||
})
|
||||
@EnableConfigurationProperties({TedProcessorProperties.class, EmbeddingProperties.class})
|
||||
@EntityScan(basePackages = {
|
||||
|
|
@ -32,8 +62,25 @@ import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
|
|||
@ComponentScan(basePackages = {
|
||||
"at.procon.dip.domain.document.service",
|
||||
"at.procon.dip.embedding",
|
||||
"at.procon.dip.search",
|
||||
"at.procon.dip.testsupport"
|
||||
"at.procon.dip.search"
|
||||
})
|
||||
@Import({
|
||||
DocumentService.class,
|
||||
DocumentContentService.class,
|
||||
DocumentRepresentationService.class,
|
||||
DocumentLexicalIndexService.class,
|
||||
SearchTestDataFactory.class,
|
||||
SemanticSearchTestDataFactory.class,
|
||||
DefaultSearchPlanner.class,
|
||||
DocumentFullTextSearchRepositoryImpl.class,
|
||||
DocumentTrigramSearchRepositoryImpl.class,
|
||||
PostgresFullTextSearchEngine.class,
|
||||
PostgresTrigramSearchEngine.class,
|
||||
DefaultSearchScoreNormalizer.class,
|
||||
DefaultSearchResultFusionService.class,
|
||||
SearchMetricsService.class,
|
||||
DefaultSearchOrchestrator.class,
|
||||
GenericSearchController.class
|
||||
})
|
||||
public class SearchSemanticTestApplication {
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
CREATE SCHEMA IF NOT EXISTS DOC;
|
||||
CREATE SCHEMA IF NOT EXISTS TED;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
|
||||
|
|
|
|||
Loading…
Reference in New Issue