embedding nv1 + search tests
parent
2687d4ba17
commit
d7369c796c
@ -0,0 +1,39 @@
|
||||
package at.procon.dip.embedding.provider.mock;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import at.procon.dip.domain.document.DistanceMetric;
|
||||
import at.procon.dip.embedding.model.EmbeddingModelDescriptor;
|
||||
import at.procon.dip.embedding.model.EmbeddingRequest;
|
||||
import at.procon.dip.embedding.model.EmbeddingUseCase;
|
||||
import at.procon.dip.embedding.model.ResolvedEmbeddingProviderConfig;
|
||||
import java.util.List;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class MockEmbeddingProviderTest {
|
||||
|
||||
private final MockEmbeddingProvider provider = new MockEmbeddingProvider();
|
||||
|
||||
@Test
|
||||
void should_produce_deterministic_vectors() {
|
||||
EmbeddingModelDescriptor model = new EmbeddingModelDescriptor(
|
||||
"mock-search", "mock-default", "mock-search", 8, DistanceMetric.COSINE, true, true, null, true);
|
||||
ResolvedEmbeddingProviderConfig config = ResolvedEmbeddingProviderConfig.builder()
|
||||
.key("mock-default")
|
||||
.providerType("mock")
|
||||
.dimensions(8)
|
||||
.build();
|
||||
EmbeddingRequest request = EmbeddingRequest.builder()
|
||||
.modelKey("mock-search")
|
||||
.useCase(EmbeddingUseCase.DOCUMENT)
|
||||
.texts(List.of("district heating optimization"))
|
||||
.build();
|
||||
|
||||
var first = provider.embedDocuments(config, model, request);
|
||||
var second = provider.embedDocuments(config, model, request);
|
||||
|
||||
assertThat(first.vectors()).hasSize(1);
|
||||
assertThat(second.vectors()).hasSize(1);
|
||||
assertThat(first.vectors().getFirst()).containsExactly(second.vectors().getFirst());
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,29 @@
|
||||
package at.procon.dip.embedding.registry;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import at.procon.dip.domain.document.DistanceMetric;
|
||||
import at.procon.dip.embedding.config.EmbeddingProperties;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class EmbeddingModelRegistryTest {
|
||||
|
||||
@Test
|
||||
void should_resolve_active_model_from_properties() {
|
||||
EmbeddingProperties properties = new EmbeddingProperties();
|
||||
properties.setDefaultDocumentModel("mock-search");
|
||||
EmbeddingProperties.ModelProperties model = new EmbeddingProperties.ModelProperties();
|
||||
model.setProviderConfigKey("mock-default");
|
||||
model.setProviderModelKey("mock-search");
|
||||
model.setDimensions(16);
|
||||
model.setDistanceMetric(DistanceMetric.COSINE);
|
||||
model.setSupportsQueryEmbeddingMode(true);
|
||||
model.setActive(true);
|
||||
properties.getModels().put("mock-search", model);
|
||||
|
||||
EmbeddingModelRegistry registry = new EmbeddingModelRegistry(properties);
|
||||
|
||||
assertThat(registry.getRequiredDefaultDocumentModelKey()).isEqualTo("mock-search");
|
||||
assertThat(registry.getRequired("mock-search").providerConfigKey()).isEqualTo("mock-default");
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,38 @@
|
||||
package at.procon.dip.embedding.service;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import at.procon.dip.domain.document.DistanceMetric;
|
||||
import at.procon.dip.embedding.model.EmbeddingModelDescriptor;
|
||||
import at.procon.dip.embedding.model.EmbeddingProviderResult;
|
||||
import at.procon.dip.embedding.model.EmbeddingUseCase;
|
||||
import at.procon.dip.embedding.registry.EmbeddingModelRegistry;
|
||||
import java.util.List;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class DefaultQueryEmbeddingServiceTest {
|
||||
|
||||
@Test
|
||||
void should_use_default_query_model() {
|
||||
EmbeddingExecutionService executionService = mock(EmbeddingExecutionService.class);
|
||||
EmbeddingModelRegistry modelRegistry = mock(EmbeddingModelRegistry.class);
|
||||
|
||||
when(modelRegistry.getRequiredDefaultQueryModelKey()).thenReturn("mock-search");
|
||||
when(executionService.embedTexts("mock-search", EmbeddingUseCase.QUERY, List.of("framework agreement")))
|
||||
.thenReturn(new EmbeddingProviderResult(
|
||||
new EmbeddingModelDescriptor("mock-search", "mock-default", "mock-search", 4,
|
||||
DistanceMetric.COSINE, true, true, null, true),
|
||||
List.of(new float[]{1f, 2f, 3f, 4f}),
|
||||
List.of(),
|
||||
"req-1",
|
||||
2
|
||||
));
|
||||
|
||||
DefaultQueryEmbeddingService service = new DefaultQueryEmbeddingService(executionService, modelRegistry);
|
||||
float[] vector = service.embedQuery("framework agreement");
|
||||
|
||||
assertThat(vector).containsExactly(1f, 2f, 3f, 4f);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,138 @@
|
||||
package at.procon.dip.search.integration;
|
||||
|
||||
import at.procon.dip.config.JacksonConfig;
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import at.procon.dip.search.dto.SearchMode;
|
||||
import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
|
||||
import at.procon.dip.search.dto.SearchRequest;
|
||||
import at.procon.dip.testsupport.AbstractSearchIntegrationTest;
|
||||
import at.procon.dip.testsupport.SearchTestDataFactory;
|
||||
import at.procon.dip.testsupport.config.SearchTestConfig;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.fasterxml.jackson.databind.SerializationFeature;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.http.HttpMessageConvertersAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jackson.JacksonAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.web.servlet.WebMvcAutoConfiguration;
|
||||
import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.test.web.servlet.MockMvc;
|
||||
|
||||
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.get;
|
||||
import static org.springframework.test.web.servlet.request.MockMvcRequestBuilders.post;
|
||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.jsonPath;
|
||||
import static org.springframework.test.web.servlet.result.MockMvcResultMatchers.status;
|
||||
|
||||
|
||||
|
||||
@Import(SearchTestConfig.class)
|
||||
@ImportAutoConfiguration({
|
||||
JacksonAutoConfiguration.class,
|
||||
HttpMessageConvertersAutoConfiguration.class,
|
||||
WebMvcAutoConfiguration.class
|
||||
})
|
||||
class GenericSearchEndpointIntegrationTest extends AbstractSearchIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
private SearchTestDataFactory dataFactory;
|
||||
|
||||
@Autowired
|
||||
private MockMvc mockMvc;
|
||||
|
||||
@Autowired
|
||||
private ObjectMapper objectMapper;
|
||||
|
||||
@Test
|
||||
void searchEndpoint_should_return_hits_for_fulltext_request() throws Exception {
|
||||
dataFactory.createDocumentWithPrimaryRepresentation(
|
||||
"Vienna school renovation framework",
|
||||
"School roof framework agreement",
|
||||
"Framework agreement for school roof renovation in Vienna.",
|
||||
DocumentType.TED_NOTICE,
|
||||
DocumentFamily.PROCUREMENT,
|
||||
"en",
|
||||
RepresentationType.SEMANTIC_TEXT);
|
||||
|
||||
SearchRequest request = SearchRequest.builder()
|
||||
.queryText("framework agreement")
|
||||
.modes(Set.of(SearchMode.FULLTEXT))
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_ONLY)
|
||||
.build();
|
||||
|
||||
mockMvc.perform(post("/search")
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.accept(MediaType.APPLICATION_JSON)
|
||||
.characterEncoding("UTF-8")
|
||||
.content(objectMapper.writeValueAsString(request)))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.hits[0].title").value("Vienna school renovation framework"))
|
||||
.andExpect(jsonPath("$.enginesUsed[0]").value("POSTGRES_FULLTEXT"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void debugEndpoint_should_return_plan_and_engine_results() throws Exception {
|
||||
dataFactory.createDocumentWithPrimaryRepresentation(
|
||||
"Maintenance manual",
|
||||
"Factory maintenance manual",
|
||||
"Maintenance manual for calibration and preventive checks.",
|
||||
DocumentType.PDF,
|
||||
DocumentFamily.KNOWLEDGE,
|
||||
"en",
|
||||
RepresentationType.FULLTEXT);
|
||||
|
||||
SearchRequest request = SearchRequest.builder()
|
||||
.queryText("maintenence manual")
|
||||
.modes(Set.of(SearchMode.HYBRID))
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_ONLY)
|
||||
.build();
|
||||
|
||||
mockMvc.perform(post("/search/debug")
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.accept(MediaType.APPLICATION_JSON)
|
||||
.characterEncoding("UTF-8")
|
||||
.content(objectMapper.writeValueAsString(request)))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.plan.engines").isArray())
|
||||
.andExpect(jsonPath("$.engineResults").isArray())
|
||||
.andExpect(jsonPath("$.fusedResponse.hits[0].title").value("Maintenance manual"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void metricsEndpoint_should_return_search_metrics_snapshot() throws Exception {
|
||||
dataFactory.createDocumentWithPrimaryAndChunks(
|
||||
"Energy optimization strategy",
|
||||
"Strategy overview",
|
||||
"This primary representation only contains a high level overview.",
|
||||
"en",
|
||||
List.of("District heating optimization strategy for municipal energy systems is described here."));
|
||||
|
||||
SearchRequest request = SearchRequest.builder()
|
||||
.queryText("district heating optimization")
|
||||
.modes(Set.of(SearchMode.FULLTEXT))
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS)
|
||||
.build();
|
||||
|
||||
mockMvc.perform(post("/search")
|
||||
.contentType(MediaType.APPLICATION_JSON)
|
||||
.accept(MediaType.APPLICATION_JSON)
|
||||
.characterEncoding("UTF-8")
|
||||
.content(objectMapper.writeValueAsString(request)))
|
||||
.andExpect(status().isOk());
|
||||
|
||||
mockMvc.perform(get("/search/metrics"))
|
||||
.andExpect(status().isOk())
|
||||
.andExpect(jsonPath("$.totalSearchRequests").isNumber())
|
||||
.andExpect(jsonPath("$.representationCounts").exists());
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,174 @@
|
||||
package at.procon.dip.search.integration;
|
||||
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import at.procon.dip.search.api.SearchExecutionContext;
|
||||
import at.procon.dip.search.dto.*;
|
||||
import at.procon.dip.search.engine.trigram.PostgresTrigramSearchEngine;
|
||||
import at.procon.dip.search.service.SearchOrchestrator;
|
||||
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||
import at.procon.dip.testsupport.AbstractSearchIntegrationTest;
|
||||
import at.procon.dip.testsupport.SearchTestDataFactory;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.test.annotation.DirtiesContext;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
@DirtiesContext(classMode = DirtiesContext.ClassMode.BEFORE_CLASS)
|
||||
class GenericSearchOrchestratorIntegrationTest extends AbstractSearchIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
private SearchTestDataFactory dataFactory;
|
||||
|
||||
@Autowired
|
||||
private SearchOrchestrator searchOrchestrator;
|
||||
|
||||
@Autowired
|
||||
private PostgresTrigramSearchEngine trigramSearchEngine;
|
||||
|
||||
@Test
|
||||
void hybridSearch_should_collapse_document_hits_when_fulltext_and_trigram_match_same_document() {
|
||||
dataFactory.createDocumentWithPrimaryRepresentation(
|
||||
"Maintenance manual",
|
||||
"Factory maintenance manual",
|
||||
"Maintenance manual for calibration and preventive checks.",
|
||||
DocumentType.PDF,
|
||||
DocumentFamily.KNOWLEDGE,
|
||||
"en",
|
||||
RepresentationType.FULLTEXT);
|
||||
|
||||
SearchRequest request = SearchRequest.builder()
|
||||
.queryText("Maintenance manual")
|
||||
.modes(Set.of(SearchMode.HYBRID))
|
||||
.collapseByDocument(true)
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_ONLY)
|
||||
.build();
|
||||
|
||||
SearchResponse response = searchOrchestrator.search(
|
||||
request,
|
||||
new SearchDocumentScope(Set.of(), null, null, null, null));
|
||||
|
||||
assertThat(response.getHits()).hasSize(1);
|
||||
assertThat(response.getHits().getFirst().getTitle()).isEqualTo("Maintenance manual");
|
||||
assertThat(response.getEnginesUsed()).isNotEmpty();
|
||||
assertThat(response.getHits().getFirst().getFinalScore()).isGreaterThan(0.0d);
|
||||
}
|
||||
|
||||
@Test
|
||||
void representationSelectionMode_should_control_chunk_visibility() {
|
||||
dataFactory.createDocumentWithPrimaryAndChunks(
|
||||
"Energy optimization strategy",
|
||||
"Strategy overview",
|
||||
"This primary representation only contains a high level overview.",
|
||||
"en",
|
||||
List.of(
|
||||
"Chunk one is introductory and does not contain the target phrase.",
|
||||
"District heating optimization strategy for municipal energy systems is described here."
|
||||
));
|
||||
|
||||
SearchRequest primaryOnly = SearchRequest.builder()
|
||||
.queryText("district heating optimization")
|
||||
.modes(Set.of(SearchMode.FULLTEXT))
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_ONLY)
|
||||
.build();
|
||||
|
||||
SearchRequest primaryAndChunks = SearchRequest.builder()
|
||||
.queryText("district heating optimization")
|
||||
.modes(Set.of(SearchMode.FULLTEXT))
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS)
|
||||
.build();
|
||||
|
||||
SearchResponse primaryOnlyResponse = searchOrchestrator.search(
|
||||
primaryOnly,
|
||||
new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null));
|
||||
|
||||
SearchResponse primaryAndChunksResponse = searchOrchestrator.search(
|
||||
primaryAndChunks,
|
||||
new SearchDocumentScope(Set.of(), Set.of(DocumentType.TEXT), Set.of(DocumentFamily.GENERIC), null, null));
|
||||
|
||||
assertThat(primaryOnlyResponse.getHits()).isEmpty();
|
||||
assertThat(primaryAndChunksResponse.getHits()).hasSize(1);
|
||||
assertThat(primaryAndChunksResponse.getHits().getFirst().getTitle()).isEqualTo("Energy optimization strategy");
|
||||
assertThat(primaryAndChunksResponse.getHits().getFirst().getMatchedRepresentationCount()).isGreaterThanOrEqualTo(1);
|
||||
assertThat(primaryAndChunksResponse.getHits().getFirst().getRepresentationType()).isEqualTo(RepresentationType.CHUNK);
|
||||
}
|
||||
|
||||
@Test
|
||||
void trigramMode_should_find_document_by_fuzzy_title() {
|
||||
dataFactory.createDocumentWithPrimaryAndChunks(
|
||||
"Energy optimization strategy",
|
||||
"Planning note",
|
||||
"This primary representation contains only generic background information.",
|
||||
"en",
|
||||
List.of(
|
||||
"This chunk talks about municipal utilities and operations.",
|
||||
"This chunk contains unrelated technical background."
|
||||
));
|
||||
|
||||
SearchRequest request = SearchRequest.builder()
|
||||
.queryText("Enegry optimiztion stratgy")
|
||||
.modes(Set.of(SearchMode.TRIGRAM))
|
||||
.build();
|
||||
|
||||
SearchResponse response = searchOrchestrator.search(
|
||||
request,
|
||||
new SearchDocumentScope(
|
||||
Set.of(),
|
||||
Set.of(DocumentType.TEXT),
|
||||
Set.of(DocumentFamily.GENERIC),
|
||||
null,
|
||||
null
|
||||
)
|
||||
);
|
||||
|
||||
assertThat(response.getHits()).isNotEmpty();
|
||||
assertThat(response.getHits()).hasSize(1);
|
||||
|
||||
SearchHit first = response.getHits().getFirst();
|
||||
assertThat(first.getTitle()).isEqualTo("Energy optimization strategy");
|
||||
assertThat(first.getPrimaryEngine()).isEqualTo(SearchEngineType.POSTGRES_TRIGRAM);
|
||||
assertThat(first.getMatchedField()).isEqualTo(SearchMatchField.DOCUMENT_TITLE);
|
||||
assertThat(first.getFinalScore()).isGreaterThan(0.0);
|
||||
}
|
||||
|
||||
@Test
|
||||
void trigramRepository_should_find_document_by_fuzzy_title() {
|
||||
dataFactory.createDocumentWithPrimaryAndChunks(
|
||||
"Energy optimization strategy",
|
||||
"Planning note",
|
||||
"This primary representation contains only generic background information.",
|
||||
"en",
|
||||
List.of(
|
||||
"This chunk talks about municipal utilities and operations.",
|
||||
"This chunk contains unrelated technical background."
|
||||
));
|
||||
|
||||
SearchRequest request = SearchRequest.builder()
|
||||
.queryText("Enegry optimiztion stratgy")
|
||||
.modes(Set.of(SearchMode.TRIGRAM))
|
||||
.build();
|
||||
|
||||
SearchExecutionContext context = SearchExecutionContext.builder()
|
||||
.request(request)
|
||||
.scope(new SearchDocumentScope(
|
||||
Set.of(),
|
||||
Set.of(DocumentType.TEXT),
|
||||
Set.of(DocumentFamily.GENERIC),
|
||||
null,
|
||||
null
|
||||
))
|
||||
.page(0)
|
||||
.size(10)
|
||||
.build();
|
||||
|
||||
List<SearchHit> hits = trigramSearchEngine.execute(context);
|
||||
|
||||
assertThat(hits).isNotEmpty();
|
||||
assertThat(hits.getFirst().getTitle()).isEqualTo("Energy optimization strategy");
|
||||
assertThat(hits.getFirst().getPrimaryEngine()).isEqualTo(SearchEngineType.POSTGRES_TRIGRAM);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,108 @@
|
||||
package at.procon.dip.search.integration;
|
||||
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import at.procon.dip.search.api.SearchExecutionContext;
|
||||
import at.procon.dip.search.dto.SearchHit;
|
||||
import at.procon.dip.search.dto.SearchMode;
|
||||
import at.procon.dip.search.dto.SearchRequest;
|
||||
import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
|
||||
import at.procon.dip.search.repository.DocumentFullTextSearchRepository;
|
||||
import at.procon.dip.search.repository.DocumentTrigramSearchRepository;
|
||||
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||
import at.procon.dip.testsupport.AbstractSearchIntegrationTest;
|
||||
import at.procon.dip.testsupport.SearchTestDataFactory;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
class GenericSearchRepositoryIntegrationTest extends AbstractSearchIntegrationTest {
|
||||
|
||||
@Autowired
|
||||
private SearchTestDataFactory dataFactory;
|
||||
|
||||
@Autowired
|
||||
private DocumentFullTextSearchRepository fullTextRepository;
|
||||
|
||||
@Autowired
|
||||
private DocumentTrigramSearchRepository trigramRepository;
|
||||
|
||||
@Test
|
||||
void fullTextRepository_should_find_exact_keyword_in_primary_representation() {
|
||||
dataFactory.createDocumentWithPrimaryRepresentation(
|
||||
"Vienna school renovation framework",
|
||||
"School roof framework agreement",
|
||||
"Framework agreement for school roof renovation in Vienna.",
|
||||
DocumentType.TED_NOTICE,
|
||||
DocumentFamily.PROCUREMENT,
|
||||
"en",
|
||||
RepresentationType.SEMANTIC_TEXT);
|
||||
|
||||
dataFactory.createDocumentWithPrimaryRepresentation(
|
||||
"Pump maintenance manual",
|
||||
"Maintenance procedures",
|
||||
"Calibration procedure for pumps and gauges.",
|
||||
DocumentType.PDF,
|
||||
DocumentFamily.KNOWLEDGE,
|
||||
"en",
|
||||
RepresentationType.FULLTEXT);
|
||||
|
||||
assertThat(jdbcTemplate.queryForObject(
|
||||
"select count(*) from doc.doc_text_representation",
|
||||
Integer.class
|
||||
)).isGreaterThan(0);
|
||||
|
||||
assertThat(jdbcTemplate.queryForObject(
|
||||
"select count(*) from doc.doc_text_representation where search_vector is not null",
|
||||
Integer.class
|
||||
)).isGreaterThan(0);
|
||||
|
||||
SearchExecutionContext context = SearchExecutionContext.builder()
|
||||
.request(SearchRequest.builder()
|
||||
.queryText("framework agreement")
|
||||
.modes(Set.of(SearchMode.FULLTEXT))
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_ONLY)
|
||||
.build())
|
||||
.scope(new SearchDocumentScope(Set.of(), null, null, null, null))
|
||||
.page(0)
|
||||
.size(10)
|
||||
.build();
|
||||
|
||||
List<SearchHit> hits = fullTextRepository.search(context, 10);
|
||||
assertThat(hits).isNotEmpty();
|
||||
assertThat(hits).extracting(SearchHit::getTitle)
|
||||
.contains("Vienna school renovation framework")
|
||||
.doesNotContain("Pump maintenance manual");
|
||||
}
|
||||
|
||||
@Test
|
||||
void trigramRepository_should_match_fuzzy_title() {
|
||||
dataFactory.createDocumentWithPrimaryRepresentation(
|
||||
"Vienna school renovation framework",
|
||||
"School roof framework agreement",
|
||||
"Framework agreement for school roof renovation in Vienna.",
|
||||
DocumentType.TED_NOTICE,
|
||||
DocumentFamily.PROCUREMENT,
|
||||
"en",
|
||||
RepresentationType.SEMANTIC_TEXT);
|
||||
|
||||
SearchExecutionContext context = SearchExecutionContext.builder()
|
||||
.request(SearchRequest.builder()
|
||||
.queryText("Viena school renovtion")
|
||||
.modes(Set.of(SearchMode.TRIGRAM))
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_ONLY)
|
||||
.build())
|
||||
.scope(new SearchDocumentScope(Set.of(), null, null, null, null))
|
||||
.page(0)
|
||||
.size(10)
|
||||
.build();
|
||||
|
||||
List<SearchHit> hits = trigramRepository.search(context, 10, 0.10d);
|
||||
assertThat(hits).isNotEmpty();
|
||||
assertThat(hits.getFirst().getTitle()).isEqualTo("Vienna school renovation framework");
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,119 @@
|
||||
package at.procon.dip.testsupport;
|
||||
|
||||
import at.procon.dip.FixedPortPostgreSQLContainer;
|
||||
import at.procon.dip.domain.document.repository.DocumentRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository;
|
||||
import at.procon.dip.domain.tenant.repository.DocumentTenantRepository;
|
||||
import javax.sql.DataSource;
|
||||
|
||||
import at.procon.dip.testsupport.config.SearchTestConfig;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.TestInstance;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.http.HttpMessageConvertersAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jackson.JacksonAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.web.servlet.WebMvcAutoConfiguration;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.test.context.DynamicPropertyRegistry;
|
||||
import org.springframework.test.context.DynamicPropertySource;
|
||||
import org.springframework.test.context.TestPropertySource;
|
||||
import org.testcontainers.containers.PostgreSQLContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
@SpringBootTest(classes = SearchTestApplication.class, webEnvironment = SpringBootTest.WebEnvironment.MOCK)
|
||||
@Testcontainers
|
||||
@TestInstance(TestInstance.Lifecycle.PER_CLASS)
|
||||
@TestPropertySource(properties = {
|
||||
"spring.jpa.hibernate.ddl-auto=create-drop",
|
||||
"spring.jpa.show-sql=false",
|
||||
"spring.jpa.open-in-view=false",
|
||||
"spring.jpa.properties.hibernate.default_schema=DOC",
|
||||
"spring.main.lazy-initialization=true",
|
||||
"ted.vectorization.enabled=false",
|
||||
"ted.search.default-page-size=20",
|
||||
"ted.search.max-page-size=100",
|
||||
"ted.search.fulltext-weight=0.60",
|
||||
"ted.search.trigram-weight=0.40",
|
||||
"ted.search.semantic-weight=0.45",
|
||||
"ted.search.recency-boost-weight=0.05",
|
||||
"ted.search.trigram-threshold=0.10",
|
||||
"server.servlet.context-path=/api"
|
||||
})
|
||||
public abstract class AbstractSearchIntegrationTest {
|
||||
|
||||
private static final int HOST_PORT = 15433;
|
||||
private static final String DB_NAME = "dip_search_test";
|
||||
private static final String DB_USER = "test";
|
||||
private static final String DB_PASSWORD = "test";
|
||||
private static final String JDBC_URL = "jdbc:postgresql://localhost:" + HOST_PORT + "/" + DB_NAME;
|
||||
|
||||
@Container
|
||||
static PostgreSQLContainer<?> postgres = new FixedPortPostgreSQLContainer<>("postgres:16-alpine", HOST_PORT)
|
||||
.withDatabaseName(DB_NAME)
|
||||
.withUsername(DB_USER)
|
||||
.withPassword(DB_PASSWORD)
|
||||
.withInitScript("sql/create-doc-search-test-schemas.sql");
|
||||
|
||||
|
||||
@DynamicPropertySource
|
||||
static void registerProperties(DynamicPropertyRegistry registry) {
|
||||
if (!postgres.isRunning()) {
|
||||
postgres.start();
|
||||
}
|
||||
registry.add("spring.datasource.url", () -> JDBC_URL);
|
||||
registry.add("spring.datasource.username", () -> DB_USER);
|
||||
registry.add("spring.datasource.password", () -> DB_PASSWORD);
|
||||
registry.add("spring.datasource.driver-class-name", () -> "org.postgresql.Driver");
|
||||
}
|
||||
|
||||
@Autowired
|
||||
protected JdbcTemplate jdbcTemplate;
|
||||
|
||||
@Autowired
|
||||
protected DataSource dataSource;
|
||||
|
||||
@Autowired
|
||||
protected DocumentRepository documentRepository;
|
||||
|
||||
@Autowired
|
||||
protected DocumentTextRepresentationRepository representationRepository;
|
||||
|
||||
@Autowired
|
||||
protected DocumentTenantRepository tenantRepository;
|
||||
|
||||
@BeforeEach
|
||||
void resetSearchTestDatabase() {
|
||||
ensureSearchColumnsAndIndexes();
|
||||
cleanupDatabase();
|
||||
}
|
||||
|
||||
protected void ensureSearchColumnsAndIndexes() {
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm");
|
||||
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body gin_trgm_ops)");
|
||||
}
|
||||
|
||||
protected void cleanupDatabase() {
|
||||
jdbcTemplate.execute("TRUNCATE TABLE doc.doc_text_representation, doc.doc_document, doc.doc_tenant RESTART IDENTITY CASCADE");
|
||||
}
|
||||
|
||||
protected void setDocumentCreatedAt(java.util.UUID documentId, java.time.OffsetDateTime createdAt) {
|
||||
jdbcTemplate.update("UPDATE doc.doc_document SET created_at = ?, updated_at = ? WHERE id = ?", createdAt, createdAt, documentId);
|
||||
}
|
||||
|
||||
protected boolean columnExists(String schema, String table, String column) {
|
||||
return Boolean.TRUE.equals(jdbcTemplate.queryForObject(
|
||||
"SELECT COUNT(*) > 0 FROM information_schema.columns WHERE table_schema = ? AND table_name = ? AND column_name = ?",
|
||||
Boolean.class,
|
||||
schema.toLowerCase(), table.toLowerCase(), column.toLowerCase()));
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,80 @@
|
||||
package at.procon.dip.testsupport;
|
||||
|
||||
import at.procon.dip.config.JacksonConfig;
|
||||
import at.procon.dip.domain.document.service.DocumentContentService;
|
||||
|
||||
import at.procon.dip.domain.document.service.DocumentRepresentationService;
|
||||
import at.procon.dip.domain.document.service.DocumentService;
|
||||
import at.procon.dip.search.engine.fulltext.PostgresFullTextSearchEngine;
|
||||
import at.procon.dip.search.engine.trigram.PostgresTrigramSearchEngine;
|
||||
import at.procon.dip.search.plan.DefaultSearchPlanner;
|
||||
import at.procon.dip.search.rank.DefaultSearchResultFusionService;
|
||||
import at.procon.dip.search.rank.DefaultSearchScoreNormalizer;
|
||||
import at.procon.dip.search.repository.DocumentFullTextSearchRepositoryImpl;
|
||||
import at.procon.dip.search.repository.DocumentTrigramSearchRepositoryImpl;
|
||||
import at.procon.dip.search.service.DefaultSearchOrchestrator;
|
||||
import at.procon.dip.search.service.DocumentLexicalIndexService;
|
||||
import at.procon.dip.search.service.SearchMetricsService;
|
||||
import at.procon.dip.search.web.GenericSearchController;
|
||||
import at.procon.ted.config.TedProcessorProperties;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.springframework.boot.SpringBootConfiguration;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.domain.EntityScan;
|
||||
import org.springframework.boot.autoconfigure.http.HttpMessageConvertersAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jackson.JacksonAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.transaction.TransactionAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.web.servlet.WebMvcAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc;
|
||||
import org.springframework.context.annotation.Import;
|
||||
import org.springframework.data.jpa.repository.config.EnableJpaRepositories;
|
||||
|
||||
/**
|
||||
* Narrow test application for generic lexical search slices.
|
||||
*
|
||||
* Important: this class does not component-scan the whole application. Every
|
||||
* test support bean that should exist in the test context must therefore be
|
||||
* imported explicitly.
|
||||
*/
|
||||
@SpringBootConfiguration
|
||||
@AutoConfigureMockMvc
|
||||
@ImportAutoConfiguration({
|
||||
DataSourceAutoConfiguration.class,
|
||||
HibernateJpaAutoConfiguration.class,
|
||||
TransactionAutoConfiguration.class,
|
||||
JdbcTemplateAutoConfiguration.class
|
||||
})
|
||||
@EnableConfigurationProperties(TedProcessorProperties.class)
|
||||
@EntityScan(basePackages = {
|
||||
"at.procon.dip.domain.document.entity",
|
||||
"at.procon.dip.domain.tenant.entity"
|
||||
})
|
||||
@EnableJpaRepositories(basePackages = {
|
||||
"at.procon.dip.domain.document.repository",
|
||||
"at.procon.dip.domain.tenant.repository"
|
||||
})
|
||||
@Import({
|
||||
DocumentService.class,
|
||||
DocumentContentService.class,
|
||||
DocumentRepresentationService.class,
|
||||
DocumentLexicalIndexService.class,
|
||||
SearchTestDataFactory.class,
|
||||
DefaultSearchPlanner.class,
|
||||
DocumentFullTextSearchRepositoryImpl.class,
|
||||
DocumentTrigramSearchRepositoryImpl.class,
|
||||
PostgresFullTextSearchEngine.class,
|
||||
PostgresTrigramSearchEngine.class,
|
||||
DefaultSearchScoreNormalizer.class,
|
||||
DefaultSearchResultFusionService.class,
|
||||
SearchMetricsService.class,
|
||||
DefaultSearchOrchestrator.class,
|
||||
GenericSearchController.class,
|
||||
DocumentLexicalIndexService.class
|
||||
})
|
||||
public class SearchTestApplication {
|
||||
}
|
||||
@ -0,0 +1,133 @@
|
||||
package at.procon.dip.testsupport;
|
||||
|
||||
import at.procon.dip.domain.access.DocumentVisibility;
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentStatus;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import at.procon.dip.domain.document.entity.Document;
|
||||
import at.procon.dip.domain.document.entity.DocumentTextRepresentation;
|
||||
import at.procon.dip.search.service.DocumentLexicalIndexService;
|
||||
import at.procon.dip.domain.document.service.DocumentRepresentationService;
|
||||
import at.procon.dip.domain.document.service.DocumentService;
|
||||
import at.procon.dip.domain.document.service.command.AddDocumentTextRepresentationCommand;
|
||||
import at.procon.dip.domain.document.service.command.CreateDocumentCommand;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
@Transactional
|
||||
public class SearchTestDataFactory {
|
||||
|
||||
private final DocumentService documentService;
|
||||
private final DocumentRepresentationService representationService;
|
||||
private final DocumentLexicalIndexService lexicalIndexService;
|
||||
|
||||
public CreatedDocument createDocumentWithPrimaryRepresentation(
|
||||
String title,
|
||||
String summary,
|
||||
String body,
|
||||
DocumentType documentType,
|
||||
DocumentFamily documentFamily,
|
||||
String languageCode,
|
||||
RepresentationType primaryType) {
|
||||
|
||||
Document document = documentService.create(new CreateDocumentCommand(
|
||||
null,
|
||||
DocumentVisibility.PUBLIC,
|
||||
documentType,
|
||||
documentFamily,
|
||||
DocumentStatus.RECEIVED,
|
||||
title,
|
||||
summary,
|
||||
languageCode,
|
||||
"text/plain",
|
||||
null,
|
||||
Integer.toHexString((title + body).hashCode())
|
||||
));
|
||||
|
||||
DocumentTextRepresentation primary = addRepresentation(document, primaryType, languageCode, true, null, null, null, body);
|
||||
return new CreatedDocument(document, primary, List.of(primary));
|
||||
}
|
||||
|
||||
public CreatedDocument createDocumentWithPrimaryAndChunks(
|
||||
String title,
|
||||
String summary,
|
||||
String primaryBody,
|
||||
String languageCode,
|
||||
List<String> chunkBodies) {
|
||||
|
||||
Document document = documentService.create(new CreateDocumentCommand(
|
||||
null,
|
||||
DocumentVisibility.PUBLIC,
|
||||
DocumentType.TEXT,
|
||||
DocumentFamily.GENERIC,
|
||||
DocumentStatus.RECEIVED,
|
||||
title,
|
||||
summary,
|
||||
languageCode,
|
||||
"text/plain",
|
||||
null,
|
||||
Integer.toHexString((title + primaryBody + chunkBodies).hashCode())
|
||||
));
|
||||
|
||||
List<DocumentTextRepresentation> all = new ArrayList<>();
|
||||
DocumentTextRepresentation primary = addRepresentation(document, RepresentationType.SEMANTIC_TEXT, languageCode, true, null, null, null, primaryBody);
|
||||
all.add(primary);
|
||||
|
||||
int offset = 0;
|
||||
for (int i = 0; i < chunkBodies.size(); i++) {
|
||||
String chunk = chunkBodies.get(i);
|
||||
DocumentTextRepresentation saved = addRepresentation(
|
||||
document,
|
||||
RepresentationType.CHUNK,
|
||||
languageCode,
|
||||
false,
|
||||
i,
|
||||
offset,
|
||||
offset + chunk.length(),
|
||||
chunk);
|
||||
all.add(saved);
|
||||
offset += chunk.length();
|
||||
}
|
||||
|
||||
return new CreatedDocument(document, primary, all);
|
||||
}
|
||||
|
||||
private DocumentTextRepresentation addRepresentation(
|
||||
Document document,
|
||||
RepresentationType type,
|
||||
String languageCode,
|
||||
boolean primary,
|
||||
Integer chunkIndex,
|
||||
Integer chunkStartOffset,
|
||||
Integer chunkEndOffset,
|
||||
String text) {
|
||||
DocumentTextRepresentation representation = representationService.addRepresentation(new AddDocumentTextRepresentationCommand(
|
||||
document.getId(),
|
||||
null,
|
||||
type,
|
||||
"search-test-factory",
|
||||
languageCode,
|
||||
null,
|
||||
chunkIndex,
|
||||
chunkStartOffset,
|
||||
chunkEndOffset,
|
||||
primary,
|
||||
text
|
||||
));
|
||||
lexicalIndexService.refreshRepresentationLexicalIndex(representation.getId());
|
||||
return representation;
|
||||
}
|
||||
|
||||
public record CreatedDocument(
|
||||
Document document,
|
||||
DocumentTextRepresentation primaryRepresentation,
|
||||
List<DocumentTextRepresentation> representations
|
||||
) {
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,20 @@
|
||||
package at.procon.dip.testsupport.config;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.SerializationFeature;
|
||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||
import org.springframework.boot.autoconfigure.jackson.Jackson2ObjectMapperBuilderCustomizer;
|
||||
import org.springframework.boot.test.context.TestConfiguration;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
|
||||
@TestConfiguration
|
||||
public class SearchTestConfig {
|
||||
|
||||
@Bean
|
||||
public ObjectMapper objectMapper() {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
mapper.registerModule(new JavaTimeModule());
|
||||
mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);
|
||||
return mapper;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue