Compare commits
6 Commits
f9df7c8d22
...
d1d81fd478
| Author | SHA1 | Date |
|---|---|---|
|
|
d1d81fd478 | |
|
|
9da416dbe4 | |
|
|
142b0a5809 | |
|
|
253845e9ea | |
|
|
430885b5af | |
|
|
5c3133d19d |
|
|
@ -0,0 +1,120 @@
|
|||
# TED Procurement Processor - Git Ignore
|
||||
# Author: Martin.Schweitzer@procon.co.at and claude.ai
|
||||
|
||||
# Compiled class files
|
||||
*.class
|
||||
|
||||
# Maven
|
||||
target/
|
||||
pom.xml.tag
|
||||
pom.xml.releaseBackup
|
||||
pom.xml.versionsBackup
|
||||
pom.xml.next
|
||||
release.properties
|
||||
dependency-reduced-pom.xml
|
||||
buildNumber.properties
|
||||
.mvn/timing.properties
|
||||
.mvn/wrapper/maven-wrapper.jar
|
||||
|
||||
# Gradle
|
||||
.gradle
|
||||
build/
|
||||
|
||||
# IDE - IntelliJ IDEA
|
||||
.idea/
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
out/
|
||||
|
||||
# IDE - Eclipse
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
bin/
|
||||
|
||||
# IDE - NetBeans
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
|
||||
# IDE - VS Code
|
||||
.vscode/
|
||||
|
||||
# OS Files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
logs/
|
||||
|
||||
# Application
|
||||
application-local.yml
|
||||
application-dev.yml
|
||||
application-prod.yml
|
||||
|
||||
# Docker
|
||||
.docker/
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
env/
|
||||
venv/
|
||||
.venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
.eggs/
|
||||
*.egg-info/
|
||||
dist/
|
||||
*.egg
|
||||
|
||||
# Model cache
|
||||
models/
|
||||
.cache/
|
||||
|
||||
# Test data
|
||||
test-data/
|
||||
*.xml.bak
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
*.temp
|
||||
*.swp
|
||||
*~
|
||||
|
||||
# Secrets
|
||||
*.pem
|
||||
*.key
|
||||
secrets/
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
|
||||
# Database
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
|
||||
# Processed files (Camel)
|
||||
.processed/
|
||||
.error/
|
||||
|
||||
*.bak
|
||||
.claude
|
||||
|
|
@ -16,8 +16,8 @@ import org.springframework.scheduling.annotation.EnableAsync;
|
|||
*/
|
||||
@SpringBootApplication(scanBasePackages = {"at.procon.dip", "at.procon.ted"})
|
||||
@EnableAsync
|
||||
@EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", "at.procon.dip.domain.ted.entity", "at.procon.dip.embedding.job.entity", "at.procon.dip.migration.audit.entity", "at.procon.dip.migration.entity", /*"at.procon.dip.domain.time.entity",*/ "at.procon.dip.clustering.entity"})
|
||||
@EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository", "at.procon.dip.domain.ted.repository", "at.procon.dip.embedding.job.repository", "at.procon.dip.migration.audit.repository", "at.procon.dip.migration.repository", /*"at.procon.dip.domain.time.repository",*/ "at.procon.dip.clustering.repository"})
|
||||
@EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", "at.procon.dip.domain.ted.entity", "at.procon.dip.embedding.job.entity", "at.procon.dip.migration.audit.entity", "at.procon.dip.migration.entity", "at.procon.dip.domain.time.entity",/**/ "at.procon.dip.clustering.entity"})
|
||||
@EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository", "at.procon.dip.domain.ted.repository", "at.procon.dip.embedding.job.repository", "at.procon.dip.migration.audit.repository", "at.procon.dip.migration.repository", "at.procon.dip.domain.time.repository",/**/ "at.procon.dip.clustering.repository"})
|
||||
public class DocumentIntelligencePlatformApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ public class TimeDomainProperties {
|
|||
private String selectiveMaterializationPersonDbk;
|
||||
private Integer selectiveMaterializationPersonNumber;
|
||||
private boolean selectiveMaterializationBuildProjection = true;
|
||||
private int materializationChunkSize = 200;
|
||||
private String representationLanguageCode = "de";
|
||||
private String scopeKey = "leitstand-default";
|
||||
private JdbcProperties jdbc = new JdbcProperties();
|
||||
|
|
|
|||
|
|
@ -80,10 +80,10 @@ public class TimeEntrySearchProjection {
|
|||
@Column(name = "time_recording_mcl_id", length = 255)
|
||||
private String timeRecordingMclId;
|
||||
|
||||
@Column(name = "time_recording_desc", length = 255)
|
||||
@Column(name = "time_recording_desc", columnDefinition = "TEXT")
|
||||
private String timeRecordingDesc;
|
||||
|
||||
@Column(name = "time_recording_remark", length = 255)
|
||||
@Column(name = "time_recording_remark", columnDefinition = "TEXT")
|
||||
private String timeRecordingRemark;
|
||||
|
||||
@Column(name = "time_recording_url", length = 1000)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,13 @@
|
|||
package at.procon.dip.domain.time.repository.leitstand;
|
||||
|
||||
import at.procon.dip.domain.time.entity.leitstand.LeitstandTimeRecordingAssignment;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
|
||||
public interface LeitstandTimeRecordingAssignmentRepository extends JpaRepository<LeitstandTimeRecordingAssignment, String> {
|
||||
|
||||
List<LeitstandTimeRecordingAssignment> findByTimeRecordingDbkOrderByDbkAsc(String timeRecordingDbk);
|
||||
|
||||
List<LeitstandTimeRecordingAssignment> findByTimeRecordingDbkInOrderByTimeRecordingDbkAscDbkAsc(Collection<String> timeRecordingDbks);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ public interface LeitstandTimeRecordingRepository extends JpaRepository<Leitstan
|
|||
|
||||
Optional<LeitstandTimeRecording> findByTimeEntry_Id(UUID timeEntryId);
|
||||
|
||||
List<LeitstandTimeRecording> findAllByOrderByRecordedFromAscDbkAsc();
|
||||
|
||||
List<LeitstandTimeRecording> findByTimeEntryIsNotNull();
|
||||
|
||||
List<LeitstandTimeRecording> findByPersonDbkOrderByRecordedFromAscDbkAsc(String personDbk);
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ import org.springframework.transaction.annotation.Propagation;
|
|||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
@Service
|
||||
//@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||
@ConditionalOnProperty(prefix = "dip.time.leitstand", name = "enabled", havingValue = "true")
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
|
|
@ -144,14 +144,26 @@ public class LeitstandTimeImportService {
|
|||
log.info("No Leitstand time recordings found for personDbk={}", personDbk);
|
||||
return 0;
|
||||
}
|
||||
//upsertCanonicalTimeEntriesForImportedRecordings(recordings);
|
||||
upsertCanonicalTimeEntriesForImportedRecordings(recordings);
|
||||
if (rebuildProjection && properties.getLeitstand().isBuildSearchProjection()) {
|
||||
projectionService.refreshForPersonDbk(personDbk);
|
||||
}
|
||||
return recordings.size();
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public int materializeCanonicalTimeEntriesForAll(boolean rebuildProjection) {
|
||||
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findAllByOrderByRecordedFromAscDbkAsc();
|
||||
if (recordings.isEmpty()) {
|
||||
log.info("No Leitstand time recordings found for full materialization");
|
||||
return 0;
|
||||
}
|
||||
upsertCanonicalTimeEntriesForImportedRecordings(recordings);
|
||||
if (rebuildProjection && properties.getLeitstand().isBuildSearchProjection()) {
|
||||
projectionService.refreshAll();
|
||||
}
|
||||
return recordings.size();
|
||||
}
|
||||
|
||||
public int materializeCanonicalTimeEntriesForPersonNumber(Integer personNumber, boolean rebuildProjection) {
|
||||
if (personNumber == null) {
|
||||
throw new IllegalArgumentException("personNumber must not be null");
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import lombok.RequiredArgsConstructor;
|
|||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Propagation;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
@Service
|
||||
|
|
@ -44,126 +45,159 @@ public class LeitstandTimeProjectionService {
|
|||
private final TimeEntrySearchProjectionRepository projectionRepository;
|
||||
private final TimeEntryRepresentationMaterializationService representationMaterializationService;
|
||||
|
||||
@Transactional
|
||||
public void refreshForLeitstandRecordingDbks(Collection<String> recordingDbks) {
|
||||
if (recordingDbks == null || recordingDbks.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findAllById(recordingDbks).stream()
|
||||
.filter(recording -> recording.getTimeEntry() != null)
|
||||
.sorted(Comparator.comparing(LeitstandTimeRecording::getRecordedFrom, Comparator.nullsLast(Comparator.naturalOrder()))
|
||||
.thenComparing(LeitstandTimeRecording::getDbk))
|
||||
.toList();
|
||||
if (recordings.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
upsertProjections(recordings);
|
||||
refreshChunked(recordings);
|
||||
}
|
||||
|
||||
|
||||
@Transactional
|
||||
public int refreshForPersonDbk(String personDbk) {
|
||||
if (personDbk == null || personDbk.isBlank()) {
|
||||
return 0;
|
||||
}
|
||||
List<LeitstandTimeRecording> recordings = timeRecordingRepository
|
||||
.findByPersonDbkAndTimeEntryIsNotNullOrderByRecordedFromAscDbkAsc(personDbk);
|
||||
upsertProjections(recordings);
|
||||
refreshChunked(recordings);
|
||||
return recordings.size();
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public int refreshAll() {
|
||||
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findByTimeEntryIsNotNull();
|
||||
upsertProjections(recordings);
|
||||
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findByTimeEntryIsNotNull().stream()
|
||||
.sorted(Comparator.comparing(LeitstandTimeRecording::getRecordedFrom, Comparator.nullsLast(Comparator.naturalOrder()))
|
||||
.thenComparing(LeitstandTimeRecording::getDbk))
|
||||
.toList();
|
||||
refreshChunked(recordings);
|
||||
return recordings.size();
|
||||
}
|
||||
|
||||
private void upsertProjections(List<LeitstandTimeRecording> recordings) {
|
||||
for (LeitstandTimeRecording recording : recordings) {
|
||||
TimeEntrySearchProjection projection = buildProjection(recording);
|
||||
TimeEntrySearchProjection saved = projectionRepository.save(projection);
|
||||
if (properties.getLeitstand().isBuildRepresentations()) {
|
||||
representationMaterializationService.upsertRepresentations(saved);
|
||||
}
|
||||
private void refreshChunked(List<LeitstandTimeRecording> recordings) {
|
||||
if (recordings == null || recordings.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
int chunkSize = Math.max(1, properties.getLeitstand().getMaterializationChunkSize());
|
||||
for (int start = 0; start < recordings.size(); start += chunkSize) {
|
||||
List<LeitstandTimeRecording> chunk = recordings.subList(start, Math.min(start + chunkSize, recordings.size()));
|
||||
refreshChunk(chunk);
|
||||
}
|
||||
}
|
||||
|
||||
private TimeEntrySearchProjection buildProjection(LeitstandTimeRecording recording) {
|
||||
TimeEntry timeEntry = timeEntryRepository.findById(recording.getTimeEntry().getId())
|
||||
.orElseThrow(() -> new IllegalArgumentException("Unknown TIME entry id: " + recording.getTimeEntry().getId()));
|
||||
Document document = timeEntry.getDocument();
|
||||
@Transactional(propagation = Propagation.REQUIRES_NEW)
|
||||
protected void refreshChunk(List<LeitstandTimeRecording> recordings) {
|
||||
if (recordings == null || recordings.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
ProjectionBuildContext ctx = preloadContext(recordings);
|
||||
List<TimeEntrySearchProjection> projections = new ArrayList<>(recordings.size());
|
||||
for (LeitstandTimeRecording recording : recordings) {
|
||||
projections.add(buildProjection(recording, ctx));
|
||||
}
|
||||
List<TimeEntrySearchProjection> saved = projectionRepository.saveAll(projections);
|
||||
projectionRepository.flush();
|
||||
if (properties.getLeitstand().isBuildRepresentations()) {
|
||||
representationMaterializationService.upsertRepresentations(saved);
|
||||
}
|
||||
}
|
||||
|
||||
LeitstandPerson person = recording.getPersonDbk() == null ? null : personRepository.findById(recording.getPersonDbk()).orElse(null);
|
||||
LeitstandActivityType activityType = recording.getActivityTypeId() == null ? null : activityTypeRepository.findById(recording.getActivityTypeId()).orElse(null);
|
||||
private ProjectionBuildContext preloadContext(List<LeitstandTimeRecording> recordings) {
|
||||
List<String> recordingDbks = recordings.stream().map(LeitstandTimeRecording::getDbk).toList();
|
||||
List<LeitstandTimeRecordingAssignment> assignments = timeRecordingAssignmentRepository
|
||||
.findByTimeRecordingDbkInOrderByTimeRecordingDbkAscDbkAsc(recordingDbks);
|
||||
Map<String, List<LeitstandTimeRecordingAssignment>> assignmentsByRecordingDbk = assignments.stream()
|
||||
.collect(Collectors.groupingBy(LeitstandTimeRecordingAssignment::getTimeRecordingDbk, LinkedHashMap::new, Collectors.toList()));
|
||||
|
||||
List<LeitstandTimeRecordingAssignment> assignments = timeRecordingAssignmentRepository.findByTimeRecordingDbkOrderByDbkAsc(recording.getDbk());
|
||||
List<LeitstandPersonTaskAssignment> personTaskAssignments = personTaskAssignmentRepository.findAllById(assignments.stream()
|
||||
List<String> personTaskAssignmentIds = assignments.stream()
|
||||
.map(LeitstandTimeRecordingAssignment::getPersonTaskAssignmentDbk)
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList());
|
||||
Map<String, LeitstandPersonTaskAssignment> ptaByDbk = indexBy(personTaskAssignments, LeitstandPersonTaskAssignment::getDbk);
|
||||
.toList();
|
||||
List<LeitstandPersonTaskAssignment> personTaskAssignments = personTaskAssignmentRepository.findAllById(personTaskAssignmentIds);
|
||||
Map<String, LeitstandPersonTaskAssignment> personTaskAssignmentsByDbk = indexBy(personTaskAssignments, LeitstandPersonTaskAssignment::getDbk);
|
||||
|
||||
Map<String, LeitstandTask> tasksByDbk = indexBy(taskRepository.findAllById(personTaskAssignments.stream()
|
||||
.map(LeitstandPersonTaskAssignment::getTaskDbk)
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList()), LeitstandTask::getDbk);
|
||||
List<String> taskIds = personTaskAssignments.stream().map(LeitstandPersonTaskAssignment::getTaskDbk).filter(Objects::nonNull).distinct().toList();
|
||||
Map<String, LeitstandTask> tasksByDbk = indexBy(taskRepository.findAllById(taskIds), LeitstandTask::getDbk);
|
||||
|
||||
Map<String, LeitstandCostUnit> costUnitsByDbk = indexBy(costUnitRepository.findAllById(personTaskAssignments.stream()
|
||||
.map(LeitstandPersonTaskAssignment::getCostUnitDbk)
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList()), LeitstandCostUnit::getDbk);
|
||||
List<String> costUnitIds = personTaskAssignments.stream().map(LeitstandPersonTaskAssignment::getCostUnitDbk).filter(Objects::nonNull).distinct().toList();
|
||||
Map<String, LeitstandCostUnit> costUnitsByDbk = indexBy(costUnitRepository.findAllById(costUnitIds), LeitstandCostUnit::getDbk);
|
||||
|
||||
Map<String, LeitstandContract> contractsByDbk = indexBy(contractRepository.findAllById(costUnitsByDbk.values().stream()
|
||||
.map(LeitstandCostUnit::getContractDbk)
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList()), LeitstandContract::getDbk);
|
||||
List<String> contractIds = costUnitsByDbk.values().stream().map(LeitstandCostUnit::getContractDbk).filter(Objects::nonNull).distinct().toList();
|
||||
Map<String, LeitstandContract> contractsByDbk = indexBy(contractRepository.findAllById(contractIds), LeitstandContract::getDbk);
|
||||
|
||||
Map<String, LeitstandContractPosition> contractPositionsByDbk = indexBy(contractPositionRepository.findAllById(costUnitsByDbk.values().stream()
|
||||
.map(LeitstandCostUnit::getContractPositionDbk)
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList()), LeitstandContractPosition::getDbk);
|
||||
List<String> contractPositionIds = costUnitsByDbk.values().stream().map(LeitstandCostUnit::getContractPositionDbk).filter(Objects::nonNull).distinct().toList();
|
||||
Map<String, LeitstandContractPosition> contractPositionsByDbk = indexBy(contractPositionRepository.findAllById(contractPositionIds), LeitstandContractPosition::getDbk);
|
||||
|
||||
Set<String> organizationDbks = new LinkedHashSet<>();
|
||||
costUnitsByDbk.values().stream().map(LeitstandCostUnit::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationDbks::add);
|
||||
contractsByDbk.values().stream().map(LeitstandContract::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationDbks::add);
|
||||
if (person != null && person.getOrganizationDbk() != null) {
|
||||
organizationDbks.add(person.getOrganizationDbk());
|
||||
Set<String> organizationIds = new LinkedHashSet<>();
|
||||
costUnitsByDbk.values().stream().map(LeitstandCostUnit::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationIds::add);
|
||||
contractsByDbk.values().stream().map(LeitstandContract::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationIds::add);
|
||||
recordings.stream().map(LeitstandTimeRecording::getPersonDbk).filter(Objects::nonNull).forEach(id -> {});
|
||||
List<String> personIds = recordings.stream().map(LeitstandTimeRecording::getPersonDbk).filter(Objects::nonNull).distinct().toList();
|
||||
Map<String, LeitstandPerson> personsByDbk = indexBy(personRepository.findAllById(personIds), LeitstandPerson::getDbk);
|
||||
personsByDbk.values().stream().map(LeitstandPerson::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationIds::add);
|
||||
Map<String, LeitstandOrganization> organizationsByDbk = indexBy(organizationRepository.findAllById(organizationIds), LeitstandOrganization::getDbk);
|
||||
|
||||
List<Integer> activityTypeIds = recordings.stream().map(LeitstandTimeRecording::getActivityTypeId).filter(Objects::nonNull).distinct().toList();
|
||||
Map<Integer, LeitstandActivityType> activityTypesById = indexBy(activityTypeRepository.findAllById(activityTypeIds), LeitstandActivityType::getId);
|
||||
|
||||
List<UUID> timeEntryIds = recordings.stream().map(LeitstandTimeRecording::getTimeEntry).filter(Objects::nonNull).map(TimeEntry::getId).filter(Objects::nonNull).distinct().toList();
|
||||
Map<UUID, TimeEntry> timeEntriesById = timeEntryRepository.findAllById(timeEntryIds).stream().collect(Collectors.toMap(TimeEntry::getId, Function.identity()));
|
||||
Map<UUID, TimeEntrySearchProjection> existingProjectionsByTimeEntryId = projectionRepository.findByTimeEntry_IdIn(timeEntryIds).stream().collect(Collectors.toMap(p -> p.getTimeEntry().getId(), Function.identity()));
|
||||
|
||||
return new ProjectionBuildContext(assignmentsByRecordingDbk, personTaskAssignmentsByDbk, tasksByDbk, costUnitsByDbk,
|
||||
contractsByDbk, contractPositionsByDbk, organizationsByDbk, personsByDbk, activityTypesById,
|
||||
timeEntriesById, existingProjectionsByTimeEntryId);
|
||||
}
|
||||
|
||||
private TimeEntrySearchProjection buildProjection(LeitstandTimeRecording recording, ProjectionBuildContext ctx) {
|
||||
TimeEntry timeEntry = ctx.timeEntriesById.get(recording.getTimeEntry().getId());
|
||||
if (timeEntry == null) {
|
||||
throw new IllegalArgumentException("Unknown TIME entry id: " + recording.getTimeEntry().getId());
|
||||
}
|
||||
Map<String, LeitstandOrganization> organizationsByDbk = indexBy(organizationRepository.findAllById(organizationDbks), LeitstandOrganization::getDbk);
|
||||
Document document = timeEntry.getDocument();
|
||||
|
||||
LeitstandPerson person = recording.getPersonDbk() == null ? null : ctx.personsByDbk.get(recording.getPersonDbk());
|
||||
LeitstandActivityType activityType = recording.getActivityTypeId() == null ? null : ctx.activityTypesById.get(recording.getActivityTypeId());
|
||||
|
||||
List<LeitstandTimeRecordingAssignment> assignments = ctx.assignmentsByRecordingDbk.getOrDefault(recording.getDbk(), List.of());
|
||||
List<LeitstandPersonTaskAssignment> personTaskAssignments = assignments.stream()
|
||||
.map(a -> ctx.personTaskAssignmentsByDbk.get(a.getPersonTaskAssignmentDbk()))
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList();
|
||||
|
||||
List<LeitstandTask> orderedTasks = assignments.stream()
|
||||
.map(a -> ptaByDbk.get(a.getPersonTaskAssignmentDbk()))
|
||||
.map(a -> ctx.personTaskAssignmentsByDbk.get(a.getPersonTaskAssignmentDbk()))
|
||||
.filter(Objects::nonNull)
|
||||
.map(pta -> tasksByDbk.get(pta.getTaskDbk()))
|
||||
.map(pta -> ctx.tasksByDbk.get(pta.getTaskDbk()))
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList();
|
||||
List<LeitstandCostUnit> orderedCostUnits = assignments.stream()
|
||||
.map(a -> ptaByDbk.get(a.getPersonTaskAssignmentDbk()))
|
||||
.map(a -> ctx.personTaskAssignmentsByDbk.get(a.getPersonTaskAssignmentDbk()))
|
||||
.filter(Objects::nonNull)
|
||||
.map(pta -> costUnitsByDbk.get(pta.getCostUnitDbk()))
|
||||
.map(pta -> ctx.costUnitsByDbk.get(pta.getCostUnitDbk()))
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList();
|
||||
List<LeitstandContract> orderedContracts = orderedCostUnits.stream()
|
||||
.map(cu -> contractsByDbk.get(cu.getContractDbk()))
|
||||
.map(cu -> ctx.contractsByDbk.get(cu.getContractDbk()))
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList();
|
||||
List<LeitstandContractPosition> orderedContractPositions = orderedCostUnits.stream()
|
||||
.map(cu -> contractPositionsByDbk.get(cu.getContractPositionDbk()))
|
||||
.map(cu -> ctx.contractPositionsByDbk.get(cu.getContractPositionDbk()))
|
||||
.filter(Objects::nonNull)
|
||||
.distinct()
|
||||
.toList();
|
||||
List<LeitstandOrganization> orderedOrganizations = new ArrayList<>();
|
||||
orderedCostUnits.stream().map(cu -> organizationsByDbk.get(cu.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
|
||||
orderedContracts.stream().map(c -> organizationsByDbk.get(c.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
|
||||
orderedCostUnits.stream().map(cu -> ctx.organizationsByDbk.get(cu.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
|
||||
orderedContracts.stream().map(c -> ctx.organizationsByDbk.get(c.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
|
||||
if (person != null && person.getOrganizationDbk() != null) {
|
||||
LeitstandOrganization personOrg = organizationsByDbk.get(person.getOrganizationDbk());
|
||||
LeitstandOrganization personOrg = ctx.organizationsByDbk.get(person.getOrganizationDbk());
|
||||
if (personOrg != null && !orderedOrganizations.contains(personOrg)) orderedOrganizations.add(personOrg);
|
||||
}
|
||||
|
||||
|
|
@ -176,8 +210,7 @@ public class LeitstandTimeProjectionService {
|
|||
String summary = buildSummary(recording, primaryTask, primaryCostUnit, primaryOrganization, person);
|
||||
String semanticText = buildSemanticText(timeEntry, recording, person, activityType, orderedTasks, orderedCostUnits, orderedContracts, orderedContractPositions, orderedOrganizations);
|
||||
|
||||
TimeEntrySearchProjection projection = projectionRepository.findByTimeEntry_Id(timeEntry.getId())
|
||||
.orElseGet(() -> TimeEntrySearchProjection.builder().timeEntry(timeEntry).document(document).build());
|
||||
TimeEntrySearchProjection projection = ctx.existingProjectionsByTimeEntryId.getOrDefault(timeEntry.getId(), TimeEntrySearchProjection.builder().timeEntry(timeEntry).document(document).build());
|
||||
projection.setDocument(document);
|
||||
projection.setTimeEntry(timeEntry);
|
||||
projection.setSourceSystem(TimeSourceSystem.LEITSTAND);
|
||||
|
|
@ -229,6 +262,19 @@ public class LeitstandTimeProjectionService {
|
|||
return projection;
|
||||
}
|
||||
|
||||
private record ProjectionBuildContext(
|
||||
Map<String, List<LeitstandTimeRecordingAssignment>> assignmentsByRecordingDbk,
|
||||
Map<String, LeitstandPersonTaskAssignment> personTaskAssignmentsByDbk,
|
||||
Map<String, LeitstandTask> tasksByDbk,
|
||||
Map<String, LeitstandCostUnit> costUnitsByDbk,
|
||||
Map<String, LeitstandContract> contractsByDbk,
|
||||
Map<String, LeitstandContractPosition> contractPositionsByDbk,
|
||||
Map<String, LeitstandOrganization> organizationsByDbk,
|
||||
Map<String, LeitstandPerson> personsByDbk,
|
||||
Map<Integer, LeitstandActivityType> activityTypesById,
|
||||
Map<UUID, TimeEntry> timeEntriesById,
|
||||
Map<UUID, TimeEntrySearchProjection> existingProjectionsByTimeEntryId) {
|
||||
}
|
||||
private String buildSummary(LeitstandTimeRecording recording,
|
||||
LeitstandTask primaryTask,
|
||||
LeitstandCostUnit primaryCostUnit,
|
||||
|
|
@ -283,7 +329,7 @@ public class LeitstandTimeProjectionService {
|
|||
return sb.toString().trim();
|
||||
}
|
||||
|
||||
private <T> Map<String, T> indexBy(Collection<T> rows, Function<T, String> id) {
|
||||
private <K, T> Map<K, T> indexBy(Collection<T> rows, Function<T, K> id) {
|
||||
return rows.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toMap(id, Function.identity(), (a, b) -> a, LinkedHashMap::new));
|
||||
|
|
|
|||
|
|
@ -13,10 +13,16 @@ import at.procon.dip.embedding.config.EmbeddingProperties;
|
|||
import at.procon.dip.embedding.registry.EmbeddingModelRegistry;
|
||||
import at.procon.dip.embedding.service.RepresentationEmbeddingOrchestrator;
|
||||
import at.procon.dip.search.service.DocumentLexicalIndexService;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Propagation;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
@Service
|
||||
|
|
@ -35,69 +41,141 @@ public class TimeEntryRepresentationMaterializationService {
|
|||
private final EmbeddingProperties embeddingProperties;
|
||||
private final EmbeddingModelRegistry modelRegistry;
|
||||
|
||||
//@Transactional
|
||||
public void upsertRepresentations(TimeEntrySearchProjection projection) {
|
||||
if (projection.getSemanticText() == null || projection.getSemanticText().isBlank()) {
|
||||
log.debug("Skipping TIME representation for document {} because semantic text is blank", projection.getDocument().getId());
|
||||
if (projection == null) {
|
||||
return;
|
||||
}
|
||||
upsertRepresentations(List.of(projection));
|
||||
}
|
||||
|
||||
@Transactional(propagation = Propagation.REQUIRES_NEW)
|
||||
public void upsertRepresentations(List<TimeEntrySearchProjection> projections) {
|
||||
if (projections == null || projections.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Document document = projection.getDocument();
|
||||
document.setTitle(firstNonBlank(projection.getSummaryText(), projection.getTimeRecordingDesc(), projection.getPrimaryTaskName(), projection.getExternalId()));
|
||||
document.setSummary(projection.getSummaryText());
|
||||
document.setLanguageCode(firstNonBlank(projection.getLanguageCode(), document.getLanguageCode()));
|
||||
if (document.getMimeType() == null || document.getMimeType().isBlank()) {
|
||||
document.setMimeType("application/x-time-entry");
|
||||
List<TimeEntrySearchProjection> eligible = projections.stream()
|
||||
.filter(projection -> documentId(projection) != null)
|
||||
.filter(projection -> projection.getSemanticText() != null && !projection.getSemanticText().isBlank())
|
||||
.toList();
|
||||
if (eligible.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
document = documentRepository.save(document);
|
||||
|
||||
Optional<DocumentTextRepresentation> existing = representationRepository
|
||||
.findByDocument_IdAndRepresentationType(document.getId(), RepresentationType.SEMANTIC_TEXT)
|
||||
.stream()
|
||||
.filter(r -> BUILDER_KEY.equals(r.getBuilderKey()) || r.isPrimaryRepresentation())
|
||||
.findFirst();
|
||||
List<UUID> documentIds = eligible.stream()
|
||||
.map(this::documentId)
|
||||
.distinct()
|
||||
.toList();
|
||||
Map<UUID, Document> documentsById = documentRepository.findAllById(documentIds).stream()
|
||||
.collect(java.util.stream.Collectors.toMap(Document::getId, java.util.function.Function.identity(), (a, b) -> a, LinkedHashMap::new));
|
||||
List<Document> documentsToSave = new ArrayList<>();
|
||||
for (TimeEntrySearchProjection projection : eligible) {
|
||||
UUID documentId = documentId(projection);
|
||||
Document document = documentsById.get(documentId);
|
||||
if (document == null || documentsToSave.contains(document)) {
|
||||
continue;
|
||||
}
|
||||
document.setTitle(firstNonBlank(projection.getSummaryText(), projection.getTimeRecordingDesc(), projection.getPrimaryTaskName(), projection.getExternalId()));
|
||||
document.setSummary(projection.getSummaryText());
|
||||
document.setLanguageCode(firstNonBlank(projection.getLanguageCode(), document.getLanguageCode()));
|
||||
if (document.getMimeType() == null || document.getMimeType().isBlank()) {
|
||||
document.setMimeType("application/x-time-entry");
|
||||
}
|
||||
documentsToSave.add(document);
|
||||
}
|
||||
if (!documentsToSave.isEmpty()) {
|
||||
documentRepository.saveAll(documentsToSave);
|
||||
documentRepository.flush();
|
||||
}
|
||||
|
||||
boolean changed = existing.isEmpty()
|
||||
|| !projection.getSemanticText().equals(existing.get().getTextBody())
|
||||
|| !equalsNullable(projection.getLanguageCode(), existing.get().getLanguageCode())
|
||||
|| !BUILDER_KEY.equals(existing.get().getBuilderKey());
|
||||
List<DocumentTextRepresentation> changedExisting = new ArrayList<>();
|
||||
List<TimeEntrySearchProjection> newRepresentationProjections = new ArrayList<>();
|
||||
List<UUID> changedRepresentationIds = new ArrayList<>();
|
||||
List<DocumentTextRepresentation> newlyCreatedRepresentations = new ArrayList<>();
|
||||
|
||||
Document finalDocument = document;
|
||||
DocumentTextRepresentation semantic = existing
|
||||
.map(found -> changed ? updateRepresentation(found, projection) : found)
|
||||
.orElseGet(() -> documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand(
|
||||
finalDocument.getId(),
|
||||
null,
|
||||
RepresentationType.SEMANTIC_TEXT,
|
||||
BUILDER_KEY,
|
||||
projection.getLanguageCode(),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
true,
|
||||
projection.getSemanticText(),
|
||||
false
|
||||
)));
|
||||
for (TimeEntrySearchProjection projection : eligible) {
|
||||
Document document = documentsById.get(documentId(projection));
|
||||
if (document == null) {
|
||||
continue;
|
||||
}
|
||||
Optional<DocumentTextRepresentation> existing = representationRepository
|
||||
.findByDocument_IdAndRepresentationType(document.getId(), RepresentationType.SEMANTIC_TEXT)
|
||||
.stream()
|
||||
.filter(r -> BUILDER_KEY.equals(r.getBuilderKey()) || r.isPrimaryRepresentation())
|
||||
.findFirst();
|
||||
|
||||
if (changed
|
||||
&& embeddingProperties.isEnabled()
|
||||
boolean changed = existing.isEmpty()
|
||||
|| !projection.getSemanticText().equals(existing.get().getTextBody())
|
||||
|| !equalsNullable(projection.getLanguageCode(), existing.get().getLanguageCode())
|
||||
|| !BUILDER_KEY.equals(existing.get().getBuilderKey());
|
||||
|
||||
if (!changed) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (existing.isPresent()) {
|
||||
DocumentTextRepresentation found = existing.get();
|
||||
found.setBuilderKey(BUILDER_KEY);
|
||||
found.setLanguageCode(projection.getLanguageCode());
|
||||
found.setPrimaryRepresentation(true);
|
||||
found.setTextBody(projection.getSemanticText());
|
||||
found.setCharCount(projection.getSemanticText().length());
|
||||
changedExisting.add(found);
|
||||
} else {
|
||||
newRepresentationProjections.add(projection);
|
||||
}
|
||||
}
|
||||
|
||||
if (!changedExisting.isEmpty()) {
|
||||
representationRepository.saveAll(changedExisting);
|
||||
representationRepository.flush();
|
||||
changedExisting.stream().map(DocumentTextRepresentation::getId).forEach(changedRepresentationIds::add);
|
||||
}
|
||||
|
||||
for (TimeEntrySearchProjection projection : newRepresentationProjections) {
|
||||
Document document = documentsById.get(documentId(projection));
|
||||
if (document == null) {
|
||||
continue;
|
||||
}
|
||||
DocumentTextRepresentation created = documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand(
|
||||
document.getId(),
|
||||
null,
|
||||
RepresentationType.SEMANTIC_TEXT,
|
||||
BUILDER_KEY,
|
||||
projection.getLanguageCode(),
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
true,
|
||||
projection.getSemanticText(),
|
||||
false
|
||||
));
|
||||
newlyCreatedRepresentations.add(created);
|
||||
changedRepresentationIds.add(created.getId());
|
||||
}
|
||||
|
||||
for (UUID representationId : changedRepresentationIds) {
|
||||
lexicalIndexService.indexRepresentation(representationId);
|
||||
}
|
||||
|
||||
if (embeddingProperties.isEnabled()
|
||||
&& timeDomainProperties.getLeitstand().isQueueEmbeddings()
|
||||
&& embeddingProperties.getDefaultDocumentModel() != null && !embeddingProperties.getDefaultDocumentModel().isBlank()) {
|
||||
&& embeddingProperties.getDefaultDocumentModel() != null
|
||||
&& !embeddingProperties.getDefaultDocumentModel().isBlank()) {
|
||||
String modelKey = modelRegistry.getRequiredDefaultDocumentModelKey();
|
||||
embeddingOrchestrator.enqueueRepresentation(document.getId(), semantic.getId(), modelKey);
|
||||
for (DocumentTextRepresentation representation : changedExisting) {
|
||||
embeddingOrchestrator.enqueueRepresentation(representation.getDocument().getId(), representation.getId(), modelKey);
|
||||
}
|
||||
for (DocumentTextRepresentation representation : newlyCreatedRepresentations) {
|
||||
embeddingOrchestrator.enqueueRepresentation(representation.getDocument().getId(), representation.getId(), modelKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private DocumentTextRepresentation updateRepresentation(DocumentTextRepresentation existing, TimeEntrySearchProjection projection) {
|
||||
existing.setBuilderKey(BUILDER_KEY);
|
||||
existing.setLanguageCode(projection.getLanguageCode());
|
||||
existing.setPrimaryRepresentation(true);
|
||||
existing.setTextBody(projection.getSemanticText());
|
||||
existing.setCharCount(projection.getSemanticText().length());
|
||||
DocumentTextRepresentation saved = representationRepository.saveAndFlush(existing);
|
||||
lexicalIndexService.indexRepresentation(saved.getId());
|
||||
return saved;
|
||||
private UUID documentId(TimeEntrySearchProjection projection) {
|
||||
Document document = projection == null ? null : projection.getDocument();
|
||||
return document == null ? null : document.getId();
|
||||
}
|
||||
|
||||
private boolean equalsNullable(String left, String right) {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,8 @@ public class LeitstandTimeSelectiveMaterializationStartupRunner implements Appli
|
|||
log.info("Completed selective Leitstand TIME materialization for personNumber={}. Processed {} recordings", cfg.getSelectiveMaterializationPersonNumber(), count);
|
||||
return;
|
||||
}
|
||||
throw new IllegalStateException("dip.time.leitstand.startup-selective-materialization-enabled=true requires either selective-materialization-person-dbk or selective-materialization-person-number");
|
||||
log.info("Starting Leitstand TIME materialization for all imported recordings (rebuildProjection={})", rebuildProjection);
|
||||
int count = importService.materializeCanonicalTimeEntriesForAll(rebuildProjection);
|
||||
log.info("Completed Leitstand TIME materialization for all imported recordings. Processed {} recordings", count);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,11 +18,18 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
|||
|
||||
@Override
|
||||
public List<SearchHit> search(SearchExecutionContext context, int limit) {
|
||||
String effectiveConfigExpr = effectiveConfigExpression("dtr");
|
||||
String tsQueryExpr = tsQueryExpression(effectiveConfigExpr);
|
||||
|
||||
StringBuilder sql = new StringBuilder("""
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
dtr.id AS representation_id,
|
||||
CAST(dtr.representation_type AS text) AS representation_type,
|
||||
dtr.is_primary AS is_primary,
|
||||
dtr.chunk_index AS chunk_index,
|
||||
dtr.chunk_start_offset AS chunk_start_offset,
|
||||
dtr.chunk_end_offset AS chunk_end_offset,
|
||||
CAST(d.document_type AS text) AS document_type,
|
||||
CAST(d.document_family AS text) AS document_family,
|
||||
CAST(d.visibility AS text) AS visibility,
|
||||
|
|
@ -33,41 +40,29 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
|||
d.created_at AS created_at,
|
||||
d.updated_at AS updated_at,
|
||||
ts_headline(
|
||||
CASE
|
||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE dtr.search_config::regconfig
|
||||
END,
|
||||
"""
|
||||
).append(effectiveConfigExpr).append("""
|
||||
,
|
||||
COALESCE(dtr.text_body, ''),
|
||||
websearch_to_tsquery(
|
||||
CASE
|
||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE dtr.search_config::regconfig
|
||||
END,
|
||||
:queryText
|
||||
),
|
||||
""").append(tsQueryExpr).append("""
|
||||
,
|
||||
'MaxFragments=2, MinWords=5, MaxWords=20'
|
||||
) AS snippet,
|
||||
ts_rank_cd(
|
||||
dtr.search_vector,
|
||||
websearch_to_tsquery(
|
||||
CASE
|
||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE dtr.search_config::regconfig
|
||||
END,
|
||||
:queryText
|
||||
)
|
||||
) AS score
|
||||
FROM doc.doc_text_representation dtr
|
||||
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||
WHERE dtr.search_vector IS NOT NULL
|
||||
AND dtr.search_vector @@ websearch_to_tsquery(
|
||||
CASE
|
||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE dtr.search_config::regconfig
|
||||
END,
|
||||
:queryText
|
||||
)
|
||||
ranked.score AS score
|
||||
FROM (
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
dtr.id AS representation_id,
|
||||
ts_rank_cd(
|
||||
dtr.search_vector,
|
||||
""").append(tsQueryExpr).append("""
|
||||
) AS score,
|
||||
d.updated_at AS updated_at
|
||||
FROM doc.doc_text_representation dtr
|
||||
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||
WHERE dtr.search_vector IS NOT NULL
|
||||
AND dtr.search_vector @@ """).append(tsQueryExpr).append("""
|
||||
""");
|
||||
|
||||
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||
|
|
@ -75,7 +70,14 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
|||
|
||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||
|
||||
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
|
||||
sql.append("""
|
||||
ORDER BY score DESC, d.updated_at DESC
|
||||
LIMIT :limit
|
||||
) ranked
|
||||
JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id
|
||||
JOIN doc.doc_document d ON d.id = ranked.document_id
|
||||
ORDER BY ranked.score DESC, d.updated_at DESC
|
||||
""");
|
||||
params.addValue("limit", limit);
|
||||
|
||||
return jdbcTemplate.query(
|
||||
|
|
@ -84,4 +86,22 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
|||
new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static String effectiveConfigExpression(String representationAlias) {
|
||||
return """
|
||||
CASE
|
||||
WHEN NULLIF(%s.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||
ELSE %s.search_config::regconfig
|
||||
END
|
||||
""".formatted(representationAlias, representationAlias).trim();
|
||||
}
|
||||
|
||||
private static String tsQueryExpression(String configExpression) {
|
||||
return """
|
||||
websearch_to_tsquery(
|
||||
%s,
|
||||
:queryText
|
||||
)
|
||||
""".formatted(configExpression).trim();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,42 +18,112 @@ public class DocumentTrigramSearchRepositoryImpl implements DocumentTrigramSearc
|
|||
|
||||
@Override
|
||||
public List<SearchHit> search(SearchExecutionContext context, int limit, double threshold) {
|
||||
String scoreExpr = "GREATEST(" +
|
||||
"doc.similarity(COALESCE(d.title, ''), :queryText), " +
|
||||
"doc.similarity(COALESCE(d.summary, ''), :queryText), " +
|
||||
"doc.similarity(COALESCE(dtr.text_body, ''), :queryText))";
|
||||
|
||||
StringBuilder sql = new StringBuilder("SELECT " +
|
||||
"d.id AS document_id, " +
|
||||
"dtr.id AS representation_id, " +
|
||||
"CAST(d.document_type AS text) AS document_type, " +
|
||||
"CAST(d.document_family AS text) AS document_family, " +
|
||||
"CAST(d.visibility AS text) AS visibility, " +
|
||||
"d.title AS title, " +
|
||||
"d.summary AS summary, " +
|
||||
"COALESCE(dtr.language_code, d.language_code) AS language_code, " +
|
||||
"d.mime_type AS mime_type, " +
|
||||
"d.created_at AS created_at, " +
|
||||
"d.updated_at AS updated_at, " +
|
||||
"LEFT(COALESCE(dtr.text_body, COALESCE(d.summary, d.title, '')), 400) AS snippet, " +
|
||||
scoreExpr + " AS score, " +
|
||||
"CASE " +
|
||||
"WHEN doc.similarity(COALESCE(d.title, ''), :queryText) >= doc.similarity(COALESCE(d.summary, ''), :queryText) " +
|
||||
" AND doc.similarity(COALESCE(d.title, ''), :queryText) >= doc.similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_TITLE' " +
|
||||
"WHEN doc.similarity(COALESCE(d.summary, ''), :queryText) >= doc.similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_SUMMARY' " +
|
||||
"ELSE 'REPRESENTATION_TEXT' END AS matched_field " +
|
||||
"FROM doc.doc_text_representation dtr " +
|
||||
"JOIN doc.doc_document d ON d.id = dtr.document_id " +
|
||||
"LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id " +
|
||||
"WHERE " + scoreExpr + " >= :threshold");
|
||||
StringBuilder sql = new StringBuilder("""
|
||||
WITH title_candidates AS (
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
dtr.id AS representation_id,
|
||||
'DOCUMENT_TITLE' AS matched_field,
|
||||
public.similarity(d.title, :queryText) AS score,
|
||||
d.updated_at AS updated_at
|
||||
FROM doc.doc_text_representation dtr
|
||||
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||
WHERE d.title IS NOT NULL
|
||||
AND d.title OPERATOR(public.%) :queryText
|
||||
""");
|
||||
|
||||
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||
params.addValue("queryText", context.getRequest().getQueryText());
|
||||
params.addValue("threshold", threshold);
|
||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
|
||||
params.addValue("branchLimit", limit);
|
||||
params.addValue("limit", limit);
|
||||
|
||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||
sql.append("""
|
||||
ORDER BY score DESC, d.updated_at DESC
|
||||
LIMIT :branchLimit
|
||||
),
|
||||
summary_candidates AS (
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
dtr.id AS representation_id,
|
||||
'DOCUMENT_SUMMARY' AS matched_field,
|
||||
public.similarity(d.summary, :queryText) AS score,
|
||||
d.updated_at AS updated_at
|
||||
FROM doc.doc_text_representation dtr
|
||||
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||
WHERE d.summary IS NOT NULL
|
||||
AND d.summary OPERATOR(public.%) :queryText
|
||||
""");
|
||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||
sql.append("""
|
||||
ORDER BY score DESC, d.updated_at DESC
|
||||
LIMIT :branchLimit
|
||||
),
|
||||
text_candidates AS (
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
dtr.id AS representation_id,
|
||||
'REPRESENTATION_TEXT' AS matched_field,
|
||||
public.similarity(dtr.text_body, :queryText) AS score,
|
||||
d.updated_at AS updated_at
|
||||
FROM doc.doc_text_representation dtr
|
||||
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||
WHERE dtr.text_body IS NOT NULL
|
||||
AND dtr.text_body OPERATOR(public.%) :queryText
|
||||
""");
|
||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||
sql.append("""
|
||||
ORDER BY score DESC, d.updated_at DESC
|
||||
LIMIT :branchLimit
|
||||
),
|
||||
ranked AS (
|
||||
SELECT DISTINCT ON (representation_id)
|
||||
document_id,
|
||||
representation_id,
|
||||
matched_field,
|
||||
score,
|
||||
updated_at
|
||||
FROM (
|
||||
SELECT * FROM title_candidates
|
||||
UNION ALL
|
||||
SELECT * FROM summary_candidates
|
||||
UNION ALL
|
||||
SELECT * FROM text_candidates
|
||||
) all_candidates
|
||||
WHERE score >= :threshold
|
||||
ORDER BY representation_id, score DESC, updated_at DESC
|
||||
)
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
dtr.id AS representation_id,
|
||||
CAST(dtr.representation_type AS text) AS representation_type,
|
||||
dtr.is_primary AS is_primary,
|
||||
dtr.chunk_index AS chunk_index,
|
||||
dtr.chunk_start_offset AS chunk_start_offset,
|
||||
dtr.chunk_end_offset AS chunk_end_offset,
|
||||
CAST(d.document_type AS text) AS document_type,
|
||||
CAST(d.document_family AS text) AS document_family,
|
||||
CAST(d.visibility AS text) AS visibility,
|
||||
d.title AS title,
|
||||
d.summary AS summary,
|
||||
COALESCE(dtr.language_code, d.language_code) AS language_code,
|
||||
d.mime_type AS mime_type,
|
||||
d.created_at AS created_at,
|
||||
d.updated_at AS updated_at,
|
||||
LEFT(COALESCE(dtr.text_body, COALESCE(d.summary, d.title, '')), 400) AS snippet,
|
||||
ranked.score AS score,
|
||||
ranked.matched_field AS matched_field
|
||||
FROM ranked
|
||||
JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id
|
||||
JOIN doc.doc_document d ON d.id = ranked.document_id
|
||||
ORDER BY ranked.score DESC, d.updated_at DESC
|
||||
LIMIT :limit
|
||||
""");
|
||||
|
||||
return jdbcTemplate.query(sql.toString(), params,
|
||||
new SearchHitRowMapper(SearchEngineType.POSTGRES_TRIGRAM, SearchMatchField.REPRESENTATION_TEXT));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,20 +34,17 @@ final class SearchSqlFilterSupport {
|
|||
boolean tenantJoinPresent) {
|
||||
Set<DocumentType> documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes());
|
||||
if (!CollectionUtils.isEmpty(documentTypes)) {
|
||||
sql.append(" AND CAST(").append(documentAlias).append(".document_type AS text) IN (:documentTypes)");
|
||||
params.addValue("documentTypes", enumNames(documentTypes));
|
||||
appendTextEnumFilter(sql, params, documentAlias + ".document_type", documentTypes, "documentTypes");
|
||||
}
|
||||
|
||||
Set<DocumentFamily> documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies());
|
||||
if (!CollectionUtils.isEmpty(documentFamilies)) {
|
||||
sql.append(" AND CAST(").append(documentAlias).append(".document_family AS text) IN (:documentFamilies)");
|
||||
params.addValue("documentFamilies", enumNames(documentFamilies));
|
||||
appendTextEnumFilter(sql, params, documentAlias + ".document_family", documentFamilies, "documentFamilies");
|
||||
}
|
||||
|
||||
Set<DocumentVisibility> visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities());
|
||||
if (!CollectionUtils.isEmpty(visibilities)) {
|
||||
sql.append(" AND CAST(").append(documentAlias).append(".visibility AS text) IN (:visibilities)");
|
||||
params.addValue("visibilities", enumNames(visibilities));
|
||||
appendTextEnumFilter(sql, params, documentAlias + ".visibility", visibilities, "visibilities");
|
||||
}
|
||||
|
||||
Set<String> languageCodes = context.getRequest().getLanguageCodes();
|
||||
|
|
@ -62,8 +59,7 @@ final class SearchSqlFilterSupport {
|
|||
|
||||
Set<RepresentationType> representationTypes = context.getRequest().getRepresentationTypes();
|
||||
if (!CollectionUtils.isEmpty(representationTypes)) {
|
||||
sql.append(" AND CAST(").append(representationAlias).append(".representation_type AS text) IN (:representationTypes)");
|
||||
params.addValue("representationTypes", enumNames(representationTypes));
|
||||
appendTextEnumFilter(sql, params, representationAlias + ".representation_type", representationTypes, "representationTypes");
|
||||
} else {
|
||||
SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode();
|
||||
if (selectionMode == null) {
|
||||
|
|
@ -242,6 +238,15 @@ final class SearchSqlFilterSupport {
|
|||
return !CollectionUtils.isEmpty(primary) ? primary : fallback;
|
||||
}
|
||||
|
||||
private static void appendTextEnumFilter(StringBuilder sql,
|
||||
MapSqlParameterSource params,
|
||||
String columnExpression,
|
||||
Collection<? extends Enum<?>> values,
|
||||
String parameterName) {
|
||||
sql.append(" AND CAST(").append(columnExpression).append(" AS text) IN (:").append(parameterName).append(")");
|
||||
params.addValue(parameterName, enumNames(values));
|
||||
}
|
||||
|
||||
private static List<String> enumNames(Collection<? extends Enum<?>> values) {
|
||||
return values.stream().map(Enum::name).collect(Collectors.toList());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
package at.procon.ted.startup;
|
||||
|
||||
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||
import at.procon.dip.runtime.config.RuntimeMode;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.boot.ApplicationArguments;
|
||||
import org.springframework.boot.ApplicationRunner;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
|
@ -19,6 +22,7 @@ import org.springframework.stereotype.Component;
|
|||
@Component
|
||||
@Order(1) // Run before other startup runners
|
||||
@RequiredArgsConstructor
|
||||
@ConditionalOnRuntimeMode(RuntimeMode.LEGACY)
|
||||
@Slf4j
|
||||
public class OrganizationSchemaFixRunner implements ApplicationRunner {
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ dip:
|
|||
max-chunks-per-document: 12
|
||||
# Startup backfill limit for missing lexical vectors
|
||||
startup-lexical-backfill-limit: 500
|
||||
scheduled-lexical-backfill-enabled: true
|
||||
scheduled-lexical-backfill-enabled: false
|
||||
scheduled-lexical-backfill-delay-ms: 30000
|
||||
scheduled-lexical-backfill-batch-size: 200
|
||||
# Number of top hits per engine returned by /search/debug
|
||||
|
|
@ -333,8 +333,8 @@ dip:
|
|||
leitstand:
|
||||
enabled: false
|
||||
startup-sync-enabled: false
|
||||
startup-selective-materialization-enabled: true
|
||||
selective-materialization-person-dbk: 100920031023144811001000
|
||||
startup-selective-materialization-enabled: false
|
||||
selective-materialization-person-dbk: #100920031023144811001000
|
||||
selective-materialization-person-number:
|
||||
selective-materialization-build-projection: true
|
||||
create-canonical-time-entries: true
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ spring:
|
|||
order_updates: true
|
||||
|
||||
flyway:
|
||||
enabled: true
|
||||
enabled: false
|
||||
locations: classpath:db/migration
|
||||
baseline-on-migrate: true
|
||||
create-schemas: true
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
-- Slice 1 + Slice 2 generic search support for DOC documents.
|
||||
-- Adds lexical-search support columns/indexes and pg_trgm extension.
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public;
|
||||
|
||||
ALTER TABLE DOC.doc_text_representation
|
||||
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
|
||||
|
|
@ -15,12 +15,12 @@ CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector
|
|||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
|
||||
ON DOC.doc_document
|
||||
USING GIN (title DOC.gin_trgm_ops);
|
||||
USING GIN (title public.gin_trgm_ops);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
|
||||
ON DOC.doc_document
|
||||
USING GIN (summary DOC.gin_trgm_ops);
|
||||
USING GIN (summary public.gin_trgm_ops);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
|
||||
ON DOC.doc_text_representation
|
||||
USING GIN (text_body DOC.gin_trgm_ops);
|
||||
USING GIN (text_body public.gin_trgm_ops);
|
||||
|
|
|
|||
|
|
@ -5,6 +5,9 @@ ALTER TABLE DOC.doc_embedding
|
|||
ADD CONSTRAINT ck_doc_embedding_dimensions_positive
|
||||
CHECK (embedding_dimensions IS NULL OR embedding_dimensions > 0);
|
||||
|
||||
ALTER TABLE DOC.doc_embedding
|
||||
ADD COLUMN IF NOT EXISTS embedding_vector public.vector;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
-- This makes migration, audit, and repair flows package-aware without having to derive the
|
||||
-- package membership from source paths at query time.
|
||||
|
||||
SET search_path TO TED, DOC, public;
|
||||
SET search_path TO ted, doc, public;
|
||||
|
||||
ALTER TABLE IF EXISTS TED.ted_notice_projection
|
||||
ADD COLUMN IF NOT EXISTS package_identifier VARCHAR(20);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
-- Slice 1 generic lexical search support.
|
||||
-- Adds PostgreSQL full-text and trigram search infrastructure for DOC-side search.
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public;
|
||||
|
||||
ALTER TABLE doc.doc_text_representation
|
||||
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
|
||||
|
|
@ -15,12 +15,12 @@ CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector
|
|||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
|
||||
ON doc.doc_document
|
||||
USING GIN (title doc.gin_trgm_ops);
|
||||
USING GIN (title public.gin_trgm_ops);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
|
||||
ON doc.doc_document
|
||||
USING GIN (summary doc.gin_trgm_ops);
|
||||
USING GIN (summary public.gin_trgm_ops);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
|
||||
ON doc.doc_text_representation
|
||||
USING GIN (text_body doc.gin_trgm_ops);
|
||||
USING GIN (text_body public.gin_trgm_ops);
|
||||
|
|
|
|||
|
|
@ -306,7 +306,7 @@ CREATE INDEX idx_doc_procedure_type ON procurement_document(procedure_type);
|
|||
CREATE INDEX idx_doc_cpv_codes ON procurement_document USING GIN(cpv_codes);
|
||||
|
||||
-- Full-text search on textual content
|
||||
CREATE INDEX idx_doc_text_content_trgm ON procurement_document USING GIN(text_content gin_trgm_ops);
|
||||
CREATE INDEX idx_doc_text_content_trgm ON procurement_document USING GIN(text_content public.gin_trgm_ops);
|
||||
|
||||
-- Vector similarity search using IVFFlat index (efficient for approximate nearest neighbor)
|
||||
-- Lists parameter: sqrt(number_of_vectors) for optimal performance
|
||||
|
|
|
|||
|
|
@ -44,34 +44,6 @@ BEGIN
|
|||
END
|
||||
$$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_enum e
|
||||
JOIN pg_type t ON t.oid = e.enumtypid
|
||||
JOIN pg_namespace n ON n.oid = t.typnamespace
|
||||
WHERE n.nspname = 'doc' AND t.typname = 'doc_document_type' AND e.enumlabel = 'TIME_ENTRY'
|
||||
) THEN
|
||||
ALTER TYPE doc.doc_document_type ADD VALUE 'TIME_ENTRY';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_enum e
|
||||
JOIN pg_type t ON t.oid = e.enumtypid
|
||||
JOIN pg_namespace n ON n.oid = t.typnamespace
|
||||
WHERE n.nspname = 'doc' AND t.typname = 'doc_document_family' AND e.enumlabel = 'TIME'
|
||||
) THEN
|
||||
ALTER TYPE doc.doc_document_family ADD VALUE 'TIME';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS "time".time_entry (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
document_id UUID NOT NULL UNIQUE REFERENCES doc.doc_document(id) ON DELETE CASCADE,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,7 @@
|
|||
-- Align TIME projection source text fields with real Leitstand payload lengths.
|
||||
|
||||
ALTER TABLE "time".time_entry_search_projection
|
||||
ALTER COLUMN time_recording_desc TYPE TEXT;
|
||||
|
||||
ALTER TABLE "time".time_entry_search_projection
|
||||
ALTER COLUMN time_recording_remark TYPE TEXT;
|
||||
|
|
@ -0,0 +1,80 @@
|
|||
-- Repair DOC document enum/check alignment for TIME documents on databases
|
||||
-- that still carry the pre-TIME family/type constraints.
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_type t
|
||||
JOIN pg_namespace n ON n.oid = t.typnamespace
|
||||
WHERE n.nspname = 'doc'
|
||||
AND t.typname = 'doc_document_type'
|
||||
) THEN
|
||||
ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TED_PACKAGE';
|
||||
ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TED_NOTICE_LOT';
|
||||
ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TIME_ENTRY';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1
|
||||
FROM pg_type t
|
||||
JOIN pg_namespace n ON n.oid = t.typnamespace
|
||||
WHERE n.nspname = 'doc'
|
||||
AND t.typname = 'doc_document_family'
|
||||
) THEN
|
||||
ALTER TYPE DOC.doc_document_family ADD VALUE IF NOT EXISTS 'TIME';
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (
|
||||
SELECT 1
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'doc'
|
||||
AND table_name = 'doc_document'
|
||||
) THEN
|
||||
ALTER TABLE DOC.doc_document DROP CONSTRAINT IF EXISTS doc_document_document_type_check;
|
||||
ALTER TABLE DOC.doc_document
|
||||
ADD CONSTRAINT doc_document_document_type_check
|
||||
CHECK (
|
||||
document_type IN (
|
||||
'TED_PACKAGE',
|
||||
'TED_NOTICE',
|
||||
'TED_NOTICE_LOT',
|
||||
'TIME_ENTRY',
|
||||
'EMAIL',
|
||||
'MIME_MESSAGE',
|
||||
'PDF',
|
||||
'DOCX',
|
||||
'HTML',
|
||||
'XML_GENERIC',
|
||||
'TEXT',
|
||||
'MARKDOWN',
|
||||
'ZIP_ARCHIVE',
|
||||
'GENERIC_BINARY',
|
||||
'UNKNOWN'
|
||||
)
|
||||
);
|
||||
|
||||
ALTER TABLE DOC.doc_document DROP CONSTRAINT IF EXISTS doc_document_document_family_check;
|
||||
ALTER TABLE DOC.doc_document
|
||||
ADD CONSTRAINT doc_document_document_family_check
|
||||
CHECK (
|
||||
document_family IN (
|
||||
'PROCUREMENT',
|
||||
'TIME',
|
||||
'MAIL',
|
||||
'ATTACHMENT',
|
||||
'KNOWLEDGE',
|
||||
'GENERIC'
|
||||
)
|
||||
);
|
||||
END IF;
|
||||
END
|
||||
$$;
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
-- Search performance support indexes for filtered DOC fulltext lookups.
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_updated
|
||||
ON DOC.doc_document(document_type, document_family, updated_at DESC, id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_updated
|
||||
ON DOC.doc_document(document_type, document_family, visibility, updated_at DESC, id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_document_primary_type
|
||||
ON DOC.doc_text_representation(document_id, is_primary, representation_type);
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
-- Support cast-to-text search filters on installations where DOC type columns are varchar.
|
||||
-- These indexes align with the query shape used by generic search filters.
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_text_updated
|
||||
ON DOC.doc_document ((CAST(document_type AS text)), (CAST(document_family AS text)), updated_at DESC, id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_text_updated
|
||||
ON DOC.doc_document (
|
||||
(CAST(document_type AS text)),
|
||||
(CAST(document_family AS text)),
|
||||
(CAST(visibility AS text)),
|
||||
updated_at DESC,
|
||||
id
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_primary_type_text_document
|
||||
ON DOC.doc_text_representation (is_primary, (CAST(representation_type AS text)), document_id);
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
package at.procon.dip.migration;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import org.flywaydb.core.Flyway;
|
||||
import org.flywaydb.core.api.MigrationVersion;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.testcontainers.containers.PostgreSQLContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
@Testcontainers
|
||||
class DocDocumentTimeEnumConstraintRepairMigrationTest {
|
||||
|
||||
@Container
|
||||
static PostgreSQLContainer<?> postgres = new PostgreSQLContainer<>("postgres:16-alpine")
|
||||
.withDatabaseName("dip_migration_test")
|
||||
.withUsername("test")
|
||||
.withPassword("test");
|
||||
|
||||
@Test
|
||||
void repairMigrationExpandsLegacyDocDocumentChecksForTimeDocuments() throws SQLException {
|
||||
createLegacyDocDocumentState();
|
||||
|
||||
Flyway.configure()
|
||||
.dataSource(postgres.getJdbcUrl(), postgres.getUsername(), postgres.getPassword())
|
||||
.locations("filesystem:src/main/resources/db/migration")
|
||||
.schemas("doc")
|
||||
.defaultSchema("doc")
|
||||
.baselineOnMigrate(true)
|
||||
.baselineVersion(MigrationVersion.fromVersion("42"))
|
||||
.load()
|
||||
.migrate();
|
||||
|
||||
try (Connection connection = openConnection();
|
||||
Statement statement = connection.createStatement()) {
|
||||
statement.executeUpdate("""
|
||||
INSERT INTO doc.doc_document (id, document_type, document_family)
|
||||
VALUES ('709e388b-19d9-4c21-8d06-82b295b33505', 'TIME_ENTRY', 'TIME')
|
||||
""");
|
||||
}
|
||||
|
||||
try (Connection connection = openConnection();
|
||||
var preparedStatement = connection.prepareStatement("""
|
||||
SELECT pg_get_constraintdef(oid)
|
||||
FROM pg_constraint
|
||||
WHERE conrelid = 'doc.doc_document'::regclass
|
||||
AND conname = ?
|
||||
""")) {
|
||||
preparedStatement.setString(1, "doc_document_document_family_check");
|
||||
try (var resultSet = preparedStatement.executeQuery()) {
|
||||
assertThat(resultSet.next()).isTrue();
|
||||
assertThat(resultSet.getString(1)).contains("TIME");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void createLegacyDocDocumentState() throws SQLException {
|
||||
try (Connection connection = openConnection();
|
||||
Statement statement = connection.createStatement()) {
|
||||
statement.execute("CREATE SCHEMA doc");
|
||||
statement.execute("""
|
||||
CREATE TYPE doc.doc_document_type AS ENUM (
|
||||
'TED_NOTICE',
|
||||
'EMAIL',
|
||||
'MIME_MESSAGE',
|
||||
'PDF',
|
||||
'DOCX',
|
||||
'HTML',
|
||||
'XML_GENERIC',
|
||||
'TEXT',
|
||||
'MARKDOWN',
|
||||
'ZIP_ARCHIVE',
|
||||
'GENERIC_BINARY',
|
||||
'UNKNOWN'
|
||||
)
|
||||
""");
|
||||
statement.execute("""
|
||||
CREATE TYPE doc.doc_document_family AS ENUM (
|
||||
'PROCUREMENT',
|
||||
'MAIL',
|
||||
'ATTACHMENT',
|
||||
'KNOWLEDGE',
|
||||
'GENERIC'
|
||||
)
|
||||
""");
|
||||
statement.execute("""
|
||||
CREATE TABLE doc.doc_document (
|
||||
id UUID PRIMARY KEY,
|
||||
document_type doc.doc_document_type NOT NULL,
|
||||
document_family doc.doc_document_family NOT NULL,
|
||||
CONSTRAINT doc_document_document_type_check
|
||||
CHECK (
|
||||
document_type IN (
|
||||
'TED_NOTICE',
|
||||
'EMAIL',
|
||||
'MIME_MESSAGE',
|
||||
'PDF',
|
||||
'DOCX',
|
||||
'HTML',
|
||||
'XML_GENERIC',
|
||||
'TEXT',
|
||||
'MARKDOWN',
|
||||
'ZIP_ARCHIVE',
|
||||
'GENERIC_BINARY',
|
||||
'UNKNOWN'
|
||||
)
|
||||
),
|
||||
CONSTRAINT doc_document_document_family_check
|
||||
CHECK (
|
||||
document_family IN (
|
||||
'PROCUREMENT',
|
||||
'MAIL',
|
||||
'ATTACHMENT',
|
||||
'KNOWLEDGE',
|
||||
'GENERIC'
|
||||
)
|
||||
)
|
||||
)
|
||||
""");
|
||||
}
|
||||
}
|
||||
|
||||
private Connection openConnection() throws SQLException {
|
||||
return DriverManager.getConnection(
|
||||
postgres.getJdbcUrl(),
|
||||
postgres.getUsername(),
|
||||
postgres.getPassword()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -2,10 +2,13 @@ package at.procon.dip.search.repository;
|
|||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||
import at.procon.dip.search.api.SearchExecutionContext;
|
||||
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
|
||||
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
|
||||
import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
|
||||
import at.procon.dip.search.dto.SearchRequest;
|
||||
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
|
@ -54,4 +57,31 @@ class SearchSqlFilterSupportTest {
|
|||
assertThat(params.getValue("attributeName1")).isEqualTo("version");
|
||||
assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldUseTypedEnumComparisonsForIndexedFilters() {
|
||||
SearchRequest request = SearchRequest.builder()
|
||||
.queryText("vertrieb")
|
||||
.documentTypes(java.util.Set.of(DocumentType.TIME_ENTRY))
|
||||
.documentFamilies(java.util.Set.of(DocumentFamily.TIME))
|
||||
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS)
|
||||
.build();
|
||||
SearchExecutionContext context = SearchExecutionContext.builder()
|
||||
.request(request)
|
||||
.scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null))
|
||||
.page(0)
|
||||
.size(20)
|
||||
.build();
|
||||
|
||||
StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1");
|
||||
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false);
|
||||
|
||||
String rendered = sql.toString();
|
||||
assertThat(rendered).contains("CAST(d.document_type AS text) IN (:documentTypes)");
|
||||
assertThat(rendered).contains("CAST(d.document_family AS text) IN (:documentFamilies)");
|
||||
assertThat(rendered).contains("CAST(dtr.representation_type AS text) = 'CHUNK'");
|
||||
assertThat(params.getValue("documentTypes")).isEqualTo(java.util.List.of("TIME_ENTRY"));
|
||||
assertThat(params.getValue("documentFamilies")).isEqualTo(java.util.List.of("TIME"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,13 +87,13 @@ public abstract class AbstractSearchIntegrationTest {
|
|||
|
||||
protected void ensureSearchColumnsAndIndexes() {
|
||||
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc");
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title public.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary public.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body public.gin_trgm_ops)");
|
||||
}
|
||||
|
||||
protected void cleanupDatabase() {
|
||||
|
|
|
|||
|
|
@ -121,14 +121,14 @@ public abstract class AbstractSemanticSearchIntegrationTest {
|
|||
}
|
||||
|
||||
protected void ensureSearchColumnsAndIndexes() {
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA doc");
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public");
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title public.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary public.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body public.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_embedding ADD COLUMN IF NOT EXISTS embedding_vector public.vector");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -82,13 +82,13 @@ public abstract class AbstractTedStructuredSearchIntegrationTest {
|
|||
protected void ensureSearchColumnsAndIndexes() {
|
||||
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
|
||||
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS ted");
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc");
|
||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title public.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary public.gin_trgm_ops)");
|
||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body public.gin_trgm_ops)");
|
||||
}
|
||||
|
||||
protected void cleanupDatabase() {
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
CREATE SCHEMA IF NOT EXISTS DOC;
|
||||
CREATE SCHEMA IF NOT EXISTS TED;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public;
|
||||
|
|
|
|||
Loading…
Reference in New Issue