Compare commits

...

6 Commits

Author SHA1 Message Date
trifonovt d1d81fd478 Tighten startup behavior defaults 2026-05-18 16:44:14 +02:00
trifonovt 9da416dbe4 Align search migrations and test schemas 2026-05-18 16:44:14 +02:00
trifonovt 142b0a5809 Repair TIME projection and DOC enum migrations 2026-05-18 16:44:14 +02:00
trifonovt 253845e9ea Optimize Leitstand TIME materialization workflow 2026-05-18 16:44:14 +02:00
trifonovt 430885b5af Optimize trigram search candidate selection 2026-05-18 16:44:14 +02:00
trifonovt 5c3133d19d Improve generic search query performance 2026-05-18 16:44:09 +02:00
32 changed files with 867 additions and 250 deletions

120
.gitignore vendored Normal file
View File

@ -0,0 +1,120 @@
# TED Procurement Processor - Git Ignore
# Author: Martin.Schweitzer@procon.co.at and claude.ai
# Compiled class files
*.class
# Maven
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
.mvn/wrapper/maven-wrapper.jar
# Gradle
.gradle
build/
# IDE - IntelliJ IDEA
.idea/
*.iws
*.iml
*.ipr
out/
# IDE - Eclipse
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
bin/
# IDE - NetBeans
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
# IDE - VS Code
.vscode/
# OS Files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# Logs
*.log
logs/
# Application
application-local.yml
application-dev.yml
application-prod.yml
# Docker
.docker/
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
venv/
.venv/
ENV/
env.bak/
venv.bak/
.eggs/
*.egg-info/
dist/
*.egg
# Model cache
models/
.cache/
# Test data
test-data/
*.xml.bak
# Temporary files
*.tmp
*.temp
*.swp
*~
# Secrets
*.pem
*.key
secrets/
.env
.env.local
.env.*.local
# Database
*.db
*.sqlite
*.sqlite3
# Processed files (Camel)
.processed/
.error/
*.bak
.claude

View File

@ -16,8 +16,8 @@ import org.springframework.scheduling.annotation.EnableAsync;
*/
@SpringBootApplication(scanBasePackages = {"at.procon.dip", "at.procon.ted"})
@EnableAsync
@EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", "at.procon.dip.domain.ted.entity", "at.procon.dip.embedding.job.entity", "at.procon.dip.migration.audit.entity", "at.procon.dip.migration.entity", /*"at.procon.dip.domain.time.entity",*/ "at.procon.dip.clustering.entity"})
@EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository", "at.procon.dip.domain.ted.repository", "at.procon.dip.embedding.job.repository", "at.procon.dip.migration.audit.repository", "at.procon.dip.migration.repository", /*"at.procon.dip.domain.time.repository",*/ "at.procon.dip.clustering.repository"})
@EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", "at.procon.dip.domain.ted.entity", "at.procon.dip.embedding.job.entity", "at.procon.dip.migration.audit.entity", "at.procon.dip.migration.entity", "at.procon.dip.domain.time.entity",/**/ "at.procon.dip.clustering.entity"})
@EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository", "at.procon.dip.domain.ted.repository", "at.procon.dip.embedding.job.repository", "at.procon.dip.migration.audit.repository", "at.procon.dip.migration.repository", "at.procon.dip.domain.time.repository",/**/ "at.procon.dip.clustering.repository"})
public class DocumentIntelligencePlatformApplication {
public static void main(String[] args) {

View File

@ -33,6 +33,7 @@ public class TimeDomainProperties {
private String selectiveMaterializationPersonDbk;
private Integer selectiveMaterializationPersonNumber;
private boolean selectiveMaterializationBuildProjection = true;
private int materializationChunkSize = 200;
private String representationLanguageCode = "de";
private String scopeKey = "leitstand-default";
private JdbcProperties jdbc = new JdbcProperties();

View File

@ -80,10 +80,10 @@ public class TimeEntrySearchProjection {
@Column(name = "time_recording_mcl_id", length = 255)
private String timeRecordingMclId;
@Column(name = "time_recording_desc", length = 255)
@Column(name = "time_recording_desc", columnDefinition = "TEXT")
private String timeRecordingDesc;
@Column(name = "time_recording_remark", length = 255)
@Column(name = "time_recording_remark", columnDefinition = "TEXT")
private String timeRecordingRemark;
@Column(name = "time_recording_url", length = 1000)

View File

@ -1,10 +1,13 @@
package at.procon.dip.domain.time.repository.leitstand;
import at.procon.dip.domain.time.entity.leitstand.LeitstandTimeRecordingAssignment;
import java.util.Collection;
import java.util.List;
import org.springframework.data.jpa.repository.JpaRepository;
public interface LeitstandTimeRecordingAssignmentRepository extends JpaRepository<LeitstandTimeRecordingAssignment, String> {
List<LeitstandTimeRecordingAssignment> findByTimeRecordingDbkOrderByDbkAsc(String timeRecordingDbk);
List<LeitstandTimeRecordingAssignment> findByTimeRecordingDbkInOrderByTimeRecordingDbkAscDbkAsc(Collection<String> timeRecordingDbks);
}

View File

@ -10,6 +10,8 @@ public interface LeitstandTimeRecordingRepository extends JpaRepository<Leitstan
Optional<LeitstandTimeRecording> findByTimeEntry_Id(UUID timeEntryId);
List<LeitstandTimeRecording> findAllByOrderByRecordedFromAscDbkAsc();
List<LeitstandTimeRecording> findByTimeEntryIsNotNull();
List<LeitstandTimeRecording> findByPersonDbkOrderByRecordedFromAscDbkAsc(String personDbk);

View File

@ -38,7 +38,7 @@ import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
@Service
//@ConditionalOnRuntimeMode(RuntimeMode.NEW)
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
@ConditionalOnProperty(prefix = "dip.time.leitstand", name = "enabled", havingValue = "true")
@RequiredArgsConstructor
@Slf4j
@ -144,14 +144,26 @@ public class LeitstandTimeImportService {
log.info("No Leitstand time recordings found for personDbk={}", personDbk);
return 0;
}
//upsertCanonicalTimeEntriesForImportedRecordings(recordings);
upsertCanonicalTimeEntriesForImportedRecordings(recordings);
if (rebuildProjection && properties.getLeitstand().isBuildSearchProjection()) {
projectionService.refreshForPersonDbk(personDbk);
}
return recordings.size();
}
@Transactional
public int materializeCanonicalTimeEntriesForAll(boolean rebuildProjection) {
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findAllByOrderByRecordedFromAscDbkAsc();
if (recordings.isEmpty()) {
log.info("No Leitstand time recordings found for full materialization");
return 0;
}
upsertCanonicalTimeEntriesForImportedRecordings(recordings);
if (rebuildProjection && properties.getLeitstand().isBuildSearchProjection()) {
projectionService.refreshAll();
}
return recordings.size();
}
public int materializeCanonicalTimeEntriesForPersonNumber(Integer personNumber, boolean rebuildProjection) {
if (personNumber == null) {
throw new IllegalArgumentException("personNumber must not be null");

View File

@ -20,6 +20,7 @@ import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
@Service
@ -44,126 +45,159 @@ public class LeitstandTimeProjectionService {
private final TimeEntrySearchProjectionRepository projectionRepository;
private final TimeEntryRepresentationMaterializationService representationMaterializationService;
@Transactional
public void refreshForLeitstandRecordingDbks(Collection<String> recordingDbks) {
if (recordingDbks == null || recordingDbks.isEmpty()) {
return;
}
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findAllById(recordingDbks).stream()
.filter(recording -> recording.getTimeEntry() != null)
.sorted(Comparator.comparing(LeitstandTimeRecording::getRecordedFrom, Comparator.nullsLast(Comparator.naturalOrder()))
.thenComparing(LeitstandTimeRecording::getDbk))
.toList();
if (recordings.isEmpty()) {
return;
}
upsertProjections(recordings);
refreshChunked(recordings);
}
@Transactional
public int refreshForPersonDbk(String personDbk) {
if (personDbk == null || personDbk.isBlank()) {
return 0;
}
List<LeitstandTimeRecording> recordings = timeRecordingRepository
.findByPersonDbkAndTimeEntryIsNotNullOrderByRecordedFromAscDbkAsc(personDbk);
upsertProjections(recordings);
refreshChunked(recordings);
return recordings.size();
}
@Transactional
public int refreshAll() {
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findByTimeEntryIsNotNull();
upsertProjections(recordings);
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findByTimeEntryIsNotNull().stream()
.sorted(Comparator.comparing(LeitstandTimeRecording::getRecordedFrom, Comparator.nullsLast(Comparator.naturalOrder()))
.thenComparing(LeitstandTimeRecording::getDbk))
.toList();
refreshChunked(recordings);
return recordings.size();
}
private void upsertProjections(List<LeitstandTimeRecording> recordings) {
for (LeitstandTimeRecording recording : recordings) {
TimeEntrySearchProjection projection = buildProjection(recording);
TimeEntrySearchProjection saved = projectionRepository.save(projection);
if (properties.getLeitstand().isBuildRepresentations()) {
representationMaterializationService.upsertRepresentations(saved);
}
private void refreshChunked(List<LeitstandTimeRecording> recordings) {
if (recordings == null || recordings.isEmpty()) {
return;
}
int chunkSize = Math.max(1, properties.getLeitstand().getMaterializationChunkSize());
for (int start = 0; start < recordings.size(); start += chunkSize) {
List<LeitstandTimeRecording> chunk = recordings.subList(start, Math.min(start + chunkSize, recordings.size()));
refreshChunk(chunk);
}
}
private TimeEntrySearchProjection buildProjection(LeitstandTimeRecording recording) {
TimeEntry timeEntry = timeEntryRepository.findById(recording.getTimeEntry().getId())
.orElseThrow(() -> new IllegalArgumentException("Unknown TIME entry id: " + recording.getTimeEntry().getId()));
Document document = timeEntry.getDocument();
@Transactional(propagation = Propagation.REQUIRES_NEW)
protected void refreshChunk(List<LeitstandTimeRecording> recordings) {
if (recordings == null || recordings.isEmpty()) {
return;
}
ProjectionBuildContext ctx = preloadContext(recordings);
List<TimeEntrySearchProjection> projections = new ArrayList<>(recordings.size());
for (LeitstandTimeRecording recording : recordings) {
projections.add(buildProjection(recording, ctx));
}
List<TimeEntrySearchProjection> saved = projectionRepository.saveAll(projections);
projectionRepository.flush();
if (properties.getLeitstand().isBuildRepresentations()) {
representationMaterializationService.upsertRepresentations(saved);
}
}
LeitstandPerson person = recording.getPersonDbk() == null ? null : personRepository.findById(recording.getPersonDbk()).orElse(null);
LeitstandActivityType activityType = recording.getActivityTypeId() == null ? null : activityTypeRepository.findById(recording.getActivityTypeId()).orElse(null);
private ProjectionBuildContext preloadContext(List<LeitstandTimeRecording> recordings) {
List<String> recordingDbks = recordings.stream().map(LeitstandTimeRecording::getDbk).toList();
List<LeitstandTimeRecordingAssignment> assignments = timeRecordingAssignmentRepository
.findByTimeRecordingDbkInOrderByTimeRecordingDbkAscDbkAsc(recordingDbks);
Map<String, List<LeitstandTimeRecordingAssignment>> assignmentsByRecordingDbk = assignments.stream()
.collect(Collectors.groupingBy(LeitstandTimeRecordingAssignment::getTimeRecordingDbk, LinkedHashMap::new, Collectors.toList()));
List<LeitstandTimeRecordingAssignment> assignments = timeRecordingAssignmentRepository.findByTimeRecordingDbkOrderByDbkAsc(recording.getDbk());
List<LeitstandPersonTaskAssignment> personTaskAssignments = personTaskAssignmentRepository.findAllById(assignments.stream()
List<String> personTaskAssignmentIds = assignments.stream()
.map(LeitstandTimeRecordingAssignment::getPersonTaskAssignmentDbk)
.filter(Objects::nonNull)
.distinct()
.toList());
Map<String, LeitstandPersonTaskAssignment> ptaByDbk = indexBy(personTaskAssignments, LeitstandPersonTaskAssignment::getDbk);
.toList();
List<LeitstandPersonTaskAssignment> personTaskAssignments = personTaskAssignmentRepository.findAllById(personTaskAssignmentIds);
Map<String, LeitstandPersonTaskAssignment> personTaskAssignmentsByDbk = indexBy(personTaskAssignments, LeitstandPersonTaskAssignment::getDbk);
Map<String, LeitstandTask> tasksByDbk = indexBy(taskRepository.findAllById(personTaskAssignments.stream()
.map(LeitstandPersonTaskAssignment::getTaskDbk)
.filter(Objects::nonNull)
.distinct()
.toList()), LeitstandTask::getDbk);
List<String> taskIds = personTaskAssignments.stream().map(LeitstandPersonTaskAssignment::getTaskDbk).filter(Objects::nonNull).distinct().toList();
Map<String, LeitstandTask> tasksByDbk = indexBy(taskRepository.findAllById(taskIds), LeitstandTask::getDbk);
Map<String, LeitstandCostUnit> costUnitsByDbk = indexBy(costUnitRepository.findAllById(personTaskAssignments.stream()
.map(LeitstandPersonTaskAssignment::getCostUnitDbk)
.filter(Objects::nonNull)
.distinct()
.toList()), LeitstandCostUnit::getDbk);
List<String> costUnitIds = personTaskAssignments.stream().map(LeitstandPersonTaskAssignment::getCostUnitDbk).filter(Objects::nonNull).distinct().toList();
Map<String, LeitstandCostUnit> costUnitsByDbk = indexBy(costUnitRepository.findAllById(costUnitIds), LeitstandCostUnit::getDbk);
Map<String, LeitstandContract> contractsByDbk = indexBy(contractRepository.findAllById(costUnitsByDbk.values().stream()
.map(LeitstandCostUnit::getContractDbk)
.filter(Objects::nonNull)
.distinct()
.toList()), LeitstandContract::getDbk);
List<String> contractIds = costUnitsByDbk.values().stream().map(LeitstandCostUnit::getContractDbk).filter(Objects::nonNull).distinct().toList();
Map<String, LeitstandContract> contractsByDbk = indexBy(contractRepository.findAllById(contractIds), LeitstandContract::getDbk);
Map<String, LeitstandContractPosition> contractPositionsByDbk = indexBy(contractPositionRepository.findAllById(costUnitsByDbk.values().stream()
.map(LeitstandCostUnit::getContractPositionDbk)
.filter(Objects::nonNull)
.distinct()
.toList()), LeitstandContractPosition::getDbk);
List<String> contractPositionIds = costUnitsByDbk.values().stream().map(LeitstandCostUnit::getContractPositionDbk).filter(Objects::nonNull).distinct().toList();
Map<String, LeitstandContractPosition> contractPositionsByDbk = indexBy(contractPositionRepository.findAllById(contractPositionIds), LeitstandContractPosition::getDbk);
Set<String> organizationDbks = new LinkedHashSet<>();
costUnitsByDbk.values().stream().map(LeitstandCostUnit::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationDbks::add);
contractsByDbk.values().stream().map(LeitstandContract::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationDbks::add);
if (person != null && person.getOrganizationDbk() != null) {
organizationDbks.add(person.getOrganizationDbk());
Set<String> organizationIds = new LinkedHashSet<>();
costUnitsByDbk.values().stream().map(LeitstandCostUnit::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationIds::add);
contractsByDbk.values().stream().map(LeitstandContract::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationIds::add);
recordings.stream().map(LeitstandTimeRecording::getPersonDbk).filter(Objects::nonNull).forEach(id -> {});
List<String> personIds = recordings.stream().map(LeitstandTimeRecording::getPersonDbk).filter(Objects::nonNull).distinct().toList();
Map<String, LeitstandPerson> personsByDbk = indexBy(personRepository.findAllById(personIds), LeitstandPerson::getDbk);
personsByDbk.values().stream().map(LeitstandPerson::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationIds::add);
Map<String, LeitstandOrganization> organizationsByDbk = indexBy(organizationRepository.findAllById(organizationIds), LeitstandOrganization::getDbk);
List<Integer> activityTypeIds = recordings.stream().map(LeitstandTimeRecording::getActivityTypeId).filter(Objects::nonNull).distinct().toList();
Map<Integer, LeitstandActivityType> activityTypesById = indexBy(activityTypeRepository.findAllById(activityTypeIds), LeitstandActivityType::getId);
List<UUID> timeEntryIds = recordings.stream().map(LeitstandTimeRecording::getTimeEntry).filter(Objects::nonNull).map(TimeEntry::getId).filter(Objects::nonNull).distinct().toList();
Map<UUID, TimeEntry> timeEntriesById = timeEntryRepository.findAllById(timeEntryIds).stream().collect(Collectors.toMap(TimeEntry::getId, Function.identity()));
Map<UUID, TimeEntrySearchProjection> existingProjectionsByTimeEntryId = projectionRepository.findByTimeEntry_IdIn(timeEntryIds).stream().collect(Collectors.toMap(p -> p.getTimeEntry().getId(), Function.identity()));
return new ProjectionBuildContext(assignmentsByRecordingDbk, personTaskAssignmentsByDbk, tasksByDbk, costUnitsByDbk,
contractsByDbk, contractPositionsByDbk, organizationsByDbk, personsByDbk, activityTypesById,
timeEntriesById, existingProjectionsByTimeEntryId);
}
private TimeEntrySearchProjection buildProjection(LeitstandTimeRecording recording, ProjectionBuildContext ctx) {
TimeEntry timeEntry = ctx.timeEntriesById.get(recording.getTimeEntry().getId());
if (timeEntry == null) {
throw new IllegalArgumentException("Unknown TIME entry id: " + recording.getTimeEntry().getId());
}
Map<String, LeitstandOrganization> organizationsByDbk = indexBy(organizationRepository.findAllById(organizationDbks), LeitstandOrganization::getDbk);
Document document = timeEntry.getDocument();
LeitstandPerson person = recording.getPersonDbk() == null ? null : ctx.personsByDbk.get(recording.getPersonDbk());
LeitstandActivityType activityType = recording.getActivityTypeId() == null ? null : ctx.activityTypesById.get(recording.getActivityTypeId());
List<LeitstandTimeRecordingAssignment> assignments = ctx.assignmentsByRecordingDbk.getOrDefault(recording.getDbk(), List.of());
List<LeitstandPersonTaskAssignment> personTaskAssignments = assignments.stream()
.map(a -> ctx.personTaskAssignmentsByDbk.get(a.getPersonTaskAssignmentDbk()))
.filter(Objects::nonNull)
.distinct()
.toList();
List<LeitstandTask> orderedTasks = assignments.stream()
.map(a -> ptaByDbk.get(a.getPersonTaskAssignmentDbk()))
.map(a -> ctx.personTaskAssignmentsByDbk.get(a.getPersonTaskAssignmentDbk()))
.filter(Objects::nonNull)
.map(pta -> tasksByDbk.get(pta.getTaskDbk()))
.map(pta -> ctx.tasksByDbk.get(pta.getTaskDbk()))
.filter(Objects::nonNull)
.distinct()
.toList();
List<LeitstandCostUnit> orderedCostUnits = assignments.stream()
.map(a -> ptaByDbk.get(a.getPersonTaskAssignmentDbk()))
.map(a -> ctx.personTaskAssignmentsByDbk.get(a.getPersonTaskAssignmentDbk()))
.filter(Objects::nonNull)
.map(pta -> costUnitsByDbk.get(pta.getCostUnitDbk()))
.map(pta -> ctx.costUnitsByDbk.get(pta.getCostUnitDbk()))
.filter(Objects::nonNull)
.distinct()
.toList();
List<LeitstandContract> orderedContracts = orderedCostUnits.stream()
.map(cu -> contractsByDbk.get(cu.getContractDbk()))
.map(cu -> ctx.contractsByDbk.get(cu.getContractDbk()))
.filter(Objects::nonNull)
.distinct()
.toList();
List<LeitstandContractPosition> orderedContractPositions = orderedCostUnits.stream()
.map(cu -> contractPositionsByDbk.get(cu.getContractPositionDbk()))
.map(cu -> ctx.contractPositionsByDbk.get(cu.getContractPositionDbk()))
.filter(Objects::nonNull)
.distinct()
.toList();
List<LeitstandOrganization> orderedOrganizations = new ArrayList<>();
orderedCostUnits.stream().map(cu -> organizationsByDbk.get(cu.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
orderedContracts.stream().map(c -> organizationsByDbk.get(c.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
orderedCostUnits.stream().map(cu -> ctx.organizationsByDbk.get(cu.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
orderedContracts.stream().map(c -> ctx.organizationsByDbk.get(c.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
if (person != null && person.getOrganizationDbk() != null) {
LeitstandOrganization personOrg = organizationsByDbk.get(person.getOrganizationDbk());
LeitstandOrganization personOrg = ctx.organizationsByDbk.get(person.getOrganizationDbk());
if (personOrg != null && !orderedOrganizations.contains(personOrg)) orderedOrganizations.add(personOrg);
}
@ -176,8 +210,7 @@ public class LeitstandTimeProjectionService {
String summary = buildSummary(recording, primaryTask, primaryCostUnit, primaryOrganization, person);
String semanticText = buildSemanticText(timeEntry, recording, person, activityType, orderedTasks, orderedCostUnits, orderedContracts, orderedContractPositions, orderedOrganizations);
TimeEntrySearchProjection projection = projectionRepository.findByTimeEntry_Id(timeEntry.getId())
.orElseGet(() -> TimeEntrySearchProjection.builder().timeEntry(timeEntry).document(document).build());
TimeEntrySearchProjection projection = ctx.existingProjectionsByTimeEntryId.getOrDefault(timeEntry.getId(), TimeEntrySearchProjection.builder().timeEntry(timeEntry).document(document).build());
projection.setDocument(document);
projection.setTimeEntry(timeEntry);
projection.setSourceSystem(TimeSourceSystem.LEITSTAND);
@ -229,6 +262,19 @@ public class LeitstandTimeProjectionService {
return projection;
}
private record ProjectionBuildContext(
Map<String, List<LeitstandTimeRecordingAssignment>> assignmentsByRecordingDbk,
Map<String, LeitstandPersonTaskAssignment> personTaskAssignmentsByDbk,
Map<String, LeitstandTask> tasksByDbk,
Map<String, LeitstandCostUnit> costUnitsByDbk,
Map<String, LeitstandContract> contractsByDbk,
Map<String, LeitstandContractPosition> contractPositionsByDbk,
Map<String, LeitstandOrganization> organizationsByDbk,
Map<String, LeitstandPerson> personsByDbk,
Map<Integer, LeitstandActivityType> activityTypesById,
Map<UUID, TimeEntry> timeEntriesById,
Map<UUID, TimeEntrySearchProjection> existingProjectionsByTimeEntryId) {
}
private String buildSummary(LeitstandTimeRecording recording,
LeitstandTask primaryTask,
LeitstandCostUnit primaryCostUnit,
@ -283,7 +329,7 @@ public class LeitstandTimeProjectionService {
return sb.toString().trim();
}
private <T> Map<String, T> indexBy(Collection<T> rows, Function<T, String> id) {
private <K, T> Map<K, T> indexBy(Collection<T> rows, Function<T, K> id) {
return rows.stream()
.filter(Objects::nonNull)
.collect(Collectors.toMap(id, Function.identity(), (a, b) -> a, LinkedHashMap::new));

View File

@ -13,10 +13,16 @@ import at.procon.dip.embedding.config.EmbeddingProperties;
import at.procon.dip.embedding.registry.EmbeddingModelRegistry;
import at.procon.dip.embedding.service.RepresentationEmbeddingOrchestrator;
import at.procon.dip.search.service.DocumentLexicalIndexService;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
@Service
@ -35,69 +41,141 @@ public class TimeEntryRepresentationMaterializationService {
private final EmbeddingProperties embeddingProperties;
private final EmbeddingModelRegistry modelRegistry;
//@Transactional
public void upsertRepresentations(TimeEntrySearchProjection projection) {
if (projection.getSemanticText() == null || projection.getSemanticText().isBlank()) {
log.debug("Skipping TIME representation for document {} because semantic text is blank", projection.getDocument().getId());
if (projection == null) {
return;
}
upsertRepresentations(List.of(projection));
}
@Transactional(propagation = Propagation.REQUIRES_NEW)
public void upsertRepresentations(List<TimeEntrySearchProjection> projections) {
if (projections == null || projections.isEmpty()) {
return;
}
Document document = projection.getDocument();
document.setTitle(firstNonBlank(projection.getSummaryText(), projection.getTimeRecordingDesc(), projection.getPrimaryTaskName(), projection.getExternalId()));
document.setSummary(projection.getSummaryText());
document.setLanguageCode(firstNonBlank(projection.getLanguageCode(), document.getLanguageCode()));
if (document.getMimeType() == null || document.getMimeType().isBlank()) {
document.setMimeType("application/x-time-entry");
List<TimeEntrySearchProjection> eligible = projections.stream()
.filter(projection -> documentId(projection) != null)
.filter(projection -> projection.getSemanticText() != null && !projection.getSemanticText().isBlank())
.toList();
if (eligible.isEmpty()) {
return;
}
document = documentRepository.save(document);
Optional<DocumentTextRepresentation> existing = representationRepository
.findByDocument_IdAndRepresentationType(document.getId(), RepresentationType.SEMANTIC_TEXT)
.stream()
.filter(r -> BUILDER_KEY.equals(r.getBuilderKey()) || r.isPrimaryRepresentation())
.findFirst();
List<UUID> documentIds = eligible.stream()
.map(this::documentId)
.distinct()
.toList();
Map<UUID, Document> documentsById = documentRepository.findAllById(documentIds).stream()
.collect(java.util.stream.Collectors.toMap(Document::getId, java.util.function.Function.identity(), (a, b) -> a, LinkedHashMap::new));
List<Document> documentsToSave = new ArrayList<>();
for (TimeEntrySearchProjection projection : eligible) {
UUID documentId = documentId(projection);
Document document = documentsById.get(documentId);
if (document == null || documentsToSave.contains(document)) {
continue;
}
document.setTitle(firstNonBlank(projection.getSummaryText(), projection.getTimeRecordingDesc(), projection.getPrimaryTaskName(), projection.getExternalId()));
document.setSummary(projection.getSummaryText());
document.setLanguageCode(firstNonBlank(projection.getLanguageCode(), document.getLanguageCode()));
if (document.getMimeType() == null || document.getMimeType().isBlank()) {
document.setMimeType("application/x-time-entry");
}
documentsToSave.add(document);
}
if (!documentsToSave.isEmpty()) {
documentRepository.saveAll(documentsToSave);
documentRepository.flush();
}
boolean changed = existing.isEmpty()
|| !projection.getSemanticText().equals(existing.get().getTextBody())
|| !equalsNullable(projection.getLanguageCode(), existing.get().getLanguageCode())
|| !BUILDER_KEY.equals(existing.get().getBuilderKey());
List<DocumentTextRepresentation> changedExisting = new ArrayList<>();
List<TimeEntrySearchProjection> newRepresentationProjections = new ArrayList<>();
List<UUID> changedRepresentationIds = new ArrayList<>();
List<DocumentTextRepresentation> newlyCreatedRepresentations = new ArrayList<>();
Document finalDocument = document;
DocumentTextRepresentation semantic = existing
.map(found -> changed ? updateRepresentation(found, projection) : found)
.orElseGet(() -> documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand(
finalDocument.getId(),
null,
RepresentationType.SEMANTIC_TEXT,
BUILDER_KEY,
projection.getLanguageCode(),
null,
null,
null,
null,
true,
projection.getSemanticText(),
false
)));
for (TimeEntrySearchProjection projection : eligible) {
Document document = documentsById.get(documentId(projection));
if (document == null) {
continue;
}
Optional<DocumentTextRepresentation> existing = representationRepository
.findByDocument_IdAndRepresentationType(document.getId(), RepresentationType.SEMANTIC_TEXT)
.stream()
.filter(r -> BUILDER_KEY.equals(r.getBuilderKey()) || r.isPrimaryRepresentation())
.findFirst();
if (changed
&& embeddingProperties.isEnabled()
boolean changed = existing.isEmpty()
|| !projection.getSemanticText().equals(existing.get().getTextBody())
|| !equalsNullable(projection.getLanguageCode(), existing.get().getLanguageCode())
|| !BUILDER_KEY.equals(existing.get().getBuilderKey());
if (!changed) {
continue;
}
if (existing.isPresent()) {
DocumentTextRepresentation found = existing.get();
found.setBuilderKey(BUILDER_KEY);
found.setLanguageCode(projection.getLanguageCode());
found.setPrimaryRepresentation(true);
found.setTextBody(projection.getSemanticText());
found.setCharCount(projection.getSemanticText().length());
changedExisting.add(found);
} else {
newRepresentationProjections.add(projection);
}
}
if (!changedExisting.isEmpty()) {
representationRepository.saveAll(changedExisting);
representationRepository.flush();
changedExisting.stream().map(DocumentTextRepresentation::getId).forEach(changedRepresentationIds::add);
}
for (TimeEntrySearchProjection projection : newRepresentationProjections) {
Document document = documentsById.get(documentId(projection));
if (document == null) {
continue;
}
DocumentTextRepresentation created = documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand(
document.getId(),
null,
RepresentationType.SEMANTIC_TEXT,
BUILDER_KEY,
projection.getLanguageCode(),
null,
null,
null,
null,
true,
projection.getSemanticText(),
false
));
newlyCreatedRepresentations.add(created);
changedRepresentationIds.add(created.getId());
}
for (UUID representationId : changedRepresentationIds) {
lexicalIndexService.indexRepresentation(representationId);
}
if (embeddingProperties.isEnabled()
&& timeDomainProperties.getLeitstand().isQueueEmbeddings()
&& embeddingProperties.getDefaultDocumentModel() != null && !embeddingProperties.getDefaultDocumentModel().isBlank()) {
&& embeddingProperties.getDefaultDocumentModel() != null
&& !embeddingProperties.getDefaultDocumentModel().isBlank()) {
String modelKey = modelRegistry.getRequiredDefaultDocumentModelKey();
embeddingOrchestrator.enqueueRepresentation(document.getId(), semantic.getId(), modelKey);
for (DocumentTextRepresentation representation : changedExisting) {
embeddingOrchestrator.enqueueRepresentation(representation.getDocument().getId(), representation.getId(), modelKey);
}
for (DocumentTextRepresentation representation : newlyCreatedRepresentations) {
embeddingOrchestrator.enqueueRepresentation(representation.getDocument().getId(), representation.getId(), modelKey);
}
}
}
private DocumentTextRepresentation updateRepresentation(DocumentTextRepresentation existing, TimeEntrySearchProjection projection) {
existing.setBuilderKey(BUILDER_KEY);
existing.setLanguageCode(projection.getLanguageCode());
existing.setPrimaryRepresentation(true);
existing.setTextBody(projection.getSemanticText());
existing.setCharCount(projection.getSemanticText().length());
DocumentTextRepresentation saved = representationRepository.saveAndFlush(existing);
lexicalIndexService.indexRepresentation(saved.getId());
return saved;
private UUID documentId(TimeEntrySearchProjection projection) {
Document document = projection == null ? null : projection.getDocument();
return document == null ? null : document.getId();
}
private boolean equalsNullable(String left, String right) {

View File

@ -37,6 +37,8 @@ public class LeitstandTimeSelectiveMaterializationStartupRunner implements Appli
log.info("Completed selective Leitstand TIME materialization for personNumber={}. Processed {} recordings", cfg.getSelectiveMaterializationPersonNumber(), count);
return;
}
throw new IllegalStateException("dip.time.leitstand.startup-selective-materialization-enabled=true requires either selective-materialization-person-dbk or selective-materialization-person-number");
log.info("Starting Leitstand TIME materialization for all imported recordings (rebuildProjection={})", rebuildProjection);
int count = importService.materializeCanonicalTimeEntriesForAll(rebuildProjection);
log.info("Completed Leitstand TIME materialization for all imported recordings. Processed {} recordings", count);
}
}

View File

@ -18,11 +18,18 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
@Override
public List<SearchHit> search(SearchExecutionContext context, int limit) {
String effectiveConfigExpr = effectiveConfigExpression("dtr");
String tsQueryExpr = tsQueryExpression(effectiveConfigExpr);
StringBuilder sql = new StringBuilder("""
SELECT
d.id AS document_id,
dtr.id AS representation_id,
CAST(dtr.representation_type AS text) AS representation_type,
dtr.is_primary AS is_primary,
dtr.chunk_index AS chunk_index,
dtr.chunk_start_offset AS chunk_start_offset,
dtr.chunk_end_offset AS chunk_end_offset,
CAST(d.document_type AS text) AS document_type,
CAST(d.document_family AS text) AS document_family,
CAST(d.visibility AS text) AS visibility,
@ -33,41 +40,29 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
d.created_at AS created_at,
d.updated_at AS updated_at,
ts_headline(
CASE
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
ELSE dtr.search_config::regconfig
END,
"""
).append(effectiveConfigExpr).append("""
,
COALESCE(dtr.text_body, ''),
websearch_to_tsquery(
CASE
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
ELSE dtr.search_config::regconfig
END,
:queryText
),
""").append(tsQueryExpr).append("""
,
'MaxFragments=2, MinWords=5, MaxWords=20'
) AS snippet,
ts_rank_cd(
dtr.search_vector,
websearch_to_tsquery(
CASE
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
ELSE dtr.search_config::regconfig
END,
:queryText
)
) AS score
FROM doc.doc_text_representation dtr
JOIN doc.doc_document d ON d.id = dtr.document_id
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
WHERE dtr.search_vector IS NOT NULL
AND dtr.search_vector @@ websearch_to_tsquery(
CASE
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
ELSE dtr.search_config::regconfig
END,
:queryText
)
ranked.score AS score
FROM (
SELECT
d.id AS document_id,
dtr.id AS representation_id,
ts_rank_cd(
dtr.search_vector,
""").append(tsQueryExpr).append("""
) AS score,
d.updated_at AS updated_at
FROM doc.doc_text_representation dtr
JOIN doc.doc_document d ON d.id = dtr.document_id
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
WHERE dtr.search_vector IS NOT NULL
AND dtr.search_vector @@ """).append(tsQueryExpr).append("""
""");
MapSqlParameterSource params = new MapSqlParameterSource();
@ -75,7 +70,14 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
sql.append("""
ORDER BY score DESC, d.updated_at DESC
LIMIT :limit
) ranked
JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id
JOIN doc.doc_document d ON d.id = ranked.document_id
ORDER BY ranked.score DESC, d.updated_at DESC
""");
params.addValue("limit", limit);
return jdbcTemplate.query(
@ -84,4 +86,22 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT)
);
}
}
private static String effectiveConfigExpression(String representationAlias) {
return """
CASE
WHEN NULLIF(%s.search_config, '') IS NULL THEN 'simple'::regconfig
ELSE %s.search_config::regconfig
END
""".formatted(representationAlias, representationAlias).trim();
}
private static String tsQueryExpression(String configExpression) {
return """
websearch_to_tsquery(
%s,
:queryText
)
""".formatted(configExpression).trim();
}
}

View File

@ -18,42 +18,112 @@ public class DocumentTrigramSearchRepositoryImpl implements DocumentTrigramSearc
@Override
public List<SearchHit> search(SearchExecutionContext context, int limit, double threshold) {
String scoreExpr = "GREATEST(" +
"doc.similarity(COALESCE(d.title, ''), :queryText), " +
"doc.similarity(COALESCE(d.summary, ''), :queryText), " +
"doc.similarity(COALESCE(dtr.text_body, ''), :queryText))";
StringBuilder sql = new StringBuilder("SELECT " +
"d.id AS document_id, " +
"dtr.id AS representation_id, " +
"CAST(d.document_type AS text) AS document_type, " +
"CAST(d.document_family AS text) AS document_family, " +
"CAST(d.visibility AS text) AS visibility, " +
"d.title AS title, " +
"d.summary AS summary, " +
"COALESCE(dtr.language_code, d.language_code) AS language_code, " +
"d.mime_type AS mime_type, " +
"d.created_at AS created_at, " +
"d.updated_at AS updated_at, " +
"LEFT(COALESCE(dtr.text_body, COALESCE(d.summary, d.title, '')), 400) AS snippet, " +
scoreExpr + " AS score, " +
"CASE " +
"WHEN doc.similarity(COALESCE(d.title, ''), :queryText) >= doc.similarity(COALESCE(d.summary, ''), :queryText) " +
" AND doc.similarity(COALESCE(d.title, ''), :queryText) >= doc.similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_TITLE' " +
"WHEN doc.similarity(COALESCE(d.summary, ''), :queryText) >= doc.similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_SUMMARY' " +
"ELSE 'REPRESENTATION_TEXT' END AS matched_field " +
"FROM doc.doc_text_representation dtr " +
"JOIN doc.doc_document d ON d.id = dtr.document_id " +
"LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id " +
"WHERE " + scoreExpr + " >= :threshold");
StringBuilder sql = new StringBuilder("""
WITH title_candidates AS (
SELECT
d.id AS document_id,
dtr.id AS representation_id,
'DOCUMENT_TITLE' AS matched_field,
public.similarity(d.title, :queryText) AS score,
d.updated_at AS updated_at
FROM doc.doc_text_representation dtr
JOIN doc.doc_document d ON d.id = dtr.document_id
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
WHERE d.title IS NOT NULL
AND d.title OPERATOR(public.%) :queryText
""");
MapSqlParameterSource params = new MapSqlParameterSource();
params.addValue("queryText", context.getRequest().getQueryText());
params.addValue("threshold", threshold);
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
params.addValue("branchLimit", limit);
params.addValue("limit", limit);
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
sql.append("""
ORDER BY score DESC, d.updated_at DESC
LIMIT :branchLimit
),
summary_candidates AS (
SELECT
d.id AS document_id,
dtr.id AS representation_id,
'DOCUMENT_SUMMARY' AS matched_field,
public.similarity(d.summary, :queryText) AS score,
d.updated_at AS updated_at
FROM doc.doc_text_representation dtr
JOIN doc.doc_document d ON d.id = dtr.document_id
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
WHERE d.summary IS NOT NULL
AND d.summary OPERATOR(public.%) :queryText
""");
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
sql.append("""
ORDER BY score DESC, d.updated_at DESC
LIMIT :branchLimit
),
text_candidates AS (
SELECT
d.id AS document_id,
dtr.id AS representation_id,
'REPRESENTATION_TEXT' AS matched_field,
public.similarity(dtr.text_body, :queryText) AS score,
d.updated_at AS updated_at
FROM doc.doc_text_representation dtr
JOIN doc.doc_document d ON d.id = dtr.document_id
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
WHERE dtr.text_body IS NOT NULL
AND dtr.text_body OPERATOR(public.%) :queryText
""");
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
sql.append("""
ORDER BY score DESC, d.updated_at DESC
LIMIT :branchLimit
),
ranked AS (
SELECT DISTINCT ON (representation_id)
document_id,
representation_id,
matched_field,
score,
updated_at
FROM (
SELECT * FROM title_candidates
UNION ALL
SELECT * FROM summary_candidates
UNION ALL
SELECT * FROM text_candidates
) all_candidates
WHERE score >= :threshold
ORDER BY representation_id, score DESC, updated_at DESC
)
SELECT
d.id AS document_id,
dtr.id AS representation_id,
CAST(dtr.representation_type AS text) AS representation_type,
dtr.is_primary AS is_primary,
dtr.chunk_index AS chunk_index,
dtr.chunk_start_offset AS chunk_start_offset,
dtr.chunk_end_offset AS chunk_end_offset,
CAST(d.document_type AS text) AS document_type,
CAST(d.document_family AS text) AS document_family,
CAST(d.visibility AS text) AS visibility,
d.title AS title,
d.summary AS summary,
COALESCE(dtr.language_code, d.language_code) AS language_code,
d.mime_type AS mime_type,
d.created_at AS created_at,
d.updated_at AS updated_at,
LEFT(COALESCE(dtr.text_body, COALESCE(d.summary, d.title, '')), 400) AS snippet,
ranked.score AS score,
ranked.matched_field AS matched_field
FROM ranked
JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id
JOIN doc.doc_document d ON d.id = ranked.document_id
ORDER BY ranked.score DESC, d.updated_at DESC
LIMIT :limit
""");
return jdbcTemplate.query(sql.toString(), params,
new SearchHitRowMapper(SearchEngineType.POSTGRES_TRIGRAM, SearchMatchField.REPRESENTATION_TEXT));
}

View File

@ -34,20 +34,17 @@ final class SearchSqlFilterSupport {
boolean tenantJoinPresent) {
Set<DocumentType> documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes());
if (!CollectionUtils.isEmpty(documentTypes)) {
sql.append(" AND CAST(").append(documentAlias).append(".document_type AS text) IN (:documentTypes)");
params.addValue("documentTypes", enumNames(documentTypes));
appendTextEnumFilter(sql, params, documentAlias + ".document_type", documentTypes, "documentTypes");
}
Set<DocumentFamily> documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies());
if (!CollectionUtils.isEmpty(documentFamilies)) {
sql.append(" AND CAST(").append(documentAlias).append(".document_family AS text) IN (:documentFamilies)");
params.addValue("documentFamilies", enumNames(documentFamilies));
appendTextEnumFilter(sql, params, documentAlias + ".document_family", documentFamilies, "documentFamilies");
}
Set<DocumentVisibility> visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities());
if (!CollectionUtils.isEmpty(visibilities)) {
sql.append(" AND CAST(").append(documentAlias).append(".visibility AS text) IN (:visibilities)");
params.addValue("visibilities", enumNames(visibilities));
appendTextEnumFilter(sql, params, documentAlias + ".visibility", visibilities, "visibilities");
}
Set<String> languageCodes = context.getRequest().getLanguageCodes();
@ -62,8 +59,7 @@ final class SearchSqlFilterSupport {
Set<RepresentationType> representationTypes = context.getRequest().getRepresentationTypes();
if (!CollectionUtils.isEmpty(representationTypes)) {
sql.append(" AND CAST(").append(representationAlias).append(".representation_type AS text) IN (:representationTypes)");
params.addValue("representationTypes", enumNames(representationTypes));
appendTextEnumFilter(sql, params, representationAlias + ".representation_type", representationTypes, "representationTypes");
} else {
SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode();
if (selectionMode == null) {
@ -242,6 +238,15 @@ final class SearchSqlFilterSupport {
return !CollectionUtils.isEmpty(primary) ? primary : fallback;
}
private static void appendTextEnumFilter(StringBuilder sql,
MapSqlParameterSource params,
String columnExpression,
Collection<? extends Enum<?>> values,
String parameterName) {
sql.append(" AND CAST(").append(columnExpression).append(" AS text) IN (:").append(parameterName).append(")");
params.addValue(parameterName, enumNames(values));
}
private static List<String> enumNames(Collection<? extends Enum<?>> values) {
return values.stream().map(Enum::name).collect(Collectors.toList());
}

View File

@ -1,9 +1,12 @@
package at.procon.ted.startup;
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
import at.procon.dip.runtime.config.RuntimeMode;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.context.annotation.Profile;
import org.springframework.core.annotation.Order;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Component;
@ -19,6 +22,7 @@ import org.springframework.stereotype.Component;
@Component
@Order(1) // Run before other startup runners
@RequiredArgsConstructor
@ConditionalOnRuntimeMode(RuntimeMode.LEGACY)
@Slf4j
public class OrganizationSchemaFixRunner implements ApplicationRunner {

View File

@ -33,7 +33,7 @@ dip:
max-chunks-per-document: 12
# Startup backfill limit for missing lexical vectors
startup-lexical-backfill-limit: 500
scheduled-lexical-backfill-enabled: true
scheduled-lexical-backfill-enabled: false
scheduled-lexical-backfill-delay-ms: 30000
scheduled-lexical-backfill-batch-size: 200
# Number of top hits per engine returned by /search/debug
@ -333,8 +333,8 @@ dip:
leitstand:
enabled: false
startup-sync-enabled: false
startup-selective-materialization-enabled: true
selective-materialization-person-dbk: 100920031023144811001000
startup-selective-materialization-enabled: false
selective-materialization-person-dbk: #100920031023144811001000
selective-materialization-person-number:
selective-materialization-build-projection: true
create-canonical-time-entries: true

View File

@ -43,7 +43,7 @@ spring:
order_updates: true
flyway:
enabled: true
enabled: false
locations: classpath:db/migration
baseline-on-migrate: true
create-schemas: true

View File

@ -1,7 +1,7 @@
-- Slice 1 + Slice 2 generic search support for DOC documents.
-- Adds lexical-search support columns/indexes and pg_trgm extension.
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public;
ALTER TABLE DOC.doc_text_representation
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
@ -15,12 +15,12 @@ CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
ON DOC.doc_document
USING GIN (title DOC.gin_trgm_ops);
USING GIN (title public.gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
ON DOC.doc_document
USING GIN (summary DOC.gin_trgm_ops);
USING GIN (summary public.gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
ON DOC.doc_text_representation
USING GIN (text_body DOC.gin_trgm_ops);
USING GIN (text_body public.gin_trgm_ops);

View File

@ -5,6 +5,9 @@ ALTER TABLE DOC.doc_embedding
ADD CONSTRAINT ck_doc_embedding_dimensions_positive
CHECK (embedding_dimensions IS NULL OR embedding_dimensions > 0);
ALTER TABLE DOC.doc_embedding
ADD COLUMN IF NOT EXISTS embedding_vector public.vector;
DO $$
BEGIN
IF NOT EXISTS (

View File

@ -2,7 +2,7 @@
-- This makes migration, audit, and repair flows package-aware without having to derive the
-- package membership from source paths at query time.
SET search_path TO TED, DOC, public;
SET search_path TO ted, doc, public;
ALTER TABLE IF EXISTS TED.ted_notice_projection
ADD COLUMN IF NOT EXISTS package_identifier VARCHAR(20);

View File

@ -1,7 +1,7 @@
-- Slice 1 generic lexical search support.
-- Adds PostgreSQL full-text and trigram search infrastructure for DOC-side search.
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public;
ALTER TABLE doc.doc_text_representation
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
@ -15,12 +15,12 @@ CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
ON doc.doc_document
USING GIN (title doc.gin_trgm_ops);
USING GIN (title public.gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
ON doc.doc_document
USING GIN (summary doc.gin_trgm_ops);
USING GIN (summary public.gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
ON doc.doc_text_representation
USING GIN (text_body doc.gin_trgm_ops);
USING GIN (text_body public.gin_trgm_ops);

View File

@ -306,7 +306,7 @@ CREATE INDEX idx_doc_procedure_type ON procurement_document(procedure_type);
CREATE INDEX idx_doc_cpv_codes ON procurement_document USING GIN(cpv_codes);
-- Full-text search on textual content
CREATE INDEX idx_doc_text_content_trgm ON procurement_document USING GIN(text_content gin_trgm_ops);
CREATE INDEX idx_doc_text_content_trgm ON procurement_document USING GIN(text_content public.gin_trgm_ops);
-- Vector similarity search using IVFFlat index (efficient for approximate nearest neighbor)
-- Lists parameter: sqrt(number_of_vectors) for optimal performance

View File

@ -44,34 +44,6 @@ BEGIN
END
$$;
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_enum e
JOIN pg_type t ON t.oid = e.enumtypid
JOIN pg_namespace n ON n.oid = t.typnamespace
WHERE n.nspname = 'doc' AND t.typname = 'doc_document_type' AND e.enumlabel = 'TIME_ENTRY'
) THEN
ALTER TYPE doc.doc_document_type ADD VALUE 'TIME_ENTRY';
END IF;
END
$$;
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1
FROM pg_enum e
JOIN pg_type t ON t.oid = e.enumtypid
JOIN pg_namespace n ON n.oid = t.typnamespace
WHERE n.nspname = 'doc' AND t.typname = 'doc_document_family' AND e.enumlabel = 'TIME'
) THEN
ALTER TYPE doc.doc_document_family ADD VALUE 'TIME';
END IF;
END
$$;
CREATE TABLE IF NOT EXISTS "time".time_entry (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL UNIQUE REFERENCES doc.doc_document(id) ON DELETE CASCADE,

View File

@ -0,0 +1,7 @@
-- Align TIME projection source text fields with real Leitstand payload lengths.
ALTER TABLE "time".time_entry_search_projection
ALTER COLUMN time_recording_desc TYPE TEXT;
ALTER TABLE "time".time_entry_search_projection
ALTER COLUMN time_recording_remark TYPE TEXT;

View File

@ -0,0 +1,80 @@
-- Repair DOC document enum/check alignment for TIME documents on databases
-- that still carry the pre-TIME family/type constraints.
DO $$
BEGIN
IF EXISTS (
SELECT 1
FROM pg_type t
JOIN pg_namespace n ON n.oid = t.typnamespace
WHERE n.nspname = 'doc'
AND t.typname = 'doc_document_type'
) THEN
ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TED_PACKAGE';
ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TED_NOTICE_LOT';
ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TIME_ENTRY';
END IF;
END
$$;
DO $$
BEGIN
IF EXISTS (
SELECT 1
FROM pg_type t
JOIN pg_namespace n ON n.oid = t.typnamespace
WHERE n.nspname = 'doc'
AND t.typname = 'doc_document_family'
) THEN
ALTER TYPE DOC.doc_document_family ADD VALUE IF NOT EXISTS 'TIME';
END IF;
END
$$;
DO $$
BEGIN
IF EXISTS (
SELECT 1
FROM information_schema.tables
WHERE table_schema = 'doc'
AND table_name = 'doc_document'
) THEN
ALTER TABLE DOC.doc_document DROP CONSTRAINT IF EXISTS doc_document_document_type_check;
ALTER TABLE DOC.doc_document
ADD CONSTRAINT doc_document_document_type_check
CHECK (
document_type IN (
'TED_PACKAGE',
'TED_NOTICE',
'TED_NOTICE_LOT',
'TIME_ENTRY',
'EMAIL',
'MIME_MESSAGE',
'PDF',
'DOCX',
'HTML',
'XML_GENERIC',
'TEXT',
'MARKDOWN',
'ZIP_ARCHIVE',
'GENERIC_BINARY',
'UNKNOWN'
)
);
ALTER TABLE DOC.doc_document DROP CONSTRAINT IF EXISTS doc_document_document_family_check;
ALTER TABLE DOC.doc_document
ADD CONSTRAINT doc_document_document_family_check
CHECK (
document_family IN (
'PROCUREMENT',
'TIME',
'MAIL',
'ATTACHMENT',
'KNOWLEDGE',
'GENERIC'
)
);
END IF;
END
$$;

View File

@ -0,0 +1,10 @@
-- Search performance support indexes for filtered DOC fulltext lookups.
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_updated
ON DOC.doc_document(document_type, document_family, updated_at DESC, id);
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_updated
ON DOC.doc_document(document_type, document_family, visibility, updated_at DESC, id);
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_document_primary_type
ON DOC.doc_text_representation(document_id, is_primary, representation_type);

View File

@ -0,0 +1,17 @@
-- Support cast-to-text search filters on installations where DOC type columns are varchar.
-- These indexes align with the query shape used by generic search filters.
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_text_updated
ON DOC.doc_document ((CAST(document_type AS text)), (CAST(document_family AS text)), updated_at DESC, id);
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_text_updated
ON DOC.doc_document (
(CAST(document_type AS text)),
(CAST(document_family AS text)),
(CAST(visibility AS text)),
updated_at DESC,
id
);
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_primary_type_text_document
ON DOC.doc_text_representation (is_primary, (CAST(representation_type AS text)), document_id);

View File

@ -0,0 +1,135 @@
package at.procon.dip.migration;
import static org.assertj.core.api.Assertions.assertThat;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import org.flywaydb.core.Flyway;
import org.flywaydb.core.api.MigrationVersion;
import org.junit.jupiter.api.Test;
import org.testcontainers.containers.PostgreSQLContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
@Testcontainers
class DocDocumentTimeEnumConstraintRepairMigrationTest {
@Container
static PostgreSQLContainer<?> postgres = new PostgreSQLContainer<>("postgres:16-alpine")
.withDatabaseName("dip_migration_test")
.withUsername("test")
.withPassword("test");
@Test
void repairMigrationExpandsLegacyDocDocumentChecksForTimeDocuments() throws SQLException {
createLegacyDocDocumentState();
Flyway.configure()
.dataSource(postgres.getJdbcUrl(), postgres.getUsername(), postgres.getPassword())
.locations("filesystem:src/main/resources/db/migration")
.schemas("doc")
.defaultSchema("doc")
.baselineOnMigrate(true)
.baselineVersion(MigrationVersion.fromVersion("42"))
.load()
.migrate();
try (Connection connection = openConnection();
Statement statement = connection.createStatement()) {
statement.executeUpdate("""
INSERT INTO doc.doc_document (id, document_type, document_family)
VALUES ('709e388b-19d9-4c21-8d06-82b295b33505', 'TIME_ENTRY', 'TIME')
""");
}
try (Connection connection = openConnection();
var preparedStatement = connection.prepareStatement("""
SELECT pg_get_constraintdef(oid)
FROM pg_constraint
WHERE conrelid = 'doc.doc_document'::regclass
AND conname = ?
""")) {
preparedStatement.setString(1, "doc_document_document_family_check");
try (var resultSet = preparedStatement.executeQuery()) {
assertThat(resultSet.next()).isTrue();
assertThat(resultSet.getString(1)).contains("TIME");
}
}
}
private void createLegacyDocDocumentState() throws SQLException {
try (Connection connection = openConnection();
Statement statement = connection.createStatement()) {
statement.execute("CREATE SCHEMA doc");
statement.execute("""
CREATE TYPE doc.doc_document_type AS ENUM (
'TED_NOTICE',
'EMAIL',
'MIME_MESSAGE',
'PDF',
'DOCX',
'HTML',
'XML_GENERIC',
'TEXT',
'MARKDOWN',
'ZIP_ARCHIVE',
'GENERIC_BINARY',
'UNKNOWN'
)
""");
statement.execute("""
CREATE TYPE doc.doc_document_family AS ENUM (
'PROCUREMENT',
'MAIL',
'ATTACHMENT',
'KNOWLEDGE',
'GENERIC'
)
""");
statement.execute("""
CREATE TABLE doc.doc_document (
id UUID PRIMARY KEY,
document_type doc.doc_document_type NOT NULL,
document_family doc.doc_document_family NOT NULL,
CONSTRAINT doc_document_document_type_check
CHECK (
document_type IN (
'TED_NOTICE',
'EMAIL',
'MIME_MESSAGE',
'PDF',
'DOCX',
'HTML',
'XML_GENERIC',
'TEXT',
'MARKDOWN',
'ZIP_ARCHIVE',
'GENERIC_BINARY',
'UNKNOWN'
)
),
CONSTRAINT doc_document_document_family_check
CHECK (
document_family IN (
'PROCUREMENT',
'MAIL',
'ATTACHMENT',
'KNOWLEDGE',
'GENERIC'
)
)
)
""");
}
}
private Connection openConnection() throws SQLException {
return DriverManager.getConnection(
postgres.getJdbcUrl(),
postgres.getUsername(),
postgres.getPassword()
);
}
}

View File

@ -2,10 +2,13 @@ package at.procon.dip.search.repository;
import static org.assertj.core.api.Assertions.assertThat;
import at.procon.dip.domain.document.DocumentFamily;
import at.procon.dip.domain.document.DocumentType;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import at.procon.dip.search.api.SearchExecutionContext;
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
import at.procon.dip.search.dto.SearchRequest;
import at.procon.dip.search.spi.SearchDocumentScope;
import org.junit.jupiter.api.Test;
@ -54,4 +57,31 @@ class SearchSqlFilterSupportTest {
assertThat(params.getValue("attributeName1")).isEqualTo("version");
assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
}
@Test
void shouldUseTypedEnumComparisonsForIndexedFilters() {
SearchRequest request = SearchRequest.builder()
.queryText("vertrieb")
.documentTypes(java.util.Set.of(DocumentType.TIME_ENTRY))
.documentFamilies(java.util.Set.of(DocumentFamily.TIME))
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS)
.build();
SearchExecutionContext context = SearchExecutionContext.builder()
.request(request)
.scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null))
.page(0)
.size(20)
.build();
StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1");
MapSqlParameterSource params = new MapSqlParameterSource();
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false);
String rendered = sql.toString();
assertThat(rendered).contains("CAST(d.document_type AS text) IN (:documentTypes)");
assertThat(rendered).contains("CAST(d.document_family AS text) IN (:documentFamilies)");
assertThat(rendered).contains("CAST(dtr.representation_type AS text) = 'CHUNK'");
assertThat(params.getValue("documentTypes")).isEqualTo(java.util.List.of("TIME_ENTRY"));
assertThat(params.getValue("documentFamilies")).isEqualTo(java.util.List.of("TIME"));
}
}

View File

@ -87,13 +87,13 @@ public abstract class AbstractSearchIntegrationTest {
protected void ensureSearchColumnsAndIndexes() {
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc");
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public");
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title public.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary public.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body public.gin_trgm_ops)");
}
protected void cleanupDatabase() {

View File

@ -121,14 +121,14 @@ public abstract class AbstractSemanticSearchIntegrationTest {
}
protected void ensureSearchColumnsAndIndexes() {
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA doc");
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public");
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public");
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title public.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary public.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body public.gin_trgm_ops)");
jdbcTemplate.execute("ALTER TABLE doc.doc_embedding ADD COLUMN IF NOT EXISTS embedding_vector public.vector");
}

View File

@ -82,13 +82,13 @@ public abstract class AbstractTedStructuredSearchIntegrationTest {
protected void ensureSearchColumnsAndIndexes() {
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS ted");
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc");
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public");
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title public.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary public.gin_trgm_ops)");
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body public.gin_trgm_ops)");
}
protected void cleanupDatabase() {

View File

@ -1,3 +1,3 @@
CREATE SCHEMA IF NOT EXISTS DOC;
CREATE SCHEMA IF NOT EXISTS TED;
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public;