Compare commits
6 Commits
f9df7c8d22
...
d1d81fd478
| Author | SHA1 | Date |
|---|---|---|
|
|
d1d81fd478 | |
|
|
9da416dbe4 | |
|
|
142b0a5809 | |
|
|
253845e9ea | |
|
|
430885b5af | |
|
|
5c3133d19d |
|
|
@ -0,0 +1,120 @@
|
||||||
|
# TED Procurement Processor - Git Ignore
|
||||||
|
# Author: Martin.Schweitzer@procon.co.at and claude.ai
|
||||||
|
|
||||||
|
# Compiled class files
|
||||||
|
*.class
|
||||||
|
|
||||||
|
# Maven
|
||||||
|
target/
|
||||||
|
pom.xml.tag
|
||||||
|
pom.xml.releaseBackup
|
||||||
|
pom.xml.versionsBackup
|
||||||
|
pom.xml.next
|
||||||
|
release.properties
|
||||||
|
dependency-reduced-pom.xml
|
||||||
|
buildNumber.properties
|
||||||
|
.mvn/timing.properties
|
||||||
|
.mvn/wrapper/maven-wrapper.jar
|
||||||
|
|
||||||
|
# Gradle
|
||||||
|
.gradle
|
||||||
|
build/
|
||||||
|
|
||||||
|
# IDE - IntelliJ IDEA
|
||||||
|
.idea/
|
||||||
|
*.iws
|
||||||
|
*.iml
|
||||||
|
*.ipr
|
||||||
|
out/
|
||||||
|
|
||||||
|
# IDE - Eclipse
|
||||||
|
.apt_generated
|
||||||
|
.classpath
|
||||||
|
.factorypath
|
||||||
|
.project
|
||||||
|
.settings
|
||||||
|
.springBeans
|
||||||
|
.sts4-cache
|
||||||
|
bin/
|
||||||
|
|
||||||
|
# IDE - NetBeans
|
||||||
|
/nbproject/private/
|
||||||
|
/nbbuild/
|
||||||
|
/dist/
|
||||||
|
/nbdist/
|
||||||
|
/.nb-gradle/
|
||||||
|
|
||||||
|
# IDE - VS Code
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# OS Files
|
||||||
|
.DS_Store
|
||||||
|
.DS_Store?
|
||||||
|
._*
|
||||||
|
.Spotlight-V100
|
||||||
|
.Trashes
|
||||||
|
ehthumbs.db
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
logs/
|
||||||
|
|
||||||
|
# Application
|
||||||
|
application-local.yml
|
||||||
|
application-dev.yml
|
||||||
|
application-prod.yml
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
.docker/
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
.venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
.eggs/
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# Model cache
|
||||||
|
models/
|
||||||
|
.cache/
|
||||||
|
|
||||||
|
# Test data
|
||||||
|
test-data/
|
||||||
|
*.xml.bak
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
*.tmp
|
||||||
|
*.temp
|
||||||
|
*.swp
|
||||||
|
*~
|
||||||
|
|
||||||
|
# Secrets
|
||||||
|
*.pem
|
||||||
|
*.key
|
||||||
|
secrets/
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
|
||||||
|
# Database
|
||||||
|
*.db
|
||||||
|
*.sqlite
|
||||||
|
*.sqlite3
|
||||||
|
|
||||||
|
# Processed files (Camel)
|
||||||
|
.processed/
|
||||||
|
.error/
|
||||||
|
|
||||||
|
*.bak
|
||||||
|
.claude
|
||||||
|
|
@ -16,8 +16,8 @@ import org.springframework.scheduling.annotation.EnableAsync;
|
||||||
*/
|
*/
|
||||||
@SpringBootApplication(scanBasePackages = {"at.procon.dip", "at.procon.ted"})
|
@SpringBootApplication(scanBasePackages = {"at.procon.dip", "at.procon.ted"})
|
||||||
@EnableAsync
|
@EnableAsync
|
||||||
@EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", "at.procon.dip.domain.ted.entity", "at.procon.dip.embedding.job.entity", "at.procon.dip.migration.audit.entity", "at.procon.dip.migration.entity", /*"at.procon.dip.domain.time.entity",*/ "at.procon.dip.clustering.entity"})
|
@EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", "at.procon.dip.domain.ted.entity", "at.procon.dip.embedding.job.entity", "at.procon.dip.migration.audit.entity", "at.procon.dip.migration.entity", "at.procon.dip.domain.time.entity",/**/ "at.procon.dip.clustering.entity"})
|
||||||
@EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository", "at.procon.dip.domain.ted.repository", "at.procon.dip.embedding.job.repository", "at.procon.dip.migration.audit.repository", "at.procon.dip.migration.repository", /*"at.procon.dip.domain.time.repository",*/ "at.procon.dip.clustering.repository"})
|
@EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository", "at.procon.dip.domain.ted.repository", "at.procon.dip.embedding.job.repository", "at.procon.dip.migration.audit.repository", "at.procon.dip.migration.repository", "at.procon.dip.domain.time.repository",/**/ "at.procon.dip.clustering.repository"})
|
||||||
public class DocumentIntelligencePlatformApplication {
|
public class DocumentIntelligencePlatformApplication {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ public class TimeDomainProperties {
|
||||||
private String selectiveMaterializationPersonDbk;
|
private String selectiveMaterializationPersonDbk;
|
||||||
private Integer selectiveMaterializationPersonNumber;
|
private Integer selectiveMaterializationPersonNumber;
|
||||||
private boolean selectiveMaterializationBuildProjection = true;
|
private boolean selectiveMaterializationBuildProjection = true;
|
||||||
|
private int materializationChunkSize = 200;
|
||||||
private String representationLanguageCode = "de";
|
private String representationLanguageCode = "de";
|
||||||
private String scopeKey = "leitstand-default";
|
private String scopeKey = "leitstand-default";
|
||||||
private JdbcProperties jdbc = new JdbcProperties();
|
private JdbcProperties jdbc = new JdbcProperties();
|
||||||
|
|
|
||||||
|
|
@ -80,10 +80,10 @@ public class TimeEntrySearchProjection {
|
||||||
@Column(name = "time_recording_mcl_id", length = 255)
|
@Column(name = "time_recording_mcl_id", length = 255)
|
||||||
private String timeRecordingMclId;
|
private String timeRecordingMclId;
|
||||||
|
|
||||||
@Column(name = "time_recording_desc", length = 255)
|
@Column(name = "time_recording_desc", columnDefinition = "TEXT")
|
||||||
private String timeRecordingDesc;
|
private String timeRecordingDesc;
|
||||||
|
|
||||||
@Column(name = "time_recording_remark", length = 255)
|
@Column(name = "time_recording_remark", columnDefinition = "TEXT")
|
||||||
private String timeRecordingRemark;
|
private String timeRecordingRemark;
|
||||||
|
|
||||||
@Column(name = "time_recording_url", length = 1000)
|
@Column(name = "time_recording_url", length = 1000)
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,13 @@
|
||||||
package at.procon.dip.domain.time.repository.leitstand;
|
package at.procon.dip.domain.time.repository.leitstand;
|
||||||
|
|
||||||
import at.procon.dip.domain.time.entity.leitstand.LeitstandTimeRecordingAssignment;
|
import at.procon.dip.domain.time.entity.leitstand.LeitstandTimeRecordingAssignment;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.springframework.data.jpa.repository.JpaRepository;
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
|
||||||
public interface LeitstandTimeRecordingAssignmentRepository extends JpaRepository<LeitstandTimeRecordingAssignment, String> {
|
public interface LeitstandTimeRecordingAssignmentRepository extends JpaRepository<LeitstandTimeRecordingAssignment, String> {
|
||||||
|
|
||||||
List<LeitstandTimeRecordingAssignment> findByTimeRecordingDbkOrderByDbkAsc(String timeRecordingDbk);
|
List<LeitstandTimeRecordingAssignment> findByTimeRecordingDbkOrderByDbkAsc(String timeRecordingDbk);
|
||||||
|
|
||||||
|
List<LeitstandTimeRecordingAssignment> findByTimeRecordingDbkInOrderByTimeRecordingDbkAscDbkAsc(Collection<String> timeRecordingDbks);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,8 @@ public interface LeitstandTimeRecordingRepository extends JpaRepository<Leitstan
|
||||||
|
|
||||||
Optional<LeitstandTimeRecording> findByTimeEntry_Id(UUID timeEntryId);
|
Optional<LeitstandTimeRecording> findByTimeEntry_Id(UUID timeEntryId);
|
||||||
|
|
||||||
|
List<LeitstandTimeRecording> findAllByOrderByRecordedFromAscDbkAsc();
|
||||||
|
|
||||||
List<LeitstandTimeRecording> findByTimeEntryIsNotNull();
|
List<LeitstandTimeRecording> findByTimeEntryIsNotNull();
|
||||||
|
|
||||||
List<LeitstandTimeRecording> findByPersonDbkOrderByRecordedFromAscDbkAsc(String personDbk);
|
List<LeitstandTimeRecording> findByPersonDbkOrderByRecordedFromAscDbkAsc(String personDbk);
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ import org.springframework.transaction.annotation.Propagation;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
//@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||||
@ConditionalOnProperty(prefix = "dip.time.leitstand", name = "enabled", havingValue = "true")
|
@ConditionalOnProperty(prefix = "dip.time.leitstand", name = "enabled", havingValue = "true")
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
|
@ -144,14 +144,26 @@ public class LeitstandTimeImportService {
|
||||||
log.info("No Leitstand time recordings found for personDbk={}", personDbk);
|
log.info("No Leitstand time recordings found for personDbk={}", personDbk);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
//upsertCanonicalTimeEntriesForImportedRecordings(recordings);
|
upsertCanonicalTimeEntriesForImportedRecordings(recordings);
|
||||||
if (rebuildProjection && properties.getLeitstand().isBuildSearchProjection()) {
|
if (rebuildProjection && properties.getLeitstand().isBuildSearchProjection()) {
|
||||||
projectionService.refreshForPersonDbk(personDbk);
|
projectionService.refreshForPersonDbk(personDbk);
|
||||||
}
|
}
|
||||||
return recordings.size();
|
return recordings.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Transactional
|
public int materializeCanonicalTimeEntriesForAll(boolean rebuildProjection) {
|
||||||
|
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findAllByOrderByRecordedFromAscDbkAsc();
|
||||||
|
if (recordings.isEmpty()) {
|
||||||
|
log.info("No Leitstand time recordings found for full materialization");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
upsertCanonicalTimeEntriesForImportedRecordings(recordings);
|
||||||
|
if (rebuildProjection && properties.getLeitstand().isBuildSearchProjection()) {
|
||||||
|
projectionService.refreshAll();
|
||||||
|
}
|
||||||
|
return recordings.size();
|
||||||
|
}
|
||||||
|
|
||||||
public int materializeCanonicalTimeEntriesForPersonNumber(Integer personNumber, boolean rebuildProjection) {
|
public int materializeCanonicalTimeEntriesForPersonNumber(Integer personNumber, boolean rebuildProjection) {
|
||||||
if (personNumber == null) {
|
if (personNumber == null) {
|
||||||
throw new IllegalArgumentException("personNumber must not be null");
|
throw new IllegalArgumentException("personNumber must not be null");
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Propagation;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
|
|
@ -44,126 +45,159 @@ public class LeitstandTimeProjectionService {
|
||||||
private final TimeEntrySearchProjectionRepository projectionRepository;
|
private final TimeEntrySearchProjectionRepository projectionRepository;
|
||||||
private final TimeEntryRepresentationMaterializationService representationMaterializationService;
|
private final TimeEntryRepresentationMaterializationService representationMaterializationService;
|
||||||
|
|
||||||
@Transactional
|
|
||||||
public void refreshForLeitstandRecordingDbks(Collection<String> recordingDbks) {
|
public void refreshForLeitstandRecordingDbks(Collection<String> recordingDbks) {
|
||||||
if (recordingDbks == null || recordingDbks.isEmpty()) {
|
if (recordingDbks == null || recordingDbks.isEmpty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findAllById(recordingDbks).stream()
|
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findAllById(recordingDbks).stream()
|
||||||
.filter(recording -> recording.getTimeEntry() != null)
|
.filter(recording -> recording.getTimeEntry() != null)
|
||||||
|
.sorted(Comparator.comparing(LeitstandTimeRecording::getRecordedFrom, Comparator.nullsLast(Comparator.naturalOrder()))
|
||||||
|
.thenComparing(LeitstandTimeRecording::getDbk))
|
||||||
.toList();
|
.toList();
|
||||||
if (recordings.isEmpty()) {
|
refreshChunked(recordings);
|
||||||
return;
|
|
||||||
}
|
|
||||||
upsertProjections(recordings);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Transactional
|
|
||||||
public int refreshForPersonDbk(String personDbk) {
|
public int refreshForPersonDbk(String personDbk) {
|
||||||
if (personDbk == null || personDbk.isBlank()) {
|
if (personDbk == null || personDbk.isBlank()) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
List<LeitstandTimeRecording> recordings = timeRecordingRepository
|
List<LeitstandTimeRecording> recordings = timeRecordingRepository
|
||||||
.findByPersonDbkAndTimeEntryIsNotNullOrderByRecordedFromAscDbkAsc(personDbk);
|
.findByPersonDbkAndTimeEntryIsNotNullOrderByRecordedFromAscDbkAsc(personDbk);
|
||||||
upsertProjections(recordings);
|
refreshChunked(recordings);
|
||||||
return recordings.size();
|
return recordings.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Transactional
|
|
||||||
public int refreshAll() {
|
public int refreshAll() {
|
||||||
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findByTimeEntryIsNotNull();
|
List<LeitstandTimeRecording> recordings = timeRecordingRepository.findByTimeEntryIsNotNull().stream()
|
||||||
upsertProjections(recordings);
|
.sorted(Comparator.comparing(LeitstandTimeRecording::getRecordedFrom, Comparator.nullsLast(Comparator.naturalOrder()))
|
||||||
|
.thenComparing(LeitstandTimeRecording::getDbk))
|
||||||
|
.toList();
|
||||||
|
refreshChunked(recordings);
|
||||||
return recordings.size();
|
return recordings.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void upsertProjections(List<LeitstandTimeRecording> recordings) {
|
private void refreshChunked(List<LeitstandTimeRecording> recordings) {
|
||||||
for (LeitstandTimeRecording recording : recordings) {
|
if (recordings == null || recordings.isEmpty()) {
|
||||||
TimeEntrySearchProjection projection = buildProjection(recording);
|
return;
|
||||||
TimeEntrySearchProjection saved = projectionRepository.save(projection);
|
}
|
||||||
if (properties.getLeitstand().isBuildRepresentations()) {
|
int chunkSize = Math.max(1, properties.getLeitstand().getMaterializationChunkSize());
|
||||||
representationMaterializationService.upsertRepresentations(saved);
|
for (int start = 0; start < recordings.size(); start += chunkSize) {
|
||||||
}
|
List<LeitstandTimeRecording> chunk = recordings.subList(start, Math.min(start + chunkSize, recordings.size()));
|
||||||
|
refreshChunk(chunk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private TimeEntrySearchProjection buildProjection(LeitstandTimeRecording recording) {
|
@Transactional(propagation = Propagation.REQUIRES_NEW)
|
||||||
TimeEntry timeEntry = timeEntryRepository.findById(recording.getTimeEntry().getId())
|
protected void refreshChunk(List<LeitstandTimeRecording> recordings) {
|
||||||
.orElseThrow(() -> new IllegalArgumentException("Unknown TIME entry id: " + recording.getTimeEntry().getId()));
|
if (recordings == null || recordings.isEmpty()) {
|
||||||
Document document = timeEntry.getDocument();
|
return;
|
||||||
|
}
|
||||||
|
ProjectionBuildContext ctx = preloadContext(recordings);
|
||||||
|
List<TimeEntrySearchProjection> projections = new ArrayList<>(recordings.size());
|
||||||
|
for (LeitstandTimeRecording recording : recordings) {
|
||||||
|
projections.add(buildProjection(recording, ctx));
|
||||||
|
}
|
||||||
|
List<TimeEntrySearchProjection> saved = projectionRepository.saveAll(projections);
|
||||||
|
projectionRepository.flush();
|
||||||
|
if (properties.getLeitstand().isBuildRepresentations()) {
|
||||||
|
representationMaterializationService.upsertRepresentations(saved);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
LeitstandPerson person = recording.getPersonDbk() == null ? null : personRepository.findById(recording.getPersonDbk()).orElse(null);
|
private ProjectionBuildContext preloadContext(List<LeitstandTimeRecording> recordings) {
|
||||||
LeitstandActivityType activityType = recording.getActivityTypeId() == null ? null : activityTypeRepository.findById(recording.getActivityTypeId()).orElse(null);
|
List<String> recordingDbks = recordings.stream().map(LeitstandTimeRecording::getDbk).toList();
|
||||||
|
List<LeitstandTimeRecordingAssignment> assignments = timeRecordingAssignmentRepository
|
||||||
|
.findByTimeRecordingDbkInOrderByTimeRecordingDbkAscDbkAsc(recordingDbks);
|
||||||
|
Map<String, List<LeitstandTimeRecordingAssignment>> assignmentsByRecordingDbk = assignments.stream()
|
||||||
|
.collect(Collectors.groupingBy(LeitstandTimeRecordingAssignment::getTimeRecordingDbk, LinkedHashMap::new, Collectors.toList()));
|
||||||
|
|
||||||
List<LeitstandTimeRecordingAssignment> assignments = timeRecordingAssignmentRepository.findByTimeRecordingDbkOrderByDbkAsc(recording.getDbk());
|
List<String> personTaskAssignmentIds = assignments.stream()
|
||||||
List<LeitstandPersonTaskAssignment> personTaskAssignments = personTaskAssignmentRepository.findAllById(assignments.stream()
|
|
||||||
.map(LeitstandTimeRecordingAssignment::getPersonTaskAssignmentDbk)
|
.map(LeitstandTimeRecordingAssignment::getPersonTaskAssignmentDbk)
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.toList());
|
.toList();
|
||||||
Map<String, LeitstandPersonTaskAssignment> ptaByDbk = indexBy(personTaskAssignments, LeitstandPersonTaskAssignment::getDbk);
|
List<LeitstandPersonTaskAssignment> personTaskAssignments = personTaskAssignmentRepository.findAllById(personTaskAssignmentIds);
|
||||||
|
Map<String, LeitstandPersonTaskAssignment> personTaskAssignmentsByDbk = indexBy(personTaskAssignments, LeitstandPersonTaskAssignment::getDbk);
|
||||||
|
|
||||||
Map<String, LeitstandTask> tasksByDbk = indexBy(taskRepository.findAllById(personTaskAssignments.stream()
|
List<String> taskIds = personTaskAssignments.stream().map(LeitstandPersonTaskAssignment::getTaskDbk).filter(Objects::nonNull).distinct().toList();
|
||||||
.map(LeitstandPersonTaskAssignment::getTaskDbk)
|
Map<String, LeitstandTask> tasksByDbk = indexBy(taskRepository.findAllById(taskIds), LeitstandTask::getDbk);
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.distinct()
|
|
||||||
.toList()), LeitstandTask::getDbk);
|
|
||||||
|
|
||||||
Map<String, LeitstandCostUnit> costUnitsByDbk = indexBy(costUnitRepository.findAllById(personTaskAssignments.stream()
|
List<String> costUnitIds = personTaskAssignments.stream().map(LeitstandPersonTaskAssignment::getCostUnitDbk).filter(Objects::nonNull).distinct().toList();
|
||||||
.map(LeitstandPersonTaskAssignment::getCostUnitDbk)
|
Map<String, LeitstandCostUnit> costUnitsByDbk = indexBy(costUnitRepository.findAllById(costUnitIds), LeitstandCostUnit::getDbk);
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.distinct()
|
|
||||||
.toList()), LeitstandCostUnit::getDbk);
|
|
||||||
|
|
||||||
Map<String, LeitstandContract> contractsByDbk = indexBy(contractRepository.findAllById(costUnitsByDbk.values().stream()
|
List<String> contractIds = costUnitsByDbk.values().stream().map(LeitstandCostUnit::getContractDbk).filter(Objects::nonNull).distinct().toList();
|
||||||
.map(LeitstandCostUnit::getContractDbk)
|
Map<String, LeitstandContract> contractsByDbk = indexBy(contractRepository.findAllById(contractIds), LeitstandContract::getDbk);
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.distinct()
|
|
||||||
.toList()), LeitstandContract::getDbk);
|
|
||||||
|
|
||||||
Map<String, LeitstandContractPosition> contractPositionsByDbk = indexBy(contractPositionRepository.findAllById(costUnitsByDbk.values().stream()
|
List<String> contractPositionIds = costUnitsByDbk.values().stream().map(LeitstandCostUnit::getContractPositionDbk).filter(Objects::nonNull).distinct().toList();
|
||||||
.map(LeitstandCostUnit::getContractPositionDbk)
|
Map<String, LeitstandContractPosition> contractPositionsByDbk = indexBy(contractPositionRepository.findAllById(contractPositionIds), LeitstandContractPosition::getDbk);
|
||||||
.filter(Objects::nonNull)
|
|
||||||
.distinct()
|
|
||||||
.toList()), LeitstandContractPosition::getDbk);
|
|
||||||
|
|
||||||
Set<String> organizationDbks = new LinkedHashSet<>();
|
Set<String> organizationIds = new LinkedHashSet<>();
|
||||||
costUnitsByDbk.values().stream().map(LeitstandCostUnit::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationDbks::add);
|
costUnitsByDbk.values().stream().map(LeitstandCostUnit::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationIds::add);
|
||||||
contractsByDbk.values().stream().map(LeitstandContract::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationDbks::add);
|
contractsByDbk.values().stream().map(LeitstandContract::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationIds::add);
|
||||||
if (person != null && person.getOrganizationDbk() != null) {
|
recordings.stream().map(LeitstandTimeRecording::getPersonDbk).filter(Objects::nonNull).forEach(id -> {});
|
||||||
organizationDbks.add(person.getOrganizationDbk());
|
List<String> personIds = recordings.stream().map(LeitstandTimeRecording::getPersonDbk).filter(Objects::nonNull).distinct().toList();
|
||||||
|
Map<String, LeitstandPerson> personsByDbk = indexBy(personRepository.findAllById(personIds), LeitstandPerson::getDbk);
|
||||||
|
personsByDbk.values().stream().map(LeitstandPerson::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationIds::add);
|
||||||
|
Map<String, LeitstandOrganization> organizationsByDbk = indexBy(organizationRepository.findAllById(organizationIds), LeitstandOrganization::getDbk);
|
||||||
|
|
||||||
|
List<Integer> activityTypeIds = recordings.stream().map(LeitstandTimeRecording::getActivityTypeId).filter(Objects::nonNull).distinct().toList();
|
||||||
|
Map<Integer, LeitstandActivityType> activityTypesById = indexBy(activityTypeRepository.findAllById(activityTypeIds), LeitstandActivityType::getId);
|
||||||
|
|
||||||
|
List<UUID> timeEntryIds = recordings.stream().map(LeitstandTimeRecording::getTimeEntry).filter(Objects::nonNull).map(TimeEntry::getId).filter(Objects::nonNull).distinct().toList();
|
||||||
|
Map<UUID, TimeEntry> timeEntriesById = timeEntryRepository.findAllById(timeEntryIds).stream().collect(Collectors.toMap(TimeEntry::getId, Function.identity()));
|
||||||
|
Map<UUID, TimeEntrySearchProjection> existingProjectionsByTimeEntryId = projectionRepository.findByTimeEntry_IdIn(timeEntryIds).stream().collect(Collectors.toMap(p -> p.getTimeEntry().getId(), Function.identity()));
|
||||||
|
|
||||||
|
return new ProjectionBuildContext(assignmentsByRecordingDbk, personTaskAssignmentsByDbk, tasksByDbk, costUnitsByDbk,
|
||||||
|
contractsByDbk, contractPositionsByDbk, organizationsByDbk, personsByDbk, activityTypesById,
|
||||||
|
timeEntriesById, existingProjectionsByTimeEntryId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private TimeEntrySearchProjection buildProjection(LeitstandTimeRecording recording, ProjectionBuildContext ctx) {
|
||||||
|
TimeEntry timeEntry = ctx.timeEntriesById.get(recording.getTimeEntry().getId());
|
||||||
|
if (timeEntry == null) {
|
||||||
|
throw new IllegalArgumentException("Unknown TIME entry id: " + recording.getTimeEntry().getId());
|
||||||
}
|
}
|
||||||
Map<String, LeitstandOrganization> organizationsByDbk = indexBy(organizationRepository.findAllById(organizationDbks), LeitstandOrganization::getDbk);
|
Document document = timeEntry.getDocument();
|
||||||
|
|
||||||
|
LeitstandPerson person = recording.getPersonDbk() == null ? null : ctx.personsByDbk.get(recording.getPersonDbk());
|
||||||
|
LeitstandActivityType activityType = recording.getActivityTypeId() == null ? null : ctx.activityTypesById.get(recording.getActivityTypeId());
|
||||||
|
|
||||||
|
List<LeitstandTimeRecordingAssignment> assignments = ctx.assignmentsByRecordingDbk.getOrDefault(recording.getDbk(), List.of());
|
||||||
|
List<LeitstandPersonTaskAssignment> personTaskAssignments = assignments.stream()
|
||||||
|
.map(a -> ctx.personTaskAssignmentsByDbk.get(a.getPersonTaskAssignmentDbk()))
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.distinct()
|
||||||
|
.toList();
|
||||||
|
|
||||||
List<LeitstandTask> orderedTasks = assignments.stream()
|
List<LeitstandTask> orderedTasks = assignments.stream()
|
||||||
.map(a -> ptaByDbk.get(a.getPersonTaskAssignmentDbk()))
|
.map(a -> ctx.personTaskAssignmentsByDbk.get(a.getPersonTaskAssignmentDbk()))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.map(pta -> tasksByDbk.get(pta.getTaskDbk()))
|
.map(pta -> ctx.tasksByDbk.get(pta.getTaskDbk()))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.toList();
|
.toList();
|
||||||
List<LeitstandCostUnit> orderedCostUnits = assignments.stream()
|
List<LeitstandCostUnit> orderedCostUnits = assignments.stream()
|
||||||
.map(a -> ptaByDbk.get(a.getPersonTaskAssignmentDbk()))
|
.map(a -> ctx.personTaskAssignmentsByDbk.get(a.getPersonTaskAssignmentDbk()))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.map(pta -> costUnitsByDbk.get(pta.getCostUnitDbk()))
|
.map(pta -> ctx.costUnitsByDbk.get(pta.getCostUnitDbk()))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.toList();
|
.toList();
|
||||||
List<LeitstandContract> orderedContracts = orderedCostUnits.stream()
|
List<LeitstandContract> orderedContracts = orderedCostUnits.stream()
|
||||||
.map(cu -> contractsByDbk.get(cu.getContractDbk()))
|
.map(cu -> ctx.contractsByDbk.get(cu.getContractDbk()))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.toList();
|
.toList();
|
||||||
List<LeitstandContractPosition> orderedContractPositions = orderedCostUnits.stream()
|
List<LeitstandContractPosition> orderedContractPositions = orderedCostUnits.stream()
|
||||||
.map(cu -> contractPositionsByDbk.get(cu.getContractPositionDbk()))
|
.map(cu -> ctx.contractPositionsByDbk.get(cu.getContractPositionDbk()))
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.distinct()
|
.distinct()
|
||||||
.toList();
|
.toList();
|
||||||
List<LeitstandOrganization> orderedOrganizations = new ArrayList<>();
|
List<LeitstandOrganization> orderedOrganizations = new ArrayList<>();
|
||||||
orderedCostUnits.stream().map(cu -> organizationsByDbk.get(cu.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
|
orderedCostUnits.stream().map(cu -> ctx.organizationsByDbk.get(cu.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
|
||||||
orderedContracts.stream().map(c -> organizationsByDbk.get(c.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
|
orderedContracts.stream().map(c -> ctx.organizationsByDbk.get(c.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); });
|
||||||
if (person != null && person.getOrganizationDbk() != null) {
|
if (person != null && person.getOrganizationDbk() != null) {
|
||||||
LeitstandOrganization personOrg = organizationsByDbk.get(person.getOrganizationDbk());
|
LeitstandOrganization personOrg = ctx.organizationsByDbk.get(person.getOrganizationDbk());
|
||||||
if (personOrg != null && !orderedOrganizations.contains(personOrg)) orderedOrganizations.add(personOrg);
|
if (personOrg != null && !orderedOrganizations.contains(personOrg)) orderedOrganizations.add(personOrg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -176,8 +210,7 @@ public class LeitstandTimeProjectionService {
|
||||||
String summary = buildSummary(recording, primaryTask, primaryCostUnit, primaryOrganization, person);
|
String summary = buildSummary(recording, primaryTask, primaryCostUnit, primaryOrganization, person);
|
||||||
String semanticText = buildSemanticText(timeEntry, recording, person, activityType, orderedTasks, orderedCostUnits, orderedContracts, orderedContractPositions, orderedOrganizations);
|
String semanticText = buildSemanticText(timeEntry, recording, person, activityType, orderedTasks, orderedCostUnits, orderedContracts, orderedContractPositions, orderedOrganizations);
|
||||||
|
|
||||||
TimeEntrySearchProjection projection = projectionRepository.findByTimeEntry_Id(timeEntry.getId())
|
TimeEntrySearchProjection projection = ctx.existingProjectionsByTimeEntryId.getOrDefault(timeEntry.getId(), TimeEntrySearchProjection.builder().timeEntry(timeEntry).document(document).build());
|
||||||
.orElseGet(() -> TimeEntrySearchProjection.builder().timeEntry(timeEntry).document(document).build());
|
|
||||||
projection.setDocument(document);
|
projection.setDocument(document);
|
||||||
projection.setTimeEntry(timeEntry);
|
projection.setTimeEntry(timeEntry);
|
||||||
projection.setSourceSystem(TimeSourceSystem.LEITSTAND);
|
projection.setSourceSystem(TimeSourceSystem.LEITSTAND);
|
||||||
|
|
@ -229,6 +262,19 @@ public class LeitstandTimeProjectionService {
|
||||||
return projection;
|
return projection;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private record ProjectionBuildContext(
|
||||||
|
Map<String, List<LeitstandTimeRecordingAssignment>> assignmentsByRecordingDbk,
|
||||||
|
Map<String, LeitstandPersonTaskAssignment> personTaskAssignmentsByDbk,
|
||||||
|
Map<String, LeitstandTask> tasksByDbk,
|
||||||
|
Map<String, LeitstandCostUnit> costUnitsByDbk,
|
||||||
|
Map<String, LeitstandContract> contractsByDbk,
|
||||||
|
Map<String, LeitstandContractPosition> contractPositionsByDbk,
|
||||||
|
Map<String, LeitstandOrganization> organizationsByDbk,
|
||||||
|
Map<String, LeitstandPerson> personsByDbk,
|
||||||
|
Map<Integer, LeitstandActivityType> activityTypesById,
|
||||||
|
Map<UUID, TimeEntry> timeEntriesById,
|
||||||
|
Map<UUID, TimeEntrySearchProjection> existingProjectionsByTimeEntryId) {
|
||||||
|
}
|
||||||
private String buildSummary(LeitstandTimeRecording recording,
|
private String buildSummary(LeitstandTimeRecording recording,
|
||||||
LeitstandTask primaryTask,
|
LeitstandTask primaryTask,
|
||||||
LeitstandCostUnit primaryCostUnit,
|
LeitstandCostUnit primaryCostUnit,
|
||||||
|
|
@ -283,7 +329,7 @@ public class LeitstandTimeProjectionService {
|
||||||
return sb.toString().trim();
|
return sb.toString().trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
private <T> Map<String, T> indexBy(Collection<T> rows, Function<T, String> id) {
|
private <K, T> Map<K, T> indexBy(Collection<T> rows, Function<T, K> id) {
|
||||||
return rows.stream()
|
return rows.stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.collect(Collectors.toMap(id, Function.identity(), (a, b) -> a, LinkedHashMap::new));
|
.collect(Collectors.toMap(id, Function.identity(), (a, b) -> a, LinkedHashMap::new));
|
||||||
|
|
|
||||||
|
|
@ -13,10 +13,16 @@ import at.procon.dip.embedding.config.EmbeddingProperties;
|
||||||
import at.procon.dip.embedding.registry.EmbeddingModelRegistry;
|
import at.procon.dip.embedding.registry.EmbeddingModelRegistry;
|
||||||
import at.procon.dip.embedding.service.RepresentationEmbeddingOrchestrator;
|
import at.procon.dip.embedding.service.RepresentationEmbeddingOrchestrator;
|
||||||
import at.procon.dip.search.service.DocumentLexicalIndexService;
|
import at.procon.dip.search.service.DocumentLexicalIndexService;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Propagation;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
|
|
@ -35,69 +41,141 @@ public class TimeEntryRepresentationMaterializationService {
|
||||||
private final EmbeddingProperties embeddingProperties;
|
private final EmbeddingProperties embeddingProperties;
|
||||||
private final EmbeddingModelRegistry modelRegistry;
|
private final EmbeddingModelRegistry modelRegistry;
|
||||||
|
|
||||||
//@Transactional
|
|
||||||
public void upsertRepresentations(TimeEntrySearchProjection projection) {
|
public void upsertRepresentations(TimeEntrySearchProjection projection) {
|
||||||
if (projection.getSemanticText() == null || projection.getSemanticText().isBlank()) {
|
if (projection == null) {
|
||||||
log.debug("Skipping TIME representation for document {} because semantic text is blank", projection.getDocument().getId());
|
return;
|
||||||
|
}
|
||||||
|
upsertRepresentations(List.of(projection));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional(propagation = Propagation.REQUIRES_NEW)
|
||||||
|
public void upsertRepresentations(List<TimeEntrySearchProjection> projections) {
|
||||||
|
if (projections == null || projections.isEmpty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Document document = projection.getDocument();
|
List<TimeEntrySearchProjection> eligible = projections.stream()
|
||||||
document.setTitle(firstNonBlank(projection.getSummaryText(), projection.getTimeRecordingDesc(), projection.getPrimaryTaskName(), projection.getExternalId()));
|
.filter(projection -> documentId(projection) != null)
|
||||||
document.setSummary(projection.getSummaryText());
|
.filter(projection -> projection.getSemanticText() != null && !projection.getSemanticText().isBlank())
|
||||||
document.setLanguageCode(firstNonBlank(projection.getLanguageCode(), document.getLanguageCode()));
|
.toList();
|
||||||
if (document.getMimeType() == null || document.getMimeType().isBlank()) {
|
if (eligible.isEmpty()) {
|
||||||
document.setMimeType("application/x-time-entry");
|
return;
|
||||||
}
|
}
|
||||||
document = documentRepository.save(document);
|
|
||||||
|
|
||||||
Optional<DocumentTextRepresentation> existing = representationRepository
|
List<UUID> documentIds = eligible.stream()
|
||||||
.findByDocument_IdAndRepresentationType(document.getId(), RepresentationType.SEMANTIC_TEXT)
|
.map(this::documentId)
|
||||||
.stream()
|
.distinct()
|
||||||
.filter(r -> BUILDER_KEY.equals(r.getBuilderKey()) || r.isPrimaryRepresentation())
|
.toList();
|
||||||
.findFirst();
|
Map<UUID, Document> documentsById = documentRepository.findAllById(documentIds).stream()
|
||||||
|
.collect(java.util.stream.Collectors.toMap(Document::getId, java.util.function.Function.identity(), (a, b) -> a, LinkedHashMap::new));
|
||||||
|
List<Document> documentsToSave = new ArrayList<>();
|
||||||
|
for (TimeEntrySearchProjection projection : eligible) {
|
||||||
|
UUID documentId = documentId(projection);
|
||||||
|
Document document = documentsById.get(documentId);
|
||||||
|
if (document == null || documentsToSave.contains(document)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
document.setTitle(firstNonBlank(projection.getSummaryText(), projection.getTimeRecordingDesc(), projection.getPrimaryTaskName(), projection.getExternalId()));
|
||||||
|
document.setSummary(projection.getSummaryText());
|
||||||
|
document.setLanguageCode(firstNonBlank(projection.getLanguageCode(), document.getLanguageCode()));
|
||||||
|
if (document.getMimeType() == null || document.getMimeType().isBlank()) {
|
||||||
|
document.setMimeType("application/x-time-entry");
|
||||||
|
}
|
||||||
|
documentsToSave.add(document);
|
||||||
|
}
|
||||||
|
if (!documentsToSave.isEmpty()) {
|
||||||
|
documentRepository.saveAll(documentsToSave);
|
||||||
|
documentRepository.flush();
|
||||||
|
}
|
||||||
|
|
||||||
boolean changed = existing.isEmpty()
|
List<DocumentTextRepresentation> changedExisting = new ArrayList<>();
|
||||||
|| !projection.getSemanticText().equals(existing.get().getTextBody())
|
List<TimeEntrySearchProjection> newRepresentationProjections = new ArrayList<>();
|
||||||
|| !equalsNullable(projection.getLanguageCode(), existing.get().getLanguageCode())
|
List<UUID> changedRepresentationIds = new ArrayList<>();
|
||||||
|| !BUILDER_KEY.equals(existing.get().getBuilderKey());
|
List<DocumentTextRepresentation> newlyCreatedRepresentations = new ArrayList<>();
|
||||||
|
|
||||||
Document finalDocument = document;
|
for (TimeEntrySearchProjection projection : eligible) {
|
||||||
DocumentTextRepresentation semantic = existing
|
Document document = documentsById.get(documentId(projection));
|
||||||
.map(found -> changed ? updateRepresentation(found, projection) : found)
|
if (document == null) {
|
||||||
.orElseGet(() -> documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand(
|
continue;
|
||||||
finalDocument.getId(),
|
}
|
||||||
null,
|
Optional<DocumentTextRepresentation> existing = representationRepository
|
||||||
RepresentationType.SEMANTIC_TEXT,
|
.findByDocument_IdAndRepresentationType(document.getId(), RepresentationType.SEMANTIC_TEXT)
|
||||||
BUILDER_KEY,
|
.stream()
|
||||||
projection.getLanguageCode(),
|
.filter(r -> BUILDER_KEY.equals(r.getBuilderKey()) || r.isPrimaryRepresentation())
|
||||||
null,
|
.findFirst();
|
||||||
null,
|
|
||||||
null,
|
|
||||||
null,
|
|
||||||
true,
|
|
||||||
projection.getSemanticText(),
|
|
||||||
false
|
|
||||||
)));
|
|
||||||
|
|
||||||
if (changed
|
boolean changed = existing.isEmpty()
|
||||||
&& embeddingProperties.isEnabled()
|
|| !projection.getSemanticText().equals(existing.get().getTextBody())
|
||||||
|
|| !equalsNullable(projection.getLanguageCode(), existing.get().getLanguageCode())
|
||||||
|
|| !BUILDER_KEY.equals(existing.get().getBuilderKey());
|
||||||
|
|
||||||
|
if (!changed) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (existing.isPresent()) {
|
||||||
|
DocumentTextRepresentation found = existing.get();
|
||||||
|
found.setBuilderKey(BUILDER_KEY);
|
||||||
|
found.setLanguageCode(projection.getLanguageCode());
|
||||||
|
found.setPrimaryRepresentation(true);
|
||||||
|
found.setTextBody(projection.getSemanticText());
|
||||||
|
found.setCharCount(projection.getSemanticText().length());
|
||||||
|
changedExisting.add(found);
|
||||||
|
} else {
|
||||||
|
newRepresentationProjections.add(projection);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!changedExisting.isEmpty()) {
|
||||||
|
representationRepository.saveAll(changedExisting);
|
||||||
|
representationRepository.flush();
|
||||||
|
changedExisting.stream().map(DocumentTextRepresentation::getId).forEach(changedRepresentationIds::add);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (TimeEntrySearchProjection projection : newRepresentationProjections) {
|
||||||
|
Document document = documentsById.get(documentId(projection));
|
||||||
|
if (document == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
DocumentTextRepresentation created = documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand(
|
||||||
|
document.getId(),
|
||||||
|
null,
|
||||||
|
RepresentationType.SEMANTIC_TEXT,
|
||||||
|
BUILDER_KEY,
|
||||||
|
projection.getLanguageCode(),
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
true,
|
||||||
|
projection.getSemanticText(),
|
||||||
|
false
|
||||||
|
));
|
||||||
|
newlyCreatedRepresentations.add(created);
|
||||||
|
changedRepresentationIds.add(created.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (UUID representationId : changedRepresentationIds) {
|
||||||
|
lexicalIndexService.indexRepresentation(representationId);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (embeddingProperties.isEnabled()
|
||||||
&& timeDomainProperties.getLeitstand().isQueueEmbeddings()
|
&& timeDomainProperties.getLeitstand().isQueueEmbeddings()
|
||||||
&& embeddingProperties.getDefaultDocumentModel() != null && !embeddingProperties.getDefaultDocumentModel().isBlank()) {
|
&& embeddingProperties.getDefaultDocumentModel() != null
|
||||||
|
&& !embeddingProperties.getDefaultDocumentModel().isBlank()) {
|
||||||
String modelKey = modelRegistry.getRequiredDefaultDocumentModelKey();
|
String modelKey = modelRegistry.getRequiredDefaultDocumentModelKey();
|
||||||
embeddingOrchestrator.enqueueRepresentation(document.getId(), semantic.getId(), modelKey);
|
for (DocumentTextRepresentation representation : changedExisting) {
|
||||||
|
embeddingOrchestrator.enqueueRepresentation(representation.getDocument().getId(), representation.getId(), modelKey);
|
||||||
|
}
|
||||||
|
for (DocumentTextRepresentation representation : newlyCreatedRepresentations) {
|
||||||
|
embeddingOrchestrator.enqueueRepresentation(representation.getDocument().getId(), representation.getId(), modelKey);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private DocumentTextRepresentation updateRepresentation(DocumentTextRepresentation existing, TimeEntrySearchProjection projection) {
|
private UUID documentId(TimeEntrySearchProjection projection) {
|
||||||
existing.setBuilderKey(BUILDER_KEY);
|
Document document = projection == null ? null : projection.getDocument();
|
||||||
existing.setLanguageCode(projection.getLanguageCode());
|
return document == null ? null : document.getId();
|
||||||
existing.setPrimaryRepresentation(true);
|
|
||||||
existing.setTextBody(projection.getSemanticText());
|
|
||||||
existing.setCharCount(projection.getSemanticText().length());
|
|
||||||
DocumentTextRepresentation saved = representationRepository.saveAndFlush(existing);
|
|
||||||
lexicalIndexService.indexRepresentation(saved.getId());
|
|
||||||
return saved;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean equalsNullable(String left, String right) {
|
private boolean equalsNullable(String left, String right) {
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,8 @@ public class LeitstandTimeSelectiveMaterializationStartupRunner implements Appli
|
||||||
log.info("Completed selective Leitstand TIME materialization for personNumber={}. Processed {} recordings", cfg.getSelectiveMaterializationPersonNumber(), count);
|
log.info("Completed selective Leitstand TIME materialization for personNumber={}. Processed {} recordings", cfg.getSelectiveMaterializationPersonNumber(), count);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
throw new IllegalStateException("dip.time.leitstand.startup-selective-materialization-enabled=true requires either selective-materialization-person-dbk or selective-materialization-person-number");
|
log.info("Starting Leitstand TIME materialization for all imported recordings (rebuildProjection={})", rebuildProjection);
|
||||||
|
int count = importService.materializeCanonicalTimeEntriesForAll(rebuildProjection);
|
||||||
|
log.info("Completed Leitstand TIME materialization for all imported recordings. Processed {} recordings", count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,11 +18,18 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<SearchHit> search(SearchExecutionContext context, int limit) {
|
public List<SearchHit> search(SearchExecutionContext context, int limit) {
|
||||||
|
String effectiveConfigExpr = effectiveConfigExpression("dtr");
|
||||||
|
String tsQueryExpr = tsQueryExpression(effectiveConfigExpr);
|
||||||
|
|
||||||
StringBuilder sql = new StringBuilder("""
|
StringBuilder sql = new StringBuilder("""
|
||||||
SELECT
|
SELECT
|
||||||
d.id AS document_id,
|
d.id AS document_id,
|
||||||
dtr.id AS representation_id,
|
dtr.id AS representation_id,
|
||||||
CAST(dtr.representation_type AS text) AS representation_type,
|
CAST(dtr.representation_type AS text) AS representation_type,
|
||||||
|
dtr.is_primary AS is_primary,
|
||||||
|
dtr.chunk_index AS chunk_index,
|
||||||
|
dtr.chunk_start_offset AS chunk_start_offset,
|
||||||
|
dtr.chunk_end_offset AS chunk_end_offset,
|
||||||
CAST(d.document_type AS text) AS document_type,
|
CAST(d.document_type AS text) AS document_type,
|
||||||
CAST(d.document_family AS text) AS document_family,
|
CAST(d.document_family AS text) AS document_family,
|
||||||
CAST(d.visibility AS text) AS visibility,
|
CAST(d.visibility AS text) AS visibility,
|
||||||
|
|
@ -33,41 +40,29 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
||||||
d.created_at AS created_at,
|
d.created_at AS created_at,
|
||||||
d.updated_at AS updated_at,
|
d.updated_at AS updated_at,
|
||||||
ts_headline(
|
ts_headline(
|
||||||
CASE
|
"""
|
||||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
).append(effectiveConfigExpr).append("""
|
||||||
ELSE dtr.search_config::regconfig
|
,
|
||||||
END,
|
|
||||||
COALESCE(dtr.text_body, ''),
|
COALESCE(dtr.text_body, ''),
|
||||||
websearch_to_tsquery(
|
""").append(tsQueryExpr).append("""
|
||||||
CASE
|
,
|
||||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
|
||||||
ELSE dtr.search_config::regconfig
|
|
||||||
END,
|
|
||||||
:queryText
|
|
||||||
),
|
|
||||||
'MaxFragments=2, MinWords=5, MaxWords=20'
|
'MaxFragments=2, MinWords=5, MaxWords=20'
|
||||||
) AS snippet,
|
) AS snippet,
|
||||||
ts_rank_cd(
|
ranked.score AS score
|
||||||
dtr.search_vector,
|
FROM (
|
||||||
websearch_to_tsquery(
|
SELECT
|
||||||
CASE
|
d.id AS document_id,
|
||||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
dtr.id AS representation_id,
|
||||||
ELSE dtr.search_config::regconfig
|
ts_rank_cd(
|
||||||
END,
|
dtr.search_vector,
|
||||||
:queryText
|
""").append(tsQueryExpr).append("""
|
||||||
)
|
) AS score,
|
||||||
) AS score
|
d.updated_at AS updated_at
|
||||||
FROM doc.doc_text_representation dtr
|
FROM doc.doc_text_representation dtr
|
||||||
JOIN doc.doc_document d ON d.id = dtr.document_id
|
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||||
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||||
WHERE dtr.search_vector IS NOT NULL
|
WHERE dtr.search_vector IS NOT NULL
|
||||||
AND dtr.search_vector @@ websearch_to_tsquery(
|
AND dtr.search_vector @@ """).append(tsQueryExpr).append("""
|
||||||
CASE
|
|
||||||
WHEN NULLIF(dtr.search_config, '') IS NULL THEN 'simple'::regconfig
|
|
||||||
ELSE dtr.search_config::regconfig
|
|
||||||
END,
|
|
||||||
:queryText
|
|
||||||
)
|
|
||||||
""");
|
""");
|
||||||
|
|
||||||
MapSqlParameterSource params = new MapSqlParameterSource();
|
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||||
|
|
@ -75,7 +70,14 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
||||||
|
|
||||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||||
|
|
||||||
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
|
sql.append("""
|
||||||
|
ORDER BY score DESC, d.updated_at DESC
|
||||||
|
LIMIT :limit
|
||||||
|
) ranked
|
||||||
|
JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id
|
||||||
|
JOIN doc.doc_document d ON d.id = ranked.document_id
|
||||||
|
ORDER BY ranked.score DESC, d.updated_at DESC
|
||||||
|
""");
|
||||||
params.addValue("limit", limit);
|
params.addValue("limit", limit);
|
||||||
|
|
||||||
return jdbcTemplate.query(
|
return jdbcTemplate.query(
|
||||||
|
|
@ -84,4 +86,22 @@ public class DocumentFullTextSearchRepositoryImpl implements DocumentFullTextSea
|
||||||
new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT)
|
new SearchHitRowMapper(SearchEngineType.POSTGRES_FULLTEXT, SearchMatchField.REPRESENTATION_TEXT)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
private static String effectiveConfigExpression(String representationAlias) {
|
||||||
|
return """
|
||||||
|
CASE
|
||||||
|
WHEN NULLIF(%s.search_config, '') IS NULL THEN 'simple'::regconfig
|
||||||
|
ELSE %s.search_config::regconfig
|
||||||
|
END
|
||||||
|
""".formatted(representationAlias, representationAlias).trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String tsQueryExpression(String configExpression) {
|
||||||
|
return """
|
||||||
|
websearch_to_tsquery(
|
||||||
|
%s,
|
||||||
|
:queryText
|
||||||
|
)
|
||||||
|
""".formatted(configExpression).trim();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,42 +18,112 @@ public class DocumentTrigramSearchRepositoryImpl implements DocumentTrigramSearc
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<SearchHit> search(SearchExecutionContext context, int limit, double threshold) {
|
public List<SearchHit> search(SearchExecutionContext context, int limit, double threshold) {
|
||||||
String scoreExpr = "GREATEST(" +
|
StringBuilder sql = new StringBuilder("""
|
||||||
"doc.similarity(COALESCE(d.title, ''), :queryText), " +
|
WITH title_candidates AS (
|
||||||
"doc.similarity(COALESCE(d.summary, ''), :queryText), " +
|
SELECT
|
||||||
"doc.similarity(COALESCE(dtr.text_body, ''), :queryText))";
|
d.id AS document_id,
|
||||||
|
dtr.id AS representation_id,
|
||||||
StringBuilder sql = new StringBuilder("SELECT " +
|
'DOCUMENT_TITLE' AS matched_field,
|
||||||
"d.id AS document_id, " +
|
public.similarity(d.title, :queryText) AS score,
|
||||||
"dtr.id AS representation_id, " +
|
d.updated_at AS updated_at
|
||||||
"CAST(d.document_type AS text) AS document_type, " +
|
FROM doc.doc_text_representation dtr
|
||||||
"CAST(d.document_family AS text) AS document_family, " +
|
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||||
"CAST(d.visibility AS text) AS visibility, " +
|
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||||
"d.title AS title, " +
|
WHERE d.title IS NOT NULL
|
||||||
"d.summary AS summary, " +
|
AND d.title OPERATOR(public.%) :queryText
|
||||||
"COALESCE(dtr.language_code, d.language_code) AS language_code, " +
|
""");
|
||||||
"d.mime_type AS mime_type, " +
|
|
||||||
"d.created_at AS created_at, " +
|
|
||||||
"d.updated_at AS updated_at, " +
|
|
||||||
"LEFT(COALESCE(dtr.text_body, COALESCE(d.summary, d.title, '')), 400) AS snippet, " +
|
|
||||||
scoreExpr + " AS score, " +
|
|
||||||
"CASE " +
|
|
||||||
"WHEN doc.similarity(COALESCE(d.title, ''), :queryText) >= doc.similarity(COALESCE(d.summary, ''), :queryText) " +
|
|
||||||
" AND doc.similarity(COALESCE(d.title, ''), :queryText) >= doc.similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_TITLE' " +
|
|
||||||
"WHEN doc.similarity(COALESCE(d.summary, ''), :queryText) >= doc.similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_SUMMARY' " +
|
|
||||||
"ELSE 'REPRESENTATION_TEXT' END AS matched_field " +
|
|
||||||
"FROM doc.doc_text_representation dtr " +
|
|
||||||
"JOIN doc.doc_document d ON d.id = dtr.document_id " +
|
|
||||||
"LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id " +
|
|
||||||
"WHERE " + scoreExpr + " >= :threshold");
|
|
||||||
|
|
||||||
MapSqlParameterSource params = new MapSqlParameterSource();
|
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||||
params.addValue("queryText", context.getRequest().getQueryText());
|
params.addValue("queryText", context.getRequest().getQueryText());
|
||||||
params.addValue("threshold", threshold);
|
params.addValue("threshold", threshold);
|
||||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
params.addValue("branchLimit", limit);
|
||||||
sql.append(" ORDER BY score DESC, d.updated_at DESC LIMIT :limit");
|
|
||||||
params.addValue("limit", limit);
|
params.addValue("limit", limit);
|
||||||
|
|
||||||
|
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||||
|
sql.append("""
|
||||||
|
ORDER BY score DESC, d.updated_at DESC
|
||||||
|
LIMIT :branchLimit
|
||||||
|
),
|
||||||
|
summary_candidates AS (
|
||||||
|
SELECT
|
||||||
|
d.id AS document_id,
|
||||||
|
dtr.id AS representation_id,
|
||||||
|
'DOCUMENT_SUMMARY' AS matched_field,
|
||||||
|
public.similarity(d.summary, :queryText) AS score,
|
||||||
|
d.updated_at AS updated_at
|
||||||
|
FROM doc.doc_text_representation dtr
|
||||||
|
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||||
|
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||||
|
WHERE d.summary IS NOT NULL
|
||||||
|
AND d.summary OPERATOR(public.%) :queryText
|
||||||
|
""");
|
||||||
|
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||||
|
sql.append("""
|
||||||
|
ORDER BY score DESC, d.updated_at DESC
|
||||||
|
LIMIT :branchLimit
|
||||||
|
),
|
||||||
|
text_candidates AS (
|
||||||
|
SELECT
|
||||||
|
d.id AS document_id,
|
||||||
|
dtr.id AS representation_id,
|
||||||
|
'REPRESENTATION_TEXT' AS matched_field,
|
||||||
|
public.similarity(dtr.text_body, :queryText) AS score,
|
||||||
|
d.updated_at AS updated_at
|
||||||
|
FROM doc.doc_text_representation dtr
|
||||||
|
JOIN doc.doc_document d ON d.id = dtr.document_id
|
||||||
|
LEFT JOIN doc.doc_tenant dt ON dt.id = d.owner_tenant_id
|
||||||
|
WHERE dtr.text_body IS NOT NULL
|
||||||
|
AND dtr.text_body OPERATOR(public.%) :queryText
|
||||||
|
""");
|
||||||
|
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", true);
|
||||||
|
sql.append("""
|
||||||
|
ORDER BY score DESC, d.updated_at DESC
|
||||||
|
LIMIT :branchLimit
|
||||||
|
),
|
||||||
|
ranked AS (
|
||||||
|
SELECT DISTINCT ON (representation_id)
|
||||||
|
document_id,
|
||||||
|
representation_id,
|
||||||
|
matched_field,
|
||||||
|
score,
|
||||||
|
updated_at
|
||||||
|
FROM (
|
||||||
|
SELECT * FROM title_candidates
|
||||||
|
UNION ALL
|
||||||
|
SELECT * FROM summary_candidates
|
||||||
|
UNION ALL
|
||||||
|
SELECT * FROM text_candidates
|
||||||
|
) all_candidates
|
||||||
|
WHERE score >= :threshold
|
||||||
|
ORDER BY representation_id, score DESC, updated_at DESC
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
d.id AS document_id,
|
||||||
|
dtr.id AS representation_id,
|
||||||
|
CAST(dtr.representation_type AS text) AS representation_type,
|
||||||
|
dtr.is_primary AS is_primary,
|
||||||
|
dtr.chunk_index AS chunk_index,
|
||||||
|
dtr.chunk_start_offset AS chunk_start_offset,
|
||||||
|
dtr.chunk_end_offset AS chunk_end_offset,
|
||||||
|
CAST(d.document_type AS text) AS document_type,
|
||||||
|
CAST(d.document_family AS text) AS document_family,
|
||||||
|
CAST(d.visibility AS text) AS visibility,
|
||||||
|
d.title AS title,
|
||||||
|
d.summary AS summary,
|
||||||
|
COALESCE(dtr.language_code, d.language_code) AS language_code,
|
||||||
|
d.mime_type AS mime_type,
|
||||||
|
d.created_at AS created_at,
|
||||||
|
d.updated_at AS updated_at,
|
||||||
|
LEFT(COALESCE(dtr.text_body, COALESCE(d.summary, d.title, '')), 400) AS snippet,
|
||||||
|
ranked.score AS score,
|
||||||
|
ranked.matched_field AS matched_field
|
||||||
|
FROM ranked
|
||||||
|
JOIN doc.doc_text_representation dtr ON dtr.id = ranked.representation_id
|
||||||
|
JOIN doc.doc_document d ON d.id = ranked.document_id
|
||||||
|
ORDER BY ranked.score DESC, d.updated_at DESC
|
||||||
|
LIMIT :limit
|
||||||
|
""");
|
||||||
|
|
||||||
return jdbcTemplate.query(sql.toString(), params,
|
return jdbcTemplate.query(sql.toString(), params,
|
||||||
new SearchHitRowMapper(SearchEngineType.POSTGRES_TRIGRAM, SearchMatchField.REPRESENTATION_TEXT));
|
new SearchHitRowMapper(SearchEngineType.POSTGRES_TRIGRAM, SearchMatchField.REPRESENTATION_TEXT));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,20 +34,17 @@ final class SearchSqlFilterSupport {
|
||||||
boolean tenantJoinPresent) {
|
boolean tenantJoinPresent) {
|
||||||
Set<DocumentType> documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes());
|
Set<DocumentType> documentTypes = firstNonEmpty(context.getRequest().getDocumentTypes(), context.getScope().documentTypes());
|
||||||
if (!CollectionUtils.isEmpty(documentTypes)) {
|
if (!CollectionUtils.isEmpty(documentTypes)) {
|
||||||
sql.append(" AND CAST(").append(documentAlias).append(".document_type AS text) IN (:documentTypes)");
|
appendTextEnumFilter(sql, params, documentAlias + ".document_type", documentTypes, "documentTypes");
|
||||||
params.addValue("documentTypes", enumNames(documentTypes));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<DocumentFamily> documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies());
|
Set<DocumentFamily> documentFamilies = firstNonEmpty(context.getRequest().getDocumentFamilies(), context.getScope().documentFamilies());
|
||||||
if (!CollectionUtils.isEmpty(documentFamilies)) {
|
if (!CollectionUtils.isEmpty(documentFamilies)) {
|
||||||
sql.append(" AND CAST(").append(documentAlias).append(".document_family AS text) IN (:documentFamilies)");
|
appendTextEnumFilter(sql, params, documentAlias + ".document_family", documentFamilies, "documentFamilies");
|
||||||
params.addValue("documentFamilies", enumNames(documentFamilies));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<DocumentVisibility> visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities());
|
Set<DocumentVisibility> visibilities = firstNonEmpty(context.getRequest().getVisibilities(), context.getScope().visibilities());
|
||||||
if (!CollectionUtils.isEmpty(visibilities)) {
|
if (!CollectionUtils.isEmpty(visibilities)) {
|
||||||
sql.append(" AND CAST(").append(documentAlias).append(".visibility AS text) IN (:visibilities)");
|
appendTextEnumFilter(sql, params, documentAlias + ".visibility", visibilities, "visibilities");
|
||||||
params.addValue("visibilities", enumNames(visibilities));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<String> languageCodes = context.getRequest().getLanguageCodes();
|
Set<String> languageCodes = context.getRequest().getLanguageCodes();
|
||||||
|
|
@ -62,8 +59,7 @@ final class SearchSqlFilterSupport {
|
||||||
|
|
||||||
Set<RepresentationType> representationTypes = context.getRequest().getRepresentationTypes();
|
Set<RepresentationType> representationTypes = context.getRequest().getRepresentationTypes();
|
||||||
if (!CollectionUtils.isEmpty(representationTypes)) {
|
if (!CollectionUtils.isEmpty(representationTypes)) {
|
||||||
sql.append(" AND CAST(").append(representationAlias).append(".representation_type AS text) IN (:representationTypes)");
|
appendTextEnumFilter(sql, params, representationAlias + ".representation_type", representationTypes, "representationTypes");
|
||||||
params.addValue("representationTypes", enumNames(representationTypes));
|
|
||||||
} else {
|
} else {
|
||||||
SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode();
|
SearchRepresentationSelectionMode selectionMode = context.getRequest().getRepresentationSelectionMode();
|
||||||
if (selectionMode == null) {
|
if (selectionMode == null) {
|
||||||
|
|
@ -242,6 +238,15 @@ final class SearchSqlFilterSupport {
|
||||||
return !CollectionUtils.isEmpty(primary) ? primary : fallback;
|
return !CollectionUtils.isEmpty(primary) ? primary : fallback;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void appendTextEnumFilter(StringBuilder sql,
|
||||||
|
MapSqlParameterSource params,
|
||||||
|
String columnExpression,
|
||||||
|
Collection<? extends Enum<?>> values,
|
||||||
|
String parameterName) {
|
||||||
|
sql.append(" AND CAST(").append(columnExpression).append(" AS text) IN (:").append(parameterName).append(")");
|
||||||
|
params.addValue(parameterName, enumNames(values));
|
||||||
|
}
|
||||||
|
|
||||||
private static List<String> enumNames(Collection<? extends Enum<?>> values) {
|
private static List<String> enumNames(Collection<? extends Enum<?>> values) {
|
||||||
return values.stream().map(Enum::name).collect(Collectors.toList());
|
return values.stream().map(Enum::name).collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,12 @@
|
||||||
package at.procon.ted.startup;
|
package at.procon.ted.startup;
|
||||||
|
|
||||||
|
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||||
|
import at.procon.dip.runtime.config.RuntimeMode;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.boot.ApplicationArguments;
|
import org.springframework.boot.ApplicationArguments;
|
||||||
import org.springframework.boot.ApplicationRunner;
|
import org.springframework.boot.ApplicationRunner;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.core.annotation.Order;
|
import org.springframework.core.annotation.Order;
|
||||||
import org.springframework.jdbc.core.JdbcTemplate;
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
@ -19,6 +22,7 @@ import org.springframework.stereotype.Component;
|
||||||
@Component
|
@Component
|
||||||
@Order(1) // Run before other startup runners
|
@Order(1) // Run before other startup runners
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
@ConditionalOnRuntimeMode(RuntimeMode.LEGACY)
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class OrganizationSchemaFixRunner implements ApplicationRunner {
|
public class OrganizationSchemaFixRunner implements ApplicationRunner {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ dip:
|
||||||
max-chunks-per-document: 12
|
max-chunks-per-document: 12
|
||||||
# Startup backfill limit for missing lexical vectors
|
# Startup backfill limit for missing lexical vectors
|
||||||
startup-lexical-backfill-limit: 500
|
startup-lexical-backfill-limit: 500
|
||||||
scheduled-lexical-backfill-enabled: true
|
scheduled-lexical-backfill-enabled: false
|
||||||
scheduled-lexical-backfill-delay-ms: 30000
|
scheduled-lexical-backfill-delay-ms: 30000
|
||||||
scheduled-lexical-backfill-batch-size: 200
|
scheduled-lexical-backfill-batch-size: 200
|
||||||
# Number of top hits per engine returned by /search/debug
|
# Number of top hits per engine returned by /search/debug
|
||||||
|
|
@ -333,8 +333,8 @@ dip:
|
||||||
leitstand:
|
leitstand:
|
||||||
enabled: false
|
enabled: false
|
||||||
startup-sync-enabled: false
|
startup-sync-enabled: false
|
||||||
startup-selective-materialization-enabled: true
|
startup-selective-materialization-enabled: false
|
||||||
selective-materialization-person-dbk: 100920031023144811001000
|
selective-materialization-person-dbk: #100920031023144811001000
|
||||||
selective-materialization-person-number:
|
selective-materialization-person-number:
|
||||||
selective-materialization-build-projection: true
|
selective-materialization-build-projection: true
|
||||||
create-canonical-time-entries: true
|
create-canonical-time-entries: true
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ spring:
|
||||||
order_updates: true
|
order_updates: true
|
||||||
|
|
||||||
flyway:
|
flyway:
|
||||||
enabled: true
|
enabled: false
|
||||||
locations: classpath:db/migration
|
locations: classpath:db/migration
|
||||||
baseline-on-migrate: true
|
baseline-on-migrate: true
|
||||||
create-schemas: true
|
create-schemas: true
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
-- Slice 1 + Slice 2 generic search support for DOC documents.
|
-- Slice 1 + Slice 2 generic search support for DOC documents.
|
||||||
-- Adds lexical-search support columns/indexes and pg_trgm extension.
|
-- Adds lexical-search support columns/indexes and pg_trgm extension.
|
||||||
|
|
||||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
|
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public;
|
||||||
|
|
||||||
ALTER TABLE DOC.doc_text_representation
|
ALTER TABLE DOC.doc_text_representation
|
||||||
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
|
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
|
||||||
|
|
@ -15,12 +15,12 @@ CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
|
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
|
||||||
ON DOC.doc_document
|
ON DOC.doc_document
|
||||||
USING GIN (title DOC.gin_trgm_ops);
|
USING GIN (title public.gin_trgm_ops);
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
|
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
|
||||||
ON DOC.doc_document
|
ON DOC.doc_document
|
||||||
USING GIN (summary DOC.gin_trgm_ops);
|
USING GIN (summary public.gin_trgm_ops);
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
|
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
|
||||||
ON DOC.doc_text_representation
|
ON DOC.doc_text_representation
|
||||||
USING GIN (text_body DOC.gin_trgm_ops);
|
USING GIN (text_body public.gin_trgm_ops);
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,9 @@ ALTER TABLE DOC.doc_embedding
|
||||||
ADD CONSTRAINT ck_doc_embedding_dimensions_positive
|
ADD CONSTRAINT ck_doc_embedding_dimensions_positive
|
||||||
CHECK (embedding_dimensions IS NULL OR embedding_dimensions > 0);
|
CHECK (embedding_dimensions IS NULL OR embedding_dimensions > 0);
|
||||||
|
|
||||||
|
ALTER TABLE DOC.doc_embedding
|
||||||
|
ADD COLUMN IF NOT EXISTS embedding_vector public.vector;
|
||||||
|
|
||||||
DO $$
|
DO $$
|
||||||
BEGIN
|
BEGIN
|
||||||
IF NOT EXISTS (
|
IF NOT EXISTS (
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
-- This makes migration, audit, and repair flows package-aware without having to derive the
|
-- This makes migration, audit, and repair flows package-aware without having to derive the
|
||||||
-- package membership from source paths at query time.
|
-- package membership from source paths at query time.
|
||||||
|
|
||||||
SET search_path TO TED, DOC, public;
|
SET search_path TO ted, doc, public;
|
||||||
|
|
||||||
ALTER TABLE IF EXISTS TED.ted_notice_projection
|
ALTER TABLE IF EXISTS TED.ted_notice_projection
|
||||||
ADD COLUMN IF NOT EXISTS package_identifier VARCHAR(20);
|
ADD COLUMN IF NOT EXISTS package_identifier VARCHAR(20);
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
-- Slice 1 generic lexical search support.
|
-- Slice 1 generic lexical search support.
|
||||||
-- Adds PostgreSQL full-text and trigram search infrastructure for DOC-side search.
|
-- Adds PostgreSQL full-text and trigram search infrastructure for DOC-side search.
|
||||||
|
|
||||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
|
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public;
|
||||||
|
|
||||||
ALTER TABLE doc.doc_text_representation
|
ALTER TABLE doc.doc_text_representation
|
||||||
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
|
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
|
||||||
|
|
@ -15,12 +15,12 @@ CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
|
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
|
||||||
ON doc.doc_document
|
ON doc.doc_document
|
||||||
USING GIN (title doc.gin_trgm_ops);
|
USING GIN (title public.gin_trgm_ops);
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
|
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
|
||||||
ON doc.doc_document
|
ON doc.doc_document
|
||||||
USING GIN (summary doc.gin_trgm_ops);
|
USING GIN (summary public.gin_trgm_ops);
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
|
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
|
||||||
ON doc.doc_text_representation
|
ON doc.doc_text_representation
|
||||||
USING GIN (text_body doc.gin_trgm_ops);
|
USING GIN (text_body public.gin_trgm_ops);
|
||||||
|
|
|
||||||
|
|
@ -306,7 +306,7 @@ CREATE INDEX idx_doc_procedure_type ON procurement_document(procedure_type);
|
||||||
CREATE INDEX idx_doc_cpv_codes ON procurement_document USING GIN(cpv_codes);
|
CREATE INDEX idx_doc_cpv_codes ON procurement_document USING GIN(cpv_codes);
|
||||||
|
|
||||||
-- Full-text search on textual content
|
-- Full-text search on textual content
|
||||||
CREATE INDEX idx_doc_text_content_trgm ON procurement_document USING GIN(text_content gin_trgm_ops);
|
CREATE INDEX idx_doc_text_content_trgm ON procurement_document USING GIN(text_content public.gin_trgm_ops);
|
||||||
|
|
||||||
-- Vector similarity search using IVFFlat index (efficient for approximate nearest neighbor)
|
-- Vector similarity search using IVFFlat index (efficient for approximate nearest neighbor)
|
||||||
-- Lists parameter: sqrt(number_of_vectors) for optimal performance
|
-- Lists parameter: sqrt(number_of_vectors) for optimal performance
|
||||||
|
|
|
||||||
|
|
@ -44,34 +44,6 @@ BEGIN
|
||||||
END
|
END
|
||||||
$$;
|
$$;
|
||||||
|
|
||||||
DO $$
|
|
||||||
BEGIN
|
|
||||||
IF NOT EXISTS (
|
|
||||||
SELECT 1
|
|
||||||
FROM pg_enum e
|
|
||||||
JOIN pg_type t ON t.oid = e.enumtypid
|
|
||||||
JOIN pg_namespace n ON n.oid = t.typnamespace
|
|
||||||
WHERE n.nspname = 'doc' AND t.typname = 'doc_document_type' AND e.enumlabel = 'TIME_ENTRY'
|
|
||||||
) THEN
|
|
||||||
ALTER TYPE doc.doc_document_type ADD VALUE 'TIME_ENTRY';
|
|
||||||
END IF;
|
|
||||||
END
|
|
||||||
$$;
|
|
||||||
|
|
||||||
DO $$
|
|
||||||
BEGIN
|
|
||||||
IF NOT EXISTS (
|
|
||||||
SELECT 1
|
|
||||||
FROM pg_enum e
|
|
||||||
JOIN pg_type t ON t.oid = e.enumtypid
|
|
||||||
JOIN pg_namespace n ON n.oid = t.typnamespace
|
|
||||||
WHERE n.nspname = 'doc' AND t.typname = 'doc_document_family' AND e.enumlabel = 'TIME'
|
|
||||||
) THEN
|
|
||||||
ALTER TYPE doc.doc_document_family ADD VALUE 'TIME';
|
|
||||||
END IF;
|
|
||||||
END
|
|
||||||
$$;
|
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS "time".time_entry (
|
CREATE TABLE IF NOT EXISTS "time".time_entry (
|
||||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
document_id UUID NOT NULL UNIQUE REFERENCES doc.doc_document(id) ON DELETE CASCADE,
|
document_id UUID NOT NULL UNIQUE REFERENCES doc.doc_document(id) ON DELETE CASCADE,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
-- Align TIME projection source text fields with real Leitstand payload lengths.
|
||||||
|
|
||||||
|
ALTER TABLE "time".time_entry_search_projection
|
||||||
|
ALTER COLUMN time_recording_desc TYPE TEXT;
|
||||||
|
|
||||||
|
ALTER TABLE "time".time_entry_search_projection
|
||||||
|
ALTER COLUMN time_recording_remark TYPE TEXT;
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
-- Repair DOC document enum/check alignment for TIME documents on databases
|
||||||
|
-- that still carry the pre-TIME family/type constraints.
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM pg_type t
|
||||||
|
JOIN pg_namespace n ON n.oid = t.typnamespace
|
||||||
|
WHERE n.nspname = 'doc'
|
||||||
|
AND t.typname = 'doc_document_type'
|
||||||
|
) THEN
|
||||||
|
ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TED_PACKAGE';
|
||||||
|
ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TED_NOTICE_LOT';
|
||||||
|
ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TIME_ENTRY';
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM pg_type t
|
||||||
|
JOIN pg_namespace n ON n.oid = t.typnamespace
|
||||||
|
WHERE n.nspname = 'doc'
|
||||||
|
AND t.typname = 'doc_document_family'
|
||||||
|
) THEN
|
||||||
|
ALTER TYPE DOC.doc_document_family ADD VALUE IF NOT EXISTS 'TIME';
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'doc'
|
||||||
|
AND table_name = 'doc_document'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE DOC.doc_document DROP CONSTRAINT IF EXISTS doc_document_document_type_check;
|
||||||
|
ALTER TABLE DOC.doc_document
|
||||||
|
ADD CONSTRAINT doc_document_document_type_check
|
||||||
|
CHECK (
|
||||||
|
document_type IN (
|
||||||
|
'TED_PACKAGE',
|
||||||
|
'TED_NOTICE',
|
||||||
|
'TED_NOTICE_LOT',
|
||||||
|
'TIME_ENTRY',
|
||||||
|
'EMAIL',
|
||||||
|
'MIME_MESSAGE',
|
||||||
|
'PDF',
|
||||||
|
'DOCX',
|
||||||
|
'HTML',
|
||||||
|
'XML_GENERIC',
|
||||||
|
'TEXT',
|
||||||
|
'MARKDOWN',
|
||||||
|
'ZIP_ARCHIVE',
|
||||||
|
'GENERIC_BINARY',
|
||||||
|
'UNKNOWN'
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
ALTER TABLE DOC.doc_document DROP CONSTRAINT IF EXISTS doc_document_document_family_check;
|
||||||
|
ALTER TABLE DOC.doc_document
|
||||||
|
ADD CONSTRAINT doc_document_document_family_check
|
||||||
|
CHECK (
|
||||||
|
document_family IN (
|
||||||
|
'PROCUREMENT',
|
||||||
|
'TIME',
|
||||||
|
'MAIL',
|
||||||
|
'ATTACHMENT',
|
||||||
|
'KNOWLEDGE',
|
||||||
|
'GENERIC'
|
||||||
|
)
|
||||||
|
);
|
||||||
|
END IF;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
-- Search performance support indexes for filtered DOC fulltext lookups.
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_updated
|
||||||
|
ON DOC.doc_document(document_type, document_family, updated_at DESC, id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_updated
|
||||||
|
ON DOC.doc_document(document_type, document_family, visibility, updated_at DESC, id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_document_primary_type
|
||||||
|
ON DOC.doc_text_representation(document_id, is_primary, representation_type);
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
-- Support cast-to-text search filters on installations where DOC type columns are varchar.
|
||||||
|
-- These indexes align with the query shape used by generic search filters.
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_text_updated
|
||||||
|
ON DOC.doc_document ((CAST(document_type AS text)), (CAST(document_family AS text)), updated_at DESC, id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_document_type_family_visibility_text_updated
|
||||||
|
ON DOC.doc_document (
|
||||||
|
(CAST(document_type AS text)),
|
||||||
|
(CAST(document_family AS text)),
|
||||||
|
(CAST(visibility AS text)),
|
||||||
|
updated_at DESC,
|
||||||
|
id
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_primary_type_text_document
|
||||||
|
ON DOC.doc_text_representation (is_primary, (CAST(representation_type AS text)), document_id);
|
||||||
|
|
@ -0,0 +1,135 @@
|
||||||
|
package at.procon.dip.migration;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import org.flywaydb.core.Flyway;
|
||||||
|
import org.flywaydb.core.api.MigrationVersion;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.testcontainers.containers.PostgreSQLContainer;
|
||||||
|
import org.testcontainers.junit.jupiter.Container;
|
||||||
|
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||||
|
|
||||||
|
@Testcontainers
|
||||||
|
class DocDocumentTimeEnumConstraintRepairMigrationTest {
|
||||||
|
|
||||||
|
@Container
|
||||||
|
static PostgreSQLContainer<?> postgres = new PostgreSQLContainer<>("postgres:16-alpine")
|
||||||
|
.withDatabaseName("dip_migration_test")
|
||||||
|
.withUsername("test")
|
||||||
|
.withPassword("test");
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void repairMigrationExpandsLegacyDocDocumentChecksForTimeDocuments() throws SQLException {
|
||||||
|
createLegacyDocDocumentState();
|
||||||
|
|
||||||
|
Flyway.configure()
|
||||||
|
.dataSource(postgres.getJdbcUrl(), postgres.getUsername(), postgres.getPassword())
|
||||||
|
.locations("filesystem:src/main/resources/db/migration")
|
||||||
|
.schemas("doc")
|
||||||
|
.defaultSchema("doc")
|
||||||
|
.baselineOnMigrate(true)
|
||||||
|
.baselineVersion(MigrationVersion.fromVersion("42"))
|
||||||
|
.load()
|
||||||
|
.migrate();
|
||||||
|
|
||||||
|
try (Connection connection = openConnection();
|
||||||
|
Statement statement = connection.createStatement()) {
|
||||||
|
statement.executeUpdate("""
|
||||||
|
INSERT INTO doc.doc_document (id, document_type, document_family)
|
||||||
|
VALUES ('709e388b-19d9-4c21-8d06-82b295b33505', 'TIME_ENTRY', 'TIME')
|
||||||
|
""");
|
||||||
|
}
|
||||||
|
|
||||||
|
try (Connection connection = openConnection();
|
||||||
|
var preparedStatement = connection.prepareStatement("""
|
||||||
|
SELECT pg_get_constraintdef(oid)
|
||||||
|
FROM pg_constraint
|
||||||
|
WHERE conrelid = 'doc.doc_document'::regclass
|
||||||
|
AND conname = ?
|
||||||
|
""")) {
|
||||||
|
preparedStatement.setString(1, "doc_document_document_family_check");
|
||||||
|
try (var resultSet = preparedStatement.executeQuery()) {
|
||||||
|
assertThat(resultSet.next()).isTrue();
|
||||||
|
assertThat(resultSet.getString(1)).contains("TIME");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createLegacyDocDocumentState() throws SQLException {
|
||||||
|
try (Connection connection = openConnection();
|
||||||
|
Statement statement = connection.createStatement()) {
|
||||||
|
statement.execute("CREATE SCHEMA doc");
|
||||||
|
statement.execute("""
|
||||||
|
CREATE TYPE doc.doc_document_type AS ENUM (
|
||||||
|
'TED_NOTICE',
|
||||||
|
'EMAIL',
|
||||||
|
'MIME_MESSAGE',
|
||||||
|
'PDF',
|
||||||
|
'DOCX',
|
||||||
|
'HTML',
|
||||||
|
'XML_GENERIC',
|
||||||
|
'TEXT',
|
||||||
|
'MARKDOWN',
|
||||||
|
'ZIP_ARCHIVE',
|
||||||
|
'GENERIC_BINARY',
|
||||||
|
'UNKNOWN'
|
||||||
|
)
|
||||||
|
""");
|
||||||
|
statement.execute("""
|
||||||
|
CREATE TYPE doc.doc_document_family AS ENUM (
|
||||||
|
'PROCUREMENT',
|
||||||
|
'MAIL',
|
||||||
|
'ATTACHMENT',
|
||||||
|
'KNOWLEDGE',
|
||||||
|
'GENERIC'
|
||||||
|
)
|
||||||
|
""");
|
||||||
|
statement.execute("""
|
||||||
|
CREATE TABLE doc.doc_document (
|
||||||
|
id UUID PRIMARY KEY,
|
||||||
|
document_type doc.doc_document_type NOT NULL,
|
||||||
|
document_family doc.doc_document_family NOT NULL,
|
||||||
|
CONSTRAINT doc_document_document_type_check
|
||||||
|
CHECK (
|
||||||
|
document_type IN (
|
||||||
|
'TED_NOTICE',
|
||||||
|
'EMAIL',
|
||||||
|
'MIME_MESSAGE',
|
||||||
|
'PDF',
|
||||||
|
'DOCX',
|
||||||
|
'HTML',
|
||||||
|
'XML_GENERIC',
|
||||||
|
'TEXT',
|
||||||
|
'MARKDOWN',
|
||||||
|
'ZIP_ARCHIVE',
|
||||||
|
'GENERIC_BINARY',
|
||||||
|
'UNKNOWN'
|
||||||
|
)
|
||||||
|
),
|
||||||
|
CONSTRAINT doc_document_document_family_check
|
||||||
|
CHECK (
|
||||||
|
document_family IN (
|
||||||
|
'PROCUREMENT',
|
||||||
|
'MAIL',
|
||||||
|
'ATTACHMENT',
|
||||||
|
'KNOWLEDGE',
|
||||||
|
'GENERIC'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
""");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Connection openConnection() throws SQLException {
|
||||||
|
return DriverManager.getConnection(
|
||||||
|
postgres.getJdbcUrl(),
|
||||||
|
postgres.getUsername(),
|
||||||
|
postgres.getPassword()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -2,10 +2,13 @@ package at.procon.dip.search.repository;
|
||||||
|
|
||||||
import static org.assertj.core.api.Assertions.assertThat;
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.DocumentFamily;
|
||||||
|
import at.procon.dip.domain.document.DocumentType;
|
||||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||||
import at.procon.dip.search.api.SearchExecutionContext;
|
import at.procon.dip.search.api.SearchExecutionContext;
|
||||||
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
|
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
|
||||||
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
|
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
|
||||||
|
import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
|
||||||
import at.procon.dip.search.dto.SearchRequest;
|
import at.procon.dip.search.dto.SearchRequest;
|
||||||
import at.procon.dip.search.spi.SearchDocumentScope;
|
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
@ -54,4 +57,31 @@ class SearchSqlFilterSupportTest {
|
||||||
assertThat(params.getValue("attributeName1")).isEqualTo("version");
|
assertThat(params.getValue("attributeName1")).isEqualTo("version");
|
||||||
assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
|
assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void shouldUseTypedEnumComparisonsForIndexedFilters() {
|
||||||
|
SearchRequest request = SearchRequest.builder()
|
||||||
|
.queryText("vertrieb")
|
||||||
|
.documentTypes(java.util.Set.of(DocumentType.TIME_ENTRY))
|
||||||
|
.documentFamilies(java.util.Set.of(DocumentFamily.TIME))
|
||||||
|
.representationSelectionMode(SearchRepresentationSelectionMode.PRIMARY_AND_CHUNKS)
|
||||||
|
.build();
|
||||||
|
SearchExecutionContext context = SearchExecutionContext.builder()
|
||||||
|
.request(request)
|
||||||
|
.scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null))
|
||||||
|
.page(0)
|
||||||
|
.size(20)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1");
|
||||||
|
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||||
|
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false);
|
||||||
|
|
||||||
|
String rendered = sql.toString();
|
||||||
|
assertThat(rendered).contains("CAST(d.document_type AS text) IN (:documentTypes)");
|
||||||
|
assertThat(rendered).contains("CAST(d.document_family AS text) IN (:documentFamilies)");
|
||||||
|
assertThat(rendered).contains("CAST(dtr.representation_type AS text) = 'CHUNK'");
|
||||||
|
assertThat(params.getValue("documentTypes")).isEqualTo(java.util.List.of("TIME_ENTRY"));
|
||||||
|
assertThat(params.getValue("documentFamilies")).isEqualTo(java.util.List.of("TIME"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -87,13 +87,13 @@ public abstract class AbstractSearchIntegrationTest {
|
||||||
|
|
||||||
protected void ensureSearchColumnsAndIndexes() {
|
protected void ensureSearchColumnsAndIndexes() {
|
||||||
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
|
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
|
||||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc");
|
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public");
|
||||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
||||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title public.gin_trgm_ops)");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary public.gin_trgm_ops)");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body public.gin_trgm_ops)");
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void cleanupDatabase() {
|
protected void cleanupDatabase() {
|
||||||
|
|
|
||||||
|
|
@ -121,14 +121,14 @@ public abstract class AbstractSemanticSearchIntegrationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void ensureSearchColumnsAndIndexes() {
|
protected void ensureSearchColumnsAndIndexes() {
|
||||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA doc");
|
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public");
|
||||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public");
|
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public");
|
||||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
||||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title public.gin_trgm_ops)");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary public.gin_trgm_ops)");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body public.gin_trgm_ops)");
|
||||||
jdbcTemplate.execute("ALTER TABLE doc.doc_embedding ADD COLUMN IF NOT EXISTS embedding_vector public.vector");
|
jdbcTemplate.execute("ALTER TABLE doc.doc_embedding ADD COLUMN IF NOT EXISTS embedding_vector public.vector");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -82,13 +82,13 @@ public abstract class AbstractTedStructuredSearchIntegrationTest {
|
||||||
protected void ensureSearchColumnsAndIndexes() {
|
protected void ensureSearchColumnsAndIndexes() {
|
||||||
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
|
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS doc");
|
||||||
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS ted");
|
jdbcTemplate.execute("CREATE SCHEMA IF NOT EXISTS ted");
|
||||||
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc");
|
jdbcTemplate.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public");
|
||||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_config VARCHAR(64)");
|
||||||
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
jdbcTemplate.execute("ALTER TABLE doc.doc_text_representation ADD COLUMN IF NOT EXISTS search_vector tsvector");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector_test ON doc.doc_text_representation USING GIN (search_vector)");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title doc.gin_trgm_ops)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm_test ON doc.doc_document USING GIN (title public.gin_trgm_ops)");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary doc.gin_trgm_ops)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm_test ON doc.doc_document USING GIN (summary public.gin_trgm_ops)");
|
||||||
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body doc.gin_trgm_ops)");
|
jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm_test ON doc.doc_text_representation USING GIN (text_body public.gin_trgm_ops)");
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void cleanupDatabase() {
|
protected void cleanupDatabase() {
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,3 @@
|
||||||
CREATE SCHEMA IF NOT EXISTS DOC;
|
CREATE SCHEMA IF NOT EXISTS DOC;
|
||||||
CREATE SCHEMA IF NOT EXISTS TED;
|
CREATE SCHEMA IF NOT EXISTS TED;
|
||||||
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
|
CREATE EXTENSION IF NOT EXISTS pg_trgm with schema public;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue