From 6ca9936b87c4caa10dc8de46c083dd7e2c182094 Mon Sep 17 00:00:00 2001 From: trifonovt <87468028+TihomirTrifonov@users.noreply.github.com> Date: Wed, 22 Apr 2026 11:40:42 +0200 Subject: [PATCH] TIME-domain foundation - Leitstand - T3 representations, search projection --- ...IME_PHASE_T3_PROJECTION_REPRESENTATIONS.md | 60 ++++ .../time/config/TimeDomainProperties.java | 7 +- .../entity/TimeEntrySearchProjection.java | 194 ++++++++++ .../TimeEntrySearchProjectionRepository.java | 17 + ...tandTimeRecordingAssignmentRepository.java | 3 +- .../LeitstandTimeRecordingRepository.java | 4 + .../service/LeitstandTimeImportService.java | 8 +- .../LeitstandTimeProjectionService.java | 340 ++++++++++++++++++ ...yRepresentationMaterializationService.java | 113 ++++++ .../LeitstandTimeProjectionStartupRunner.java | 30 ++ src/main/resources/application-new.yml | 16 +- ...26__time_t3_projection_representations.sql | 61 ++++ 12 files changed, 847 insertions(+), 6 deletions(-) create mode 100644 docs/TIME_PHASE_T3_PROJECTION_REPRESENTATIONS.md create mode 100644 src/main/java/at/procon/dip/domain/time/entity/TimeEntrySearchProjection.java create mode 100644 src/main/java/at/procon/dip/domain/time/repository/TimeEntrySearchProjectionRepository.java create mode 100644 src/main/java/at/procon/dip/domain/time/service/LeitstandTimeProjectionService.java create mode 100644 src/main/java/at/procon/dip/domain/time/service/TimeEntryRepresentationMaterializationService.java create mode 100644 src/main/java/at/procon/dip/domain/time/startup/LeitstandTimeProjectionStartupRunner.java create mode 100644 src/main/resources/db/migration/V26__time_t3_projection_representations.sql diff --git a/docs/TIME_PHASE_T3_PROJECTION_REPRESENTATIONS.md b/docs/TIME_PHASE_T3_PROJECTION_REPRESENTATIONS.md new file mode 100644 index 0000000..3ed6087 --- /dev/null +++ b/docs/TIME_PHASE_T3_PROJECTION_REPRESENTATIONS.md @@ -0,0 +1,60 @@ +# TIME Phase T3 — Projection, representations, and embedding enqueueing + +This phase adds the NEW-only TIME projection layer on top of the Leitstand import foundation from T2. + +## Scope + +Included in T3: +- `TIME.time_entry_search_projection` +- Leitstand projection builder from imported `TIME.ls_*` tables +- semantic text materialization for canonical `DOC.doc_document` roots of `TIME.time_entry` +- lexical indexing through `DOC.doc_text_representation.search_vector` +- embedding job enqueueing for the TIME primary semantic representation + +Not included yet: +- structured search endpoint +- dedicated TIME search DTOs +- Toggl import +- cross-source search + +## Representation strategy + +One root semantic representation is generated per canonical `TIME.time_entry` document. + +The semantic text is built from: +- Time recording description / remark / ID / URL +- person display name +- activity type +- task ids / names / descriptions +- cost unit ids / names / descriptions +- contract names / references +- contract position names / references +- organization names +- recorded time window and duration + +This keeps the searchable unit aligned with the future common TIME search concept: one canonical time-entry document enriched with source-specific business context. + +## Operational notes + +After T2 import runs, T3 can immediately refresh projection rows for the imported Leitstand time recordings. + +A separate startup runner is also available for rebuilding all TIME projections and semantic representations for already-imported rows: + +```yaml +id: time-t3-startup + +dip: + time: + leitstand: + startup-projection-rebuild-enabled: false + build-search-projection: true + build-representations: true + representation-language-code: de +``` + +## Files added in this phase + +- `TIME.time_entry_search_projection` +- `LeitstandTimeProjectionService` +- `TimeEntryRepresentationMaterializationService` +- `LeitstandTimeProjectionStartupRunner` diff --git a/src/main/java/at/procon/dip/domain/time/config/TimeDomainProperties.java b/src/main/java/at/procon/dip/domain/time/config/TimeDomainProperties.java index bffc459..37c746f 100644 --- a/src/main/java/at/procon/dip/domain/time/config/TimeDomainProperties.java +++ b/src/main/java/at/procon/dip/domain/time/config/TimeDomainProperties.java @@ -9,7 +9,7 @@ import org.springframework.context.annotation.Configuration; @Data public class TimeDomainProperties { - private boolean enabled = false; + private boolean enabled = false; private LeitstandProperties leitstand = new LeitstandProperties(); private SourceProperties togglTrack = new SourceProperties(); @@ -25,6 +25,11 @@ public class TimeDomainProperties { private boolean startupSyncEnabled = false; private boolean createCanonicalTimeEntries = true; private boolean incrementalEnabled = true; + private boolean buildSearchProjection = true; + private boolean buildRepresentations = true; + private boolean queueEmbeddings = true; + private boolean startupProjectionRebuildEnabled = false; + private String representationLanguageCode = "de"; private String scopeKey = "leitstand-default"; private JdbcProperties jdbc = new JdbcProperties(); } diff --git a/src/main/java/at/procon/dip/domain/time/entity/TimeEntrySearchProjection.java b/src/main/java/at/procon/dip/domain/time/entity/TimeEntrySearchProjection.java new file mode 100644 index 0000000..adea24c --- /dev/null +++ b/src/main/java/at/procon/dip/domain/time/entity/TimeEntrySearchProjection.java @@ -0,0 +1,194 @@ +package at.procon.dip.domain.time.entity; + +import at.procon.dip.architecture.SchemaNames; +import at.procon.dip.domain.document.entity.Document; +import jakarta.persistence.*; +import java.time.OffsetDateTime; +import java.util.UUID; +import lombok.*; + +@Entity +@Table(schema = SchemaNames.TIME, name = "time_entry_search_projection", indexes = { + @Index(name = "idx_time_entry_search_projection_document", columnList = "document_id"), + @Index(name = "idx_time_entry_search_projection_source", columnList = "source_system, external_id"), + @Index(name = "idx_time_entry_search_projection_entry_start", columnList = "entry_start"), + @Index(name = "idx_time_entry_search_projection_person", columnList = "person_external_id"), + @Index(name = "idx_time_entry_search_projection_activity_type", columnList = "activity_type_id") +}) +@Getter +@Setter +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class TimeEntrySearchProjection { + + @Id + @GeneratedValue(strategy = GenerationType.UUID) + private UUID id; + + @OneToOne(fetch = FetchType.LAZY, optional = false) + @JoinColumn(name = "time_entry_id", nullable = false, unique = true) + private TimeEntry timeEntry; + + @OneToOne(fetch = FetchType.LAZY, optional = false) + @JoinColumn(name = "document_id", nullable = false, unique = true) + private Document document; + + @Enumerated(EnumType.STRING) + @Column(name = "source_system", nullable = false, length = 32) + private TimeSourceSystem sourceSystem; + + @Column(name = "external_id", nullable = false, length = 255) + private String externalId; + + @Column(name = "language_code", length = 16) + private String languageCode; + + @Column(name = "entry_start") + private OffsetDateTime entryStart; + + @Column(name = "entry_end") + private OffsetDateTime entryEnd; + + @Column(name = "duration_seconds") + private Long durationSeconds; + + @Column(name = "person_external_id", length = 255) + private String personExternalId; + + @Column(name = "person_display_name", length = 255) + private String personDisplayName; + + @Column(name = "activity_type_id") + private Integer activityTypeId; + + @Column(name = "activity_type_code", length = 64) + private String activityTypeCode; + + @Column(name = "activity_type_name", length = 255) + private String activityTypeName; + + @Column(name = "record_type", length = 64) + private String recordType; + + @Column(name = "time_recording_dbk", length = 24) + private String timeRecordingDbk; + + @Column(name = "time_recording_mcl_id", length = 255) + private String timeRecordingMclId; + + @Column(name = "time_recording_desc", length = 255) + private String timeRecordingDesc; + + @Column(name = "time_recording_remark", length = 255) + private String timeRecordingRemark; + + @Column(name = "time_recording_url", length = 1000) + private String timeRecordingUrl; + + @Column(name = "primary_task_dbk", length = 24) + private String primaryTaskDbk; + + @Column(name = "primary_task_mcl_id", length = 255) + private String primaryTaskMclId; + + @Column(name = "primary_task_name", length = 255) + private String primaryTaskName; + + @Column(name = "primary_task_desc", columnDefinition = "TEXT") + private String primaryTaskDesc; + + @Column(name = "primary_cost_unit_dbk", length = 24) + private String primaryCostUnitDbk; + + @Column(name = "primary_cost_unit_mcl_id", length = 255) + private String primaryCostUnitMclId; + + @Column(name = "primary_cost_unit_name", length = 255) + private String primaryCostUnitName; + + @Column(name = "primary_cost_unit_desc", length = 255) + private String primaryCostUnitDesc; + + @Column(name = "primary_contract_dbk", length = 24) + private String primaryContractDbk; + + @Column(name = "primary_contract_name", length = 255) + private String primaryContractName; + + @Column(name = "primary_contract_iref", length = 255) + private String primaryContractIref; + + @Column(name = "primary_contract_position_dbk", length = 24) + private String primaryContractPositionDbk; + + @Column(name = "primary_contract_position_name", length = 255) + private String primaryContractPositionName; + + @Column(name = "primary_contract_position_iref", length = 255) + private String primaryContractPositionIref; + + @Column(name = "primary_organization_dbk", length = 24) + private String primaryOrganizationDbk; + + @Column(name = "primary_organization_name", length = 255) + private String primaryOrganizationName; + + @Column(name = "task_ids_text", columnDefinition = "TEXT") + private String taskIdsText; + + @Column(name = "task_names_text", columnDefinition = "TEXT") + private String taskNamesText; + + @Column(name = "task_descriptions_text", columnDefinition = "TEXT") + private String taskDescriptionsText; + + @Column(name = "cost_unit_ids_text", columnDefinition = "TEXT") + private String costUnitIdsText; + + @Column(name = "cost_unit_names_text", columnDefinition = "TEXT") + private String costUnitNamesText; + + @Column(name = "cost_unit_descriptions_text", columnDefinition = "TEXT") + private String costUnitDescriptionsText; + + @Column(name = "contract_names_text", columnDefinition = "TEXT") + private String contractNamesText; + + @Column(name = "contract_irefs_text", columnDefinition = "TEXT") + private String contractIrefsText; + + @Column(name = "contract_position_names_text", columnDefinition = "TEXT") + private String contractPositionNamesText; + + @Column(name = "contract_position_irefs_text", columnDefinition = "TEXT") + private String contractPositionIrefsText; + + @Column(name = "organization_names_text", columnDefinition = "TEXT") + private String organizationNamesText; + + @Column(name = "semantic_text", columnDefinition = "TEXT") + private String semanticText; + + @Column(name = "summary_text", columnDefinition = "TEXT") + private String summaryText; + + @Builder.Default + @Column(name = "created_at", nullable = false, updatable = false) + private OffsetDateTime createdAt = OffsetDateTime.now(); + + @Builder.Default + @Column(name = "updated_at", nullable = false) + private OffsetDateTime updatedAt = OffsetDateTime.now(); + + @PrePersist + protected void onCreate() { + createdAt = OffsetDateTime.now(); + updatedAt = OffsetDateTime.now(); + } + + @PreUpdate + protected void onUpdate() { + updatedAt = OffsetDateTime.now(); + } +} diff --git a/src/main/java/at/procon/dip/domain/time/repository/TimeEntrySearchProjectionRepository.java b/src/main/java/at/procon/dip/domain/time/repository/TimeEntrySearchProjectionRepository.java new file mode 100644 index 0000000..4f5a8c0 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/time/repository/TimeEntrySearchProjectionRepository.java @@ -0,0 +1,17 @@ +package at.procon.dip.domain.time.repository; + +import at.procon.dip.domain.time.entity.TimeEntrySearchProjection; +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.springframework.data.jpa.repository.JpaRepository; + +public interface TimeEntrySearchProjectionRepository extends JpaRepository { + + Optional findByTimeEntry_Id(UUID timeEntryId); + + Optional findByDocument_Id(UUID documentId); + + List findByTimeEntry_IdIn(Collection timeEntryIds); +} diff --git a/src/main/java/at/procon/dip/domain/time/repository/leitstand/LeitstandTimeRecordingAssignmentRepository.java b/src/main/java/at/procon/dip/domain/time/repository/leitstand/LeitstandTimeRecordingAssignmentRepository.java index 42adaa6..c87ac3c 100644 --- a/src/main/java/at/procon/dip/domain/time/repository/leitstand/LeitstandTimeRecordingAssignmentRepository.java +++ b/src/main/java/at/procon/dip/domain/time/repository/leitstand/LeitstandTimeRecordingAssignmentRepository.java @@ -5,5 +5,6 @@ import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; public interface LeitstandTimeRecordingAssignmentRepository extends JpaRepository { - List findByTimeRecordingDbk(String timeRecordingDbk); + + List findByTimeRecordingDbkOrderByDbkAsc(String timeRecordingDbk); } diff --git a/src/main/java/at/procon/dip/domain/time/repository/leitstand/LeitstandTimeRecordingRepository.java b/src/main/java/at/procon/dip/domain/time/repository/leitstand/LeitstandTimeRecordingRepository.java index a3ff201..2ec0463 100644 --- a/src/main/java/at/procon/dip/domain/time/repository/leitstand/LeitstandTimeRecordingRepository.java +++ b/src/main/java/at/procon/dip/domain/time/repository/leitstand/LeitstandTimeRecordingRepository.java @@ -1,10 +1,14 @@ package at.procon.dip.domain.time.repository.leitstand; import at.procon.dip.domain.time.entity.leitstand.LeitstandTimeRecording; +import java.util.List; import java.util.Optional; import java.util.UUID; import org.springframework.data.jpa.repository.JpaRepository; public interface LeitstandTimeRecordingRepository extends JpaRepository { + Optional findByTimeEntry_Id(UUID timeEntryId); + + List findByTimeEntryIsNotNull(); } diff --git a/src/main/java/at/procon/dip/domain/time/service/LeitstandTimeImportService.java b/src/main/java/at/procon/dip/domain/time/service/LeitstandTimeImportService.java index 68ad4c1..27d45c3 100644 --- a/src/main/java/at/procon/dip/domain/time/service/LeitstandTimeImportService.java +++ b/src/main/java/at/procon/dip/domain/time/service/LeitstandTimeImportService.java @@ -49,6 +49,7 @@ public class LeitstandTimeImportService { private final DocumentRepository documentRepository; private final TimeEntryRepository timeEntryRepository; private final TimeEntrySourceLinkRepository sourceLinkRepository; + private final LeitstandTimeProjectionService projectionService; public LeitstandTimeImportService( @Qualifier("applicationJdbcTemplate") JdbcTemplate targetJdbcTemplate, @@ -58,7 +59,8 @@ public class LeitstandTimeImportService { TimeSyncStateRepository syncStateRepository, DocumentRepository documentRepository, TimeEntryRepository timeEntryRepository, - TimeEntrySourceLinkRepository sourceLinkRepository + TimeEntrySourceLinkRepository sourceLinkRepository, + LeitstandTimeProjectionService projectionService ) { this. properties = properties; this.jdbcTemplate = targetJdbcTemplate; @@ -68,6 +70,7 @@ public class LeitstandTimeImportService { this.documentRepository = documentRepository; this.timeEntryRepository = timeEntryRepository; this.sourceLinkRepository = sourceLinkRepository; + this.projectionService = projectionService; } public void runSync() { @@ -91,6 +94,9 @@ public class LeitstandTimeImportService { syncTimeRecordingAssignments(run); if (properties.getLeitstand().isCreateCanonicalTimeEntries()) { upsertCanonicalTimeEntries(recordings); + if (properties.getLeitstand().isBuildSearchProjection()) { + projectionService.refreshAll(); + } } run.setStatus(TimeSyncRunStatus.COMPLETED); run.setFinishedAt(OffsetDateTime.now()); diff --git a/src/main/java/at/procon/dip/domain/time/service/LeitstandTimeProjectionService.java b/src/main/java/at/procon/dip/domain/time/service/LeitstandTimeProjectionService.java new file mode 100644 index 0000000..b7e97f3 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/time/service/LeitstandTimeProjectionService.java @@ -0,0 +1,340 @@ +package at.procon.dip.domain.time.service; + +import at.procon.dip.domain.document.entity.Document; +import at.procon.dip.domain.time.config.TimeDomainProperties; +import at.procon.dip.domain.time.entity.TimeEntry; +import at.procon.dip.domain.time.entity.TimeEntrySearchProjection; +import at.procon.dip.domain.time.entity.TimeSourceSystem; +import at.procon.dip.domain.time.entity.leitstand.*; +import at.procon.dip.domain.time.repository.TimeEntryRepository; +import at.procon.dip.domain.time.repository.TimeEntrySearchProjectionRepository; +import at.procon.dip.domain.time.repository.leitstand.*; +import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; +import at.procon.dip.runtime.config.RuntimeMode; +import java.time.OffsetDateTime; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +@Service +@ConditionalOnRuntimeMode(RuntimeMode.NEW) +@ConditionalOnProperty(prefix = "dip.time.leitstand", name = "enabled", havingValue = "true") +@RequiredArgsConstructor +@Slf4j +public class LeitstandTimeProjectionService { + + private final TimeDomainProperties properties; + private final LeitstandTimeRecordingRepository timeRecordingRepository; + private final LeitstandTimeRecordingAssignmentRepository timeRecordingAssignmentRepository; + private final LeitstandPersonTaskAssignmentRepository personTaskAssignmentRepository; + private final LeitstandTaskRepository taskRepository; + private final LeitstandCostUnitRepository costUnitRepository; + private final LeitstandContractRepository contractRepository; + private final LeitstandContractPositionRepository contractPositionRepository; + private final LeitstandOrganizationRepository organizationRepository; + private final LeitstandPersonRepository personRepository; + private final LeitstandActivityTypeRepository activityTypeRepository; + private final TimeEntryRepository timeEntryRepository; + private final TimeEntrySearchProjectionRepository projectionRepository; + private final TimeEntryRepresentationMaterializationService representationMaterializationService; + + @Transactional + public void refreshForLeitstandRecordingDbks(Collection recordingDbks) { + if (recordingDbks == null || recordingDbks.isEmpty()) { + return; + } + List recordings = timeRecordingRepository.findAllById(recordingDbks).stream() + .filter(recording -> recording.getTimeEntry() != null) + .toList(); + if (recordings.isEmpty()) { + return; + } + upsertProjections(recordings); + } + + @Transactional + public int refreshAll() { + List recordings = timeRecordingRepository.findByTimeEntryIsNotNull(); + upsertProjections(recordings); + return recordings.size(); + } + + private void upsertProjections(List recordings) { + for (LeitstandTimeRecording recording : recordings) { + TimeEntrySearchProjection projection = buildProjection(recording); + TimeEntrySearchProjection saved = projectionRepository.save(projection); + if (properties.getLeitstand().isBuildRepresentations()) { + representationMaterializationService.upsertRepresentations(saved); + } + } + } + + private TimeEntrySearchProjection buildProjection(LeitstandTimeRecording recording) { + TimeEntry timeEntry = timeEntryRepository.findById(recording.getTimeEntry().getId()) + .orElseThrow(() -> new IllegalArgumentException("Unknown TIME entry id: " + recording.getTimeEntry().getId())); + Document document = timeEntry.getDocument(); + + LeitstandPerson person = recording.getPersonDbk() == null ? null : personRepository.findById(recording.getPersonDbk()).orElse(null); + LeitstandActivityType activityType = recording.getActivityTypeId() == null ? null : activityTypeRepository.findById(recording.getActivityTypeId()).orElse(null); + + List assignments = timeRecordingAssignmentRepository.findByTimeRecordingDbkOrderByDbkAsc(recording.getDbk()); + List personTaskAssignments = personTaskAssignmentRepository.findAllById(assignments.stream() + .map(LeitstandTimeRecordingAssignment::getPersonTaskAssignmentDbk) + .filter(Objects::nonNull) + .distinct() + .toList()); + Map ptaByDbk = indexBy(personTaskAssignments, LeitstandPersonTaskAssignment::getDbk); + + Map tasksByDbk = indexBy(taskRepository.findAllById(personTaskAssignments.stream() + .map(LeitstandPersonTaskAssignment::getTaskDbk) + .filter(Objects::nonNull) + .distinct() + .toList()), LeitstandTask::getDbk); + + Map costUnitsByDbk = indexBy(costUnitRepository.findAllById(personTaskAssignments.stream() + .map(LeitstandPersonTaskAssignment::getCostUnitDbk) + .filter(Objects::nonNull) + .distinct() + .toList()), LeitstandCostUnit::getDbk); + + Map contractsByDbk = indexBy(contractRepository.findAllById(costUnitsByDbk.values().stream() + .map(LeitstandCostUnit::getContractDbk) + .filter(Objects::nonNull) + .distinct() + .toList()), LeitstandContract::getDbk); + + Map contractPositionsByDbk = indexBy(contractPositionRepository.findAllById(costUnitsByDbk.values().stream() + .map(LeitstandCostUnit::getContractPositionDbk) + .filter(Objects::nonNull) + .distinct() + .toList()), LeitstandContractPosition::getDbk); + + Set organizationDbks = new LinkedHashSet<>(); + costUnitsByDbk.values().stream().map(LeitstandCostUnit::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationDbks::add); + contractsByDbk.values().stream().map(LeitstandContract::getOrganizationDbk).filter(Objects::nonNull).forEach(organizationDbks::add); + if (person != null && person.getOrganizationDbk() != null) { + organizationDbks.add(person.getOrganizationDbk()); + } + Map organizationsByDbk = indexBy(organizationRepository.findAllById(organizationDbks), LeitstandOrganization::getDbk); + + List orderedTasks = assignments.stream() + .map(a -> ptaByDbk.get(a.getPersonTaskAssignmentDbk())) + .filter(Objects::nonNull) + .map(pta -> tasksByDbk.get(pta.getTaskDbk())) + .filter(Objects::nonNull) + .distinct() + .toList(); + List orderedCostUnits = assignments.stream() + .map(a -> ptaByDbk.get(a.getPersonTaskAssignmentDbk())) + .filter(Objects::nonNull) + .map(pta -> costUnitsByDbk.get(pta.getCostUnitDbk())) + .filter(Objects::nonNull) + .distinct() + .toList(); + List orderedContracts = orderedCostUnits.stream() + .map(cu -> contractsByDbk.get(cu.getContractDbk())) + .filter(Objects::nonNull) + .distinct() + .toList(); + List orderedContractPositions = orderedCostUnits.stream() + .map(cu -> contractPositionsByDbk.get(cu.getContractPositionDbk())) + .filter(Objects::nonNull) + .distinct() + .toList(); + List orderedOrganizations = new ArrayList<>(); + orderedCostUnits.stream().map(cu -> organizationsByDbk.get(cu.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); }); + orderedContracts.stream().map(c -> organizationsByDbk.get(c.getOrganizationDbk())).filter(Objects::nonNull).forEach(org -> { if (!orderedOrganizations.contains(org)) orderedOrganizations.add(org); }); + if (person != null && person.getOrganizationDbk() != null) { + LeitstandOrganization personOrg = organizationsByDbk.get(person.getOrganizationDbk()); + if (personOrg != null && !orderedOrganizations.contains(personOrg)) orderedOrganizations.add(personOrg); + } + + LeitstandTask primaryTask = orderedTasks.isEmpty() ? null : orderedTasks.getFirst(); + LeitstandCostUnit primaryCostUnit = orderedCostUnits.isEmpty() ? null : orderedCostUnits.getFirst(); + LeitstandContract primaryContract = orderedContracts.isEmpty() ? null : orderedContracts.getFirst(); + LeitstandContractPosition primaryContractPosition = orderedContractPositions.isEmpty() ? null : orderedContractPositions.getFirst(); + LeitstandOrganization primaryOrganization = orderedOrganizations.isEmpty() ? null : orderedOrganizations.getFirst(); + + String summary = buildSummary(recording, primaryTask, primaryCostUnit, primaryOrganization, person); + String semanticText = buildSemanticText(timeEntry, recording, person, activityType, orderedTasks, orderedCostUnits, orderedContracts, orderedContractPositions, orderedOrganizations); + + TimeEntrySearchProjection projection = projectionRepository.findByTimeEntry_Id(timeEntry.getId()) + .orElseGet(() -> TimeEntrySearchProjection.builder().timeEntry(timeEntry).document(document).build()); + projection.setDocument(document); + projection.setTimeEntry(timeEntry); + projection.setSourceSystem(TimeSourceSystem.LEITSTAND); + projection.setExternalId(timeEntry.getExternalId()); + projection.setLanguageCode(properties.getLeitstand().getRepresentationLanguageCode()); + projection.setEntryStart(timeEntry.getEntryStart()); + projection.setEntryEnd(timeEntry.getEntryEnd()); + projection.setDurationSeconds(timeEntry.getDurationSeconds()); + projection.setPersonExternalId(timeEntry.getPersonExternalId()); + projection.setPersonDisplayName(firstNonBlank(timeEntry.getPersonDisplayName(), formatPerson(person))); + projection.setActivityTypeId(recording.getActivityTypeId()); + projection.setActivityTypeCode(activityType == null ? null : activityType.getLCode()); + projection.setActivityTypeName(activityType == null ? null : activityType.getBez()); + projection.setRecordType(recording.getRecordType()); + projection.setTimeRecordingDbk(recording.getDbk()); + projection.setTimeRecordingMclId(recording.getMclId()); + projection.setTimeRecordingDesc(recording.getMclDesc()); + projection.setTimeRecordingRemark(recording.getRemark()); + projection.setTimeRecordingUrl(recording.getUrl()); + projection.setPrimaryTaskDbk(primaryTask == null ? null : primaryTask.getDbk()); + projection.setPrimaryTaskMclId(primaryTask == null ? null : primaryTask.getMclId()); + projection.setPrimaryTaskName(primaryTask == null ? null : primaryTask.getMclName()); + projection.setPrimaryTaskDesc(primaryTask == null ? null : primaryTask.getMclDesc()); + projection.setPrimaryCostUnitDbk(primaryCostUnit == null ? null : primaryCostUnit.getDbk()); + projection.setPrimaryCostUnitMclId(primaryCostUnit == null ? null : primaryCostUnit.getMclId()); + projection.setPrimaryCostUnitName(primaryCostUnit == null ? null : primaryCostUnit.getMclName()); + projection.setPrimaryCostUnitDesc(primaryCostUnit == null ? null : primaryCostUnit.getMclDesc()); + projection.setPrimaryContractDbk(primaryContract == null ? null : primaryContract.getDbk()); + projection.setPrimaryContractName(primaryContract == null ? null : primaryContract.getName()); + projection.setPrimaryContractIref(primaryContract == null ? null : primaryContract.getIref()); + projection.setPrimaryContractPositionDbk(primaryContractPosition == null ? null : primaryContractPosition.getDbk()); + projection.setPrimaryContractPositionName(primaryContractPosition == null ? null : primaryContractPosition.getName()); + projection.setPrimaryContractPositionIref(primaryContractPosition == null ? null : primaryContractPosition.getIref()); + projection.setPrimaryOrganizationDbk(primaryOrganization == null ? null : primaryOrganization.getDbk()); + projection.setPrimaryOrganizationName(primaryOrganization == null ? null : primaryOrganization.getName()); + projection.setTaskIdsText(joinDistinct(orderedTasks.stream().map(LeitstandTask::getMclId).toList())); + projection.setTaskNamesText(joinDistinct(orderedTasks.stream().map(LeitstandTask::getMclName).toList())); + projection.setTaskDescriptionsText(joinDistinct(orderedTasks.stream().map(LeitstandTask::getMclDesc).toList())); + projection.setCostUnitIdsText(joinDistinct(orderedCostUnits.stream().map(LeitstandCostUnit::getMclId).toList())); + projection.setCostUnitNamesText(joinDistinct(orderedCostUnits.stream().map(LeitstandCostUnit::getMclName).toList())); + projection.setCostUnitDescriptionsText(joinDistinct(orderedCostUnits.stream().map(LeitstandCostUnit::getMclDesc).toList())); + projection.setContractNamesText(joinDistinct(orderedContracts.stream().map(LeitstandContract::getName).toList())); + projection.setContractIrefsText(joinDistinct(orderedContracts.stream().map(LeitstandContract::getIref).toList())); + projection.setContractPositionNamesText(joinDistinct(orderedContractPositions.stream().map(LeitstandContractPosition::getName).toList())); + projection.setContractPositionIrefsText(joinDistinct(orderedContractPositions.stream().map(LeitstandContractPosition::getIref).toList())); + projection.setOrganizationNamesText(joinDistinct(orderedOrganizations.stream().map(LeitstandOrganization::getName).toList())); + projection.setSummaryText(summary); + projection.setSemanticText(semanticText); + return projection; + } + + private String buildSummary(LeitstandTimeRecording recording, + LeitstandTask primaryTask, + LeitstandCostUnit primaryCostUnit, + LeitstandOrganization primaryOrganization, + LeitstandPerson person) { + List parts = new ArrayList<>(); + add(parts, formatPerson(person)); + add(parts, recording.getMclDesc()); + add(parts, primaryTask == null ? null : primaryTask.getMclName()); + add(parts, primaryCostUnit == null ? null : primaryCostUnit.getMclName()); + add(parts, primaryOrganization == null ? null : primaryOrganization.getName()); + add(parts, formatPeriod(recording.getRecordedFrom(), recording.getRecordedTo())); + return String.join(" | ", parts); + } + + private String buildSemanticText(TimeEntry timeEntry, + LeitstandTimeRecording recording, + LeitstandPerson person, + LeitstandActivityType activityType, + List tasks, + List costUnits, + List contracts, + List contractPositions, + List organizations) { + StringBuilder sb = new StringBuilder(); + sb.append("Document type: TIME_ENTRY\n"); + sb.append("Source system: LEITSTAND\n"); + sb.append("External ID: ").append(timeEntry.getExternalId()).append('\n'); + addLine(sb, "Person", formatPerson(person)); + addLine(sb, "Activity type", activityType == null ? null : firstNonBlank(activityType.getBez(), activityType.getLCode())); + addLine(sb, "Record type", recording.getRecordType()); + addLine(sb, "Start", formatTimestamp(recording.getRecordedFrom())); + addLine(sb, "End", formatTimestamp(recording.getRecordedTo())); + if (timeEntry.getDurationSeconds() != null) { + addLine(sb, "Duration seconds", String.valueOf(timeEntry.getDurationSeconds())); + } + addLine(sb, "Time recording title", recording.getMclDesc()); + addLine(sb, "Time recording ID", recording.getMclId()); + addLine(sb, "Remark", recording.getRemark()); + addLine(sb, "URL", recording.getUrl()); + addLine(sb, "Task IDs", joinDistinct(tasks.stream().map(LeitstandTask::getMclId).toList())); + addLine(sb, "Task names", joinDistinct(tasks.stream().map(LeitstandTask::getMclName).toList())); + addLine(sb, "Task descriptions", joinDistinct(tasks.stream().map(LeitstandTask::getMclDesc).toList())); + addLine(sb, "Cost unit IDs", joinDistinct(costUnits.stream().map(LeitstandCostUnit::getMclId).toList())); + addLine(sb, "Cost unit names", joinDistinct(costUnits.stream().map(LeitstandCostUnit::getMclName).toList())); + addLine(sb, "Cost unit descriptions", joinDistinct(costUnits.stream().map(LeitstandCostUnit::getMclDesc).toList())); + addLine(sb, "Contracts", joinDistinct(contracts.stream().map(LeitstandContract::getName).toList())); + addLine(sb, "Contract references", joinDistinct(contracts.stream().map(LeitstandContract::getIref).toList())); + addLine(sb, "Contract positions", joinDistinct(contractPositions.stream().map(LeitstandContractPosition::getName).toList())); + addLine(sb, "Contract position references", joinDistinct(contractPositions.stream().map(LeitstandContractPosition::getIref).toList())); + addLine(sb, "Organizations", joinDistinct(organizations.stream().map(LeitstandOrganization::getName).toList())); + return sb.toString().trim(); + } + + private Map indexBy(Collection rows, Function id) { + return rows.stream() + .filter(Objects::nonNull) + .collect(Collectors.toMap(id, Function.identity(), (a, b) -> a, LinkedHashMap::new)); + } + + private void addLine(StringBuilder sb, String label, String value) { + if (value != null && !value.isBlank()) { + sb.append(label).append(": ").append(value.trim()).append('\n'); + } + } + + private void add(List parts, String value) { + if (value != null && !value.isBlank()) { + parts.add(value.trim()); + } + } + + private String formatPerson(LeitstandPerson person) { + if (person == null) { + return null; + } + return firstNonBlank(joinName(person.getFirstName(), person.getLastName()), person.getPersonNumber() == null ? null : String.valueOf(person.getPersonNumber())); + } + + private String joinName(String firstName, String lastName) { + String left = firstName == null ? "" : firstName.trim(); + String right = lastName == null ? "" : lastName.trim(); + return (left + (left.isEmpty() || right.isEmpty() ? "" : " ") + right).trim(); + } + + private String formatPeriod(OffsetDateTime from, OffsetDateTime to) { + if (from == null && to == null) { + return null; + } + if (from != null && to != null) { + return formatTimestamp(from) + " - " + formatTimestamp(to); + } + return formatTimestamp(firstNonNull(from, to)); + } + + private OffsetDateTime firstNonNull(OffsetDateTime a, OffsetDateTime b) { + return a != null ? a : b; + } + + private String formatTimestamp(OffsetDateTime timestamp) { + return timestamp == null ? null : timestamp.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + } + + private String firstNonBlank(String... values) { + for (String value : values) { + if (value != null && !value.trim().isEmpty()) { + return value.trim(); + } + } + return null; + } + + private String joinDistinct(Collection values) { + return values.stream() + .filter(Objects::nonNull) + .map(String::trim) + .filter(v -> !v.isEmpty()) + .distinct() + .collect(Collectors.joining(" | ")); + } +} diff --git a/src/main/java/at/procon/dip/domain/time/service/TimeEntryRepresentationMaterializationService.java b/src/main/java/at/procon/dip/domain/time/service/TimeEntryRepresentationMaterializationService.java new file mode 100644 index 0000000..1f18a4d --- /dev/null +++ b/src/main/java/at/procon/dip/domain/time/service/TimeEntryRepresentationMaterializationService.java @@ -0,0 +1,113 @@ +package at.procon.dip.domain.time.service; + +import at.procon.dip.domain.document.RepresentationType; +import at.procon.dip.domain.document.entity.Document; +import at.procon.dip.domain.document.entity.DocumentTextRepresentation; +import at.procon.dip.domain.document.repository.DocumentRepository; +import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository; +import at.procon.dip.domain.document.service.DocumentRepresentationService; +import at.procon.dip.domain.document.service.command.AddDocumentTextRepresentationCommand; +import at.procon.dip.domain.time.config.TimeDomainProperties; +import at.procon.dip.domain.time.entity.TimeEntrySearchProjection; +import at.procon.dip.embedding.config.EmbeddingProperties; +import at.procon.dip.embedding.registry.EmbeddingModelRegistry; +import at.procon.dip.embedding.service.RepresentationEmbeddingOrchestrator; +import at.procon.dip.search.service.DocumentLexicalIndexService; +import java.util.Optional; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +@Service +@RequiredArgsConstructor +@Slf4j +public class TimeEntryRepresentationMaterializationService { + + public static final String BUILDER_KEY = "time-entry-structured-text"; + + private final TimeDomainProperties timeDomainProperties; + private final DocumentRepository documentRepository; + private final DocumentTextRepresentationRepository representationRepository; + private final DocumentRepresentationService documentRepresentationService; + private final DocumentLexicalIndexService lexicalIndexService; + private final RepresentationEmbeddingOrchestrator embeddingOrchestrator; + private final EmbeddingProperties embeddingProperties; + private final EmbeddingModelRegistry modelRegistry; + + @Transactional + public void upsertRepresentations(TimeEntrySearchProjection projection) { + if (projection.getSemanticText() == null || projection.getSemanticText().isBlank()) { + log.debug("Skipping TIME representation for document {} because semantic text is blank", projection.getDocument().getId()); + return; + } + + Document document = projection.getDocument(); + document.setTitle(firstNonBlank(projection.getSummaryText(), projection.getTimeRecordingDesc(), projection.getPrimaryTaskName(), projection.getExternalId())); + document.setSummary(projection.getSummaryText()); + document.setLanguageCode(firstNonBlank(projection.getLanguageCode(), document.getLanguageCode())); + if (document.getMimeType() == null || document.getMimeType().isBlank()) { + document.setMimeType("application/x-time-entry"); + } + document = documentRepository.save(document); + + Optional existing = representationRepository + .findByDocument_IdAndRepresentationType(document.getId(), RepresentationType.SEMANTIC_TEXT) + .stream() + .filter(r -> BUILDER_KEY.equals(r.getBuilderKey()) || r.isPrimaryRepresentation()) + .findFirst(); + + boolean changed = existing.isEmpty() + || !projection.getSemanticText().equals(existing.get().getTextBody()) + || !equalsNullable(projection.getLanguageCode(), existing.get().getLanguageCode()) + || !BUILDER_KEY.equals(existing.get().getBuilderKey()); + + DocumentTextRepresentation semantic = existing + .map(found -> changed ? updateRepresentation(found, projection) : found) + .orElseGet(() -> documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand( + document.getId(), + null, + RepresentationType.SEMANTIC_TEXT, + BUILDER_KEY, + projection.getLanguageCode(), + null, + null, + null, + null, + true, + projection.getSemanticText() + ))); + + if (changed + && embeddingProperties.isEnabled() + && timeDomainProperties.getLeitstand().isQueueEmbeddings() + && embeddingProperties.getDefaultDocumentModel() != null && !embeddingProperties.getDefaultDocumentModel().isBlank()) { + String modelKey = modelRegistry.getRequiredDefaultDocumentModelKey(); + embeddingOrchestrator.enqueueRepresentation(document.getId(), semantic.getId(), modelKey); + } + } + + private DocumentTextRepresentation updateRepresentation(DocumentTextRepresentation existing, TimeEntrySearchProjection projection) { + existing.setBuilderKey(BUILDER_KEY); + existing.setLanguageCode(projection.getLanguageCode()); + existing.setPrimaryRepresentation(true); + existing.setTextBody(projection.getSemanticText()); + existing.setCharCount(projection.getSemanticText().length()); + DocumentTextRepresentation saved = representationRepository.saveAndFlush(existing); + lexicalIndexService.indexRepresentation(saved.getId()); + return saved; + } + + private boolean equalsNullable(String left, String right) { + return left == null ? right == null : left.equals(right); + } + + private String firstNonBlank(String... values) { + for (String value : values) { + if (value != null && !value.trim().isEmpty()) { + return value.trim(); + } + } + return null; + } +} diff --git a/src/main/java/at/procon/dip/domain/time/startup/LeitstandTimeProjectionStartupRunner.java b/src/main/java/at/procon/dip/domain/time/startup/LeitstandTimeProjectionStartupRunner.java new file mode 100644 index 0000000..9ed2133 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/time/startup/LeitstandTimeProjectionStartupRunner.java @@ -0,0 +1,30 @@ +package at.procon.dip.domain.time.startup; + +import at.procon.dip.domain.time.config.TimeDomainProperties; +import at.procon.dip.domain.time.service.LeitstandTimeProjectionService; +import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; +import at.procon.dip.runtime.config.RuntimeMode; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.stereotype.Component; + +@Component +@ConditionalOnRuntimeMode(RuntimeMode.NEW) +@ConditionalOnProperty(prefix = "dip.time.leitstand", name = {"enabled", "startup-projection-rebuild-enabled"}, havingValue = "true") +@RequiredArgsConstructor +@Slf4j +public class LeitstandTimeProjectionStartupRunner implements ApplicationRunner { + + private final TimeDomainProperties properties; + private final LeitstandTimeProjectionService projectionService; + + @Override + public void run(ApplicationArguments args) { + log.info("Starting Leitstand TIME projection rebuild (scope={})", properties.getLeitstand().getScopeKey()); + int refreshed = projectionService.refreshAll(); + log.info("Completed Leitstand TIME projection rebuild. Refreshed {} time-entry projections", refreshed); + } +} diff --git a/src/main/resources/application-new.yml b/src/main/resources/application-new.yml index ebfc3e5..825101e 100644 --- a/src/main/resources/application-new.yml +++ b/src/main/resources/application-new.yml @@ -296,16 +296,26 @@ dip: time: enabled: true leitstand: - enabled: true - startup-sync-enabled: true + enabled: false + startup-sync-enabled: false + create-canonical-time-entries: true + build-search-projection: true + build-representations: true + queue-embeddings: true + startup-projection-rebuild-enabled: false + representation-language-code: de + incremental-enabled: true + scope-key: leitstand-default import-batch-id: time-leitstand reconcile-lookback-days: 7 - create-canonical-time-entries: true jdbc: url: jdbc:jtds:sqlserver://mag2:1433;databaseName=spc username: sa password: jhcbxr driver-class-name: net.sourceforge.jtds.jdbc.Driver + fetch-size: 500 + query-timeout-seconds: 300 + toggl-track: enabled: false import-batch-id: time-toggl diff --git a/src/main/resources/db/migration/V26__time_t3_projection_representations.sql b/src/main/resources/db/migration/V26__time_t3_projection_representations.sql new file mode 100644 index 0000000..e7029a5 --- /dev/null +++ b/src/main/resources/db/migration/V26__time_t3_projection_representations.sql @@ -0,0 +1,61 @@ +-- TIME Phase T3: search projection and representation materialization foundation for time entries. + +CREATE TABLE IF NOT EXISTS TIME.time_entry_search_projection ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + time_entry_id UUID NOT NULL UNIQUE REFERENCES TIME.time_entry(id) ON DELETE CASCADE, + document_id UUID NOT NULL UNIQUE REFERENCES DOC.doc_document(id) ON DELETE CASCADE, + source_system TIME.time_source_system NOT NULL, + external_id VARCHAR(255) NOT NULL, + language_code VARCHAR(16), + entry_start TIMESTAMP WITH TIME ZONE, + entry_end TIMESTAMP WITH TIME ZONE, + duration_seconds BIGINT, + person_external_id VARCHAR(255), + person_display_name VARCHAR(255), + activity_type_id INTEGER, + activity_type_code VARCHAR(64), + activity_type_name VARCHAR(255), + record_type VARCHAR(64), + time_recording_dbk VARCHAR(24), + time_recording_mcl_id VARCHAR(255), + time_recording_desc VARCHAR(255), + time_recording_remark VARCHAR(255), + time_recording_url VARCHAR(1000), + primary_task_dbk VARCHAR(24), + primary_task_mcl_id VARCHAR(255), + primary_task_name VARCHAR(255), + primary_task_desc TEXT, + primary_cost_unit_dbk VARCHAR(24), + primary_cost_unit_mcl_id VARCHAR(255), + primary_cost_unit_name VARCHAR(255), + primary_cost_unit_desc VARCHAR(255), + primary_contract_dbk VARCHAR(24), + primary_contract_name VARCHAR(255), + primary_contract_iref VARCHAR(255), + primary_contract_position_dbk VARCHAR(24), + primary_contract_position_name VARCHAR(255), + primary_contract_position_iref VARCHAR(255), + primary_organization_dbk VARCHAR(24), + primary_organization_name VARCHAR(255), + task_ids_text TEXT, + task_names_text TEXT, + task_descriptions_text TEXT, + cost_unit_ids_text TEXT, + cost_unit_names_text TEXT, + cost_unit_descriptions_text TEXT, + contract_names_text TEXT, + contract_irefs_text TEXT, + contract_position_names_text TEXT, + contract_position_irefs_text TEXT, + organization_names_text TEXT, + semantic_text TEXT, + summary_text TEXT, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_time_entry_search_projection_document ON TIME.time_entry_search_projection(document_id); +CREATE INDEX IF NOT EXISTS idx_time_entry_search_projection_source ON TIME.time_entry_search_projection(source_system, external_id); +CREATE INDEX IF NOT EXISTS idx_time_entry_search_projection_entry_start ON TIME.time_entry_search_projection(entry_start DESC); +CREATE INDEX IF NOT EXISTS idx_time_entry_search_projection_person ON TIME.time_entry_search_projection(person_external_id); +CREATE INDEX IF NOT EXISTS idx_time_entry_search_projection_activity_type ON TIME.time_entry_search_projection(activity_type_id);