Refactor phases 3

master
trifonovt 1 month ago
parent 71fb43a5ea
commit adc4f2da43

@ -0,0 +1,46 @@
# Phase 3 - TED projection model
## Goal
Move TED from being the implicit root data model to being a typed projection on top of the generic
canonical document model.
## New persistence model
### Generic root
- `DOC.doc_document`
- `DOC.doc_content`
- `DOC.doc_text_representation`
- `DOC.doc_embedding`
### TED-specific projection
- `TED.ted_notice_projection`
- `TED.ted_notice_lot`
- `TED.ted_notice_organization`
## Relationship model
- one generic `DOC.doc_document`
- zero or one `TED.ted_notice_projection`
- zero to many `TED.ted_notice_lot`
- zero to many `TED.ted_notice_organization`
The projection also keeps an optional back-reference to the legacy `TED.procurement_document` row to
support incremental migration and validation.
## Runtime behavior
When a new TED XML document is imported:
1. it is parsed into the existing legacy `ProcurementDocument`
2. the generic DOC root is ensured/refreshed
3. the primary text representation is ensured
4. if the generic vectorization pipeline is enabled, a pending embedding is ensured
5. the TED structured projection tables are refreshed from the parsed legacy document
## Why this phase matters
This is the first phase where TED is explicitly modeled as a document type projection instead of the
platform's canonical root entity. That makes the next steps possible:
- generic semantic search across multiple document types
- future non-TED projections
- migration of TED structured search to the new projection tables

@ -18,8 +18,8 @@ import org.springframework.scheduling.annotation.EnableAsync;
@SpringBootApplication(scanBasePackages = {"at.procon.dip", "at.procon.ted"}) @SpringBootApplication(scanBasePackages = {"at.procon.dip", "at.procon.ted"})
@EnableAsync @EnableAsync
//@EnableConfigurationProperties(TedProcessorProperties.class) //@EnableConfigurationProperties(TedProcessorProperties.class)
@EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity"}) @EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", "at.procon.dip.domain.ted.entity"})
@EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository"}) @EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository", "at.procon.dip.domain.ted.repository"})
public class DocumentIntelligencePlatformApplication { public class DocumentIntelligencePlatformApplication {
public static void main(String[] args) { public static void main(String[] args) {

@ -0,0 +1,30 @@
# Phase 3 - TED as a structured projection on the generic document core
Phase 3 makes TED a proper type-specific projection layered on top of the generic `DOC.doc_document`
root introduced in Phase 1 and the generic vectorization model introduced in Phase 2.
## What is implemented
- `TED.ted_notice_projection`
- `TED.ted_notice_lot`
- `TED.ted_notice_organization`
- `TedNoticeProjectionService`
- optional startup backfill of missing TED projections
- processing flow updated so freshly imported TED notices dual-write to:
- `DOC` generic document/content/representation model
- `TED` structured projection tables
## Core intent
TED is no longer the root model of the platform. Instead:
- `DOC.doc_document` is the canonical document root
- `TED.ted_notice_projection` holds TED-specific structured metadata
- `TED.ted_notice_lot` and `TED.ted_notice_organization` hold normalized child structures
## Compatibility
This phase is additive:
- legacy `TED.procurement_document` remains in place
- existing search and API behavior continue to work
- new imports are now representable in both the legacy and new projection model
## Important limitation
Structured search endpoints still read from the legacy TED model. Moving TED structured reads to the
new projection tables is the next migration step.

@ -0,0 +1,95 @@
package at.procon.dip.domain.ted.entity;
import at.procon.dip.architecture.SchemaNames;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.FetchType;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.Index;
import jakarta.persistence.JoinColumn;
import jakarta.persistence.ManyToOne;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import jakarta.persistence.UniqueConstraint;
import java.math.BigDecimal;
import java.time.OffsetDateTime;
import java.util.UUID;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
@Entity
@Table(schema = SchemaNames.TED, name = "ted_notice_lot", indexes = {
@Index(name = "idx_ted_notice_lot_projection", columnList = "notice_projection_id")
}, uniqueConstraints = {
@UniqueConstraint(name = "uq_ted_notice_lot_projection_lot", columnNames = {"notice_projection_id", "lot_id"})
})
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class TedNoticeLot {
@Id
@GeneratedValue(strategy = GenerationType.UUID)
private UUID id;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "notice_projection_id", nullable = false)
private TedNoticeProjection noticeProjection;
@Column(name = "lot_id", nullable = false, length = 50)
private String lotId;
@Column(name = "internal_id", columnDefinition = "TEXT")
private String internalId;
@Column(name = "title", columnDefinition = "TEXT")
private String title;
@Column(name = "description", columnDefinition = "TEXT")
private String description;
@Column(name = "cpv_codes", columnDefinition = "VARCHAR(100)[]")
@JdbcTypeCode(SqlTypes.ARRAY)
private String[] cpvCodes;
@Column(name = "nuts_codes", columnDefinition = "VARCHAR(20)[]")
@JdbcTypeCode(SqlTypes.ARRAY)
private String[] nutsCodes;
@Column(name = "estimated_value", precision = 20, scale = 2)
private BigDecimal estimatedValue;
@Column(name = "estimated_value_currency", length = 3)
private String estimatedValueCurrency;
@Column(name = "duration_value")
private Double durationValue;
@Column(name = "duration_unit", length = 20)
private String durationUnit;
@Column(name = "submission_deadline")
private OffsetDateTime submissionDeadline;
@Column(name = "eu_funded")
@Builder.Default
private Boolean euFunded = false;
@Builder.Default
@Column(name = "created_at", nullable = false, updatable = false)
private OffsetDateTime createdAt = OffsetDateTime.now();
@PrePersist
protected void onCreate() {
createdAt = OffsetDateTime.now();
}
}

@ -0,0 +1,90 @@
package at.procon.dip.domain.ted.entity;
import at.procon.dip.architecture.SchemaNames;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.FetchType;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.Index;
import jakarta.persistence.JoinColumn;
import jakarta.persistence.ManyToOne;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import jakarta.persistence.UniqueConstraint;
import java.time.OffsetDateTime;
import java.util.UUID;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
@Entity
@Table(schema = SchemaNames.TED, name = "ted_notice_organization", indexes = {
@Index(name = "idx_ted_notice_org_projection", columnList = "notice_projection_id"),
@Index(name = "idx_ted_notice_org_country", columnList = "country_code")
}, uniqueConstraints = {
@UniqueConstraint(name = "uq_ted_notice_org_projection_ref", columnNames = {"notice_projection_id", "org_reference"})
})
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class TedNoticeOrganization {
@Id
@GeneratedValue(strategy = GenerationType.UUID)
private UUID id;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "notice_projection_id", nullable = false)
private TedNoticeProjection noticeProjection;
@Column(name = "org_reference", length = 50)
private String orgReference;
@Column(name = "role", length = 50)
private String role;
@Column(name = "name", columnDefinition = "TEXT")
private String name;
@Column(name = "company_id", length = 1000)
private String companyId;
@Column(name = "country_code", length = 10)
private String countryCode;
@Column(name = "city", length = 255)
private String city;
@Column(name = "postal_code", length = 255)
private String postalCode;
@Column(name = "street_name", columnDefinition = "TEXT")
private String streetName;
@Column(name = "nuts_code", length = 10)
private String nutsCode;
@Column(name = "website_uri", columnDefinition = "TEXT")
private String websiteUri;
@Column(name = "email", length = 255)
private String email;
@Column(name = "phone", length = 50)
private String phone;
@Builder.Default
@Column(name = "created_at", nullable = false, updatable = false)
private OffsetDateTime createdAt = OffsetDateTime.now();
@PrePersist
protected void onCreate() {
createdAt = OffsetDateTime.now();
}
}

@ -0,0 +1,194 @@
package at.procon.dip.domain.ted.entity;
import at.procon.dip.architecture.SchemaNames;
import at.procon.dip.domain.document.entity.Document;
import at.procon.ted.model.entity.ContractNature;
import at.procon.ted.model.entity.NoticeType;
import at.procon.ted.model.entity.ProcedureType;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.EnumType;
import jakarta.persistence.Enumerated;
import jakarta.persistence.FetchType;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.Index;
import jakarta.persistence.JoinColumn;
import jakarta.persistence.OneToOne;
import jakarta.persistence.PrePersist;
import jakarta.persistence.PreUpdate;
import jakarta.persistence.Table;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.util.UUID;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.annotations.JdbcTypeCode;
import org.hibernate.type.SqlTypes;
/**
* Phase 3 TED-specific projection that sits on top of the generic DOC document root.
*/
@Entity
@Table(schema = SchemaNames.TED, name = "ted_notice_projection", indexes = {
@Index(name = "idx_ted_proj_document", columnList = "document_id"),
@Index(name = "idx_ted_proj_legacy_doc", columnList = "legacy_procurement_document_id"),
@Index(name = "idx_ted_proj_publication_id", columnList = "publication_id"),
@Index(name = "idx_ted_proj_notice_type", columnList = "notice_type"),
@Index(name = "idx_ted_proj_buyer_country", columnList = "buyer_country_code"),
@Index(name = "idx_ted_proj_publication_date", columnList = "publication_date")
})
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class TedNoticeProjection {
@Id
@GeneratedValue(strategy = GenerationType.UUID)
private UUID id;
@OneToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "document_id", nullable = false, unique = true)
private Document document;
@Column(name = "legacy_procurement_document_id", unique = true)
private UUID legacyProcurementDocumentId;
@Column(name = "notice_id", length = 100)
private String noticeId;
@Column(name = "publication_id", length = 50)
private String publicationId;
@Column(name = "notice_url", length = 255)
private String noticeUrl;
@Column(name = "ojs_id", length = 20)
private String ojsId;
@Column(name = "contract_folder_id", length = 100)
private String contractFolderId;
@Enumerated(EnumType.STRING)
@Column(name = "notice_type", nullable = false, length = 50)
@Builder.Default
private NoticeType noticeType = NoticeType.OTHER;
@Column(name = "notice_subtype_code", length = 10)
private String noticeSubtypeCode;
@Column(name = "sdk_version", length = 20)
private String sdkVersion;
@Column(name = "ubl_version", length = 10)
private String ublVersion;
@Column(name = "language_code", length = 10)
private String languageCode;
@Column(name = "issue_datetime")
private OffsetDateTime issueDateTime;
@Column(name = "publication_date")
private LocalDate publicationDate;
@Column(name = "submission_deadline")
private OffsetDateTime submissionDeadline;
@Column(name = "buyer_name", columnDefinition = "TEXT")
private String buyerName;
@Column(name = "buyer_country_code", length = 10)
private String buyerCountryCode;
@Column(name = "buyer_city", length = 255)
private String buyerCity;
@Column(name = "buyer_postal_code", length = 100)
private String buyerPostalCode;
@Column(name = "buyer_nuts_code", length = 10)
private String buyerNutsCode;
@Column(name = "buyer_activity_type", length = 50)
private String buyerActivityType;
@Column(name = "buyer_legal_type", length = 50)
private String buyerLegalType;
@Column(name = "project_title", columnDefinition = "TEXT")
private String projectTitle;
@Column(name = "project_description", columnDefinition = "TEXT")
private String projectDescription;
@Column(name = "internal_reference", length = 500)
private String internalReference;
@Enumerated(EnumType.STRING)
@Column(name = "contract_nature", nullable = false, length = 50)
@Builder.Default
private ContractNature contractNature = ContractNature.UNKNOWN;
@Enumerated(EnumType.STRING)
@Column(name = "procedure_type", length = 50)
@Builder.Default
private ProcedureType procedureType = ProcedureType.OTHER;
@Column(name = "cpv_codes", columnDefinition = "VARCHAR(100)[]")
@JdbcTypeCode(SqlTypes.ARRAY)
private String[] cpvCodes;
@Column(name = "nuts_codes", columnDefinition = "VARCHAR(20)[]")
@JdbcTypeCode(SqlTypes.ARRAY)
private String[] nutsCodes;
@Column(name = "estimated_value", precision = 20, scale = 2)
private BigDecimal estimatedValue;
@Column(name = "estimated_value_currency", length = 3)
private String estimatedValueCurrency;
@Column(name = "total_lots")
@Builder.Default
private Integer totalLots = 0;
@Column(name = "max_lots_awarded")
private Integer maxLotsAwarded;
@Column(name = "max_lots_submitted")
private Integer maxLotsSubmitted;
@Column(name = "regulatory_domain", length = 50)
private String regulatoryDomain;
@Column(name = "eu_funded")
@Builder.Default
private Boolean euFunded = false;
@Builder.Default
@Column(name = "created_at", nullable = false, updatable = false)
private OffsetDateTime createdAt = OffsetDateTime.now();
@Builder.Default
@Column(name = "updated_at", nullable = false)
private OffsetDateTime updatedAt = OffsetDateTime.now();
@PrePersist
protected void onCreate() {
createdAt = OffsetDateTime.now();
updatedAt = OffsetDateTime.now();
}
@PreUpdate
protected void onUpdate() {
updatedAt = OffsetDateTime.now();
}
}

@ -0,0 +1,13 @@
package at.procon.dip.domain.ted.repository;
import at.procon.dip.domain.ted.entity.TedNoticeLot;
import java.util.List;
import java.util.UUID;
import org.springframework.data.jpa.repository.JpaRepository;
public interface TedNoticeLotRepository extends JpaRepository<TedNoticeLot, UUID> {
List<TedNoticeLot> findByNoticeProjection_Id(UUID noticeProjectionId);
void deleteByNoticeProjection_Id(UUID noticeProjectionId);
}

@ -0,0 +1,13 @@
package at.procon.dip.domain.ted.repository;
import at.procon.dip.domain.ted.entity.TedNoticeOrganization;
import java.util.List;
import java.util.UUID;
import org.springframework.data.jpa.repository.JpaRepository;
public interface TedNoticeOrganizationRepository extends JpaRepository<TedNoticeOrganization, UUID> {
List<TedNoticeOrganization> findByNoticeProjection_Id(UUID noticeProjectionId);
void deleteByNoticeProjection_Id(UUID noticeProjectionId);
}

@ -0,0 +1,15 @@
package at.procon.dip.domain.ted.repository;
import at.procon.dip.domain.ted.entity.TedNoticeProjection;
import java.util.Optional;
import java.util.UUID;
import org.springframework.data.jpa.repository.JpaRepository;
public interface TedNoticeProjectionRepository extends JpaRepository<TedNoticeProjection, UUID> {
Optional<TedNoticeProjection> findByDocument_Id(UUID documentId);
Optional<TedNoticeProjection> findByLegacyProcurementDocumentId(UUID legacyProcurementDocumentId);
boolean existsByLegacyProcurementDocumentId(UUID legacyProcurementDocumentId);
}

@ -0,0 +1,181 @@
package at.procon.dip.domain.ted.service;
import at.procon.dip.domain.document.entity.Document;
import at.procon.dip.domain.document.repository.DocumentRepository;
import at.procon.dip.domain.ted.entity.TedNoticeLot;
import at.procon.dip.domain.ted.entity.TedNoticeOrganization;
import at.procon.dip.domain.ted.entity.TedNoticeProjection;
import at.procon.dip.domain.ted.repository.TedNoticeLotRepository;
import at.procon.dip.domain.ted.repository.TedNoticeOrganizationRepository;
import at.procon.dip.domain.ted.repository.TedNoticeProjectionRepository;
import at.procon.ted.config.TedProcessorProperties;
import at.procon.ted.model.entity.Organization;
import at.procon.ted.model.entity.ProcurementDocument;
import at.procon.ted.model.entity.ProcurementLot;
import at.procon.ted.service.TedPhase2GenericDocumentService;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
/**
* Phase 3 service that materializes TED-specific structured projections on top of the generic DOC document root.
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class TedNoticeProjectionService {
private final TedProcessorProperties properties;
private final TedPhase2GenericDocumentService tedPhase2GenericDocumentService;
private final DocumentRepository documentRepository;
private final TedNoticeProjectionRepository projectionRepository;
private final TedNoticeLotRepository lotRepository;
private final TedNoticeOrganizationRepository organizationRepository;
@Transactional
public UUID registerOrRefreshProjection(ProcurementDocument legacyDocument) {
if (!properties.getProjection().isEnabled()) {
return null;
}
TedPhase2GenericDocumentService.TedGenericDocumentSyncResult syncResult =
tedPhase2GenericDocumentService.syncTedDocument(legacyDocument);
return registerOrRefreshProjection(legacyDocument, syncResult.documentId());
}
@Transactional
public UUID registerOrRefreshProjection(ProcurementDocument legacyDocument, UUID genericDocumentId) {
if (!properties.getProjection().isEnabled()) {
return null;
}
UUID resolvedDocumentId = genericDocumentId;
if (resolvedDocumentId == null) {
resolvedDocumentId = tedPhase2GenericDocumentService.ensureGenericTedDocument(legacyDocument);
}
UUID finalResolvedDocumentId = resolvedDocumentId;
Document genericDocument = documentRepository.findById(resolvedDocumentId)
.orElseThrow(() -> new IllegalArgumentException("Unknown DOC document id: " + finalResolvedDocumentId));
TedNoticeProjection projection = projectionRepository.findByLegacyProcurementDocumentId(legacyDocument.getId())
.or(() -> projectionRepository.findByDocument_Id(genericDocument.getId()))
.orElseGet(TedNoticeProjection::new);
mapProjection(projection, genericDocument, legacyDocument);
projection = projectionRepository.save(projection);
replaceLots(projection, legacyDocument.getLots());
replaceOrganizations(projection, legacyDocument.getOrganizations());
log.debug("Phase 3 TED projection ensured for legacy {} -> projection {} / doc {}",
legacyDocument.getId(), projection.getId(), genericDocument.getId());
return projection.getId();
}
@Transactional(readOnly = true)
public Optional<TedNoticeProjection> findByLegacyProcurementDocumentId(UUID legacyDocumentId) {
return projectionRepository.findByLegacyProcurementDocumentId(legacyDocumentId);
}
private void mapProjection(TedNoticeProjection projection, Document genericDocument, ProcurementDocument legacyDocument) {
projection.setDocument(genericDocument);
projection.setLegacyProcurementDocumentId(legacyDocument.getId());
projection.setNoticeId(legacyDocument.getNoticeId());
projection.setPublicationId(legacyDocument.getPublicationId());
projection.setNoticeUrl(legacyDocument.getNoticeUrl());
projection.setOjsId(legacyDocument.getOjsId());
projection.setContractFolderId(legacyDocument.getContractFolderId());
projection.setNoticeType(legacyDocument.getNoticeType());
projection.setNoticeSubtypeCode(legacyDocument.getNoticeSubtypeCode());
projection.setSdkVersion(legacyDocument.getSdkVersion());
projection.setUblVersion(legacyDocument.getUblVersion());
projection.setLanguageCode(legacyDocument.getLanguageCode());
projection.setIssueDateTime(legacyDocument.getIssueDateTime());
projection.setPublicationDate(legacyDocument.getPublicationDate());
projection.setSubmissionDeadline(legacyDocument.getSubmissionDeadline());
projection.setBuyerName(legacyDocument.getBuyerName());
projection.setBuyerCountryCode(legacyDocument.getBuyerCountryCode());
projection.setBuyerCity(legacyDocument.getBuyerCity());
projection.setBuyerPostalCode(legacyDocument.getBuyerPostalCode());
projection.setBuyerNutsCode(legacyDocument.getBuyerNutsCode());
projection.setBuyerActivityType(legacyDocument.getBuyerActivityType());
projection.setBuyerLegalType(legacyDocument.getBuyerLegalType());
projection.setProjectTitle(legacyDocument.getProjectTitle());
projection.setProjectDescription(legacyDocument.getProjectDescription());
projection.setInternalReference(legacyDocument.getInternalReference());
projection.setContractNature(legacyDocument.getContractNature());
projection.setProcedureType(legacyDocument.getProcedureType());
projection.setCpvCodes(copyArray(legacyDocument.getCpvCodes()));
projection.setNutsCodes(copyArray(legacyDocument.getNutsCodes()));
projection.setEstimatedValue(legacyDocument.getEstimatedValue());
projection.setEstimatedValueCurrency(legacyDocument.getEstimatedValueCurrency());
projection.setTotalLots(legacyDocument.getTotalLots());
projection.setMaxLotsAwarded(legacyDocument.getMaxLotsAwarded());
projection.setMaxLotsSubmitted(legacyDocument.getMaxLotsSubmitted());
projection.setRegulatoryDomain(legacyDocument.getRegulatoryDomain());
projection.setEuFunded(legacyDocument.getEuFunded());
}
private void replaceLots(TedNoticeProjection projection, List<ProcurementLot> legacyLots) {
lotRepository.deleteByNoticeProjection_Id(projection.getId());
if (legacyLots == null || legacyLots.isEmpty()) {
return;
}
List<TedNoticeLot> projectedLots = new ArrayList<>();
for (ProcurementLot lot : legacyLots) {
projectedLots.add(TedNoticeLot.builder()
.noticeProjection(projection)
.lotId(lot.getLotId())
.internalId(lot.getInternalId())
.title(lot.getTitle())
.description(lot.getDescription())
.cpvCodes(copyArray(lot.getCpvCodes()))
.nutsCodes(copyArray(lot.getNutsCodes()))
.estimatedValue(lot.getEstimatedValue())
.estimatedValueCurrency(lot.getEstimatedValueCurrency())
.durationValue(lot.getDurationValue())
.durationUnit(lot.getDurationUnit())
.submissionDeadline(lot.getSubmissionDeadline())
.euFunded(lot.getEuFunded())
.build());
}
lotRepository.saveAll(projectedLots);
}
private void replaceOrganizations(TedNoticeProjection projection, List<Organization> legacyOrganizations) {
organizationRepository.deleteByNoticeProjection_Id(projection.getId());
if (legacyOrganizations == null || legacyOrganizations.isEmpty()) {
return;
}
List<TedNoticeOrganization> projectedOrganizations = new ArrayList<>();
for (Organization organization : legacyOrganizations) {
projectedOrganizations.add(TedNoticeOrganization.builder()
.noticeProjection(projection)
.orgReference(organization.getOrgReference())
.role(organization.getRole())
.name(organization.getName())
.companyId(organization.getCompanyId())
.countryCode(organization.getCountryCode())
.city(organization.getCity())
.postalCode(organization.getPostalCode())
.streetName(organization.getStreetName())
.nutsCode(organization.getNutsCode())
.websiteUri(organization.getWebsiteUri())
.email(organization.getEmail())
.phone(organization.getPhone())
.build());
}
organizationRepository.saveAll(projectedOrganizations);
}
private String[] copyArray(String[] source) {
return source == null ? null : source.clone();
}
}

@ -0,0 +1,51 @@
package at.procon.dip.domain.ted.startup;
import at.procon.dip.domain.ted.repository.TedNoticeProjectionRepository;
import at.procon.dip.domain.ted.service.TedNoticeProjectionService;
import at.procon.ted.config.TedProcessorProperties;
import at.procon.ted.repository.ProcurementDocumentRepository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component;
/**
* Optional startup backfill for Phase 3 TED projections.
*/
@Component
@RequiredArgsConstructor
@Slf4j
public class TedProjectionStartupRunner implements ApplicationRunner {
private final TedProcessorProperties properties;
private final ProcurementDocumentRepository procurementDocumentRepository;
private final TedNoticeProjectionRepository projectionRepository;
private final TedNoticeProjectionService projectionService;
@Override
public void run(ApplicationArguments args) {
if (!properties.getProjection().isEnabled() || !properties.getProjection().isStartupBackfillEnabled()) {
return;
}
int limit = properties.getProjection().getStartupBackfillLimit();
log.info("Phase 3 startup backfill enabled - ensuring TED projections for up to {} documents", limit);
var page = procurementDocumentRepository.findAll(
PageRequest.of(0, limit, Sort.by(Sort.Direction.ASC, "createdAt")));
int synced = 0;
for (var legacyDocument : page.getContent()) {
if (projectionRepository.existsByLegacyProcurementDocumentId(legacyDocument.getId())) {
continue;
}
projectionService.registerOrRefreshProjection(legacyDocument);
synced++;
}
log.info("Phase 3 startup backfill completed - synced {} TED projections", synced);
}
}

@ -27,6 +27,7 @@ public class TedProcessorProperties {
private DownloadProperties download = new DownloadProperties(); private DownloadProperties download = new DownloadProperties();
private MailProperties mail = new MailProperties(); private MailProperties mail = new MailProperties();
private SolutionBriefProperties solutionBrief = new SolutionBriefProperties(); private SolutionBriefProperties solutionBrief = new SolutionBriefProperties();
private ProjectionProperties projection = new ProjectionProperties();
/** /**
* Input directory configuration for Apache Camel file consumer. * Input directory configuration for Apache Camel file consumer.
@ -405,6 +406,30 @@ public class TedProcessorProperties {
private long mimeInputPollInterval = 10000; private long mimeInputPollInterval = 10000;
} }
/**
* Phase 3 TED projection configuration.
*/
@Data
public static class ProjectionProperties {
/**
* Enable/disable Phase 3 TED structured projection dual-write.
*/
private boolean enabled = true;
/**
* Optional startup backfill of missing projections from legacy TED documents.
*/
private boolean startupBackfillEnabled = false;
/**
* Maximum number of legacy TED documents to backfill during startup.
*/
@Positive
private int startupBackfillLimit = 250;
}
/** /**
* Solution Brief processing configuration. * Solution Brief processing configuration.
* Scans PDF files and generates Excel reports with similar TED documents. * Scans PDF files and generates Excel reports with similar TED documents.

@ -1,5 +1,7 @@
package at.procon.ted.service; package at.procon.ted.service;
import at.procon.dip.domain.ted.service.TedNoticeProjectionService;
import at.procon.ted.config.TedProcessorProperties;
import at.procon.ted.model.entity.ProcurementDocument; import at.procon.ted.model.entity.ProcurementDocument;
import at.procon.ted.model.entity.ProcessingLog; import at.procon.ted.model.entity.ProcessingLog;
import at.procon.ted.repository.ProcurementDocumentRepository; import at.procon.ted.repository.ProcurementDocumentRepository;
@ -38,7 +40,9 @@ public class BatchDocumentProcessingService {
private final XmlParserService xmlParserService; private final XmlParserService xmlParserService;
private final ProcurementDocumentRepository documentRepository; private final ProcurementDocumentRepository documentRepository;
private final ProcessingLogService processingLogService; private final ProcessingLogService processingLogService;
private final TedProcessorProperties properties;
private final TedPhase2GenericDocumentService tedPhase2GenericDocumentService; private final TedPhase2GenericDocumentService tedPhase2GenericDocumentService;
private final TedNoticeProjectionService tedNoticeProjectionService;
/** /**
* Process a batch of XML files from a Daily Package. * Process a batch of XML files from a Daily Package.
@ -132,9 +136,13 @@ public class BatchDocumentProcessingService {
doc.getSourceFilename(), 0); doc.getSourceFilename(), 0);
if (doc.getDocumentHash() != null) { if (doc.getDocumentHash() != null) {
if (properties.getProjection().isEnabled()) {
tedNoticeProjectionService.registerOrRefreshProjection(doc);
} else if (properties.getVectorization().isGenericPipelineEnabled()) {
tedPhase2GenericDocumentService.registerOrRefreshTedDocument(doc); tedPhase2GenericDocumentService.registerOrRefreshTedDocument(doc);
} }
} }
}
log.info("Successfully inserted {} documents in batch", savedDocuments.size()); log.info("Successfully inserted {} documents in batch", savedDocuments.size());
} }

@ -1,5 +1,6 @@
package at.procon.ted.service; package at.procon.ted.service;
import at.procon.dip.domain.ted.service.TedNoticeProjectionService;
import at.procon.ted.config.TedProcessorProperties; import at.procon.ted.config.TedProcessorProperties;
import at.procon.ted.event.DocumentSavedEvent; import at.procon.ted.event.DocumentSavedEvent;
import at.procon.ted.model.entity.*; import at.procon.ted.model.entity.*;
@ -37,6 +38,7 @@ public class DocumentProcessingService {
private final TedProcessorProperties properties; private final TedProcessorProperties properties;
private final ApplicationEventPublisher eventPublisher; private final ApplicationEventPublisher eventPublisher;
private final TedPhase2GenericDocumentService tedPhase2GenericDocumentService; private final TedPhase2GenericDocumentService tedPhase2GenericDocumentService;
private final TedNoticeProjectionService tedNoticeProjectionService;
/** /**
* Process an XML document from the file system. * Process an XML document from the file system.
@ -88,7 +90,16 @@ public class DocumentProcessingService {
"Document parsed and stored successfully", null, filename, "Document parsed and stored successfully", null, filename,
(int) (System.currentTimeMillis() - startTime)); (int) (System.currentTimeMillis() - startTime));
if (properties.getVectorization().isGenericPipelineEnabled()) { if (properties.getProjection().isEnabled()) {
tedNoticeProjectionService.registerOrRefreshProjection(document);
log.debug("Document saved successfully, Phase 3 TED projection ensured: {}", document.getId());
if (!properties.getVectorization().isGenericPipelineEnabled()) {
// Keep legacy vectorization behavior when the generic embedding pipeline is disabled.
eventPublisher.publishEvent(new DocumentSavedEvent(document.getId(), document.getPublicationId()));
log.debug("Document saved successfully, legacy vectorization event published: {}", document.getId());
}
} else if (properties.getVectorization().isGenericPipelineEnabled()) {
tedPhase2GenericDocumentService.registerOrRefreshTedDocument(document); tedPhase2GenericDocumentService.registerOrRefreshTedDocument(document);
log.debug("Document saved successfully, Phase 2 generic vectorization record ensured: {}", document.getId()); log.debug("Document saved successfully, Phase 2 generic vectorization record ensured: {}", document.getId());
} else { } else {
@ -147,7 +158,9 @@ public class DocumentProcessingService {
documentRepository.save(updated); documentRepository.save(updated);
if (properties.getVectorization().isGenericPipelineEnabled()) { if (properties.getProjection().isEnabled()) {
tedNoticeProjectionService.registerOrRefreshProjection(updated);
} else if (properties.getVectorization().isGenericPipelineEnabled()) {
tedPhase2GenericDocumentService.registerOrRefreshTedDocument(updated); tedPhase2GenericDocumentService.registerOrRefreshTedDocument(updated);
} }

@ -34,6 +34,8 @@ import org.springframework.transaction.annotation.Transactional;
/** /**
* Phase 2 bridge that dual-writes TED documents into the generic DOC persistence backbone. * Phase 2 bridge that dual-writes TED documents into the generic DOC persistence backbone.
* In Phase 3 the same bridge also becomes the generic root for TED projections even when the
* generic vectorization route is disabled.
*/ */
@Service @Service
@RequiredArgsConstructor @RequiredArgsConstructor
@ -49,10 +51,28 @@ public class TedPhase2GenericDocumentService {
private final DocumentService documentService; private final DocumentService documentService;
private final DocumentEmbeddingService embeddingService; private final DocumentEmbeddingService embeddingService;
/**
* Phase 2 compatibility API used by manual trigger flows. Returns the embedding id when the
* generic vectorization pipeline is enabled; otherwise returns {@code null} after ensuring the
* generic document root exists.
*/
@Transactional @Transactional
public UUID registerOrRefreshTedDocument(ProcurementDocument tedDocument) { public UUID registerOrRefreshTedDocument(ProcurementDocument tedDocument) {
if (!properties.getVectorization().isGenericPipelineEnabled()) { return syncTedDocument(tedDocument).embeddingId();
return null; }
/**
* Ensures the generic DOC document exists and is refreshed from the current legacy TED document state.
*/
@Transactional
public UUID ensureGenericTedDocument(ProcurementDocument tedDocument) {
return syncTedDocument(tedDocument).documentId();
}
@Transactional
public TedGenericDocumentSyncResult syncTedDocument(ProcurementDocument tedDocument) {
if (!isGenericTedSyncEnabled()) {
return new TedGenericDocumentSyncResult(null, null, null);
} }
Document document = documentRepository.findByDedupHash(tedDocument.getDocumentHash()) Document document = documentRepository.findByDedupHash(tedDocument.getDocumentHash())
@ -73,10 +93,21 @@ public class TedPhase2GenericDocumentService {
ensureTedSource(document, tedDocument); ensureTedSource(document, tedDocument);
DocumentContent originalContent = ensureOriginalContent(document, tedDocument); DocumentContent originalContent = ensureOriginalContent(document, tedDocument);
DocumentTextRepresentation representation = ensurePrimaryRepresentation(document, originalContent, tedDocument); DocumentTextRepresentation representation = ensurePrimaryRepresentation(document, originalContent, tedDocument);
UUID embeddingId = null;
if (properties.getVectorization().isGenericPipelineEnabled()) {
DocumentEmbedding embedding = ensurePendingEmbedding(document, representation); DocumentEmbedding embedding = ensurePendingEmbedding(document, representation);
embeddingId = embedding.getId();
log.debug("Phase 2 DOC bridge ensured generic TED document {} -> embedding {}", document.getId(), embeddingId);
} else {
log.debug("Phase 2 DOC bridge ensured generic TED document {} without embedding queue", document.getId());
}
log.debug("Phase 2 DOC bridge ensured generic TED document {} -> embedding {}", document.getId(), embedding.getId()); return new TedGenericDocumentSyncResult(document.getId(), embeddingId, representation.getId());
return embedding.getId(); }
private boolean isGenericTedSyncEnabled() {
return properties.getVectorization().isGenericPipelineEnabled() || properties.getProjection().isEnabled();
} }
private Document createGenericDocument(ProcurementDocument tedDocument) { private Document createGenericDocument(ProcurementDocument tedDocument) {
@ -194,4 +225,11 @@ public class TedPhase2GenericDocumentService {
} }
return "TED:hash:" + tedDocument.getDocumentHash(); return "TED:hash:" + tedDocument.getDocumentHash();
} }
public record TedGenericDocumentSyncResult(
UUID documentId,
UUID embeddingId,
UUID primaryRepresentationId
) {
}
} }

@ -195,6 +195,15 @@ ted:
# Polling interval for MIME input directory (milliseconds) # Polling interval for MIME input directory (milliseconds)
mime-input-poll-interval: 10000 mime-input-poll-interval: 10000
# Phase 3 TED projection configuration
projection:
# Enable/disable dual-write into the TED projection model on top of DOC.doc_document
enabled: true
# Optional startup backfill for legacy TED documents without a projection row yet
startup-backfill-enabled: false
# Maximum number of legacy TED documents to backfill during startup
startup-backfill-limit: 250
# Solution Brief processing configuration # Solution Brief processing configuration
solution-brief: solution-brief:
# Enable/disable Solution Brief processing # Enable/disable Solution Brief processing

@ -0,0 +1,105 @@
-- Phase 3: TED becomes a structured projection on top of the generic DOC document root.
-- Additive migration; legacy TED tables remain in place for compatibility.
SET search_path TO TED, DOC, public;
CREATE TABLE IF NOT EXISTS TED.ted_notice_projection (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
document_id UUID NOT NULL UNIQUE REFERENCES DOC.doc_document(id) ON DELETE CASCADE,
legacy_procurement_document_id UUID UNIQUE REFERENCES TED.procurement_document(id) ON DELETE SET NULL,
notice_id VARCHAR(100),
publication_id VARCHAR(50),
notice_url VARCHAR(255),
ojs_id VARCHAR(20),
contract_folder_id VARCHAR(100),
notice_type VARCHAR(50) NOT NULL DEFAULT 'OTHER',
notice_subtype_code VARCHAR(10),
sdk_version VARCHAR(20),
ubl_version VARCHAR(10),
language_code VARCHAR(10),
issue_datetime TIMESTAMP WITH TIME ZONE,
publication_date DATE,
submission_deadline TIMESTAMP WITH TIME ZONE,
buyer_name TEXT,
buyer_country_code VARCHAR(10),
buyer_city VARCHAR(255),
buyer_postal_code VARCHAR(100),
buyer_nuts_code VARCHAR(10),
buyer_activity_type VARCHAR(50),
buyer_legal_type VARCHAR(50),
project_title TEXT,
project_description TEXT,
internal_reference VARCHAR(500),
contract_nature VARCHAR(50) NOT NULL DEFAULT 'UNKNOWN',
procedure_type VARCHAR(50) DEFAULT 'OTHER',
cpv_codes VARCHAR(100)[],
nuts_codes VARCHAR(20)[],
estimated_value NUMERIC(20,2),
estimated_value_currency VARCHAR(3),
total_lots INTEGER NOT NULL DEFAULT 0,
max_lots_awarded INTEGER,
max_lots_submitted INTEGER,
regulatory_domain VARCHAR(50),
eu_funded BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_ted_notice_projection_publication_id
ON TED.ted_notice_projection(publication_id);
CREATE INDEX IF NOT EXISTS idx_ted_notice_projection_notice_type
ON TED.ted_notice_projection(notice_type);
CREATE INDEX IF NOT EXISTS idx_ted_notice_projection_buyer_country
ON TED.ted_notice_projection(buyer_country_code);
CREATE INDEX IF NOT EXISTS idx_ted_notice_projection_publication_date
ON TED.ted_notice_projection(publication_date DESC);
CREATE INDEX IF NOT EXISTS idx_ted_notice_projection_document
ON TED.ted_notice_projection(document_id);
CREATE INDEX IF NOT EXISTS idx_ted_notice_projection_legacy_doc
ON TED.ted_notice_projection(legacy_procurement_document_id);
CREATE TABLE IF NOT EXISTS TED.ted_notice_lot (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
notice_projection_id UUID NOT NULL REFERENCES TED.ted_notice_projection(id) ON DELETE CASCADE,
lot_id VARCHAR(50) NOT NULL,
internal_id TEXT,
title TEXT,
description TEXT,
cpv_codes VARCHAR(100)[],
nuts_codes VARCHAR(20)[],
estimated_value NUMERIC(20,2),
estimated_value_currency VARCHAR(3),
duration_value DOUBLE PRECISION,
duration_unit VARCHAR(20),
submission_deadline TIMESTAMP WITH TIME ZONE,
eu_funded BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT uq_ted_notice_lot_projection_lot UNIQUE (notice_projection_id, lot_id)
);
CREATE INDEX IF NOT EXISTS idx_ted_notice_lot_projection
ON TED.ted_notice_lot(notice_projection_id);
CREATE TABLE IF NOT EXISTS TED.ted_notice_organization (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
notice_projection_id UUID NOT NULL REFERENCES TED.ted_notice_projection(id) ON DELETE CASCADE,
org_reference VARCHAR(50),
role VARCHAR(50),
name TEXT,
company_id VARCHAR(1000),
country_code VARCHAR(10),
city VARCHAR(255),
postal_code VARCHAR(255),
street_name TEXT,
nuts_code VARCHAR(10),
website_uri TEXT,
email VARCHAR(255),
phone VARCHAR(50),
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT uq_ted_notice_org_projection_ref UNIQUE (notice_projection_id, org_reference)
);
CREATE INDEX IF NOT EXISTS idx_ted_notice_org_projection
ON TED.ted_notice_organization(notice_projection_id);
CREATE INDEX IF NOT EXISTS idx_ted_notice_org_country
ON TED.ted_notice_organization(country_code);
Loading…
Cancel
Save