ted legacy documents: audition and repair
parent
6ae39b4ea5
commit
00ad3aad38
@ -0,0 +1,47 @@
|
|||||||
|
package at.procon.dip.migration.audit.config;
|
||||||
|
|
||||||
|
import jakarta.validation.constraints.Min;
|
||||||
|
import lombok.Data;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@ConfigurationProperties(prefix = "dip.migration.legacy-audit")
|
||||||
|
@Data
|
||||||
|
public class LegacyTedAuditProperties {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enables the Wave 1 / Milestone A legacy TED audit subsystem.
|
||||||
|
*/
|
||||||
|
private boolean enabled = true;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Automatically runs the read-only audit on application startup.
|
||||||
|
*/
|
||||||
|
private boolean startupRunEnabled = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of legacy TED documents to scan during startup.
|
||||||
|
* 0 means no limit.
|
||||||
|
*/
|
||||||
|
@Min(0)
|
||||||
|
private int startupRunLimit = 500;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Batch size for legacy TED document paging.
|
||||||
|
*/
|
||||||
|
@Min(1)
|
||||||
|
private int pageSize = 100;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hard cap for persisted findings in a single run to avoid runaway audit volume.
|
||||||
|
*/
|
||||||
|
@Min(1)
|
||||||
|
private int maxFindingsPerRun = 10000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of duplicate/grouped samples recorded for global aggregate checks.
|
||||||
|
*/
|
||||||
|
@Min(1)
|
||||||
|
private int maxDuplicateSamples = 100;
|
||||||
|
}
|
||||||
@ -0,0 +1,87 @@
|
|||||||
|
package at.procon.dip.migration.audit.entity;
|
||||||
|
|
||||||
|
import at.procon.dip.architecture.SchemaNames;
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.EnumType;
|
||||||
|
import jakarta.persistence.Enumerated;
|
||||||
|
import jakarta.persistence.FetchType;
|
||||||
|
import jakarta.persistence.GeneratedValue;
|
||||||
|
import jakarta.persistence.GenerationType;
|
||||||
|
import jakarta.persistence.Id;
|
||||||
|
import jakarta.persistence.Index;
|
||||||
|
import jakarta.persistence.JoinColumn;
|
||||||
|
import jakarta.persistence.ManyToOne;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
|
import jakarta.persistence.Table;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(schema = SchemaNames.DOC, name = "doc_legacy_audit_finding", indexes = {
|
||||||
|
@Index(name = "idx_doc_legacy_audit_find_run", columnList = "run_id"),
|
||||||
|
@Index(name = "idx_doc_legacy_audit_find_type", columnList = "finding_type"),
|
||||||
|
@Index(name = "idx_doc_legacy_audit_find_severity", columnList = "severity"),
|
||||||
|
@Index(name = "idx_doc_legacy_audit_find_legacy_doc", columnList = "legacy_procurement_document_id"),
|
||||||
|
@Index(name = "idx_doc_legacy_audit_find_document", columnList = "document_id")
|
||||||
|
})
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class LegacyTedAuditFinding {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.UUID)
|
||||||
|
private UUID id;
|
||||||
|
|
||||||
|
@ManyToOne(fetch = FetchType.LAZY, optional = false)
|
||||||
|
@JoinColumn(name = "run_id", nullable = false)
|
||||||
|
private LegacyTedAuditRun run;
|
||||||
|
|
||||||
|
@Enumerated(EnumType.STRING)
|
||||||
|
@Column(name = "severity", nullable = false, length = 16)
|
||||||
|
private LegacyTedAuditSeverity severity;
|
||||||
|
|
||||||
|
@Enumerated(EnumType.STRING)
|
||||||
|
@Column(name = "finding_type", nullable = false, length = 64)
|
||||||
|
private LegacyTedAuditFindingType findingType;
|
||||||
|
|
||||||
|
@Column(name = "package_identifier", length = 20)
|
||||||
|
private String packageIdentifier;
|
||||||
|
|
||||||
|
@Column(name = "legacy_procurement_document_id")
|
||||||
|
private UUID legacyProcurementDocumentId;
|
||||||
|
|
||||||
|
@Column(name = "document_id")
|
||||||
|
private UUID documentId;
|
||||||
|
|
||||||
|
@Column(name = "ted_notice_projection_id")
|
||||||
|
private UUID tedNoticeProjectionId;
|
||||||
|
|
||||||
|
@Column(name = "reference_key", length = 255)
|
||||||
|
private String referenceKey;
|
||||||
|
|
||||||
|
@Column(name = "message", nullable = false, columnDefinition = "TEXT")
|
||||||
|
private String message;
|
||||||
|
|
||||||
|
@Column(name = "details_text", columnDefinition = "TEXT")
|
||||||
|
private String detailsText;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "created_at", nullable = false, updatable = false)
|
||||||
|
private OffsetDateTime createdAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@PrePersist
|
||||||
|
protected void onCreate() {
|
||||||
|
if (createdAt == null) {
|
||||||
|
createdAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,28 @@
|
|||||||
|
package at.procon.dip.migration.audit.entity;
|
||||||
|
|
||||||
|
public enum LegacyTedAuditFindingType {
|
||||||
|
PACKAGE_SEQUENCE_GAP,
|
||||||
|
PACKAGE_INCOMPLETE,
|
||||||
|
PACKAGE_COMPLETED_WITHOUT_PROCESSED_AT,
|
||||||
|
PACKAGE_COMPLETED_COUNT_MISMATCH,
|
||||||
|
PACKAGE_MISSING_XML_FILE_COUNT,
|
||||||
|
PACKAGE_MISSING_FILE_HASH,
|
||||||
|
PACKAGE_FAILED_WITHOUT_ERROR_MESSAGE,
|
||||||
|
LEGACY_PUBLICATION_ID_DUPLICATE,
|
||||||
|
DOC_DEDUP_HASH_DUPLICATE,
|
||||||
|
LEGACY_DOCUMENT_MISSING_HASH,
|
||||||
|
LEGACY_DOCUMENT_MISSING_XML,
|
||||||
|
LEGACY_DOCUMENT_MISSING_TEXT,
|
||||||
|
LEGACY_DOCUMENT_MISSING_PUBLICATION_ID,
|
||||||
|
DOC_DOCUMENT_MISSING,
|
||||||
|
DOC_DOCUMENT_DUPLICATE,
|
||||||
|
DOC_SOURCE_MISSING,
|
||||||
|
DOC_ORIGINAL_CONTENT_MISSING,
|
||||||
|
DOC_ORIGINAL_CONTENT_DUPLICATE,
|
||||||
|
DOC_PRIMARY_REPRESENTATION_MISSING,
|
||||||
|
DOC_PRIMARY_REPRESENTATION_DUPLICATE,
|
||||||
|
TED_PROJECTION_MISSING,
|
||||||
|
TED_PROJECTION_MISSING_LEGACY_LINK,
|
||||||
|
TED_PROJECTION_DOCUMENT_MISMATCH,
|
||||||
|
FINDINGS_TRUNCATED
|
||||||
|
}
|
||||||
@ -0,0 +1,110 @@
|
|||||||
|
package at.procon.dip.migration.audit.entity;
|
||||||
|
|
||||||
|
import at.procon.dip.architecture.SchemaNames;
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.EnumType;
|
||||||
|
import jakarta.persistence.Enumerated;
|
||||||
|
import jakarta.persistence.GeneratedValue;
|
||||||
|
import jakarta.persistence.GenerationType;
|
||||||
|
import jakarta.persistence.Id;
|
||||||
|
import jakarta.persistence.Index;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
|
import jakarta.persistence.PreUpdate;
|
||||||
|
import jakarta.persistence.Table;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(schema = SchemaNames.DOC, name = "doc_legacy_audit_run", indexes = {
|
||||||
|
@Index(name = "idx_doc_legacy_audit_run_status", columnList = "status"),
|
||||||
|
@Index(name = "idx_doc_legacy_audit_run_started", columnList = "started_at")
|
||||||
|
})
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class LegacyTedAuditRun {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.UUID)
|
||||||
|
private UUID id;
|
||||||
|
|
||||||
|
@Enumerated(EnumType.STRING)
|
||||||
|
@Column(name = "status", nullable = false, length = 32)
|
||||||
|
private LegacyTedAuditRunStatus status;
|
||||||
|
|
||||||
|
@Column(name = "requested_limit")
|
||||||
|
private Integer requestedLimit;
|
||||||
|
|
||||||
|
@Column(name = "page_size", nullable = false)
|
||||||
|
private Integer pageSize;
|
||||||
|
|
||||||
|
@Column(name = "scanned_packages", nullable = false)
|
||||||
|
@Builder.Default
|
||||||
|
private Integer scannedPackages = 0;
|
||||||
|
|
||||||
|
@Column(name = "scanned_legacy_documents", nullable = false)
|
||||||
|
@Builder.Default
|
||||||
|
private Integer scannedLegacyDocuments = 0;
|
||||||
|
|
||||||
|
@Column(name = "finding_count", nullable = false)
|
||||||
|
@Builder.Default
|
||||||
|
private Integer findingCount = 0;
|
||||||
|
|
||||||
|
@Column(name = "info_count", nullable = false)
|
||||||
|
@Builder.Default
|
||||||
|
private Integer infoCount = 0;
|
||||||
|
|
||||||
|
@Column(name = "warning_count", nullable = false)
|
||||||
|
@Builder.Default
|
||||||
|
private Integer warningCount = 0;
|
||||||
|
|
||||||
|
@Column(name = "error_count", nullable = false)
|
||||||
|
@Builder.Default
|
||||||
|
private Integer errorCount = 0;
|
||||||
|
|
||||||
|
@Column(name = "started_at", nullable = false)
|
||||||
|
private OffsetDateTime startedAt;
|
||||||
|
|
||||||
|
@Column(name = "completed_at")
|
||||||
|
private OffsetDateTime completedAt;
|
||||||
|
|
||||||
|
@Column(name = "summary_text", columnDefinition = "TEXT")
|
||||||
|
private String summaryText;
|
||||||
|
|
||||||
|
@Column(name = "failure_message", columnDefinition = "TEXT")
|
||||||
|
private String failureMessage;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "created_at", nullable = false, updatable = false)
|
||||||
|
private OffsetDateTime createdAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "updated_at", nullable = false)
|
||||||
|
private OffsetDateTime updatedAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@PrePersist
|
||||||
|
protected void onCreate() {
|
||||||
|
if (startedAt == null) {
|
||||||
|
startedAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
if (createdAt == null) {
|
||||||
|
createdAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
if (updatedAt == null) {
|
||||||
|
updatedAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@PreUpdate
|
||||||
|
protected void onUpdate() {
|
||||||
|
updatedAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
package at.procon.dip.migration.audit.entity;
|
||||||
|
|
||||||
|
public enum LegacyTedAuditRunStatus {
|
||||||
|
RUNNING,
|
||||||
|
COMPLETED,
|
||||||
|
FAILED
|
||||||
|
}
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
package at.procon.dip.migration.audit.entity;
|
||||||
|
|
||||||
|
public enum LegacyTedAuditSeverity {
|
||||||
|
INFO,
|
||||||
|
WARNING,
|
||||||
|
ERROR
|
||||||
|
}
|
||||||
@ -0,0 +1,8 @@
|
|||||||
|
package at.procon.dip.migration.audit.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditFinding;
|
||||||
|
import java.util.UUID;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
|
||||||
|
public interface LegacyTedAuditFindingRepository extends JpaRepository<LegacyTedAuditFinding, UUID> {
|
||||||
|
}
|
||||||
@ -0,0 +1,8 @@
|
|||||||
|
package at.procon.dip.migration.audit.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditRun;
|
||||||
|
import java.util.UUID;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
|
||||||
|
public interface LegacyTedAuditRunRepository extends JpaRepository<LegacyTedAuditRun, UUID> {
|
||||||
|
}
|
||||||
@ -0,0 +1,610 @@
|
|||||||
|
package at.procon.dip.migration.audit.service;
|
||||||
|
|
||||||
|
import at.procon.dip.migration.audit.config.LegacyTedAuditProperties;
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditFinding;
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditFindingType;
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditRun;
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditRunStatus;
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditSeverity;
|
||||||
|
import at.procon.dip.migration.audit.repository.LegacyTedAuditFindingRepository;
|
||||||
|
import at.procon.dip.migration.audit.repository.LegacyTedAuditRunRepository;
|
||||||
|
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||||
|
import at.procon.dip.runtime.config.RuntimeMode;
|
||||||
|
import at.procon.ted.model.entity.ProcurementDocument;
|
||||||
|
import at.procon.ted.model.entity.TedDailyPackage;
|
||||||
|
import at.procon.ted.repository.ProcurementDocumentRepository;
|
||||||
|
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.time.Year;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.data.domain.Page;
|
||||||
|
import org.springframework.data.domain.PageRequest;
|
||||||
|
import org.springframework.data.domain.Sort;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.util.StringUtils;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class LegacyTedAuditService {
|
||||||
|
|
||||||
|
private final LegacyTedAuditProperties properties;
|
||||||
|
private final TedDailyPackageRepository tedDailyPackageRepository;
|
||||||
|
private final ProcurementDocumentRepository procurementDocumentRepository;
|
||||||
|
private final LegacyTedAuditRunRepository runRepository;
|
||||||
|
private final LegacyTedAuditFindingRepository findingRepository;
|
||||||
|
private final JdbcTemplate jdbcTemplate;
|
||||||
|
|
||||||
|
public LegacyTedAuditRun executeAudit() {
|
||||||
|
return executeAudit(properties.getStartupRunLimit());
|
||||||
|
}
|
||||||
|
|
||||||
|
public LegacyTedAuditRun executeAudit(int requestedLimit) {
|
||||||
|
if (!properties.isEnabled()) {
|
||||||
|
throw new IllegalStateException("Legacy TED audit is disabled by configuration");
|
||||||
|
}
|
||||||
|
|
||||||
|
Integer effectiveLimit = requestedLimit > 0 ? requestedLimit : null;
|
||||||
|
int pageSize = properties.getPageSize();
|
||||||
|
AuditAccumulator accumulator = new AuditAccumulator();
|
||||||
|
|
||||||
|
LegacyTedAuditRun run = LegacyTedAuditRun.builder()
|
||||||
|
.status(LegacyTedAuditRunStatus.RUNNING)
|
||||||
|
.requestedLimit(effectiveLimit)
|
||||||
|
.pageSize(pageSize)
|
||||||
|
.startedAt(OffsetDateTime.now())
|
||||||
|
.build();
|
||||||
|
run = runRepository.save(run);
|
||||||
|
|
||||||
|
try {
|
||||||
|
int scannedPackages = auditPackages(run, accumulator);
|
||||||
|
auditGlobalDuplicates(run, accumulator);
|
||||||
|
int scannedLegacyDocuments = 0;//auditLegacyDocuments(run, accumulator, effectiveLimit, pageSize);
|
||||||
|
|
||||||
|
run.setStatus(LegacyTedAuditRunStatus.COMPLETED);
|
||||||
|
run.setCompletedAt(OffsetDateTime.now());
|
||||||
|
run.setScannedPackages(scannedPackages);
|
||||||
|
run.setScannedLegacyDocuments(scannedLegacyDocuments);
|
||||||
|
run.setFindingCount(accumulator.totalFindings());
|
||||||
|
run.setInfoCount(accumulator.infoCount());
|
||||||
|
run.setWarningCount(accumulator.warningCount());
|
||||||
|
run.setErrorCount(accumulator.errorCount());
|
||||||
|
run.setSummaryText(buildSummary(scannedPackages, scannedLegacyDocuments, accumulator));
|
||||||
|
run.setFailureMessage(null);
|
||||||
|
run = runRepository.save(run);
|
||||||
|
|
||||||
|
log.info("Wave 1 / Milestone A legacy-only audit completed: runId={}, packages={}, documents={}, findings={}, warnings={}, errors={}",
|
||||||
|
run.getId(), scannedPackages, scannedLegacyDocuments, accumulator.totalFindings(),
|
||||||
|
accumulator.warningCount(), accumulator.errorCount());
|
||||||
|
return run;
|
||||||
|
} catch (RuntimeException ex) {
|
||||||
|
run.setStatus(LegacyTedAuditRunStatus.FAILED);
|
||||||
|
run.setCompletedAt(OffsetDateTime.now());
|
||||||
|
run.setScannedPackages(accumulator.scannedPackages());
|
||||||
|
run.setScannedLegacyDocuments(accumulator.scannedLegacyDocuments());
|
||||||
|
run.setFindingCount(accumulator.totalFindings());
|
||||||
|
run.setInfoCount(accumulator.infoCount());
|
||||||
|
run.setWarningCount(accumulator.warningCount());
|
||||||
|
run.setErrorCount(accumulator.errorCount());
|
||||||
|
run.setFailureMessage(ex.getMessage());
|
||||||
|
run.setSummaryText(buildSummary(accumulator.scannedPackages(), accumulator.scannedLegacyDocuments(), accumulator));
|
||||||
|
runRepository.save(run);
|
||||||
|
log.error("Wave 1 / Milestone A legacy-only audit failed: runId={}", run.getId(), ex);
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int auditPackages(LegacyTedAuditRun run, AuditAccumulator accumulator) {
|
||||||
|
List<TedDailyPackage> packages = tedDailyPackageRepository.findAll(Sort.by(Sort.Direction.ASC, "year", "serialNumber"));
|
||||||
|
if (packages.isEmpty()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<Integer, List<TedDailyPackage>> packagesByYear = new TreeMap<>();
|
||||||
|
for (TedDailyPackage dailyPackage : packages) {
|
||||||
|
packagesByYear.computeIfAbsent(dailyPackage.getYear(), ignored -> new ArrayList<>()).add(dailyPackage);
|
||||||
|
}
|
||||||
|
|
||||||
|
int firstYear = packagesByYear.keySet().iterator().next();
|
||||||
|
int currentYear = Year.now().getValue();
|
||||||
|
|
||||||
|
for (int year = firstYear; year <= currentYear; year++) {
|
||||||
|
List<TedDailyPackage> yearPackages = packagesByYear.get(year);
|
||||||
|
if (yearPackages == null || yearPackages.isEmpty()) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
"year:" + year,
|
||||||
|
"No TED package rows exist for this year inside the audited interval",
|
||||||
|
"year=" + year + ", intervalStartYear=" + firstYear + ", intervalEndYear=" + currentYear);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
auditYearPackageSequence(run, accumulator, year, yearPackages);
|
||||||
|
|
||||||
|
for (TedDailyPackage dailyPackage : yearPackages) {
|
||||||
|
accumulator.incrementScannedPackages();
|
||||||
|
auditSinglePackage(run, accumulator, dailyPackage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return packages.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void auditYearPackageSequence(LegacyTedAuditRun run,
|
||||||
|
AuditAccumulator accumulator,
|
||||||
|
int year,
|
||||||
|
List<TedDailyPackage> yearPackages) {
|
||||||
|
yearPackages.sort((left, right) -> Integer.compare(safeInt(left.getSerialNumber()), safeInt(right.getSerialNumber())));
|
||||||
|
|
||||||
|
int firstSerial = safeInt(yearPackages.getFirst().getSerialNumber());
|
||||||
|
if (firstSerial > 1) {
|
||||||
|
recordMissingPackageRange(run, accumulator, year, 1, firstSerial - 1,
|
||||||
|
"TED package year starts after serial 1");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 1; i < yearPackages.size(); i++) {
|
||||||
|
int previousSerial = safeInt(yearPackages.get(i - 1).getSerialNumber());
|
||||||
|
int currentSerial = safeInt(yearPackages.get(i).getSerialNumber());
|
||||||
|
if (currentSerial > previousSerial + 1) {
|
||||||
|
recordMissingPackageRange(run, accumulator, year, previousSerial + 1, currentSerial - 1,
|
||||||
|
"TED package sequence gap detected");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void recordMissingPackageRange(LegacyTedAuditRun run,
|
||||||
|
AuditAccumulator accumulator,
|
||||||
|
int year,
|
||||||
|
int startSerial,
|
||||||
|
int endSerial,
|
||||||
|
String message) {
|
||||||
|
String startPackageId = formatPackageIdentifier(year, startSerial);
|
||||||
|
String endPackageId = formatPackageIdentifier(year, endSerial);
|
||||||
|
String referenceKey = startSerial == endSerial ? startPackageId : startPackageId + "-" + endPackageId;
|
||||||
|
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP,
|
||||||
|
startSerial == endSerial ? startPackageId : null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
referenceKey,
|
||||||
|
message,
|
||||||
|
"year=" + year + ", missingStartSerial=" + startSerial + ", missingEndSerial=" + endSerial);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void auditSinglePackage(LegacyTedAuditRun run,
|
||||||
|
AuditAccumulator accumulator,
|
||||||
|
TedDailyPackage dailyPackage) {
|
||||||
|
String packageIdentifier = dailyPackage.getPackageIdentifier();
|
||||||
|
int processedCount = safeInt(dailyPackage.getProcessedCount());
|
||||||
|
int failedCount = safeInt(dailyPackage.getFailedCount());
|
||||||
|
int accountedDocuments = processedCount + failedCount;
|
||||||
|
|
||||||
|
if (dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.COMPLETED
|
||||||
|
&& dailyPackage.getProcessedAt() == null) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_COMPLETED_WITHOUT_PROCESSED_AT,
|
||||||
|
packageIdentifier,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
packageIdentifier,
|
||||||
|
"TED package is marked COMPLETED but processedAt is null",
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.COMPLETED
|
||||||
|
&& dailyPackage.getXmlFileCount() == null) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_MISSING_XML_FILE_COUNT,
|
||||||
|
packageIdentifier,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
packageIdentifier,
|
||||||
|
"TED package is marked COMPLETED but xmlFileCount is null",
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.DOWNLOADED
|
||||||
|
|| dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.PROCESSING
|
||||||
|
|| dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.COMPLETED)
|
||||||
|
&& !StringUtils.hasText(dailyPackage.getFileHash())) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_MISSING_FILE_HASH,
|
||||||
|
packageIdentifier,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
packageIdentifier,
|
||||||
|
"TED package has no file hash recorded",
|
||||||
|
"downloadStatus=" + dailyPackage.getDownloadStatus());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.FAILED
|
||||||
|
&& !StringUtils.hasText(dailyPackage.getErrorMessage())) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_FAILED_WITHOUT_ERROR_MESSAGE,
|
||||||
|
packageIdentifier,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
packageIdentifier,
|
||||||
|
"TED package is marked FAILED but has no error message",
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dailyPackage.getXmlFileCount() != null) {
|
||||||
|
if (accountedDocuments > dailyPackage.getXmlFileCount()) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.ERROR,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_COMPLETED_COUNT_MISMATCH,
|
||||||
|
packageIdentifier,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
packageIdentifier,
|
||||||
|
"TED package accounting exceeds xmlFileCount",
|
||||||
|
"xmlFileCount=" + dailyPackage.getXmlFileCount()
|
||||||
|
+ ", processedCount=" + processedCount
|
||||||
|
+ ", failedCount=" + failedCount);
|
||||||
|
} else if (dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.COMPLETED
|
||||||
|
&& accountedDocuments < dailyPackage.getXmlFileCount()) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_COMPLETED_COUNT_MISMATCH,
|
||||||
|
packageIdentifier,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
packageIdentifier,
|
||||||
|
"TED package accounting is below xmlFileCount",
|
||||||
|
"xmlFileCount=" + dailyPackage.getXmlFileCount()
|
||||||
|
+ ", processedCount=" + processedCount
|
||||||
|
+ ", failedCount=" + failedCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isPackageIncompleteForReimport(dailyPackage, processedCount, failedCount, accountedDocuments)) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.FAILED
|
||||||
|
? LegacyTedAuditSeverity.ERROR
|
||||||
|
: LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_INCOMPLETE,
|
||||||
|
packageIdentifier,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
packageIdentifier,
|
||||||
|
"TED package is not fully imported and should be considered for re-import",
|
||||||
|
buildIncompletePackageDetails(dailyPackage, processedCount, failedCount, accountedDocuments));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isPackageIncompleteForReimport(TedDailyPackage dailyPackage,
|
||||||
|
int processedCount,
|
||||||
|
int failedCount,
|
||||||
|
int accountedDocuments) {
|
||||||
|
TedDailyPackage.DownloadStatus status = dailyPackage.getDownloadStatus();
|
||||||
|
if (status == null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (status == TedDailyPackage.DownloadStatus.NOT_FOUND) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (status == TedDailyPackage.DownloadStatus.PENDING
|
||||||
|
|| status == TedDailyPackage.DownloadStatus.DOWNLOADING
|
||||||
|
|| status == TedDailyPackage.DownloadStatus.DOWNLOADED
|
||||||
|
|| status == TedDailyPackage.DownloadStatus.PROCESSING
|
||||||
|
|| status == TedDailyPackage.DownloadStatus.FAILED) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (status != TedDailyPackage.DownloadStatus.COMPLETED) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (dailyPackage.getXmlFileCount() == null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (failedCount > 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return processedCount < dailyPackage.getXmlFileCount()
|
||||||
|
|| accountedDocuments != dailyPackage.getXmlFileCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildIncompletePackageDetails(TedDailyPackage dailyPackage,
|
||||||
|
int processedCount,
|
||||||
|
int failedCount,
|
||||||
|
int accountedDocuments) {
|
||||||
|
return "status=" + dailyPackage.getDownloadStatus()
|
||||||
|
+ ", xmlFileCount=" + dailyPackage.getXmlFileCount()
|
||||||
|
+ ", processedCount=" + processedCount
|
||||||
|
+ ", failedCount=" + failedCount
|
||||||
|
+ ", accountedDocuments=" + accountedDocuments;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void auditGlobalDuplicates(LegacyTedAuditRun run, AuditAccumulator accumulator) {
|
||||||
|
int limit = properties.getMaxDuplicateSamples();
|
||||||
|
|
||||||
|
jdbcTemplate.query(
|
||||||
|
"""
|
||||||
|
SELECT publication_id, COUNT(*) AS duplicate_count
|
||||||
|
FROM ted.procurement_document
|
||||||
|
WHERE publication_id IS NOT NULL AND publication_id <> ''
|
||||||
|
GROUP BY publication_id
|
||||||
|
HAVING COUNT(*) > 1
|
||||||
|
ORDER BY duplicate_count DESC, publication_id ASC
|
||||||
|
LIMIT ?
|
||||||
|
""",
|
||||||
|
ps -> ps.setInt(1, limit),
|
||||||
|
(rs, rowNum) -> {
|
||||||
|
String publicationId = rs.getString("publication_id");
|
||||||
|
long duplicateCount = rs.getLong("duplicate_count");
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.ERROR,
|
||||||
|
LegacyTedAuditFindingType.LEGACY_PUBLICATION_ID_DUPLICATE,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
publicationId,
|
||||||
|
"Legacy TED publicationId appears multiple times",
|
||||||
|
"publicationId=" + publicationId + ", duplicateCount=" + duplicateCount);
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private int auditLegacyDocuments(LegacyTedAuditRun run,
|
||||||
|
AuditAccumulator accumulator,
|
||||||
|
Integer requestedLimit,
|
||||||
|
int pageSize) {
|
||||||
|
int processed = 0;
|
||||||
|
int pageNumber = 0;
|
||||||
|
|
||||||
|
while (requestedLimit == null || processed < requestedLimit) {
|
||||||
|
Page<ProcurementDocument> page = procurementDocumentRepository.findAll(
|
||||||
|
PageRequest.of(pageNumber, pageSize, Sort.by(Sort.Direction.ASC, "createdAt", "id")));
|
||||||
|
|
||||||
|
if (page.isEmpty()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (ProcurementDocument legacyDocument : page.getContent()) {
|
||||||
|
auditSingleLegacyDocument(run, accumulator, legacyDocument);
|
||||||
|
accumulator.incrementScannedLegacyDocuments();
|
||||||
|
processed++;
|
||||||
|
if (requestedLimit != null && processed >= requestedLimit) {
|
||||||
|
return processed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!page.hasNext()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pageNumber++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return processed;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void auditSingleLegacyDocument(LegacyTedAuditRun run,
|
||||||
|
AuditAccumulator accumulator,
|
||||||
|
ProcurementDocument legacyDocument) {
|
||||||
|
UUID legacyDocumentId = legacyDocument.getId();
|
||||||
|
String referenceKey = buildReferenceKey(legacyDocument);
|
||||||
|
String documentHash = legacyDocument.getDocumentHash();
|
||||||
|
|
||||||
|
if (!StringUtils.hasText(documentHash)) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.ERROR,
|
||||||
|
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_HASH,
|
||||||
|
null,
|
||||||
|
legacyDocumentId,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
referenceKey,
|
||||||
|
"Legacy TED document has no documentHash",
|
||||||
|
null);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!StringUtils.hasText(legacyDocument.getXmlDocument())) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.ERROR,
|
||||||
|
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_XML,
|
||||||
|
null,
|
||||||
|
legacyDocumentId,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
referenceKey,
|
||||||
|
"Legacy TED document has no xmlDocument payload",
|
||||||
|
"documentHash=" + documentHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!StringUtils.hasText(legacyDocument.getTextContent())) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_TEXT,
|
||||||
|
null,
|
||||||
|
legacyDocumentId,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
referenceKey,
|
||||||
|
"Legacy TED document has no normalized textContent",
|
||||||
|
"documentHash=" + documentHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!StringUtils.hasText(legacyDocument.getPublicationId())) {
|
||||||
|
recordFinding(run, accumulator,
|
||||||
|
LegacyTedAuditSeverity.WARNING,
|
||||||
|
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_PUBLICATION_ID,
|
||||||
|
null,
|
||||||
|
legacyDocumentId,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
referenceKey,
|
||||||
|
"Legacy TED document has no publicationId",
|
||||||
|
"documentHash=" + documentHash);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void recordFinding(LegacyTedAuditRun run,
|
||||||
|
AuditAccumulator accumulator,
|
||||||
|
LegacyTedAuditSeverity severity,
|
||||||
|
LegacyTedAuditFindingType findingType,
|
||||||
|
String packageIdentifier,
|
||||||
|
UUID legacyProcurementDocumentId,
|
||||||
|
UUID genericDocumentId,
|
||||||
|
UUID tedProjectionId,
|
||||||
|
String referenceKey,
|
||||||
|
String message,
|
||||||
|
String detailsText) {
|
||||||
|
if (accumulator.totalFindings() >= properties.getMaxFindingsPerRun()) {
|
||||||
|
accumulator.markTruncated();
|
||||||
|
if (!accumulator.truncationRecorded()) {
|
||||||
|
LegacyTedAuditFinding truncatedFinding = LegacyTedAuditFinding.builder()
|
||||||
|
.run(run)
|
||||||
|
.severity(LegacyTedAuditSeverity.INFO)
|
||||||
|
.findingType(LegacyTedAuditFindingType.FINDINGS_TRUNCATED)
|
||||||
|
.referenceKey(referenceKey != null ? referenceKey : "max-findings-per-run")
|
||||||
|
.message("Legacy TED audit finding limit reached; additional findings were suppressed")
|
||||||
|
.detailsText("maxFindingsPerRun=" + properties.getMaxFindingsPerRun())
|
||||||
|
.build();
|
||||||
|
findingRepository.save(truncatedFinding);
|
||||||
|
accumulator.recordFinding(LegacyTedAuditSeverity.INFO, true);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LegacyTedAuditFinding finding = LegacyTedAuditFinding.builder()
|
||||||
|
.run(run)
|
||||||
|
.severity(severity)
|
||||||
|
.findingType(findingType)
|
||||||
|
.packageIdentifier(packageIdentifier)
|
||||||
|
.legacyProcurementDocumentId(legacyProcurementDocumentId)
|
||||||
|
.documentId(genericDocumentId)
|
||||||
|
.tedNoticeProjectionId(tedProjectionId)
|
||||||
|
.referenceKey(referenceKey)
|
||||||
|
.message(message)
|
||||||
|
.detailsText(detailsText)
|
||||||
|
.build();
|
||||||
|
findingRepository.save(finding);
|
||||||
|
accumulator.recordFinding(severity, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildReferenceKey(ProcurementDocument legacyDocument) {
|
||||||
|
if (StringUtils.hasText(legacyDocument.getPublicationId())) {
|
||||||
|
return legacyDocument.getPublicationId();
|
||||||
|
}
|
||||||
|
if (StringUtils.hasText(legacyDocument.getNoticeId())) {
|
||||||
|
return legacyDocument.getNoticeId();
|
||||||
|
}
|
||||||
|
if (StringUtils.hasText(legacyDocument.getSourceFilename())) {
|
||||||
|
return legacyDocument.getSourceFilename();
|
||||||
|
}
|
||||||
|
return String.valueOf(legacyDocument.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
private int safeInt(Integer value) {
|
||||||
|
return value != null ? value : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String formatPackageIdentifier(int year, int serialNumber) {
|
||||||
|
return "%04d%05d".formatted(year, serialNumber);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildSummary(int scannedPackages,
|
||||||
|
int scannedLegacyDocuments,
|
||||||
|
AuditAccumulator accumulator) {
|
||||||
|
return "packages=" + scannedPackages
|
||||||
|
+ ", legacyDocuments=" + scannedLegacyDocuments
|
||||||
|
+ ", findings=" + accumulator.totalFindings()
|
||||||
|
+ ", warnings=" + accumulator.warningCount()
|
||||||
|
+ ", errors=" + accumulator.errorCount()
|
||||||
|
+ (accumulator.truncated() ? ", truncated=true" : "");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final class AuditAccumulator {
|
||||||
|
private int scannedPackages;
|
||||||
|
private int scannedLegacyDocuments;
|
||||||
|
private int infoCount;
|
||||||
|
private int warningCount;
|
||||||
|
private int errorCount;
|
||||||
|
private boolean truncated;
|
||||||
|
private boolean truncationRecorded;
|
||||||
|
|
||||||
|
void incrementScannedPackages() {
|
||||||
|
scannedPackages++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void incrementScannedLegacyDocuments() {
|
||||||
|
scannedLegacyDocuments++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void recordFinding(LegacyTedAuditSeverity severity, boolean truncationFindingRecordedNow) {
|
||||||
|
switch (severity) {
|
||||||
|
case INFO -> infoCount++;
|
||||||
|
case WARNING -> warningCount++;
|
||||||
|
case ERROR -> errorCount++;
|
||||||
|
}
|
||||||
|
if (truncationFindingRecordedNow) {
|
||||||
|
truncationRecorded = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void markTruncated() {
|
||||||
|
truncated = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int totalFindings() {
|
||||||
|
return infoCount + warningCount + errorCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int infoCount() {
|
||||||
|
return infoCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int warningCount() {
|
||||||
|
return warningCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int errorCount() {
|
||||||
|
return errorCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scannedPackages() {
|
||||||
|
return scannedPackages;
|
||||||
|
}
|
||||||
|
|
||||||
|
int scannedLegacyDocuments() {
|
||||||
|
return scannedLegacyDocuments;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean truncated() {
|
||||||
|
return truncated;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean truncationRecorded() {
|
||||||
|
return truncationRecorded;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,33 @@
|
|||||||
|
package at.procon.dip.migration.audit.startup;
|
||||||
|
|
||||||
|
import at.procon.dip.migration.audit.config.LegacyTedAuditProperties;
|
||||||
|
import at.procon.dip.migration.audit.service.LegacyTedAuditService;
|
||||||
|
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||||
|
import at.procon.dip.runtime.config.RuntimeMode;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.boot.ApplicationArguments;
|
||||||
|
import org.springframework.boot.ApplicationRunner;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class LegacyTedAuditStartupRunner implements ApplicationRunner {
|
||||||
|
|
||||||
|
private final LegacyTedAuditProperties properties;
|
||||||
|
private final LegacyTedAuditService legacyTedAuditService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run(ApplicationArguments args) {
|
||||||
|
if (!properties.isEnabled() || !properties.isStartupRunEnabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int requestedLimit = properties.getStartupRunLimit();
|
||||||
|
log.info("Wave 1 / Milestone A startup audit enabled - scanning legacy TED data with limit {}",
|
||||||
|
requestedLimit > 0 ? requestedLimit : "unbounded");
|
||||||
|
legacyTedAuditService.executeAudit(requestedLimit);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,446 @@
|
|||||||
|
package at.procon.ted.repair;
|
||||||
|
|
||||||
|
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||||
|
import at.procon.dip.runtime.config.RuntimeMode;
|
||||||
|
import at.procon.ted.config.TedProcessorProperties;
|
||||||
|
import at.procon.ted.model.entity.TedDailyPackage;
|
||||||
|
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||||
|
import at.procon.ted.service.BatchDocumentProcessingService;
|
||||||
|
import at.procon.ted.service.TedPackageDownloadService;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.data.domain.Sort;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Startup tool for repairing / re-importing incomplete legacy TED daily packages.
|
||||||
|
*
|
||||||
|
* Strategy:
|
||||||
|
* - Identify incomplete package rows from {@code ted.ted_daily_package}
|
||||||
|
* - Optionally include missing sequence numbers inside a configured package range
|
||||||
|
* - Reuse existing batch XML processing so already-imported XML documents are skipped by hash,
|
||||||
|
* while missing documents are inserted during the repair run
|
||||||
|
*/
|
||||||
|
@Service
|
||||||
|
@ConditionalOnRuntimeMode(RuntimeMode.LEGACY)
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class TedPackageRepairService {
|
||||||
|
|
||||||
|
private static final Pattern PACKAGE_IDENTIFIER_PATTERN = Pattern.compile("\\d{9}");
|
||||||
|
private static final int PROCESSING_CHUNK_SIZE = 25;
|
||||||
|
|
||||||
|
private final TedProcessorProperties properties;
|
||||||
|
private final TedDailyPackageRepository packageRepository;
|
||||||
|
private final TedPackageDownloadService downloadService;
|
||||||
|
private final BatchDocumentProcessingService batchProcessingService;
|
||||||
|
|
||||||
|
public RepairSummary repairConfiguredPackages() {
|
||||||
|
TedProcessorProperties.RepairProperties repairProperties = properties.getRepair();
|
||||||
|
List<RepairCandidate> candidates = resolveCandidates(repairProperties);
|
||||||
|
|
||||||
|
if (candidates.isEmpty()) {
|
||||||
|
log.info("TED package repair found no matching incomplete packages");
|
||||||
|
return new RepairSummary(0, 0, 0, 0, List.of());
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("TED package repair selected {} package candidates (dryRun={})", candidates.size(), repairProperties.isDryRun());
|
||||||
|
candidates.forEach(candidate -> log.info("Repair candidate: {} [{}]", candidate.packageIdentifier(), candidate.reason()));
|
||||||
|
|
||||||
|
if (repairProperties.isDryRun()) {
|
||||||
|
return new RepairSummary(candidates.size(), 0, 0, 0,
|
||||||
|
candidates.stream().map(RepairCandidate::packageIdentifier).toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
int succeeded = 0;
|
||||||
|
int failed = 0;
|
||||||
|
int notFound = 0;
|
||||||
|
List<String> processed = new ArrayList<>();
|
||||||
|
|
||||||
|
for (RepairCandidate candidate : candidates) {
|
||||||
|
try {
|
||||||
|
RepairExecutionResult result = repairCandidate(candidate, repairProperties);
|
||||||
|
processed.add(candidate.packageIdentifier());
|
||||||
|
switch (result.outcome()) {
|
||||||
|
case COMPLETED -> succeeded++;
|
||||||
|
case NOT_FOUND -> notFound++;
|
||||||
|
case FAILED -> failed++;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
failed++;
|
||||||
|
log.error("TED package repair failed for {}: {}", candidate.packageIdentifier(), e.getMessage(), e);
|
||||||
|
markExistingPackageFailure(candidate.existingPackage(), "Repair run failed: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("TED package repair finished: selected={}, succeeded={}, failed={}, notFound={}",
|
||||||
|
candidates.size(), succeeded, failed, notFound);
|
||||||
|
return new RepairSummary(candidates.size(), succeeded, failed, notFound, processed);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<RepairCandidate> resolveCandidates(TedProcessorProperties.RepairProperties repairProperties) {
|
||||||
|
List<TedDailyPackage> existingPackages = packageRepository.findAll(Sort.by(Sort.Direction.ASC, "year", "serialNumber"));
|
||||||
|
Map<String, TedDailyPackage> existingByIdentifier = existingPackages.stream()
|
||||||
|
.collect(Collectors.toMap(TedDailyPackage::getPackageIdentifier, pkg -> pkg, (left, right) -> left, LinkedHashMap::new));
|
||||||
|
|
||||||
|
if (!repairProperties.getPackageIdentifiers().isEmpty()) {
|
||||||
|
return resolveExplicitCandidates(repairProperties.getPackageIdentifiers(), existingByIdentifier, repairProperties.getMaxPackages());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (existingPackages.isEmpty()) {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
List<RepairCandidate> candidates = new ArrayList<>();
|
||||||
|
Set<String> seen = new LinkedHashSet<>();
|
||||||
|
|
||||||
|
boolean inspectSequenceRange = repairProperties.isIncludeMissingSequenceGaps()
|
||||||
|
|| hasText(repairProperties.getFromPackageIdentifier())
|
||||||
|
|| hasText(repairProperties.getToPackageIdentifier());
|
||||||
|
|
||||||
|
if (!inspectSequenceRange) {
|
||||||
|
for (TedDailyPackage pkg : existingPackages) {
|
||||||
|
if (isIncomplete(pkg) && seen.add(pkg.getPackageIdentifier())) {
|
||||||
|
candidates.add(RepairCandidate.existing(pkg, repairReasonFor(pkg)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return limitCandidates(candidates, repairProperties.getMaxPackages());
|
||||||
|
}
|
||||||
|
|
||||||
|
PackageCoordinates first = parseIdentifier(
|
||||||
|
hasText(repairProperties.getFromPackageIdentifier())
|
||||||
|
? repairProperties.getFromPackageIdentifier()
|
||||||
|
: existingPackages.getFirst().getPackageIdentifier());
|
||||||
|
|
||||||
|
PackageCoordinates last = parseIdentifier(
|
||||||
|
hasText(repairProperties.getToPackageIdentifier())
|
||||||
|
? repairProperties.getToPackageIdentifier()
|
||||||
|
: existingPackages.getLast().getPackageIdentifier());
|
||||||
|
|
||||||
|
if (first.compareTo(last) > 0) {
|
||||||
|
throw new IllegalArgumentException("Repair package range is invalid: from > to");
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<Integer, Integer> observedMaxByYear = existingPackages.stream()
|
||||||
|
.collect(Collectors.groupingBy(TedDailyPackage::getYear,
|
||||||
|
LinkedHashMap::new,
|
||||||
|
Collectors.collectingAndThen(
|
||||||
|
Collectors.maxBy(Comparator.comparingInt(TedDailyPackage::getSerialNumber)),
|
||||||
|
optional -> optional.map(TedDailyPackage::getSerialNumber).orElse(0))));
|
||||||
|
|
||||||
|
for (int year = first.year(); year <= last.year(); year++) {
|
||||||
|
int startSerial = year == first.year() ? first.serialNumber() : 1;
|
||||||
|
int defaultEndSerial = observedMaxByYear.getOrDefault(year, 0);
|
||||||
|
int endSerial = year == last.year() ? last.serialNumber() : defaultEndSerial;
|
||||||
|
|
||||||
|
if (endSerial < startSerial || endSerial <= 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int serial = startSerial; serial <= endSerial; serial++) {
|
||||||
|
String packageIdentifier = formatPackageIdentifier(year, serial);
|
||||||
|
TedDailyPackage existingPackage = existingByIdentifier.get(packageIdentifier);
|
||||||
|
if (existingPackage != null) {
|
||||||
|
if (isIncomplete(existingPackage) && seen.add(packageIdentifier)) {
|
||||||
|
candidates.add(RepairCandidate.existing(existingPackage, repairReasonFor(existingPackage)));
|
||||||
|
}
|
||||||
|
} else if (repairProperties.isIncludeMissingSequenceGaps() && seen.add(packageIdentifier)) {
|
||||||
|
candidates.add(RepairCandidate.missing(year, serial, packageIdentifier, "MISSING_SEQUENCE_GAP"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return limitCandidates(candidates, repairProperties.getMaxPackages());
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RepairCandidate> resolveExplicitCandidates(Collection<String> packageIdentifiers,
|
||||||
|
Map<String, TedDailyPackage> existingByIdentifier,
|
||||||
|
int maxPackages) {
|
||||||
|
List<RepairCandidate> candidates = new ArrayList<>();
|
||||||
|
Set<String> seen = new LinkedHashSet<>();
|
||||||
|
|
||||||
|
for (String rawIdentifier : packageIdentifiers) {
|
||||||
|
if (!hasText(rawIdentifier)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String normalized = rawIdentifier.trim();
|
||||||
|
if (!seen.add(normalized)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
PackageCoordinates coordinates = parseIdentifier(normalized);
|
||||||
|
TedDailyPackage existing = existingByIdentifier.get(normalized);
|
||||||
|
if (existing != null) {
|
||||||
|
candidates.add(RepairCandidate.existing(existing, repairReasonFor(existing)));
|
||||||
|
} else {
|
||||||
|
candidates.add(RepairCandidate.missing(coordinates.year(), coordinates.serialNumber(), normalized, "EXPLICIT_PACKAGE"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return limitCandidates(candidates, maxPackages);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<RepairCandidate> limitCandidates(List<RepairCandidate> candidates, int maxPackages) {
|
||||||
|
if (candidates.size() <= maxPackages) {
|
||||||
|
return candidates;
|
||||||
|
}
|
||||||
|
return new ArrayList<>(candidates.subList(0, maxPackages));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
RepairExecutionResult repairCandidate(RepairCandidate candidate, TedProcessorProperties.RepairProperties repairProperties) throws Exception {
|
||||||
|
TedDailyPackage packageEntity = candidate.existingPackage() != null
|
||||||
|
? candidate.existingPackage()
|
||||||
|
: createMissingPackageRecord(candidate);
|
||||||
|
|
||||||
|
String packageIdentifier = candidate.packageIdentifier();
|
||||||
|
boolean downloadedNow = false;
|
||||||
|
long startNanos = System.nanoTime();
|
||||||
|
|
||||||
|
Path archivePath = packageArchivePath(packageIdentifier);
|
||||||
|
if (repairProperties.isForceRedownload() || !Files.exists(archivePath)) {
|
||||||
|
if (!repairProperties.isRedownloadMissingArchives()) {
|
||||||
|
String message = "Package archive is missing locally and re-download is disabled";
|
||||||
|
markFailure(packageEntity, message);
|
||||||
|
return new RepairExecutionResult(RepairOutcome.FAILED, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
Path downloadedArchive = downloadService.downloadArchive(packageIdentifier);
|
||||||
|
if (downloadedArchive == null) {
|
||||||
|
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.NOT_FOUND);
|
||||||
|
packageEntity.setErrorMessage("Package not found during repair run");
|
||||||
|
packageRepository.save(packageEntity);
|
||||||
|
return new RepairExecutionResult(RepairOutcome.NOT_FOUND, "HTTP 404");
|
||||||
|
}
|
||||||
|
archivePath = downloadedArchive;
|
||||||
|
downloadedNow = true;
|
||||||
|
packageEntity.setDownloadedAt(OffsetDateTime.now());
|
||||||
|
packageEntity.setDownloadUrl(downloadService.buildDownloadUrlForPackage(packageIdentifier));
|
||||||
|
}
|
||||||
|
|
||||||
|
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.PROCESSING);
|
||||||
|
packageEntity.setErrorMessage(null);
|
||||||
|
packageEntity.setProcessedCount(0);
|
||||||
|
packageEntity.setFailedCount(0);
|
||||||
|
packageEntity.setFileHash(downloadService.calculateArchiveHash(archivePath));
|
||||||
|
packageRepository.save(packageEntity);
|
||||||
|
|
||||||
|
List<Path> xmlFiles = downloadService.extractArchive(archivePath, packageIdentifier);
|
||||||
|
packageEntity.setXmlFileCount(xmlFiles.size());
|
||||||
|
packageRepository.save(packageEntity);
|
||||||
|
|
||||||
|
int totalProcessed = 0;
|
||||||
|
int totalFailed = 0;
|
||||||
|
try {
|
||||||
|
for (int i = 0; i < xmlFiles.size(); i += PROCESSING_CHUNK_SIZE) {
|
||||||
|
int end = Math.min(i + PROCESSING_CHUNK_SIZE, xmlFiles.size());
|
||||||
|
List<Path> chunk = xmlFiles.subList(i, end);
|
||||||
|
BatchDocumentProcessingService.BatchProcessingResult result = batchProcessingService.processBatch(chunk);
|
||||||
|
totalProcessed += result.insertedCount() + result.duplicateCount();
|
||||||
|
totalFailed += result.errorCount();
|
||||||
|
|
||||||
|
packageEntity.setProcessedCount(totalProcessed);
|
||||||
|
packageEntity.setFailedCount(totalFailed);
|
||||||
|
packageRepository.save(packageEntity);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
cleanupExtractedXmlFiles(xmlFiles);
|
||||||
|
if (downloadedNow && properties.getDownload().isDeleteAfterExtraction()) {
|
||||||
|
deleteQuietly(archivePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
packageEntity.setProcessedAt(OffsetDateTime.now());
|
||||||
|
packageEntity.setProcessingDurationMs((System.nanoTime() - startNanos) / 1_000_000L);
|
||||||
|
packageEntity.setProcessedCount(totalProcessed);
|
||||||
|
packageEntity.setFailedCount(totalFailed);
|
||||||
|
|
||||||
|
if (totalFailed == 0 && totalProcessed == xmlFiles.size()) {
|
||||||
|
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.COMPLETED);
|
||||||
|
packageEntity.setErrorMessage(null);
|
||||||
|
packageRepository.save(packageEntity);
|
||||||
|
return new RepairExecutionResult(RepairOutcome.COMPLETED, "Package repaired successfully");
|
||||||
|
}
|
||||||
|
|
||||||
|
String failureMessage = String.format(Locale.ROOT,
|
||||||
|
"Repair incomplete: xmlFiles=%d, processed=%d, failed=%d",
|
||||||
|
xmlFiles.size(), totalProcessed, totalFailed);
|
||||||
|
markFailure(packageEntity, failureMessage);
|
||||||
|
return new RepairExecutionResult(RepairOutcome.FAILED, failureMessage);
|
||||||
|
}
|
||||||
|
|
||||||
|
private TedDailyPackage createMissingPackageRecord(RepairCandidate candidate) {
|
||||||
|
TedDailyPackage pkg = TedDailyPackage.builder()
|
||||||
|
.packageIdentifier(candidate.packageIdentifier())
|
||||||
|
.year(candidate.year())
|
||||||
|
.serialNumber(candidate.serialNumber())
|
||||||
|
.downloadUrl(downloadService.buildDownloadUrlForPackage(candidate.packageIdentifier()))
|
||||||
|
.downloadStatus(TedDailyPackage.DownloadStatus.PENDING)
|
||||||
|
.build();
|
||||||
|
return packageRepository.save(pkg);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void markFailure(TedDailyPackage packageEntity, String message) {
|
||||||
|
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.FAILED);
|
||||||
|
packageEntity.setErrorMessage(message);
|
||||||
|
packageRepository.save(packageEntity);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void markExistingPackageFailure(TedDailyPackage packageEntity, String message) {
|
||||||
|
if (packageEntity == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.FAILED);
|
||||||
|
packageEntity.setErrorMessage(message);
|
||||||
|
packageRepository.save(packageEntity);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path packageArchivePath(String packageIdentifier) {
|
||||||
|
return Paths.get(properties.getDownload().getDownloadDirectory()).resolve(packageIdentifier + ".tar.gz");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void cleanupExtractedXmlFiles(List<Path> xmlFiles) {
|
||||||
|
if (xmlFiles.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Path packageDirectory = xmlFiles.getFirst().getParent();
|
||||||
|
for (Path xmlFile : xmlFiles) {
|
||||||
|
deleteQuietly(xmlFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (packageDirectory != null) {
|
||||||
|
try (var stream = Files.list(packageDirectory)) {
|
||||||
|
if (stream.findAny().isEmpty()) {
|
||||||
|
deleteQuietly(packageDirectory);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.debug("Could not clean extracted package directory {}: {}", packageDirectory, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void deleteQuietly(Path path) {
|
||||||
|
try {
|
||||||
|
Files.deleteIfExists(path);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.debug("Could not delete {}: {}", path, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isIncomplete(TedDailyPackage pkg) {
|
||||||
|
if (pkg == null || pkg.getDownloadStatus() == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pkg.getDownloadStatus() == TedDailyPackage.DownloadStatus.NOT_FOUND) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pkg.getDownloadStatus() != TedDailyPackage.DownloadStatus.COMPLETED) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Integer xmlFileCount = pkg.getXmlFileCount();
|
||||||
|
int processedCount = pkg.getProcessedCount() != null ? pkg.getProcessedCount() : 0;
|
||||||
|
int failedCount = pkg.getFailedCount() != null ? pkg.getFailedCount() : 0;
|
||||||
|
|
||||||
|
if (xmlFileCount == null || xmlFileCount <= 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (failedCount > 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return processedCount != xmlFileCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String repairReasonFor(TedDailyPackage pkg) {
|
||||||
|
if (pkg.getDownloadStatus() != TedDailyPackage.DownloadStatus.COMPLETED) {
|
||||||
|
return "STATUS_" + pkg.getDownloadStatus();
|
||||||
|
}
|
||||||
|
if (pkg.getXmlFileCount() == null || pkg.getXmlFileCount() <= 0) {
|
||||||
|
return "MISSING_XML_COUNT";
|
||||||
|
}
|
||||||
|
if (pkg.getFailedCount() != null && pkg.getFailedCount() > 0) {
|
||||||
|
return "FAILED_DOCUMENTS";
|
||||||
|
}
|
||||||
|
return "COUNT_MISMATCH";
|
||||||
|
}
|
||||||
|
|
||||||
|
private PackageCoordinates parseIdentifier(String packageIdentifier) {
|
||||||
|
String normalized = packageIdentifier != null ? packageIdentifier.trim() : "";
|
||||||
|
if (!PACKAGE_IDENTIFIER_PATTERN.matcher(normalized).matches()) {
|
||||||
|
throw new IllegalArgumentException("Invalid package identifier: " + packageIdentifier);
|
||||||
|
}
|
||||||
|
return new PackageCoordinates(
|
||||||
|
Integer.parseInt(normalized.substring(0, 4)),
|
||||||
|
Integer.parseInt(normalized.substring(4)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String formatPackageIdentifier(int year, int serialNumber) {
|
||||||
|
return String.format(Locale.ROOT, "%04d%05d", year, serialNumber);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasText(String value) {
|
||||||
|
return value != null && !value.isBlank();
|
||||||
|
}
|
||||||
|
|
||||||
|
record PackageCoordinates(int year, int serialNumber) implements Comparable<PackageCoordinates> {
|
||||||
|
@Override
|
||||||
|
public int compareTo(PackageCoordinates other) {
|
||||||
|
int yearCompare = Integer.compare(this.year, other.year);
|
||||||
|
if (yearCompare != 0) {
|
||||||
|
return yearCompare;
|
||||||
|
}
|
||||||
|
return Integer.compare(this.serialNumber, other.serialNumber);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record RepairCandidate(int year,
|
||||||
|
int serialNumber,
|
||||||
|
String packageIdentifier,
|
||||||
|
TedDailyPackage existingPackage,
|
||||||
|
String reason) {
|
||||||
|
static RepairCandidate existing(TedDailyPackage pkg, String reason) {
|
||||||
|
return new RepairCandidate(pkg.getYear(), pkg.getSerialNumber(), pkg.getPackageIdentifier(), pkg, reason);
|
||||||
|
}
|
||||||
|
|
||||||
|
static RepairCandidate missing(int year, int serialNumber, String packageIdentifier, String reason) {
|
||||||
|
return new RepairCandidate(year, serialNumber, packageIdentifier, null, reason);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum RepairOutcome {
|
||||||
|
COMPLETED,
|
||||||
|
FAILED,
|
||||||
|
NOT_FOUND
|
||||||
|
}
|
||||||
|
|
||||||
|
record RepairExecutionResult(RepairOutcome outcome, String message) {
|
||||||
|
}
|
||||||
|
|
||||||
|
public record RepairSummary(int selected,
|
||||||
|
int succeeded,
|
||||||
|
int failed,
|
||||||
|
int notFound,
|
||||||
|
List<String> processedPackageIdentifiers) {
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,42 @@
|
|||||||
|
package at.procon.ted.startup;
|
||||||
|
|
||||||
|
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||||
|
import at.procon.dip.runtime.config.RuntimeMode;
|
||||||
|
import at.procon.ted.config.TedProcessorProperties;
|
||||||
|
import at.procon.ted.repair.TedPackageRepairService;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.boot.ApplicationArguments;
|
||||||
|
import org.springframework.boot.ApplicationRunner;
|
||||||
|
import org.springframework.core.annotation.Order;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optional startup runner that repairs / re-imports incomplete legacy TED packages.
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
@ConditionalOnRuntimeMode(RuntimeMode.LEGACY)
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
@Order(50)
|
||||||
|
public class TedPackageRepairStartupRunner implements ApplicationRunner {
|
||||||
|
|
||||||
|
private final TedProcessorProperties properties;
|
||||||
|
private final TedPackageRepairService repairService;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run(ApplicationArguments args) {
|
||||||
|
if (!properties.getRepair().isEnabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (properties.getDownload().isEnabled() && !properties.getRepair().isAllowWhileDownloadEnabled()) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"ted.repair.enabled=true requires ted.download.enabled=false " +
|
||||||
|
"or ted.repair.allow-while-download-enabled=true to avoid concurrent package processing");
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Starting legacy TED package repair tool...");
|
||||||
|
repairService.repairConfiguredPackages();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,57 @@
|
|||||||
|
-- Wave 1 / Milestone A: read-only legacy audit run/finding persistence.
|
||||||
|
-- Additive tables only; no legacy business data is modified by this migration.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOC.doc_legacy_audit_run (
|
||||||
|
id UUID PRIMARY KEY,
|
||||||
|
status VARCHAR(32) NOT NULL,
|
||||||
|
requested_limit INTEGER,
|
||||||
|
page_size INTEGER NOT NULL,
|
||||||
|
scanned_packages INTEGER NOT NULL DEFAULT 0,
|
||||||
|
scanned_legacy_documents INTEGER NOT NULL DEFAULT 0,
|
||||||
|
finding_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
info_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
warning_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
error_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
started_at TIMESTAMPTZ NOT NULL,
|
||||||
|
completed_at TIMESTAMPTZ,
|
||||||
|
summary_text TEXT,
|
||||||
|
failure_message TEXT,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_run_status
|
||||||
|
ON DOC.doc_legacy_audit_run(status);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_run_started
|
||||||
|
ON DOC.doc_legacy_audit_run(started_at DESC);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOC.doc_legacy_audit_finding (
|
||||||
|
id UUID PRIMARY KEY,
|
||||||
|
run_id UUID NOT NULL REFERENCES DOC.doc_legacy_audit_run(id) ON DELETE CASCADE,
|
||||||
|
severity VARCHAR(16) NOT NULL,
|
||||||
|
finding_type VARCHAR(64) NOT NULL,
|
||||||
|
package_identifier VARCHAR(20),
|
||||||
|
legacy_procurement_document_id UUID,
|
||||||
|
document_id UUID,
|
||||||
|
ted_notice_projection_id UUID,
|
||||||
|
reference_key VARCHAR(255),
|
||||||
|
message TEXT NOT NULL,
|
||||||
|
details_text TEXT,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_run
|
||||||
|
ON DOC.doc_legacy_audit_finding(run_id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_type
|
||||||
|
ON DOC.doc_legacy_audit_finding(finding_type);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_severity
|
||||||
|
ON DOC.doc_legacy_audit_finding(severity);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_legacy_doc
|
||||||
|
ON DOC.doc_legacy_audit_finding(legacy_procurement_document_id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_document
|
||||||
|
ON DOC.doc_legacy_audit_finding(document_id);
|
||||||
@ -0,0 +1,241 @@
|
|||||||
|
package at.procon.dip.migration.audit.service;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
import at.procon.dip.migration.audit.config.LegacyTedAuditProperties;
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditFinding;
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditFindingType;
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditRun;
|
||||||
|
import at.procon.dip.migration.audit.entity.LegacyTedAuditRunStatus;
|
||||||
|
import at.procon.dip.migration.audit.repository.LegacyTedAuditFindingRepository;
|
||||||
|
import at.procon.dip.migration.audit.repository.LegacyTedAuditRunRepository;
|
||||||
|
import at.procon.ted.model.entity.NoticeType;
|
||||||
|
import at.procon.ted.model.entity.ProcurementDocument;
|
||||||
|
import at.procon.ted.model.entity.TedDailyPackage;
|
||||||
|
import at.procon.ted.repository.ProcurementDocumentRepository;
|
||||||
|
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.time.Year;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.extension.ExtendWith;
|
||||||
|
import org.mockito.Mock;
|
||||||
|
import org.mockito.junit.jupiter.MockitoExtension;
|
||||||
|
import org.springframework.data.domain.Page;
|
||||||
|
import org.springframework.data.domain.PageImpl;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
|
||||||
|
@ExtendWith(MockitoExtension.class)
|
||||||
|
class LegacyTedAuditServiceTest {
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
private TedDailyPackageRepository tedDailyPackageRepository;
|
||||||
|
@Mock
|
||||||
|
private ProcurementDocumentRepository procurementDocumentRepository;
|
||||||
|
@Mock
|
||||||
|
private LegacyTedAuditRunRepository runRepository;
|
||||||
|
@Mock
|
||||||
|
private LegacyTedAuditFindingRepository findingRepository;
|
||||||
|
@Mock
|
||||||
|
private JdbcTemplate jdbcTemplate;
|
||||||
|
|
||||||
|
private LegacyTedAuditService service;
|
||||||
|
private List<LegacyTedAuditFinding> persistedFindings;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
void setUp() {
|
||||||
|
LegacyTedAuditProperties properties = new LegacyTedAuditProperties();
|
||||||
|
properties.setEnabled(true);
|
||||||
|
properties.setPageSize(50);
|
||||||
|
properties.setMaxFindingsPerRun(100);
|
||||||
|
properties.setMaxDuplicateSamples(10);
|
||||||
|
|
||||||
|
service = new LegacyTedAuditService(
|
||||||
|
properties,
|
||||||
|
tedDailyPackageRepository,
|
||||||
|
procurementDocumentRepository,
|
||||||
|
runRepository,
|
||||||
|
findingRepository,
|
||||||
|
jdbcTemplate
|
||||||
|
);
|
||||||
|
|
||||||
|
persistedFindings = new ArrayList<>();
|
||||||
|
|
||||||
|
when(runRepository.save(any(LegacyTedAuditRun.class))).thenAnswer(invocation -> {
|
||||||
|
LegacyTedAuditRun run = invocation.getArgument(0);
|
||||||
|
if (run.getId() == null) {
|
||||||
|
run.setId(UUID.randomUUID());
|
||||||
|
}
|
||||||
|
return run;
|
||||||
|
});
|
||||||
|
|
||||||
|
when(findingRepository.save(any(LegacyTedAuditFinding.class))).thenAnswer(invocation -> {
|
||||||
|
LegacyTedAuditFinding finding = invocation.getArgument(0);
|
||||||
|
if (finding.getId() == null) {
|
||||||
|
finding.setId(UUID.randomUUID());
|
||||||
|
}
|
||||||
|
persistedFindings.add(finding);
|
||||||
|
return finding;
|
||||||
|
});
|
||||||
|
|
||||||
|
when(procurementDocumentRepository.findAll(any(org.springframework.data.domain.Pageable.class)))
|
||||||
|
.thenReturn(new PageImpl<>(List.of()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void executeAudit_should_record_package_sequence_gaps_and_incomplete_packages() {
|
||||||
|
int currentYear = Year.now().getValue();
|
||||||
|
|
||||||
|
when(tedDailyPackageRepository.findAll(any(org.springframework.data.domain.Sort.class))).thenReturn(List.of(
|
||||||
|
TedDailyPackage.builder()
|
||||||
|
.packageIdentifier(formatPackageIdentifier(currentYear, 1))
|
||||||
|
.year(currentYear)
|
||||||
|
.serialNumber(1)
|
||||||
|
.downloadStatus(TedDailyPackage.DownloadStatus.COMPLETED)
|
||||||
|
.xmlFileCount(10)
|
||||||
|
.processedCount(10)
|
||||||
|
.failedCount(0)
|
||||||
|
.fileHash("hash-1")
|
||||||
|
.processedAt(OffsetDateTime.now())
|
||||||
|
.build(),
|
||||||
|
TedDailyPackage.builder()
|
||||||
|
.packageIdentifier(formatPackageIdentifier(currentYear, 3))
|
||||||
|
.year(currentYear)
|
||||||
|
.serialNumber(3)
|
||||||
|
.downloadStatus(TedDailyPackage.DownloadStatus.COMPLETED)
|
||||||
|
.xmlFileCount(10)
|
||||||
|
.processedCount(9)
|
||||||
|
.failedCount(1)
|
||||||
|
.fileHash("hash-3")
|
||||||
|
.processedAt(OffsetDateTime.now())
|
||||||
|
.build(),
|
||||||
|
TedDailyPackage.builder()
|
||||||
|
.packageIdentifier(formatPackageIdentifier(currentYear, 4))
|
||||||
|
.year(currentYear)
|
||||||
|
.serialNumber(4)
|
||||||
|
.downloadStatus(TedDailyPackage.DownloadStatus.FAILED)
|
||||||
|
.xmlFileCount(12)
|
||||||
|
.processedCount(0)
|
||||||
|
.failedCount(0)
|
||||||
|
.errorMessage("processing failed")
|
||||||
|
.build()
|
||||||
|
));
|
||||||
|
|
||||||
|
LegacyTedAuditRun run = service.executeAudit(0);
|
||||||
|
|
||||||
|
assertThat(run.getStatus()).isEqualTo(LegacyTedAuditRunStatus.COMPLETED);
|
||||||
|
assertThat(run.getScannedPackages()).isEqualTo(3);
|
||||||
|
assertThat(persistedFindings)
|
||||||
|
.extracting(LegacyTedAuditFinding::getFindingType)
|
||||||
|
.contains(LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP,
|
||||||
|
LegacyTedAuditFindingType.PACKAGE_INCOMPLETE);
|
||||||
|
|
||||||
|
assertThat(persistedFindings)
|
||||||
|
.filteredOn(f -> f.getFindingType() == LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP)
|
||||||
|
.extracting(LegacyTedAuditFinding::getReferenceKey)
|
||||||
|
.contains(formatPackageIdentifier(currentYear, 2));
|
||||||
|
|
||||||
|
assertThat(persistedFindings)
|
||||||
|
.filteredOn(f -> f.getFindingType() == LegacyTedAuditFindingType.PACKAGE_INCOMPLETE)
|
||||||
|
.extracting(LegacyTedAuditFinding::getPackageIdentifier)
|
||||||
|
.contains(formatPackageIdentifier(currentYear, 3), formatPackageIdentifier(currentYear, 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void executeAudit_should_record_missing_years_inside_audited_interval() {
|
||||||
|
int currentYear = Year.now().getValue();
|
||||||
|
|
||||||
|
when(tedDailyPackageRepository.findAll(any(org.springframework.data.domain.Sort.class))).thenReturn(List.of(
|
||||||
|
TedDailyPackage.builder()
|
||||||
|
.packageIdentifier(formatPackageIdentifier(currentYear - 2, 1))
|
||||||
|
.year(currentYear - 2)
|
||||||
|
.serialNumber(1)
|
||||||
|
.downloadStatus(TedDailyPackage.DownloadStatus.COMPLETED)
|
||||||
|
.xmlFileCount(1)
|
||||||
|
.processedCount(1)
|
||||||
|
.failedCount(0)
|
||||||
|
.fileHash("hash-a")
|
||||||
|
.processedAt(OffsetDateTime.now())
|
||||||
|
.build(),
|
||||||
|
TedDailyPackage.builder()
|
||||||
|
.packageIdentifier(formatPackageIdentifier(currentYear, 1))
|
||||||
|
.year(currentYear)
|
||||||
|
.serialNumber(1)
|
||||||
|
.downloadStatus(TedDailyPackage.DownloadStatus.COMPLETED)
|
||||||
|
.xmlFileCount(1)
|
||||||
|
.processedCount(1)
|
||||||
|
.failedCount(0)
|
||||||
|
.fileHash("hash-b")
|
||||||
|
.processedAt(OffsetDateTime.now())
|
||||||
|
.build()
|
||||||
|
));
|
||||||
|
|
||||||
|
LegacyTedAuditRun run = service.executeAudit(0);
|
||||||
|
|
||||||
|
assertThat(run.getStatus()).isEqualTo(LegacyTedAuditRunStatus.COMPLETED);
|
||||||
|
assertThat(persistedFindings)
|
||||||
|
.filteredOn(f -> f.getFindingType() == LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP)
|
||||||
|
.extracting(LegacyTedAuditFinding::getReferenceKey)
|
||||||
|
.contains("year:" + (currentYear - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void executeAudit_should_record_legacy_document_integrity_findings_only() {
|
||||||
|
ProcurementDocument missingXml = ProcurementDocument.builder()
|
||||||
|
.id(UUID.randomUUID())
|
||||||
|
.documentHash("hash-1")
|
||||||
|
.publicationId("2025/S 001-000001")
|
||||||
|
.noticeType(NoticeType.CONTRACT_NOTICE)
|
||||||
|
.xmlDocument(null)
|
||||||
|
.textContent("hello")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
ProcurementDocument missingTextAndPublicationId = ProcurementDocument.builder()
|
||||||
|
.id(UUID.randomUUID())
|
||||||
|
.documentHash("hash-2")
|
||||||
|
.publicationId(null)
|
||||||
|
.noticeType(NoticeType.CONTRACT_NOTICE)
|
||||||
|
.xmlDocument("<xml/>")
|
||||||
|
.textContent(null)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
when(tedDailyPackageRepository.findAll(any(org.springframework.data.domain.Sort.class))).thenReturn(List.of());
|
||||||
|
when(procurementDocumentRepository.findAll(any(org.springframework.data.domain.Pageable.class)))
|
||||||
|
.thenReturn(pageOf(missingXml, missingTextAndPublicationId));
|
||||||
|
|
||||||
|
LegacyTedAuditRun run = service.executeAudit(10);
|
||||||
|
|
||||||
|
assertThat(run.getStatus()).isEqualTo(LegacyTedAuditRunStatus.COMPLETED);
|
||||||
|
assertThat(run.getScannedLegacyDocuments()).isEqualTo(2);
|
||||||
|
assertThat(persistedFindings)
|
||||||
|
.extracting(LegacyTedAuditFinding::getFindingType)
|
||||||
|
.contains(
|
||||||
|
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_XML,
|
||||||
|
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_TEXT,
|
||||||
|
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_PUBLICATION_ID
|
||||||
|
)
|
||||||
|
.doesNotContain(
|
||||||
|
LegacyTedAuditFindingType.DOC_DOCUMENT_MISSING,
|
||||||
|
LegacyTedAuditFindingType.DOC_SOURCE_MISSING,
|
||||||
|
LegacyTedAuditFindingType.DOC_ORIGINAL_CONTENT_MISSING,
|
||||||
|
LegacyTedAuditFindingType.DOC_PRIMARY_REPRESENTATION_MISSING,
|
||||||
|
LegacyTedAuditFindingType.TED_PROJECTION_MISSING,
|
||||||
|
LegacyTedAuditFindingType.TED_PROJECTION_MISSING_LEGACY_LINK,
|
||||||
|
LegacyTedAuditFindingType.TED_PROJECTION_DOCUMENT_MISMATCH,
|
||||||
|
LegacyTedAuditFindingType.DOC_DEDUP_HASH_DUPLICATE
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Page<ProcurementDocument> pageOf(ProcurementDocument... documents) {
|
||||||
|
return new PageImpl<>(List.of(documents));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String formatPackageIdentifier(int year, int serialNumber) {
|
||||||
|
return "%04d%05d".formatted(year, serialNumber);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,120 @@
|
|||||||
|
package at.procon.ted.repair;
|
||||||
|
|
||||||
|
import at.procon.ted.config.TedProcessorProperties;
|
||||||
|
import at.procon.ted.model.entity.TedDailyPackage;
|
||||||
|
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||||
|
import at.procon.ted.service.BatchDocumentProcessingService;
|
||||||
|
import at.procon.ted.service.TedPackageDownloadService;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
import org.springframework.data.domain.Sort;
|
||||||
|
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.ArgumentMatchers.eq;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
class TedPackageRepairServiceTest {
|
||||||
|
|
||||||
|
@TempDir
|
||||||
|
Path tempDir;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void resolveCandidatesIncludesIncompletePackagesAndMissingSequenceGaps() {
|
||||||
|
TedProcessorProperties properties = new TedProcessorProperties();
|
||||||
|
properties.getRepair().setEnabled(true);
|
||||||
|
properties.getRepair().setFromPackageIdentifier("202600001");
|
||||||
|
properties.getRepair().setToPackageIdentifier("202600003");
|
||||||
|
properties.getRepair().setIncludeMissingSequenceGaps(true);
|
||||||
|
properties.getRepair().setMaxPackages(10);
|
||||||
|
|
||||||
|
TedDailyPackageRepository repository = mock(TedDailyPackageRepository.class);
|
||||||
|
TedDailyPackage pkg1 = newPackage("202600001", 2026, 1, TedDailyPackage.DownloadStatus.COMPLETED, 20, 20, 0);
|
||||||
|
TedDailyPackage pkg3 = newPackage("202600003", 2026, 3, TedDailyPackage.DownloadStatus.PROCESSING, 20, 5, 0);
|
||||||
|
when(repository.findAll(any(Sort.class))).thenReturn(List.of(pkg1, pkg3));
|
||||||
|
|
||||||
|
TedPackageRepairService service = new TedPackageRepairService(
|
||||||
|
properties,
|
||||||
|
repository,
|
||||||
|
mock(TedPackageDownloadService.class),
|
||||||
|
mock(BatchDocumentProcessingService.class));
|
||||||
|
|
||||||
|
List<TedPackageRepairService.RepairCandidate> candidates = service.resolveCandidates(properties.getRepair());
|
||||||
|
|
||||||
|
assertThat(candidates).extracting(TedPackageRepairService.RepairCandidate::packageIdentifier)
|
||||||
|
.containsExactly("202600002", "202600003");
|
||||||
|
assertThat(candidates).extracting(TedPackageRepairService.RepairCandidate::reason)
|
||||||
|
.containsExactly("MISSING_SEQUENCE_GAP", "STATUS_PROCESSING");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void repairCandidateProcessesExistingArchiveAndMarksPackageCompleted() throws Exception {
|
||||||
|
TedProcessorProperties properties = new TedProcessorProperties();
|
||||||
|
properties.getRepair().setEnabled(true);
|
||||||
|
properties.getRepair().setRedownloadMissingArchives(false);
|
||||||
|
properties.getDownload().setDownloadDirectory(tempDir.toString());
|
||||||
|
properties.getDownload().setDeleteAfterExtraction(false);
|
||||||
|
|
||||||
|
Path archive = tempDir.resolve("202600003.tar.gz");
|
||||||
|
Files.writeString(archive, "dummy");
|
||||||
|
|
||||||
|
TedDailyPackageRepository repository = mock(TedDailyPackageRepository.class);
|
||||||
|
TedDailyPackage pkg = newPackage("202600003", 2026, 3, TedDailyPackage.DownloadStatus.PROCESSING, 3, 0, 0);
|
||||||
|
when(repository.save(any(TedDailyPackage.class))).thenAnswer(invocation -> invocation.getArgument(0));
|
||||||
|
when(repository.findByPackageIdentifier("202600003")).thenReturn(Optional.of(pkg));
|
||||||
|
|
||||||
|
TedPackageDownloadService downloadService = mock(TedPackageDownloadService.class);
|
||||||
|
Path extractedDir = Files.createDirectory(tempDir.resolve("extracted"));
|
||||||
|
Path xml1 = Files.writeString(extractedDir.resolve("a.xml"), "<a/>");
|
||||||
|
Path xml2 = Files.writeString(extractedDir.resolve("b.xml"), "<b/>");
|
||||||
|
Path xml3 = Files.writeString(extractedDir.resolve("c.xml"), "<c/>");
|
||||||
|
when(downloadService.calculateArchiveHash(eq(archive))).thenReturn("hash-1");
|
||||||
|
when(downloadService.extractArchive(eq(archive), eq("202600003"))).thenReturn(List.of(xml1, xml2, xml3));
|
||||||
|
|
||||||
|
BatchDocumentProcessingService batchService = mock(BatchDocumentProcessingService.class);
|
||||||
|
when(batchService.processBatch(any())).thenReturn(new BatchDocumentProcessingService.BatchProcessingResult(
|
||||||
|
1, 2, 0, 5L, List.of(UUID.randomUUID()), List.of()));
|
||||||
|
|
||||||
|
TedPackageRepairService service = new TedPackageRepairService(properties, repository, downloadService, batchService);
|
||||||
|
TedPackageRepairService.RepairCandidate candidate = TedPackageRepairService.RepairCandidate.existing(pkg, "STATUS_PROCESSING");
|
||||||
|
|
||||||
|
var result = service.repairCandidate(candidate, properties.getRepair());
|
||||||
|
|
||||||
|
assertThat(result.outcome()).isEqualTo(TedPackageRepairService.RepairOutcome.COMPLETED);
|
||||||
|
assertThat(pkg.getDownloadStatus()).isEqualTo(TedDailyPackage.DownloadStatus.COMPLETED);
|
||||||
|
assertThat(pkg.getProcessedCount()).isEqualTo(3);
|
||||||
|
assertThat(pkg.getFailedCount()).isZero();
|
||||||
|
assertThat(pkg.getFileHash()).isEqualTo("hash-1");
|
||||||
|
assertThat(pkg.getProcessedAt()).isNotNull();
|
||||||
|
}
|
||||||
|
|
||||||
|
private TedDailyPackage newPackage(String packageIdentifier,
|
||||||
|
int year,
|
||||||
|
int serial,
|
||||||
|
TedDailyPackage.DownloadStatus status,
|
||||||
|
Integer xmlCount,
|
||||||
|
Integer processed,
|
||||||
|
Integer failed) {
|
||||||
|
TedDailyPackage pkg = new TedDailyPackage();
|
||||||
|
pkg.setId(UUID.randomUUID());
|
||||||
|
pkg.setPackageIdentifier(packageIdentifier);
|
||||||
|
pkg.setYear(year);
|
||||||
|
pkg.setSerialNumber(serial);
|
||||||
|
pkg.setDownloadStatus(status);
|
||||||
|
pkg.setXmlFileCount(xmlCount);
|
||||||
|
pkg.setProcessedCount(processed);
|
||||||
|
pkg.setFailedCount(failed);
|
||||||
|
pkg.setDownloadUrl("https://ted.europa.eu/packages/daily/" + packageIdentifier);
|
||||||
|
pkg.setCreatedAt(OffsetDateTime.now());
|
||||||
|
pkg.setUpdatedAt(OffsetDateTime.now());
|
||||||
|
return pkg;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue