ted legacy documents: audition and repair
parent
6ae39b4ea5
commit
00ad3aad38
@ -0,0 +1,47 @@
|
||||
package at.procon.dip.migration.audit.config;
|
||||
|
||||
import jakarta.validation.constraints.Min;
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Configuration
|
||||
@ConfigurationProperties(prefix = "dip.migration.legacy-audit")
|
||||
@Data
|
||||
public class LegacyTedAuditProperties {
|
||||
|
||||
/**
|
||||
* Enables the Wave 1 / Milestone A legacy TED audit subsystem.
|
||||
*/
|
||||
private boolean enabled = true;
|
||||
|
||||
/**
|
||||
* Automatically runs the read-only audit on application startup.
|
||||
*/
|
||||
private boolean startupRunEnabled = false;
|
||||
|
||||
/**
|
||||
* Maximum number of legacy TED documents to scan during startup.
|
||||
* 0 means no limit.
|
||||
*/
|
||||
@Min(0)
|
||||
private int startupRunLimit = 500;
|
||||
|
||||
/**
|
||||
* Batch size for legacy TED document paging.
|
||||
*/
|
||||
@Min(1)
|
||||
private int pageSize = 100;
|
||||
|
||||
/**
|
||||
* Hard cap for persisted findings in a single run to avoid runaway audit volume.
|
||||
*/
|
||||
@Min(1)
|
||||
private int maxFindingsPerRun = 10000;
|
||||
|
||||
/**
|
||||
* Maximum number of duplicate/grouped samples recorded for global aggregate checks.
|
||||
*/
|
||||
@Min(1)
|
||||
private int maxDuplicateSamples = 100;
|
||||
}
|
||||
@ -0,0 +1,87 @@
|
||||
package at.procon.dip.migration.audit.entity;
|
||||
|
||||
import at.procon.dip.architecture.SchemaNames;
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.EnumType;
|
||||
import jakarta.persistence.Enumerated;
|
||||
import jakarta.persistence.FetchType;
|
||||
import jakarta.persistence.GeneratedValue;
|
||||
import jakarta.persistence.GenerationType;
|
||||
import jakarta.persistence.Id;
|
||||
import jakarta.persistence.Index;
|
||||
import jakarta.persistence.JoinColumn;
|
||||
import jakarta.persistence.ManyToOne;
|
||||
import jakarta.persistence.PrePersist;
|
||||
import jakarta.persistence.Table;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.UUID;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
@Entity
|
||||
@Table(schema = SchemaNames.DOC, name = "doc_legacy_audit_finding", indexes = {
|
||||
@Index(name = "idx_doc_legacy_audit_find_run", columnList = "run_id"),
|
||||
@Index(name = "idx_doc_legacy_audit_find_type", columnList = "finding_type"),
|
||||
@Index(name = "idx_doc_legacy_audit_find_severity", columnList = "severity"),
|
||||
@Index(name = "idx_doc_legacy_audit_find_legacy_doc", columnList = "legacy_procurement_document_id"),
|
||||
@Index(name = "idx_doc_legacy_audit_find_document", columnList = "document_id")
|
||||
})
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Builder
|
||||
public class LegacyTedAuditFinding {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.UUID)
|
||||
private UUID id;
|
||||
|
||||
@ManyToOne(fetch = FetchType.LAZY, optional = false)
|
||||
@JoinColumn(name = "run_id", nullable = false)
|
||||
private LegacyTedAuditRun run;
|
||||
|
||||
@Enumerated(EnumType.STRING)
|
||||
@Column(name = "severity", nullable = false, length = 16)
|
||||
private LegacyTedAuditSeverity severity;
|
||||
|
||||
@Enumerated(EnumType.STRING)
|
||||
@Column(name = "finding_type", nullable = false, length = 64)
|
||||
private LegacyTedAuditFindingType findingType;
|
||||
|
||||
@Column(name = "package_identifier", length = 20)
|
||||
private String packageIdentifier;
|
||||
|
||||
@Column(name = "legacy_procurement_document_id")
|
||||
private UUID legacyProcurementDocumentId;
|
||||
|
||||
@Column(name = "document_id")
|
||||
private UUID documentId;
|
||||
|
||||
@Column(name = "ted_notice_projection_id")
|
||||
private UUID tedNoticeProjectionId;
|
||||
|
||||
@Column(name = "reference_key", length = 255)
|
||||
private String referenceKey;
|
||||
|
||||
@Column(name = "message", nullable = false, columnDefinition = "TEXT")
|
||||
private String message;
|
||||
|
||||
@Column(name = "details_text", columnDefinition = "TEXT")
|
||||
private String detailsText;
|
||||
|
||||
@Builder.Default
|
||||
@Column(name = "created_at", nullable = false, updatable = false)
|
||||
private OffsetDateTime createdAt = OffsetDateTime.now();
|
||||
|
||||
@PrePersist
|
||||
protected void onCreate() {
|
||||
if (createdAt == null) {
|
||||
createdAt = OffsetDateTime.now();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,28 @@
|
||||
package at.procon.dip.migration.audit.entity;
|
||||
|
||||
public enum LegacyTedAuditFindingType {
|
||||
PACKAGE_SEQUENCE_GAP,
|
||||
PACKAGE_INCOMPLETE,
|
||||
PACKAGE_COMPLETED_WITHOUT_PROCESSED_AT,
|
||||
PACKAGE_COMPLETED_COUNT_MISMATCH,
|
||||
PACKAGE_MISSING_XML_FILE_COUNT,
|
||||
PACKAGE_MISSING_FILE_HASH,
|
||||
PACKAGE_FAILED_WITHOUT_ERROR_MESSAGE,
|
||||
LEGACY_PUBLICATION_ID_DUPLICATE,
|
||||
DOC_DEDUP_HASH_DUPLICATE,
|
||||
LEGACY_DOCUMENT_MISSING_HASH,
|
||||
LEGACY_DOCUMENT_MISSING_XML,
|
||||
LEGACY_DOCUMENT_MISSING_TEXT,
|
||||
LEGACY_DOCUMENT_MISSING_PUBLICATION_ID,
|
||||
DOC_DOCUMENT_MISSING,
|
||||
DOC_DOCUMENT_DUPLICATE,
|
||||
DOC_SOURCE_MISSING,
|
||||
DOC_ORIGINAL_CONTENT_MISSING,
|
||||
DOC_ORIGINAL_CONTENT_DUPLICATE,
|
||||
DOC_PRIMARY_REPRESENTATION_MISSING,
|
||||
DOC_PRIMARY_REPRESENTATION_DUPLICATE,
|
||||
TED_PROJECTION_MISSING,
|
||||
TED_PROJECTION_MISSING_LEGACY_LINK,
|
||||
TED_PROJECTION_DOCUMENT_MISMATCH,
|
||||
FINDINGS_TRUNCATED
|
||||
}
|
||||
@ -0,0 +1,110 @@
|
||||
package at.procon.dip.migration.audit.entity;
|
||||
|
||||
import at.procon.dip.architecture.SchemaNames;
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.EnumType;
|
||||
import jakarta.persistence.Enumerated;
|
||||
import jakarta.persistence.GeneratedValue;
|
||||
import jakarta.persistence.GenerationType;
|
||||
import jakarta.persistence.Id;
|
||||
import jakarta.persistence.Index;
|
||||
import jakarta.persistence.PrePersist;
|
||||
import jakarta.persistence.PreUpdate;
|
||||
import jakarta.persistence.Table;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.UUID;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
|
||||
@Entity
|
||||
@Table(schema = SchemaNames.DOC, name = "doc_legacy_audit_run", indexes = {
|
||||
@Index(name = "idx_doc_legacy_audit_run_status", columnList = "status"),
|
||||
@Index(name = "idx_doc_legacy_audit_run_started", columnList = "started_at")
|
||||
})
|
||||
@Getter
|
||||
@Setter
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Builder
|
||||
public class LegacyTedAuditRun {
|
||||
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.UUID)
|
||||
private UUID id;
|
||||
|
||||
@Enumerated(EnumType.STRING)
|
||||
@Column(name = "status", nullable = false, length = 32)
|
||||
private LegacyTedAuditRunStatus status;
|
||||
|
||||
@Column(name = "requested_limit")
|
||||
private Integer requestedLimit;
|
||||
|
||||
@Column(name = "page_size", nullable = false)
|
||||
private Integer pageSize;
|
||||
|
||||
@Column(name = "scanned_packages", nullable = false)
|
||||
@Builder.Default
|
||||
private Integer scannedPackages = 0;
|
||||
|
||||
@Column(name = "scanned_legacy_documents", nullable = false)
|
||||
@Builder.Default
|
||||
private Integer scannedLegacyDocuments = 0;
|
||||
|
||||
@Column(name = "finding_count", nullable = false)
|
||||
@Builder.Default
|
||||
private Integer findingCount = 0;
|
||||
|
||||
@Column(name = "info_count", nullable = false)
|
||||
@Builder.Default
|
||||
private Integer infoCount = 0;
|
||||
|
||||
@Column(name = "warning_count", nullable = false)
|
||||
@Builder.Default
|
||||
private Integer warningCount = 0;
|
||||
|
||||
@Column(name = "error_count", nullable = false)
|
||||
@Builder.Default
|
||||
private Integer errorCount = 0;
|
||||
|
||||
@Column(name = "started_at", nullable = false)
|
||||
private OffsetDateTime startedAt;
|
||||
|
||||
@Column(name = "completed_at")
|
||||
private OffsetDateTime completedAt;
|
||||
|
||||
@Column(name = "summary_text", columnDefinition = "TEXT")
|
||||
private String summaryText;
|
||||
|
||||
@Column(name = "failure_message", columnDefinition = "TEXT")
|
||||
private String failureMessage;
|
||||
|
||||
@Builder.Default
|
||||
@Column(name = "created_at", nullable = false, updatable = false)
|
||||
private OffsetDateTime createdAt = OffsetDateTime.now();
|
||||
|
||||
@Builder.Default
|
||||
@Column(name = "updated_at", nullable = false)
|
||||
private OffsetDateTime updatedAt = OffsetDateTime.now();
|
||||
|
||||
@PrePersist
|
||||
protected void onCreate() {
|
||||
if (startedAt == null) {
|
||||
startedAt = OffsetDateTime.now();
|
||||
}
|
||||
if (createdAt == null) {
|
||||
createdAt = OffsetDateTime.now();
|
||||
}
|
||||
if (updatedAt == null) {
|
||||
updatedAt = OffsetDateTime.now();
|
||||
}
|
||||
}
|
||||
|
||||
@PreUpdate
|
||||
protected void onUpdate() {
|
||||
updatedAt = OffsetDateTime.now();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
package at.procon.dip.migration.audit.entity;
|
||||
|
||||
public enum LegacyTedAuditRunStatus {
|
||||
RUNNING,
|
||||
COMPLETED,
|
||||
FAILED
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
package at.procon.dip.migration.audit.entity;
|
||||
|
||||
public enum LegacyTedAuditSeverity {
|
||||
INFO,
|
||||
WARNING,
|
||||
ERROR
|
||||
}
|
||||
@ -0,0 +1,8 @@
|
||||
package at.procon.dip.migration.audit.repository;
|
||||
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditFinding;
|
||||
import java.util.UUID;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
|
||||
public interface LegacyTedAuditFindingRepository extends JpaRepository<LegacyTedAuditFinding, UUID> {
|
||||
}
|
||||
@ -0,0 +1,8 @@
|
||||
package at.procon.dip.migration.audit.repository;
|
||||
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditRun;
|
||||
import java.util.UUID;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
|
||||
public interface LegacyTedAuditRunRepository extends JpaRepository<LegacyTedAuditRun, UUID> {
|
||||
}
|
||||
@ -0,0 +1,610 @@
|
||||
package at.procon.dip.migration.audit.service;
|
||||
|
||||
import at.procon.dip.migration.audit.config.LegacyTedAuditProperties;
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditFinding;
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditFindingType;
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditRun;
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditRunStatus;
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditSeverity;
|
||||
import at.procon.dip.migration.audit.repository.LegacyTedAuditFindingRepository;
|
||||
import at.procon.dip.migration.audit.repository.LegacyTedAuditRunRepository;
|
||||
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||
import at.procon.dip.runtime.config.RuntimeMode;
|
||||
import at.procon.ted.model.entity.ProcurementDocument;
|
||||
import at.procon.ted.model.entity.TedDailyPackage;
|
||||
import at.procon.ted.repository.ProcurementDocumentRepository;
|
||||
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.Year;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.UUID;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.PageRequest;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
@Service
|
||||
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class LegacyTedAuditService {
|
||||
|
||||
private final LegacyTedAuditProperties properties;
|
||||
private final TedDailyPackageRepository tedDailyPackageRepository;
|
||||
private final ProcurementDocumentRepository procurementDocumentRepository;
|
||||
private final LegacyTedAuditRunRepository runRepository;
|
||||
private final LegacyTedAuditFindingRepository findingRepository;
|
||||
private final JdbcTemplate jdbcTemplate;
|
||||
|
||||
public LegacyTedAuditRun executeAudit() {
|
||||
return executeAudit(properties.getStartupRunLimit());
|
||||
}
|
||||
|
||||
public LegacyTedAuditRun executeAudit(int requestedLimit) {
|
||||
if (!properties.isEnabled()) {
|
||||
throw new IllegalStateException("Legacy TED audit is disabled by configuration");
|
||||
}
|
||||
|
||||
Integer effectiveLimit = requestedLimit > 0 ? requestedLimit : null;
|
||||
int pageSize = properties.getPageSize();
|
||||
AuditAccumulator accumulator = new AuditAccumulator();
|
||||
|
||||
LegacyTedAuditRun run = LegacyTedAuditRun.builder()
|
||||
.status(LegacyTedAuditRunStatus.RUNNING)
|
||||
.requestedLimit(effectiveLimit)
|
||||
.pageSize(pageSize)
|
||||
.startedAt(OffsetDateTime.now())
|
||||
.build();
|
||||
run = runRepository.save(run);
|
||||
|
||||
try {
|
||||
int scannedPackages = auditPackages(run, accumulator);
|
||||
auditGlobalDuplicates(run, accumulator);
|
||||
int scannedLegacyDocuments = 0;//auditLegacyDocuments(run, accumulator, effectiveLimit, pageSize);
|
||||
|
||||
run.setStatus(LegacyTedAuditRunStatus.COMPLETED);
|
||||
run.setCompletedAt(OffsetDateTime.now());
|
||||
run.setScannedPackages(scannedPackages);
|
||||
run.setScannedLegacyDocuments(scannedLegacyDocuments);
|
||||
run.setFindingCount(accumulator.totalFindings());
|
||||
run.setInfoCount(accumulator.infoCount());
|
||||
run.setWarningCount(accumulator.warningCount());
|
||||
run.setErrorCount(accumulator.errorCount());
|
||||
run.setSummaryText(buildSummary(scannedPackages, scannedLegacyDocuments, accumulator));
|
||||
run.setFailureMessage(null);
|
||||
run = runRepository.save(run);
|
||||
|
||||
log.info("Wave 1 / Milestone A legacy-only audit completed: runId={}, packages={}, documents={}, findings={}, warnings={}, errors={}",
|
||||
run.getId(), scannedPackages, scannedLegacyDocuments, accumulator.totalFindings(),
|
||||
accumulator.warningCount(), accumulator.errorCount());
|
||||
return run;
|
||||
} catch (RuntimeException ex) {
|
||||
run.setStatus(LegacyTedAuditRunStatus.FAILED);
|
||||
run.setCompletedAt(OffsetDateTime.now());
|
||||
run.setScannedPackages(accumulator.scannedPackages());
|
||||
run.setScannedLegacyDocuments(accumulator.scannedLegacyDocuments());
|
||||
run.setFindingCount(accumulator.totalFindings());
|
||||
run.setInfoCount(accumulator.infoCount());
|
||||
run.setWarningCount(accumulator.warningCount());
|
||||
run.setErrorCount(accumulator.errorCount());
|
||||
run.setFailureMessage(ex.getMessage());
|
||||
run.setSummaryText(buildSummary(accumulator.scannedPackages(), accumulator.scannedLegacyDocuments(), accumulator));
|
||||
runRepository.save(run);
|
||||
log.error("Wave 1 / Milestone A legacy-only audit failed: runId={}", run.getId(), ex);
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
|
||||
private int auditPackages(LegacyTedAuditRun run, AuditAccumulator accumulator) {
|
||||
List<TedDailyPackage> packages = tedDailyPackageRepository.findAll(Sort.by(Sort.Direction.ASC, "year", "serialNumber"));
|
||||
if (packages.isEmpty()) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
Map<Integer, List<TedDailyPackage>> packagesByYear = new TreeMap<>();
|
||||
for (TedDailyPackage dailyPackage : packages) {
|
||||
packagesByYear.computeIfAbsent(dailyPackage.getYear(), ignored -> new ArrayList<>()).add(dailyPackage);
|
||||
}
|
||||
|
||||
int firstYear = packagesByYear.keySet().iterator().next();
|
||||
int currentYear = Year.now().getValue();
|
||||
|
||||
for (int year = firstYear; year <= currentYear; year++) {
|
||||
List<TedDailyPackage> yearPackages = packagesByYear.get(year);
|
||||
if (yearPackages == null || yearPackages.isEmpty()) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
"year:" + year,
|
||||
"No TED package rows exist for this year inside the audited interval",
|
||||
"year=" + year + ", intervalStartYear=" + firstYear + ", intervalEndYear=" + currentYear);
|
||||
continue;
|
||||
}
|
||||
|
||||
auditYearPackageSequence(run, accumulator, year, yearPackages);
|
||||
|
||||
for (TedDailyPackage dailyPackage : yearPackages) {
|
||||
accumulator.incrementScannedPackages();
|
||||
auditSinglePackage(run, accumulator, dailyPackage);
|
||||
}
|
||||
}
|
||||
|
||||
return packages.size();
|
||||
}
|
||||
|
||||
private void auditYearPackageSequence(LegacyTedAuditRun run,
|
||||
AuditAccumulator accumulator,
|
||||
int year,
|
||||
List<TedDailyPackage> yearPackages) {
|
||||
yearPackages.sort((left, right) -> Integer.compare(safeInt(left.getSerialNumber()), safeInt(right.getSerialNumber())));
|
||||
|
||||
int firstSerial = safeInt(yearPackages.getFirst().getSerialNumber());
|
||||
if (firstSerial > 1) {
|
||||
recordMissingPackageRange(run, accumulator, year, 1, firstSerial - 1,
|
||||
"TED package year starts after serial 1");
|
||||
}
|
||||
|
||||
for (int i = 1; i < yearPackages.size(); i++) {
|
||||
int previousSerial = safeInt(yearPackages.get(i - 1).getSerialNumber());
|
||||
int currentSerial = safeInt(yearPackages.get(i).getSerialNumber());
|
||||
if (currentSerial > previousSerial + 1) {
|
||||
recordMissingPackageRange(run, accumulator, year, previousSerial + 1, currentSerial - 1,
|
||||
"TED package sequence gap detected");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void recordMissingPackageRange(LegacyTedAuditRun run,
|
||||
AuditAccumulator accumulator,
|
||||
int year,
|
||||
int startSerial,
|
||||
int endSerial,
|
||||
String message) {
|
||||
String startPackageId = formatPackageIdentifier(year, startSerial);
|
||||
String endPackageId = formatPackageIdentifier(year, endSerial);
|
||||
String referenceKey = startSerial == endSerial ? startPackageId : startPackageId + "-" + endPackageId;
|
||||
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP,
|
||||
startSerial == endSerial ? startPackageId : null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
referenceKey,
|
||||
message,
|
||||
"year=" + year + ", missingStartSerial=" + startSerial + ", missingEndSerial=" + endSerial);
|
||||
}
|
||||
|
||||
private void auditSinglePackage(LegacyTedAuditRun run,
|
||||
AuditAccumulator accumulator,
|
||||
TedDailyPackage dailyPackage) {
|
||||
String packageIdentifier = dailyPackage.getPackageIdentifier();
|
||||
int processedCount = safeInt(dailyPackage.getProcessedCount());
|
||||
int failedCount = safeInt(dailyPackage.getFailedCount());
|
||||
int accountedDocuments = processedCount + failedCount;
|
||||
|
||||
if (dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.COMPLETED
|
||||
&& dailyPackage.getProcessedAt() == null) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.PACKAGE_COMPLETED_WITHOUT_PROCESSED_AT,
|
||||
packageIdentifier,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
packageIdentifier,
|
||||
"TED package is marked COMPLETED but processedAt is null",
|
||||
null);
|
||||
}
|
||||
|
||||
if (dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.COMPLETED
|
||||
&& dailyPackage.getXmlFileCount() == null) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.PACKAGE_MISSING_XML_FILE_COUNT,
|
||||
packageIdentifier,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
packageIdentifier,
|
||||
"TED package is marked COMPLETED but xmlFileCount is null",
|
||||
null);
|
||||
}
|
||||
|
||||
if ((dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.DOWNLOADED
|
||||
|| dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.PROCESSING
|
||||
|| dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.COMPLETED)
|
||||
&& !StringUtils.hasText(dailyPackage.getFileHash())) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.PACKAGE_MISSING_FILE_HASH,
|
||||
packageIdentifier,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
packageIdentifier,
|
||||
"TED package has no file hash recorded",
|
||||
"downloadStatus=" + dailyPackage.getDownloadStatus());
|
||||
}
|
||||
|
||||
if (dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.FAILED
|
||||
&& !StringUtils.hasText(dailyPackage.getErrorMessage())) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.PACKAGE_FAILED_WITHOUT_ERROR_MESSAGE,
|
||||
packageIdentifier,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
packageIdentifier,
|
||||
"TED package is marked FAILED but has no error message",
|
||||
null);
|
||||
}
|
||||
|
||||
if (dailyPackage.getXmlFileCount() != null) {
|
||||
if (accountedDocuments > dailyPackage.getXmlFileCount()) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.ERROR,
|
||||
LegacyTedAuditFindingType.PACKAGE_COMPLETED_COUNT_MISMATCH,
|
||||
packageIdentifier,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
packageIdentifier,
|
||||
"TED package accounting exceeds xmlFileCount",
|
||||
"xmlFileCount=" + dailyPackage.getXmlFileCount()
|
||||
+ ", processedCount=" + processedCount
|
||||
+ ", failedCount=" + failedCount);
|
||||
} else if (dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.COMPLETED
|
||||
&& accountedDocuments < dailyPackage.getXmlFileCount()) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.PACKAGE_COMPLETED_COUNT_MISMATCH,
|
||||
packageIdentifier,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
packageIdentifier,
|
||||
"TED package accounting is below xmlFileCount",
|
||||
"xmlFileCount=" + dailyPackage.getXmlFileCount()
|
||||
+ ", processedCount=" + processedCount
|
||||
+ ", failedCount=" + failedCount);
|
||||
}
|
||||
}
|
||||
|
||||
if (isPackageIncompleteForReimport(dailyPackage, processedCount, failedCount, accountedDocuments)) {
|
||||
recordFinding(run, accumulator,
|
||||
dailyPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.FAILED
|
||||
? LegacyTedAuditSeverity.ERROR
|
||||
: LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.PACKAGE_INCOMPLETE,
|
||||
packageIdentifier,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
packageIdentifier,
|
||||
"TED package is not fully imported and should be considered for re-import",
|
||||
buildIncompletePackageDetails(dailyPackage, processedCount, failedCount, accountedDocuments));
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isPackageIncompleteForReimport(TedDailyPackage dailyPackage,
|
||||
int processedCount,
|
||||
int failedCount,
|
||||
int accountedDocuments) {
|
||||
TedDailyPackage.DownloadStatus status = dailyPackage.getDownloadStatus();
|
||||
if (status == null) {
|
||||
return true;
|
||||
}
|
||||
if (status == TedDailyPackage.DownloadStatus.NOT_FOUND) {
|
||||
return false;
|
||||
}
|
||||
if (status == TedDailyPackage.DownloadStatus.PENDING
|
||||
|| status == TedDailyPackage.DownloadStatus.DOWNLOADING
|
||||
|| status == TedDailyPackage.DownloadStatus.DOWNLOADED
|
||||
|| status == TedDailyPackage.DownloadStatus.PROCESSING
|
||||
|| status == TedDailyPackage.DownloadStatus.FAILED) {
|
||||
return true;
|
||||
}
|
||||
if (status != TedDailyPackage.DownloadStatus.COMPLETED) {
|
||||
return true;
|
||||
}
|
||||
if (dailyPackage.getXmlFileCount() == null) {
|
||||
return true;
|
||||
}
|
||||
if (failedCount > 0) {
|
||||
return true;
|
||||
}
|
||||
return processedCount < dailyPackage.getXmlFileCount()
|
||||
|| accountedDocuments != dailyPackage.getXmlFileCount();
|
||||
}
|
||||
|
||||
private String buildIncompletePackageDetails(TedDailyPackage dailyPackage,
|
||||
int processedCount,
|
||||
int failedCount,
|
||||
int accountedDocuments) {
|
||||
return "status=" + dailyPackage.getDownloadStatus()
|
||||
+ ", xmlFileCount=" + dailyPackage.getXmlFileCount()
|
||||
+ ", processedCount=" + processedCount
|
||||
+ ", failedCount=" + failedCount
|
||||
+ ", accountedDocuments=" + accountedDocuments;
|
||||
}
|
||||
|
||||
private void auditGlobalDuplicates(LegacyTedAuditRun run, AuditAccumulator accumulator) {
|
||||
int limit = properties.getMaxDuplicateSamples();
|
||||
|
||||
jdbcTemplate.query(
|
||||
"""
|
||||
SELECT publication_id, COUNT(*) AS duplicate_count
|
||||
FROM ted.procurement_document
|
||||
WHERE publication_id IS NOT NULL AND publication_id <> ''
|
||||
GROUP BY publication_id
|
||||
HAVING COUNT(*) > 1
|
||||
ORDER BY duplicate_count DESC, publication_id ASC
|
||||
LIMIT ?
|
||||
""",
|
||||
ps -> ps.setInt(1, limit),
|
||||
(rs, rowNum) -> {
|
||||
String publicationId = rs.getString("publication_id");
|
||||
long duplicateCount = rs.getLong("duplicate_count");
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.ERROR,
|
||||
LegacyTedAuditFindingType.LEGACY_PUBLICATION_ID_DUPLICATE,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
publicationId,
|
||||
"Legacy TED publicationId appears multiple times",
|
||||
"publicationId=" + publicationId + ", duplicateCount=" + duplicateCount);
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
private int auditLegacyDocuments(LegacyTedAuditRun run,
|
||||
AuditAccumulator accumulator,
|
||||
Integer requestedLimit,
|
||||
int pageSize) {
|
||||
int processed = 0;
|
||||
int pageNumber = 0;
|
||||
|
||||
while (requestedLimit == null || processed < requestedLimit) {
|
||||
Page<ProcurementDocument> page = procurementDocumentRepository.findAll(
|
||||
PageRequest.of(pageNumber, pageSize, Sort.by(Sort.Direction.ASC, "createdAt", "id")));
|
||||
|
||||
if (page.isEmpty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
for (ProcurementDocument legacyDocument : page.getContent()) {
|
||||
auditSingleLegacyDocument(run, accumulator, legacyDocument);
|
||||
accumulator.incrementScannedLegacyDocuments();
|
||||
processed++;
|
||||
if (requestedLimit != null && processed >= requestedLimit) {
|
||||
return processed;
|
||||
}
|
||||
}
|
||||
|
||||
if (!page.hasNext()) {
|
||||
break;
|
||||
}
|
||||
pageNumber++;
|
||||
}
|
||||
|
||||
return processed;
|
||||
}
|
||||
|
||||
private void auditSingleLegacyDocument(LegacyTedAuditRun run,
|
||||
AuditAccumulator accumulator,
|
||||
ProcurementDocument legacyDocument) {
|
||||
UUID legacyDocumentId = legacyDocument.getId();
|
||||
String referenceKey = buildReferenceKey(legacyDocument);
|
||||
String documentHash = legacyDocument.getDocumentHash();
|
||||
|
||||
if (!StringUtils.hasText(documentHash)) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.ERROR,
|
||||
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_HASH,
|
||||
null,
|
||||
legacyDocumentId,
|
||||
null,
|
||||
null,
|
||||
referenceKey,
|
||||
"Legacy TED document has no documentHash",
|
||||
null);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!StringUtils.hasText(legacyDocument.getXmlDocument())) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.ERROR,
|
||||
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_XML,
|
||||
null,
|
||||
legacyDocumentId,
|
||||
null,
|
||||
null,
|
||||
referenceKey,
|
||||
"Legacy TED document has no xmlDocument payload",
|
||||
"documentHash=" + documentHash);
|
||||
}
|
||||
|
||||
if (!StringUtils.hasText(legacyDocument.getTextContent())) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_TEXT,
|
||||
null,
|
||||
legacyDocumentId,
|
||||
null,
|
||||
null,
|
||||
referenceKey,
|
||||
"Legacy TED document has no normalized textContent",
|
||||
"documentHash=" + documentHash);
|
||||
}
|
||||
|
||||
if (!StringUtils.hasText(legacyDocument.getPublicationId())) {
|
||||
recordFinding(run, accumulator,
|
||||
LegacyTedAuditSeverity.WARNING,
|
||||
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_PUBLICATION_ID,
|
||||
null,
|
||||
legacyDocumentId,
|
||||
null,
|
||||
null,
|
||||
referenceKey,
|
||||
"Legacy TED document has no publicationId",
|
||||
"documentHash=" + documentHash);
|
||||
}
|
||||
}
|
||||
|
||||
private void recordFinding(LegacyTedAuditRun run,
|
||||
AuditAccumulator accumulator,
|
||||
LegacyTedAuditSeverity severity,
|
||||
LegacyTedAuditFindingType findingType,
|
||||
String packageIdentifier,
|
||||
UUID legacyProcurementDocumentId,
|
||||
UUID genericDocumentId,
|
||||
UUID tedProjectionId,
|
||||
String referenceKey,
|
||||
String message,
|
||||
String detailsText) {
|
||||
if (accumulator.totalFindings() >= properties.getMaxFindingsPerRun()) {
|
||||
accumulator.markTruncated();
|
||||
if (!accumulator.truncationRecorded()) {
|
||||
LegacyTedAuditFinding truncatedFinding = LegacyTedAuditFinding.builder()
|
||||
.run(run)
|
||||
.severity(LegacyTedAuditSeverity.INFO)
|
||||
.findingType(LegacyTedAuditFindingType.FINDINGS_TRUNCATED)
|
||||
.referenceKey(referenceKey != null ? referenceKey : "max-findings-per-run")
|
||||
.message("Legacy TED audit finding limit reached; additional findings were suppressed")
|
||||
.detailsText("maxFindingsPerRun=" + properties.getMaxFindingsPerRun())
|
||||
.build();
|
||||
findingRepository.save(truncatedFinding);
|
||||
accumulator.recordFinding(LegacyTedAuditSeverity.INFO, true);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
LegacyTedAuditFinding finding = LegacyTedAuditFinding.builder()
|
||||
.run(run)
|
||||
.severity(severity)
|
||||
.findingType(findingType)
|
||||
.packageIdentifier(packageIdentifier)
|
||||
.legacyProcurementDocumentId(legacyProcurementDocumentId)
|
||||
.documentId(genericDocumentId)
|
||||
.tedNoticeProjectionId(tedProjectionId)
|
||||
.referenceKey(referenceKey)
|
||||
.message(message)
|
||||
.detailsText(detailsText)
|
||||
.build();
|
||||
findingRepository.save(finding);
|
||||
accumulator.recordFinding(severity, false);
|
||||
}
|
||||
|
||||
private String buildReferenceKey(ProcurementDocument legacyDocument) {
|
||||
if (StringUtils.hasText(legacyDocument.getPublicationId())) {
|
||||
return legacyDocument.getPublicationId();
|
||||
}
|
||||
if (StringUtils.hasText(legacyDocument.getNoticeId())) {
|
||||
return legacyDocument.getNoticeId();
|
||||
}
|
||||
if (StringUtils.hasText(legacyDocument.getSourceFilename())) {
|
||||
return legacyDocument.getSourceFilename();
|
||||
}
|
||||
return String.valueOf(legacyDocument.getId());
|
||||
}
|
||||
|
||||
private int safeInt(Integer value) {
|
||||
return value != null ? value : 0;
|
||||
}
|
||||
|
||||
private String formatPackageIdentifier(int year, int serialNumber) {
|
||||
return "%04d%05d".formatted(year, serialNumber);
|
||||
}
|
||||
|
||||
private String buildSummary(int scannedPackages,
|
||||
int scannedLegacyDocuments,
|
||||
AuditAccumulator accumulator) {
|
||||
return "packages=" + scannedPackages
|
||||
+ ", legacyDocuments=" + scannedLegacyDocuments
|
||||
+ ", findings=" + accumulator.totalFindings()
|
||||
+ ", warnings=" + accumulator.warningCount()
|
||||
+ ", errors=" + accumulator.errorCount()
|
||||
+ (accumulator.truncated() ? ", truncated=true" : "");
|
||||
}
|
||||
|
||||
private static final class AuditAccumulator {
|
||||
private int scannedPackages;
|
||||
private int scannedLegacyDocuments;
|
||||
private int infoCount;
|
||||
private int warningCount;
|
||||
private int errorCount;
|
||||
private boolean truncated;
|
||||
private boolean truncationRecorded;
|
||||
|
||||
void incrementScannedPackages() {
|
||||
scannedPackages++;
|
||||
}
|
||||
|
||||
void incrementScannedLegacyDocuments() {
|
||||
scannedLegacyDocuments++;
|
||||
}
|
||||
|
||||
void recordFinding(LegacyTedAuditSeverity severity, boolean truncationFindingRecordedNow) {
|
||||
switch (severity) {
|
||||
case INFO -> infoCount++;
|
||||
case WARNING -> warningCount++;
|
||||
case ERROR -> errorCount++;
|
||||
}
|
||||
if (truncationFindingRecordedNow) {
|
||||
truncationRecorded = true;
|
||||
}
|
||||
}
|
||||
|
||||
void markTruncated() {
|
||||
truncated = true;
|
||||
}
|
||||
|
||||
int totalFindings() {
|
||||
return infoCount + warningCount + errorCount;
|
||||
}
|
||||
|
||||
int infoCount() {
|
||||
return infoCount;
|
||||
}
|
||||
|
||||
int warningCount() {
|
||||
return warningCount;
|
||||
}
|
||||
|
||||
int errorCount() {
|
||||
return errorCount;
|
||||
}
|
||||
|
||||
int scannedPackages() {
|
||||
return scannedPackages;
|
||||
}
|
||||
|
||||
int scannedLegacyDocuments() {
|
||||
return scannedLegacyDocuments;
|
||||
}
|
||||
|
||||
boolean truncated() {
|
||||
return truncated;
|
||||
}
|
||||
|
||||
boolean truncationRecorded() {
|
||||
return truncationRecorded;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,33 @@
|
||||
package at.procon.dip.migration.audit.startup;
|
||||
|
||||
import at.procon.dip.migration.audit.config.LegacyTedAuditProperties;
|
||||
import at.procon.dip.migration.audit.service.LegacyTedAuditService;
|
||||
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||
import at.procon.dip.runtime.config.RuntimeMode;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.boot.ApplicationArguments;
|
||||
import org.springframework.boot.ApplicationRunner;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class LegacyTedAuditStartupRunner implements ApplicationRunner {
|
||||
|
||||
private final LegacyTedAuditProperties properties;
|
||||
private final LegacyTedAuditService legacyTedAuditService;
|
||||
|
||||
@Override
|
||||
public void run(ApplicationArguments args) {
|
||||
if (!properties.isEnabled() || !properties.isStartupRunEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
int requestedLimit = properties.getStartupRunLimit();
|
||||
log.info("Wave 1 / Milestone A startup audit enabled - scanning legacy TED data with limit {}",
|
||||
requestedLimit > 0 ? requestedLimit : "unbounded");
|
||||
legacyTedAuditService.executeAudit(requestedLimit);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,446 @@
|
||||
package at.procon.ted.repair;
|
||||
|
||||
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||
import at.procon.dip.runtime.config.RuntimeMode;
|
||||
import at.procon.ted.config.TedProcessorProperties;
|
||||
import at.procon.ted.model.entity.TedDailyPackage;
|
||||
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||
import at.procon.ted.service.BatchDocumentProcessingService;
|
||||
import at.procon.ted.service.TedPackageDownloadService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.data.domain.Sort;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Startup tool for repairing / re-importing incomplete legacy TED daily packages.
|
||||
*
|
||||
* Strategy:
|
||||
* - Identify incomplete package rows from {@code ted.ted_daily_package}
|
||||
* - Optionally include missing sequence numbers inside a configured package range
|
||||
* - Reuse existing batch XML processing so already-imported XML documents are skipped by hash,
|
||||
* while missing documents are inserted during the repair run
|
||||
*/
|
||||
@Service
|
||||
@ConditionalOnRuntimeMode(RuntimeMode.LEGACY)
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class TedPackageRepairService {
|
||||
|
||||
private static final Pattern PACKAGE_IDENTIFIER_PATTERN = Pattern.compile("\\d{9}");
|
||||
private static final int PROCESSING_CHUNK_SIZE = 25;
|
||||
|
||||
private final TedProcessorProperties properties;
|
||||
private final TedDailyPackageRepository packageRepository;
|
||||
private final TedPackageDownloadService downloadService;
|
||||
private final BatchDocumentProcessingService batchProcessingService;
|
||||
|
||||
public RepairSummary repairConfiguredPackages() {
|
||||
TedProcessorProperties.RepairProperties repairProperties = properties.getRepair();
|
||||
List<RepairCandidate> candidates = resolveCandidates(repairProperties);
|
||||
|
||||
if (candidates.isEmpty()) {
|
||||
log.info("TED package repair found no matching incomplete packages");
|
||||
return new RepairSummary(0, 0, 0, 0, List.of());
|
||||
}
|
||||
|
||||
log.info("TED package repair selected {} package candidates (dryRun={})", candidates.size(), repairProperties.isDryRun());
|
||||
candidates.forEach(candidate -> log.info("Repair candidate: {} [{}]", candidate.packageIdentifier(), candidate.reason()));
|
||||
|
||||
if (repairProperties.isDryRun()) {
|
||||
return new RepairSummary(candidates.size(), 0, 0, 0,
|
||||
candidates.stream().map(RepairCandidate::packageIdentifier).toList());
|
||||
}
|
||||
|
||||
int succeeded = 0;
|
||||
int failed = 0;
|
||||
int notFound = 0;
|
||||
List<String> processed = new ArrayList<>();
|
||||
|
||||
for (RepairCandidate candidate : candidates) {
|
||||
try {
|
||||
RepairExecutionResult result = repairCandidate(candidate, repairProperties);
|
||||
processed.add(candidate.packageIdentifier());
|
||||
switch (result.outcome()) {
|
||||
case COMPLETED -> succeeded++;
|
||||
case NOT_FOUND -> notFound++;
|
||||
case FAILED -> failed++;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
failed++;
|
||||
log.error("TED package repair failed for {}: {}", candidate.packageIdentifier(), e.getMessage(), e);
|
||||
markExistingPackageFailure(candidate.existingPackage(), "Repair run failed: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
log.info("TED package repair finished: selected={}, succeeded={}, failed={}, notFound={}",
|
||||
candidates.size(), succeeded, failed, notFound);
|
||||
return new RepairSummary(candidates.size(), succeeded, failed, notFound, processed);
|
||||
}
|
||||
|
||||
List<RepairCandidate> resolveCandidates(TedProcessorProperties.RepairProperties repairProperties) {
|
||||
List<TedDailyPackage> existingPackages = packageRepository.findAll(Sort.by(Sort.Direction.ASC, "year", "serialNumber"));
|
||||
Map<String, TedDailyPackage> existingByIdentifier = existingPackages.stream()
|
||||
.collect(Collectors.toMap(TedDailyPackage::getPackageIdentifier, pkg -> pkg, (left, right) -> left, LinkedHashMap::new));
|
||||
|
||||
if (!repairProperties.getPackageIdentifiers().isEmpty()) {
|
||||
return resolveExplicitCandidates(repairProperties.getPackageIdentifiers(), existingByIdentifier, repairProperties.getMaxPackages());
|
||||
}
|
||||
|
||||
if (existingPackages.isEmpty()) {
|
||||
return List.of();
|
||||
}
|
||||
|
||||
List<RepairCandidate> candidates = new ArrayList<>();
|
||||
Set<String> seen = new LinkedHashSet<>();
|
||||
|
||||
boolean inspectSequenceRange = repairProperties.isIncludeMissingSequenceGaps()
|
||||
|| hasText(repairProperties.getFromPackageIdentifier())
|
||||
|| hasText(repairProperties.getToPackageIdentifier());
|
||||
|
||||
if (!inspectSequenceRange) {
|
||||
for (TedDailyPackage pkg : existingPackages) {
|
||||
if (isIncomplete(pkg) && seen.add(pkg.getPackageIdentifier())) {
|
||||
candidates.add(RepairCandidate.existing(pkg, repairReasonFor(pkg)));
|
||||
}
|
||||
}
|
||||
return limitCandidates(candidates, repairProperties.getMaxPackages());
|
||||
}
|
||||
|
||||
PackageCoordinates first = parseIdentifier(
|
||||
hasText(repairProperties.getFromPackageIdentifier())
|
||||
? repairProperties.getFromPackageIdentifier()
|
||||
: existingPackages.getFirst().getPackageIdentifier());
|
||||
|
||||
PackageCoordinates last = parseIdentifier(
|
||||
hasText(repairProperties.getToPackageIdentifier())
|
||||
? repairProperties.getToPackageIdentifier()
|
||||
: existingPackages.getLast().getPackageIdentifier());
|
||||
|
||||
if (first.compareTo(last) > 0) {
|
||||
throw new IllegalArgumentException("Repair package range is invalid: from > to");
|
||||
}
|
||||
|
||||
Map<Integer, Integer> observedMaxByYear = existingPackages.stream()
|
||||
.collect(Collectors.groupingBy(TedDailyPackage::getYear,
|
||||
LinkedHashMap::new,
|
||||
Collectors.collectingAndThen(
|
||||
Collectors.maxBy(Comparator.comparingInt(TedDailyPackage::getSerialNumber)),
|
||||
optional -> optional.map(TedDailyPackage::getSerialNumber).orElse(0))));
|
||||
|
||||
for (int year = first.year(); year <= last.year(); year++) {
|
||||
int startSerial = year == first.year() ? first.serialNumber() : 1;
|
||||
int defaultEndSerial = observedMaxByYear.getOrDefault(year, 0);
|
||||
int endSerial = year == last.year() ? last.serialNumber() : defaultEndSerial;
|
||||
|
||||
if (endSerial < startSerial || endSerial <= 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int serial = startSerial; serial <= endSerial; serial++) {
|
||||
String packageIdentifier = formatPackageIdentifier(year, serial);
|
||||
TedDailyPackage existingPackage = existingByIdentifier.get(packageIdentifier);
|
||||
if (existingPackage != null) {
|
||||
if (isIncomplete(existingPackage) && seen.add(packageIdentifier)) {
|
||||
candidates.add(RepairCandidate.existing(existingPackage, repairReasonFor(existingPackage)));
|
||||
}
|
||||
} else if (repairProperties.isIncludeMissingSequenceGaps() && seen.add(packageIdentifier)) {
|
||||
candidates.add(RepairCandidate.missing(year, serial, packageIdentifier, "MISSING_SEQUENCE_GAP"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return limitCandidates(candidates, repairProperties.getMaxPackages());
|
||||
}
|
||||
|
||||
private List<RepairCandidate> resolveExplicitCandidates(Collection<String> packageIdentifiers,
|
||||
Map<String, TedDailyPackage> existingByIdentifier,
|
||||
int maxPackages) {
|
||||
List<RepairCandidate> candidates = new ArrayList<>();
|
||||
Set<String> seen = new LinkedHashSet<>();
|
||||
|
||||
for (String rawIdentifier : packageIdentifiers) {
|
||||
if (!hasText(rawIdentifier)) {
|
||||
continue;
|
||||
}
|
||||
String normalized = rawIdentifier.trim();
|
||||
if (!seen.add(normalized)) {
|
||||
continue;
|
||||
}
|
||||
PackageCoordinates coordinates = parseIdentifier(normalized);
|
||||
TedDailyPackage existing = existingByIdentifier.get(normalized);
|
||||
if (existing != null) {
|
||||
candidates.add(RepairCandidate.existing(existing, repairReasonFor(existing)));
|
||||
} else {
|
||||
candidates.add(RepairCandidate.missing(coordinates.year(), coordinates.serialNumber(), normalized, "EXPLICIT_PACKAGE"));
|
||||
}
|
||||
}
|
||||
|
||||
return limitCandidates(candidates, maxPackages);
|
||||
}
|
||||
|
||||
private List<RepairCandidate> limitCandidates(List<RepairCandidate> candidates, int maxPackages) {
|
||||
if (candidates.size() <= maxPackages) {
|
||||
return candidates;
|
||||
}
|
||||
return new ArrayList<>(candidates.subList(0, maxPackages));
|
||||
}
|
||||
|
||||
@Transactional
|
||||
RepairExecutionResult repairCandidate(RepairCandidate candidate, TedProcessorProperties.RepairProperties repairProperties) throws Exception {
|
||||
TedDailyPackage packageEntity = candidate.existingPackage() != null
|
||||
? candidate.existingPackage()
|
||||
: createMissingPackageRecord(candidate);
|
||||
|
||||
String packageIdentifier = candidate.packageIdentifier();
|
||||
boolean downloadedNow = false;
|
||||
long startNanos = System.nanoTime();
|
||||
|
||||
Path archivePath = packageArchivePath(packageIdentifier);
|
||||
if (repairProperties.isForceRedownload() || !Files.exists(archivePath)) {
|
||||
if (!repairProperties.isRedownloadMissingArchives()) {
|
||||
String message = "Package archive is missing locally and re-download is disabled";
|
||||
markFailure(packageEntity, message);
|
||||
return new RepairExecutionResult(RepairOutcome.FAILED, message);
|
||||
}
|
||||
|
||||
Path downloadedArchive = downloadService.downloadArchive(packageIdentifier);
|
||||
if (downloadedArchive == null) {
|
||||
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.NOT_FOUND);
|
||||
packageEntity.setErrorMessage("Package not found during repair run");
|
||||
packageRepository.save(packageEntity);
|
||||
return new RepairExecutionResult(RepairOutcome.NOT_FOUND, "HTTP 404");
|
||||
}
|
||||
archivePath = downloadedArchive;
|
||||
downloadedNow = true;
|
||||
packageEntity.setDownloadedAt(OffsetDateTime.now());
|
||||
packageEntity.setDownloadUrl(downloadService.buildDownloadUrlForPackage(packageIdentifier));
|
||||
}
|
||||
|
||||
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.PROCESSING);
|
||||
packageEntity.setErrorMessage(null);
|
||||
packageEntity.setProcessedCount(0);
|
||||
packageEntity.setFailedCount(0);
|
||||
packageEntity.setFileHash(downloadService.calculateArchiveHash(archivePath));
|
||||
packageRepository.save(packageEntity);
|
||||
|
||||
List<Path> xmlFiles = downloadService.extractArchive(archivePath, packageIdentifier);
|
||||
packageEntity.setXmlFileCount(xmlFiles.size());
|
||||
packageRepository.save(packageEntity);
|
||||
|
||||
int totalProcessed = 0;
|
||||
int totalFailed = 0;
|
||||
try {
|
||||
for (int i = 0; i < xmlFiles.size(); i += PROCESSING_CHUNK_SIZE) {
|
||||
int end = Math.min(i + PROCESSING_CHUNK_SIZE, xmlFiles.size());
|
||||
List<Path> chunk = xmlFiles.subList(i, end);
|
||||
BatchDocumentProcessingService.BatchProcessingResult result = batchProcessingService.processBatch(chunk);
|
||||
totalProcessed += result.insertedCount() + result.duplicateCount();
|
||||
totalFailed += result.errorCount();
|
||||
|
||||
packageEntity.setProcessedCount(totalProcessed);
|
||||
packageEntity.setFailedCount(totalFailed);
|
||||
packageRepository.save(packageEntity);
|
||||
}
|
||||
} finally {
|
||||
cleanupExtractedXmlFiles(xmlFiles);
|
||||
if (downloadedNow && properties.getDownload().isDeleteAfterExtraction()) {
|
||||
deleteQuietly(archivePath);
|
||||
}
|
||||
}
|
||||
|
||||
packageEntity.setProcessedAt(OffsetDateTime.now());
|
||||
packageEntity.setProcessingDurationMs((System.nanoTime() - startNanos) / 1_000_000L);
|
||||
packageEntity.setProcessedCount(totalProcessed);
|
||||
packageEntity.setFailedCount(totalFailed);
|
||||
|
||||
if (totalFailed == 0 && totalProcessed == xmlFiles.size()) {
|
||||
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.COMPLETED);
|
||||
packageEntity.setErrorMessage(null);
|
||||
packageRepository.save(packageEntity);
|
||||
return new RepairExecutionResult(RepairOutcome.COMPLETED, "Package repaired successfully");
|
||||
}
|
||||
|
||||
String failureMessage = String.format(Locale.ROOT,
|
||||
"Repair incomplete: xmlFiles=%d, processed=%d, failed=%d",
|
||||
xmlFiles.size(), totalProcessed, totalFailed);
|
||||
markFailure(packageEntity, failureMessage);
|
||||
return new RepairExecutionResult(RepairOutcome.FAILED, failureMessage);
|
||||
}
|
||||
|
||||
private TedDailyPackage createMissingPackageRecord(RepairCandidate candidate) {
|
||||
TedDailyPackage pkg = TedDailyPackage.builder()
|
||||
.packageIdentifier(candidate.packageIdentifier())
|
||||
.year(candidate.year())
|
||||
.serialNumber(candidate.serialNumber())
|
||||
.downloadUrl(downloadService.buildDownloadUrlForPackage(candidate.packageIdentifier()))
|
||||
.downloadStatus(TedDailyPackage.DownloadStatus.PENDING)
|
||||
.build();
|
||||
return packageRepository.save(pkg);
|
||||
}
|
||||
|
||||
private void markFailure(TedDailyPackage packageEntity, String message) {
|
||||
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.FAILED);
|
||||
packageEntity.setErrorMessage(message);
|
||||
packageRepository.save(packageEntity);
|
||||
}
|
||||
|
||||
private void markExistingPackageFailure(TedDailyPackage packageEntity, String message) {
|
||||
if (packageEntity == null) {
|
||||
return;
|
||||
}
|
||||
packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.FAILED);
|
||||
packageEntity.setErrorMessage(message);
|
||||
packageRepository.save(packageEntity);
|
||||
}
|
||||
|
||||
private Path packageArchivePath(String packageIdentifier) {
|
||||
return Paths.get(properties.getDownload().getDownloadDirectory()).resolve(packageIdentifier + ".tar.gz");
|
||||
}
|
||||
|
||||
private void cleanupExtractedXmlFiles(List<Path> xmlFiles) {
|
||||
if (xmlFiles.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Path packageDirectory = xmlFiles.getFirst().getParent();
|
||||
for (Path xmlFile : xmlFiles) {
|
||||
deleteQuietly(xmlFile);
|
||||
}
|
||||
|
||||
if (packageDirectory != null) {
|
||||
try (var stream = Files.list(packageDirectory)) {
|
||||
if (stream.findAny().isEmpty()) {
|
||||
deleteQuietly(packageDirectory);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.debug("Could not clean extracted package directory {}: {}", packageDirectory, e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteQuietly(Path path) {
|
||||
try {
|
||||
Files.deleteIfExists(path);
|
||||
} catch (IOException e) {
|
||||
log.debug("Could not delete {}: {}", path, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
boolean isIncomplete(TedDailyPackage pkg) {
|
||||
if (pkg == null || pkg.getDownloadStatus() == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pkg.getDownloadStatus() == TedDailyPackage.DownloadStatus.NOT_FOUND) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pkg.getDownloadStatus() != TedDailyPackage.DownloadStatus.COMPLETED) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Integer xmlFileCount = pkg.getXmlFileCount();
|
||||
int processedCount = pkg.getProcessedCount() != null ? pkg.getProcessedCount() : 0;
|
||||
int failedCount = pkg.getFailedCount() != null ? pkg.getFailedCount() : 0;
|
||||
|
||||
if (xmlFileCount == null || xmlFileCount <= 0) {
|
||||
return true;
|
||||
}
|
||||
if (failedCount > 0) {
|
||||
return true;
|
||||
}
|
||||
return processedCount != xmlFileCount;
|
||||
}
|
||||
|
||||
private String repairReasonFor(TedDailyPackage pkg) {
|
||||
if (pkg.getDownloadStatus() != TedDailyPackage.DownloadStatus.COMPLETED) {
|
||||
return "STATUS_" + pkg.getDownloadStatus();
|
||||
}
|
||||
if (pkg.getXmlFileCount() == null || pkg.getXmlFileCount() <= 0) {
|
||||
return "MISSING_XML_COUNT";
|
||||
}
|
||||
if (pkg.getFailedCount() != null && pkg.getFailedCount() > 0) {
|
||||
return "FAILED_DOCUMENTS";
|
||||
}
|
||||
return "COUNT_MISMATCH";
|
||||
}
|
||||
|
||||
private PackageCoordinates parseIdentifier(String packageIdentifier) {
|
||||
String normalized = packageIdentifier != null ? packageIdentifier.trim() : "";
|
||||
if (!PACKAGE_IDENTIFIER_PATTERN.matcher(normalized).matches()) {
|
||||
throw new IllegalArgumentException("Invalid package identifier: " + packageIdentifier);
|
||||
}
|
||||
return new PackageCoordinates(
|
||||
Integer.parseInt(normalized.substring(0, 4)),
|
||||
Integer.parseInt(normalized.substring(4)));
|
||||
}
|
||||
|
||||
private String formatPackageIdentifier(int year, int serialNumber) {
|
||||
return String.format(Locale.ROOT, "%04d%05d", year, serialNumber);
|
||||
}
|
||||
|
||||
private boolean hasText(String value) {
|
||||
return value != null && !value.isBlank();
|
||||
}
|
||||
|
||||
record PackageCoordinates(int year, int serialNumber) implements Comparable<PackageCoordinates> {
|
||||
@Override
|
||||
public int compareTo(PackageCoordinates other) {
|
||||
int yearCompare = Integer.compare(this.year, other.year);
|
||||
if (yearCompare != 0) {
|
||||
return yearCompare;
|
||||
}
|
||||
return Integer.compare(this.serialNumber, other.serialNumber);
|
||||
}
|
||||
}
|
||||
|
||||
public record RepairCandidate(int year,
|
||||
int serialNumber,
|
||||
String packageIdentifier,
|
||||
TedDailyPackage existingPackage,
|
||||
String reason) {
|
||||
static RepairCandidate existing(TedDailyPackage pkg, String reason) {
|
||||
return new RepairCandidate(pkg.getYear(), pkg.getSerialNumber(), pkg.getPackageIdentifier(), pkg, reason);
|
||||
}
|
||||
|
||||
static RepairCandidate missing(int year, int serialNumber, String packageIdentifier, String reason) {
|
||||
return new RepairCandidate(year, serialNumber, packageIdentifier, null, reason);
|
||||
}
|
||||
}
|
||||
|
||||
enum RepairOutcome {
|
||||
COMPLETED,
|
||||
FAILED,
|
||||
NOT_FOUND
|
||||
}
|
||||
|
||||
record RepairExecutionResult(RepairOutcome outcome, String message) {
|
||||
}
|
||||
|
||||
public record RepairSummary(int selected,
|
||||
int succeeded,
|
||||
int failed,
|
||||
int notFound,
|
||||
List<String> processedPackageIdentifiers) {
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,42 @@
|
||||
package at.procon.ted.startup;
|
||||
|
||||
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||
import at.procon.dip.runtime.config.RuntimeMode;
|
||||
import at.procon.ted.config.TedProcessorProperties;
|
||||
import at.procon.ted.repair.TedPackageRepairService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.boot.ApplicationArguments;
|
||||
import org.springframework.boot.ApplicationRunner;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* Optional startup runner that repairs / re-imports incomplete legacy TED packages.
|
||||
*/
|
||||
@Component
|
||||
@ConditionalOnRuntimeMode(RuntimeMode.LEGACY)
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
@Order(50)
|
||||
public class TedPackageRepairStartupRunner implements ApplicationRunner {
|
||||
|
||||
private final TedProcessorProperties properties;
|
||||
private final TedPackageRepairService repairService;
|
||||
|
||||
@Override
|
||||
public void run(ApplicationArguments args) {
|
||||
if (!properties.getRepair().isEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (properties.getDownload().isEnabled() && !properties.getRepair().isAllowWhileDownloadEnabled()) {
|
||||
throw new IllegalStateException(
|
||||
"ted.repair.enabled=true requires ted.download.enabled=false " +
|
||||
"or ted.repair.allow-while-download-enabled=true to avoid concurrent package processing");
|
||||
}
|
||||
|
||||
log.info("Starting legacy TED package repair tool...");
|
||||
repairService.repairConfiguredPackages();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,57 @@
|
||||
-- Wave 1 / Milestone A: read-only legacy audit run/finding persistence.
|
||||
-- Additive tables only; no legacy business data is modified by this migration.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS DOC.doc_legacy_audit_run (
|
||||
id UUID PRIMARY KEY,
|
||||
status VARCHAR(32) NOT NULL,
|
||||
requested_limit INTEGER,
|
||||
page_size INTEGER NOT NULL,
|
||||
scanned_packages INTEGER NOT NULL DEFAULT 0,
|
||||
scanned_legacy_documents INTEGER NOT NULL DEFAULT 0,
|
||||
finding_count INTEGER NOT NULL DEFAULT 0,
|
||||
info_count INTEGER NOT NULL DEFAULT 0,
|
||||
warning_count INTEGER NOT NULL DEFAULT 0,
|
||||
error_count INTEGER NOT NULL DEFAULT 0,
|
||||
started_at TIMESTAMPTZ NOT NULL,
|
||||
completed_at TIMESTAMPTZ,
|
||||
summary_text TEXT,
|
||||
failure_message TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_run_status
|
||||
ON DOC.doc_legacy_audit_run(status);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_run_started
|
||||
ON DOC.doc_legacy_audit_run(started_at DESC);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS DOC.doc_legacy_audit_finding (
|
||||
id UUID PRIMARY KEY,
|
||||
run_id UUID NOT NULL REFERENCES DOC.doc_legacy_audit_run(id) ON DELETE CASCADE,
|
||||
severity VARCHAR(16) NOT NULL,
|
||||
finding_type VARCHAR(64) NOT NULL,
|
||||
package_identifier VARCHAR(20),
|
||||
legacy_procurement_document_id UUID,
|
||||
document_id UUID,
|
||||
ted_notice_projection_id UUID,
|
||||
reference_key VARCHAR(255),
|
||||
message TEXT NOT NULL,
|
||||
details_text TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_run
|
||||
ON DOC.doc_legacy_audit_finding(run_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_type
|
||||
ON DOC.doc_legacy_audit_finding(finding_type);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_severity
|
||||
ON DOC.doc_legacy_audit_finding(severity);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_legacy_doc
|
||||
ON DOC.doc_legacy_audit_finding(legacy_procurement_document_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_doc_legacy_audit_find_document
|
||||
ON DOC.doc_legacy_audit_finding(document_id);
|
||||
@ -0,0 +1,241 @@
|
||||
package at.procon.dip.migration.audit.service;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import at.procon.dip.migration.audit.config.LegacyTedAuditProperties;
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditFinding;
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditFindingType;
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditRun;
|
||||
import at.procon.dip.migration.audit.entity.LegacyTedAuditRunStatus;
|
||||
import at.procon.dip.migration.audit.repository.LegacyTedAuditFindingRepository;
|
||||
import at.procon.dip.migration.audit.repository.LegacyTedAuditRunRepository;
|
||||
import at.procon.ted.model.entity.NoticeType;
|
||||
import at.procon.ted.model.entity.ProcurementDocument;
|
||||
import at.procon.ted.model.entity.TedDailyPackage;
|
||||
import at.procon.ted.repository.ProcurementDocumentRepository;
|
||||
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.Year;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.PageImpl;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class LegacyTedAuditServiceTest {
|
||||
|
||||
@Mock
|
||||
private TedDailyPackageRepository tedDailyPackageRepository;
|
||||
@Mock
|
||||
private ProcurementDocumentRepository procurementDocumentRepository;
|
||||
@Mock
|
||||
private LegacyTedAuditRunRepository runRepository;
|
||||
@Mock
|
||||
private LegacyTedAuditFindingRepository findingRepository;
|
||||
@Mock
|
||||
private JdbcTemplate jdbcTemplate;
|
||||
|
||||
private LegacyTedAuditService service;
|
||||
private List<LegacyTedAuditFinding> persistedFindings;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
LegacyTedAuditProperties properties = new LegacyTedAuditProperties();
|
||||
properties.setEnabled(true);
|
||||
properties.setPageSize(50);
|
||||
properties.setMaxFindingsPerRun(100);
|
||||
properties.setMaxDuplicateSamples(10);
|
||||
|
||||
service = new LegacyTedAuditService(
|
||||
properties,
|
||||
tedDailyPackageRepository,
|
||||
procurementDocumentRepository,
|
||||
runRepository,
|
||||
findingRepository,
|
||||
jdbcTemplate
|
||||
);
|
||||
|
||||
persistedFindings = new ArrayList<>();
|
||||
|
||||
when(runRepository.save(any(LegacyTedAuditRun.class))).thenAnswer(invocation -> {
|
||||
LegacyTedAuditRun run = invocation.getArgument(0);
|
||||
if (run.getId() == null) {
|
||||
run.setId(UUID.randomUUID());
|
||||
}
|
||||
return run;
|
||||
});
|
||||
|
||||
when(findingRepository.save(any(LegacyTedAuditFinding.class))).thenAnswer(invocation -> {
|
||||
LegacyTedAuditFinding finding = invocation.getArgument(0);
|
||||
if (finding.getId() == null) {
|
||||
finding.setId(UUID.randomUUID());
|
||||
}
|
||||
persistedFindings.add(finding);
|
||||
return finding;
|
||||
});
|
||||
|
||||
when(procurementDocumentRepository.findAll(any(org.springframework.data.domain.Pageable.class)))
|
||||
.thenReturn(new PageImpl<>(List.of()));
|
||||
}
|
||||
|
||||
@Test
|
||||
void executeAudit_should_record_package_sequence_gaps_and_incomplete_packages() {
|
||||
int currentYear = Year.now().getValue();
|
||||
|
||||
when(tedDailyPackageRepository.findAll(any(org.springframework.data.domain.Sort.class))).thenReturn(List.of(
|
||||
TedDailyPackage.builder()
|
||||
.packageIdentifier(formatPackageIdentifier(currentYear, 1))
|
||||
.year(currentYear)
|
||||
.serialNumber(1)
|
||||
.downloadStatus(TedDailyPackage.DownloadStatus.COMPLETED)
|
||||
.xmlFileCount(10)
|
||||
.processedCount(10)
|
||||
.failedCount(0)
|
||||
.fileHash("hash-1")
|
||||
.processedAt(OffsetDateTime.now())
|
||||
.build(),
|
||||
TedDailyPackage.builder()
|
||||
.packageIdentifier(formatPackageIdentifier(currentYear, 3))
|
||||
.year(currentYear)
|
||||
.serialNumber(3)
|
||||
.downloadStatus(TedDailyPackage.DownloadStatus.COMPLETED)
|
||||
.xmlFileCount(10)
|
||||
.processedCount(9)
|
||||
.failedCount(1)
|
||||
.fileHash("hash-3")
|
||||
.processedAt(OffsetDateTime.now())
|
||||
.build(),
|
||||
TedDailyPackage.builder()
|
||||
.packageIdentifier(formatPackageIdentifier(currentYear, 4))
|
||||
.year(currentYear)
|
||||
.serialNumber(4)
|
||||
.downloadStatus(TedDailyPackage.DownloadStatus.FAILED)
|
||||
.xmlFileCount(12)
|
||||
.processedCount(0)
|
||||
.failedCount(0)
|
||||
.errorMessage("processing failed")
|
||||
.build()
|
||||
));
|
||||
|
||||
LegacyTedAuditRun run = service.executeAudit(0);
|
||||
|
||||
assertThat(run.getStatus()).isEqualTo(LegacyTedAuditRunStatus.COMPLETED);
|
||||
assertThat(run.getScannedPackages()).isEqualTo(3);
|
||||
assertThat(persistedFindings)
|
||||
.extracting(LegacyTedAuditFinding::getFindingType)
|
||||
.contains(LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP,
|
||||
LegacyTedAuditFindingType.PACKAGE_INCOMPLETE);
|
||||
|
||||
assertThat(persistedFindings)
|
||||
.filteredOn(f -> f.getFindingType() == LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP)
|
||||
.extracting(LegacyTedAuditFinding::getReferenceKey)
|
||||
.contains(formatPackageIdentifier(currentYear, 2));
|
||||
|
||||
assertThat(persistedFindings)
|
||||
.filteredOn(f -> f.getFindingType() == LegacyTedAuditFindingType.PACKAGE_INCOMPLETE)
|
||||
.extracting(LegacyTedAuditFinding::getPackageIdentifier)
|
||||
.contains(formatPackageIdentifier(currentYear, 3), formatPackageIdentifier(currentYear, 4));
|
||||
}
|
||||
|
||||
@Test
|
||||
void executeAudit_should_record_missing_years_inside_audited_interval() {
|
||||
int currentYear = Year.now().getValue();
|
||||
|
||||
when(tedDailyPackageRepository.findAll(any(org.springframework.data.domain.Sort.class))).thenReturn(List.of(
|
||||
TedDailyPackage.builder()
|
||||
.packageIdentifier(formatPackageIdentifier(currentYear - 2, 1))
|
||||
.year(currentYear - 2)
|
||||
.serialNumber(1)
|
||||
.downloadStatus(TedDailyPackage.DownloadStatus.COMPLETED)
|
||||
.xmlFileCount(1)
|
||||
.processedCount(1)
|
||||
.failedCount(0)
|
||||
.fileHash("hash-a")
|
||||
.processedAt(OffsetDateTime.now())
|
||||
.build(),
|
||||
TedDailyPackage.builder()
|
||||
.packageIdentifier(formatPackageIdentifier(currentYear, 1))
|
||||
.year(currentYear)
|
||||
.serialNumber(1)
|
||||
.downloadStatus(TedDailyPackage.DownloadStatus.COMPLETED)
|
||||
.xmlFileCount(1)
|
||||
.processedCount(1)
|
||||
.failedCount(0)
|
||||
.fileHash("hash-b")
|
||||
.processedAt(OffsetDateTime.now())
|
||||
.build()
|
||||
));
|
||||
|
||||
LegacyTedAuditRun run = service.executeAudit(0);
|
||||
|
||||
assertThat(run.getStatus()).isEqualTo(LegacyTedAuditRunStatus.COMPLETED);
|
||||
assertThat(persistedFindings)
|
||||
.filteredOn(f -> f.getFindingType() == LegacyTedAuditFindingType.PACKAGE_SEQUENCE_GAP)
|
||||
.extracting(LegacyTedAuditFinding::getReferenceKey)
|
||||
.contains("year:" + (currentYear - 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
void executeAudit_should_record_legacy_document_integrity_findings_only() {
|
||||
ProcurementDocument missingXml = ProcurementDocument.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.documentHash("hash-1")
|
||||
.publicationId("2025/S 001-000001")
|
||||
.noticeType(NoticeType.CONTRACT_NOTICE)
|
||||
.xmlDocument(null)
|
||||
.textContent("hello")
|
||||
.build();
|
||||
|
||||
ProcurementDocument missingTextAndPublicationId = ProcurementDocument.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.documentHash("hash-2")
|
||||
.publicationId(null)
|
||||
.noticeType(NoticeType.CONTRACT_NOTICE)
|
||||
.xmlDocument("<xml/>")
|
||||
.textContent(null)
|
||||
.build();
|
||||
|
||||
when(tedDailyPackageRepository.findAll(any(org.springframework.data.domain.Sort.class))).thenReturn(List.of());
|
||||
when(procurementDocumentRepository.findAll(any(org.springframework.data.domain.Pageable.class)))
|
||||
.thenReturn(pageOf(missingXml, missingTextAndPublicationId));
|
||||
|
||||
LegacyTedAuditRun run = service.executeAudit(10);
|
||||
|
||||
assertThat(run.getStatus()).isEqualTo(LegacyTedAuditRunStatus.COMPLETED);
|
||||
assertThat(run.getScannedLegacyDocuments()).isEqualTo(2);
|
||||
assertThat(persistedFindings)
|
||||
.extracting(LegacyTedAuditFinding::getFindingType)
|
||||
.contains(
|
||||
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_XML,
|
||||
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_TEXT,
|
||||
LegacyTedAuditFindingType.LEGACY_DOCUMENT_MISSING_PUBLICATION_ID
|
||||
)
|
||||
.doesNotContain(
|
||||
LegacyTedAuditFindingType.DOC_DOCUMENT_MISSING,
|
||||
LegacyTedAuditFindingType.DOC_SOURCE_MISSING,
|
||||
LegacyTedAuditFindingType.DOC_ORIGINAL_CONTENT_MISSING,
|
||||
LegacyTedAuditFindingType.DOC_PRIMARY_REPRESENTATION_MISSING,
|
||||
LegacyTedAuditFindingType.TED_PROJECTION_MISSING,
|
||||
LegacyTedAuditFindingType.TED_PROJECTION_MISSING_LEGACY_LINK,
|
||||
LegacyTedAuditFindingType.TED_PROJECTION_DOCUMENT_MISMATCH,
|
||||
LegacyTedAuditFindingType.DOC_DEDUP_HASH_DUPLICATE
|
||||
);
|
||||
}
|
||||
|
||||
private Page<ProcurementDocument> pageOf(ProcurementDocument... documents) {
|
||||
return new PageImpl<>(List.of(documents));
|
||||
}
|
||||
|
||||
private String formatPackageIdentifier(int year, int serialNumber) {
|
||||
return "%04d%05d".formatted(year, serialNumber);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,120 @@
|
||||
package at.procon.ted.repair;
|
||||
|
||||
import at.procon.ted.config.TedProcessorProperties;
|
||||
import at.procon.ted.model.entity.TedDailyPackage;
|
||||
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||
import at.procon.ted.service.BatchDocumentProcessingService;
|
||||
import at.procon.ted.service.TedPackageDownloadService;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.springframework.data.domain.Sort;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.eq;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
class TedPackageRepairServiceTest {
|
||||
|
||||
@TempDir
|
||||
Path tempDir;
|
||||
|
||||
@Test
|
||||
void resolveCandidatesIncludesIncompletePackagesAndMissingSequenceGaps() {
|
||||
TedProcessorProperties properties = new TedProcessorProperties();
|
||||
properties.getRepair().setEnabled(true);
|
||||
properties.getRepair().setFromPackageIdentifier("202600001");
|
||||
properties.getRepair().setToPackageIdentifier("202600003");
|
||||
properties.getRepair().setIncludeMissingSequenceGaps(true);
|
||||
properties.getRepair().setMaxPackages(10);
|
||||
|
||||
TedDailyPackageRepository repository = mock(TedDailyPackageRepository.class);
|
||||
TedDailyPackage pkg1 = newPackage("202600001", 2026, 1, TedDailyPackage.DownloadStatus.COMPLETED, 20, 20, 0);
|
||||
TedDailyPackage pkg3 = newPackage("202600003", 2026, 3, TedDailyPackage.DownloadStatus.PROCESSING, 20, 5, 0);
|
||||
when(repository.findAll(any(Sort.class))).thenReturn(List.of(pkg1, pkg3));
|
||||
|
||||
TedPackageRepairService service = new TedPackageRepairService(
|
||||
properties,
|
||||
repository,
|
||||
mock(TedPackageDownloadService.class),
|
||||
mock(BatchDocumentProcessingService.class));
|
||||
|
||||
List<TedPackageRepairService.RepairCandidate> candidates = service.resolveCandidates(properties.getRepair());
|
||||
|
||||
assertThat(candidates).extracting(TedPackageRepairService.RepairCandidate::packageIdentifier)
|
||||
.containsExactly("202600002", "202600003");
|
||||
assertThat(candidates).extracting(TedPackageRepairService.RepairCandidate::reason)
|
||||
.containsExactly("MISSING_SEQUENCE_GAP", "STATUS_PROCESSING");
|
||||
}
|
||||
|
||||
@Test
|
||||
void repairCandidateProcessesExistingArchiveAndMarksPackageCompleted() throws Exception {
|
||||
TedProcessorProperties properties = new TedProcessorProperties();
|
||||
properties.getRepair().setEnabled(true);
|
||||
properties.getRepair().setRedownloadMissingArchives(false);
|
||||
properties.getDownload().setDownloadDirectory(tempDir.toString());
|
||||
properties.getDownload().setDeleteAfterExtraction(false);
|
||||
|
||||
Path archive = tempDir.resolve("202600003.tar.gz");
|
||||
Files.writeString(archive, "dummy");
|
||||
|
||||
TedDailyPackageRepository repository = mock(TedDailyPackageRepository.class);
|
||||
TedDailyPackage pkg = newPackage("202600003", 2026, 3, TedDailyPackage.DownloadStatus.PROCESSING, 3, 0, 0);
|
||||
when(repository.save(any(TedDailyPackage.class))).thenAnswer(invocation -> invocation.getArgument(0));
|
||||
when(repository.findByPackageIdentifier("202600003")).thenReturn(Optional.of(pkg));
|
||||
|
||||
TedPackageDownloadService downloadService = mock(TedPackageDownloadService.class);
|
||||
Path extractedDir = Files.createDirectory(tempDir.resolve("extracted"));
|
||||
Path xml1 = Files.writeString(extractedDir.resolve("a.xml"), "<a/>");
|
||||
Path xml2 = Files.writeString(extractedDir.resolve("b.xml"), "<b/>");
|
||||
Path xml3 = Files.writeString(extractedDir.resolve("c.xml"), "<c/>");
|
||||
when(downloadService.calculateArchiveHash(eq(archive))).thenReturn("hash-1");
|
||||
when(downloadService.extractArchive(eq(archive), eq("202600003"))).thenReturn(List.of(xml1, xml2, xml3));
|
||||
|
||||
BatchDocumentProcessingService batchService = mock(BatchDocumentProcessingService.class);
|
||||
when(batchService.processBatch(any())).thenReturn(new BatchDocumentProcessingService.BatchProcessingResult(
|
||||
1, 2, 0, 5L, List.of(UUID.randomUUID()), List.of()));
|
||||
|
||||
TedPackageRepairService service = new TedPackageRepairService(properties, repository, downloadService, batchService);
|
||||
TedPackageRepairService.RepairCandidate candidate = TedPackageRepairService.RepairCandidate.existing(pkg, "STATUS_PROCESSING");
|
||||
|
||||
var result = service.repairCandidate(candidate, properties.getRepair());
|
||||
|
||||
assertThat(result.outcome()).isEqualTo(TedPackageRepairService.RepairOutcome.COMPLETED);
|
||||
assertThat(pkg.getDownloadStatus()).isEqualTo(TedDailyPackage.DownloadStatus.COMPLETED);
|
||||
assertThat(pkg.getProcessedCount()).isEqualTo(3);
|
||||
assertThat(pkg.getFailedCount()).isZero();
|
||||
assertThat(pkg.getFileHash()).isEqualTo("hash-1");
|
||||
assertThat(pkg.getProcessedAt()).isNotNull();
|
||||
}
|
||||
|
||||
private TedDailyPackage newPackage(String packageIdentifier,
|
||||
int year,
|
||||
int serial,
|
||||
TedDailyPackage.DownloadStatus status,
|
||||
Integer xmlCount,
|
||||
Integer processed,
|
||||
Integer failed) {
|
||||
TedDailyPackage pkg = new TedDailyPackage();
|
||||
pkg.setId(UUID.randomUUID());
|
||||
pkg.setPackageIdentifier(packageIdentifier);
|
||||
pkg.setYear(year);
|
||||
pkg.setSerialNumber(serial);
|
||||
pkg.setDownloadStatus(status);
|
||||
pkg.setXmlFileCount(xmlCount);
|
||||
pkg.setProcessedCount(processed);
|
||||
pkg.setFailedCount(failed);
|
||||
pkg.setDownloadUrl("https://ted.europa.eu/packages/daily/" + packageIdentifier);
|
||||
pkg.setCreatedAt(OffsetDateTime.now());
|
||||
pkg.setUpdatedAt(OffsetDateTime.now());
|
||||
return pkg;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue