ted legacy data migration, repair fixes
parent
61d163f8fe
commit
28c7854ead
@ -0,0 +1,113 @@
|
||||
package at.procon.dip.domain.ted.service;
|
||||
|
||||
import at.procon.dip.domain.access.DocumentVisibility;
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentStatus;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.entity.Document;
|
||||
import at.procon.dip.domain.document.repository.DocumentRepository;
|
||||
import at.procon.dip.domain.document.service.DocumentService;
|
||||
import at.procon.dip.domain.document.service.command.CreateDocumentCommand;
|
||||
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||
import at.procon.dip.runtime.config.RuntimeMode;
|
||||
import at.procon.ted.model.entity.ProcurementDocument;
|
||||
import at.procon.ted.model.entity.TedDailyPackage;
|
||||
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||
import at.procon.ted.util.HashUtils;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.Optional;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
@Service
|
||||
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||
@RequiredArgsConstructor
|
||||
public class TedPackageDocumentService {
|
||||
|
||||
private static final String PACKAGE_MIME_TYPE = "application/gzip";
|
||||
|
||||
private final TedPackageIdentifierResolver packageIdentifierResolver;
|
||||
private final TedDailyPackageRepository tedDailyPackageRepository;
|
||||
private final DocumentRepository documentRepository;
|
||||
private final DocumentService documentService;
|
||||
|
||||
@Transactional
|
||||
public Optional<Document> ensurePackageDocumentForLegacyNotice(ProcurementDocument legacyDocument) {
|
||||
return packageIdentifierResolver.resolveFromSourceMetadata(legacyDocument.getSourcePath(), legacyDocument.getSourceFilename())
|
||||
.map(this::ensurePackageDocument);
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public Document ensurePackageDocument(String packageIdentifier) {
|
||||
String businessKey = buildBusinessKey(packageIdentifier);
|
||||
Document document = documentRepository.findByBusinessKey(businessKey)
|
||||
.orElseGet(() -> createPackageDocument(packageIdentifier));
|
||||
|
||||
Optional<TedDailyPackage> packageEntity = tedDailyPackageRepository.findByPackageIdentifier(packageIdentifier);
|
||||
document.setVisibility(DocumentVisibility.PUBLIC);
|
||||
document.setDocumentType(DocumentType.TED_PACKAGE);
|
||||
document.setDocumentFamily(DocumentFamily.PROCUREMENT);
|
||||
document.setStatus(resolveStatus(packageEntity));
|
||||
document.setTitle(buildTitle(packageIdentifier));
|
||||
document.setSummary(buildSummary(packageIdentifier, packageEntity.orElse(null)));
|
||||
document.setMimeType(PACKAGE_MIME_TYPE);
|
||||
document.setBusinessKey(businessKey);
|
||||
document.setDedupHash(HashUtils.computeSha256(businessKey));
|
||||
return documentService.save(document);
|
||||
}
|
||||
|
||||
private Document createPackageDocument(String packageIdentifier) {
|
||||
String businessKey = buildBusinessKey(packageIdentifier);
|
||||
return documentService.create(new CreateDocumentCommand(
|
||||
null,
|
||||
DocumentVisibility.PUBLIC,
|
||||
DocumentType.TED_PACKAGE,
|
||||
DocumentFamily.PROCUREMENT,
|
||||
DocumentStatus.RECEIVED,
|
||||
buildTitle(packageIdentifier),
|
||||
null,
|
||||
null,
|
||||
PACKAGE_MIME_TYPE,
|
||||
businessKey,
|
||||
HashUtils.computeSha256(businessKey)
|
||||
));
|
||||
}
|
||||
|
||||
private DocumentStatus resolveStatus(Optional<TedDailyPackage> packageEntity) {
|
||||
if (packageEntity.isEmpty()) {
|
||||
return DocumentStatus.RECEIVED;
|
||||
}
|
||||
return switch (packageEntity.get().getDownloadStatus()) {
|
||||
case COMPLETED -> DocumentStatus.CLASSIFIED;
|
||||
case FAILED, NOT_FOUND -> DocumentStatus.FAILED;
|
||||
default -> DocumentStatus.RECEIVED;
|
||||
};
|
||||
}
|
||||
|
||||
private String buildBusinessKey(String packageIdentifier) {
|
||||
return "TED_PACKAGE:" + packageIdentifier;
|
||||
}
|
||||
|
||||
private String buildTitle(String packageIdentifier) {
|
||||
return packageIdentifier + ".tar.gz";
|
||||
}
|
||||
|
||||
private String buildSummary(String packageIdentifier, TedDailyPackage packageEntity) {
|
||||
if (packageEntity == null) {
|
||||
return "TED daily package " + packageIdentifier;
|
||||
}
|
||||
return "TED daily package %s (status=%s, xmlFileCount=%s, processedCount=%s, failedCount=%s, downloadedAt=%s)".formatted(
|
||||
packageIdentifier,
|
||||
packageEntity.getDownloadStatus(),
|
||||
packageEntity.getXmlFileCount(),
|
||||
packageEntity.getProcessedCount(),
|
||||
packageEntity.getFailedCount(),
|
||||
formatTimestamp(packageEntity.getDownloadedAt())
|
||||
);
|
||||
}
|
||||
|
||||
private String formatTimestamp(OffsetDateTime value) {
|
||||
return value == null ? null : value.toString();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,31 @@
|
||||
package at.procon.dip.domain.ted.service;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
/**
|
||||
* Resolves a TED daily package identifier (YYYYSSSSS) from legacy source metadata.
|
||||
*/
|
||||
@Component
|
||||
public class TedPackageIdentifierResolver {
|
||||
|
||||
private static final Pattern PACKAGE_IDENTIFIER_PATTERN = Pattern.compile("(?<!\\d)(20\\d{7})(?!\\d)");
|
||||
|
||||
public Optional<String> resolveFromSourceMetadata(String sourcePath, String sourceFilename) {
|
||||
return resolve(sourcePath).or(() -> resolve(sourceFilename));
|
||||
}
|
||||
|
||||
public Optional<String> resolve(String value) {
|
||||
if (!StringUtils.hasText(value)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
Matcher matcher = PACKAGE_IDENTIFIER_PATTERN.matcher(value);
|
||||
if (matcher.find()) {
|
||||
return Optional.of(matcher.group(1));
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
@ -1,9 +1,9 @@
|
||||
package at.procon.ted.repository;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.Instant;
|
||||
import java.util.UUID;
|
||||
|
||||
public interface LegacyTedMigrationCursor {
|
||||
UUID getId();
|
||||
OffsetDateTime getCreatedAt();
|
||||
Instant getCreatedAt();
|
||||
}
|
||||
|
||||
@ -0,0 +1,95 @@
|
||||
SET search_path TO TED, DOC, public;
|
||||
|
||||
WITH legacy_package_map AS (
|
||||
SELECT
|
||||
d.id AS legacy_procurement_document_id,
|
||||
p.document_id AS child_document_id,
|
||||
substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') AS package_identifier
|
||||
FROM TED.procurement_document d
|
||||
JOIN TED.ted_notice_projection p
|
||||
ON p.legacy_procurement_document_id = d.id
|
||||
WHERE substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') IS NOT NULL
|
||||
), package_documents AS (
|
||||
SELECT DISTINCT
|
||||
l.package_identifier,
|
||||
'TED:package:' || l.package_identifier AS business_key,
|
||||
encode(digest('TED:package:' || l.package_identifier, 'sha256'), 'hex') AS dedup_hash
|
||||
FROM legacy_package_map l
|
||||
)
|
||||
INSERT INTO DOC.doc_document (
|
||||
id, visibility, document_type, document_family, status, title, summary, mime_type, business_key, dedup_hash
|
||||
)
|
||||
SELECT
|
||||
gen_random_uuid(),
|
||||
'PUBLIC',
|
||||
'TED_PACKAGE',
|
||||
'PROCUREMENT',
|
||||
CASE
|
||||
WHEN pkg.download_status = 'COMPLETED' THEN 'CLASSIFIED'
|
||||
WHEN pkg.download_status IN ('FAILED', 'NOT_FOUND') THEN 'FAILED'
|
||||
ELSE 'RECEIVED'
|
||||
END,
|
||||
'TED Daily Package ' || pd.package_identifier,
|
||||
CASE
|
||||
WHEN pkg.package_identifier IS NULL THEN 'TED daily package ' || pd.package_identifier
|
||||
ELSE 'TED daily package ' || pd.package_identifier ||
|
||||
' (status=' || coalesce(pkg.download_status::text, 'UNKNOWN') ||
|
||||
', xmlFileCount=' || coalesce(pkg.xml_file_count::text, 'null') ||
|
||||
', processedCount=' || coalesce(pkg.processed_count::text, 'null') ||
|
||||
', failedCount=' || coalesce(pkg.failed_count::text, 'null') || ')'
|
||||
END,
|
||||
'application/gzip',
|
||||
pd.business_key,
|
||||
pd.dedup_hash
|
||||
FROM package_documents pd
|
||||
LEFT JOIN TED.ted_daily_package pkg
|
||||
ON pkg.package_identifier = pd.package_identifier
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM DOC.doc_document existing
|
||||
WHERE existing.business_key = pd.business_key
|
||||
);
|
||||
|
||||
UPDATE TED.ted_notice_projection p
|
||||
SET package_identifier = substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})')
|
||||
FROM TED.procurement_document d
|
||||
WHERE p.legacy_procurement_document_id = d.id
|
||||
AND substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') IS NOT NULL
|
||||
AND p.package_identifier IS DISTINCT FROM substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})');
|
||||
|
||||
WITH legacy_package_map AS (
|
||||
SELECT
|
||||
p.document_id AS child_document_id,
|
||||
substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') AS package_identifier
|
||||
FROM TED.procurement_document d
|
||||
JOIN TED.ted_notice_projection p
|
||||
ON p.legacy_procurement_document_id = d.id
|
||||
WHERE substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') IS NOT NULL
|
||||
), package_documents AS (
|
||||
SELECT
|
||||
doc.id AS parent_document_id,
|
||||
substring(doc.business_key from '(20[0-9]{7})') AS package_identifier
|
||||
FROM DOC.doc_document doc
|
||||
WHERE doc.document_type = 'TED_PACKAGE'
|
||||
AND doc.business_key LIKE 'TED:package:%'
|
||||
)
|
||||
INSERT INTO DOC.doc_relation (
|
||||
id, parent_document_id, child_document_id, relation_type, sort_order, relation_metadata
|
||||
)
|
||||
SELECT
|
||||
gen_random_uuid(),
|
||||
pkg.parent_document_id,
|
||||
l.child_document_id,
|
||||
'CONTAINS',
|
||||
NULL,
|
||||
'packageIdentifier=' || l.package_identifier
|
||||
FROM legacy_package_map l
|
||||
JOIN package_documents pkg
|
||||
ON pkg.package_identifier = l.package_identifier
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM DOC.doc_relation rel
|
||||
WHERE rel.parent_document_id = pkg.parent_document_id
|
||||
AND rel.child_document_id = l.child_document_id
|
||||
AND rel.relation_type = 'CONTAINS'
|
||||
);
|
||||
Loading…
Reference in New Issue