ted legacy data migration, repair fixes
parent
61d163f8fe
commit
28c7854ead
@ -0,0 +1,113 @@
|
|||||||
|
package at.procon.dip.domain.ted.service;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.access.DocumentVisibility;
|
||||||
|
import at.procon.dip.domain.document.DocumentFamily;
|
||||||
|
import at.procon.dip.domain.document.DocumentStatus;
|
||||||
|
import at.procon.dip.domain.document.DocumentType;
|
||||||
|
import at.procon.dip.domain.document.entity.Document;
|
||||||
|
import at.procon.dip.domain.document.repository.DocumentRepository;
|
||||||
|
import at.procon.dip.domain.document.service.DocumentService;
|
||||||
|
import at.procon.dip.domain.document.service.command.CreateDocumentCommand;
|
||||||
|
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||||
|
import at.procon.dip.runtime.config.RuntimeMode;
|
||||||
|
import at.procon.ted.model.entity.ProcurementDocument;
|
||||||
|
import at.procon.ted.model.entity.TedDailyPackage;
|
||||||
|
import at.procon.ted.repository.TedDailyPackageRepository;
|
||||||
|
import at.procon.ted.util.HashUtils;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.Optional;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class TedPackageDocumentService {
|
||||||
|
|
||||||
|
private static final String PACKAGE_MIME_TYPE = "application/gzip";
|
||||||
|
|
||||||
|
private final TedPackageIdentifierResolver packageIdentifierResolver;
|
||||||
|
private final TedDailyPackageRepository tedDailyPackageRepository;
|
||||||
|
private final DocumentRepository documentRepository;
|
||||||
|
private final DocumentService documentService;
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
public Optional<Document> ensurePackageDocumentForLegacyNotice(ProcurementDocument legacyDocument) {
|
||||||
|
return packageIdentifierResolver.resolveFromSourceMetadata(legacyDocument.getSourcePath(), legacyDocument.getSourceFilename())
|
||||||
|
.map(this::ensurePackageDocument);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
public Document ensurePackageDocument(String packageIdentifier) {
|
||||||
|
String businessKey = buildBusinessKey(packageIdentifier);
|
||||||
|
Document document = documentRepository.findByBusinessKey(businessKey)
|
||||||
|
.orElseGet(() -> createPackageDocument(packageIdentifier));
|
||||||
|
|
||||||
|
Optional<TedDailyPackage> packageEntity = tedDailyPackageRepository.findByPackageIdentifier(packageIdentifier);
|
||||||
|
document.setVisibility(DocumentVisibility.PUBLIC);
|
||||||
|
document.setDocumentType(DocumentType.TED_PACKAGE);
|
||||||
|
document.setDocumentFamily(DocumentFamily.PROCUREMENT);
|
||||||
|
document.setStatus(resolveStatus(packageEntity));
|
||||||
|
document.setTitle(buildTitle(packageIdentifier));
|
||||||
|
document.setSummary(buildSummary(packageIdentifier, packageEntity.orElse(null)));
|
||||||
|
document.setMimeType(PACKAGE_MIME_TYPE);
|
||||||
|
document.setBusinessKey(businessKey);
|
||||||
|
document.setDedupHash(HashUtils.computeSha256(businessKey));
|
||||||
|
return documentService.save(document);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Document createPackageDocument(String packageIdentifier) {
|
||||||
|
String businessKey = buildBusinessKey(packageIdentifier);
|
||||||
|
return documentService.create(new CreateDocumentCommand(
|
||||||
|
null,
|
||||||
|
DocumentVisibility.PUBLIC,
|
||||||
|
DocumentType.TED_PACKAGE,
|
||||||
|
DocumentFamily.PROCUREMENT,
|
||||||
|
DocumentStatus.RECEIVED,
|
||||||
|
buildTitle(packageIdentifier),
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
PACKAGE_MIME_TYPE,
|
||||||
|
businessKey,
|
||||||
|
HashUtils.computeSha256(businessKey)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocumentStatus resolveStatus(Optional<TedDailyPackage> packageEntity) {
|
||||||
|
if (packageEntity.isEmpty()) {
|
||||||
|
return DocumentStatus.RECEIVED;
|
||||||
|
}
|
||||||
|
return switch (packageEntity.get().getDownloadStatus()) {
|
||||||
|
case COMPLETED -> DocumentStatus.CLASSIFIED;
|
||||||
|
case FAILED, NOT_FOUND -> DocumentStatus.FAILED;
|
||||||
|
default -> DocumentStatus.RECEIVED;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildBusinessKey(String packageIdentifier) {
|
||||||
|
return "TED_PACKAGE:" + packageIdentifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildTitle(String packageIdentifier) {
|
||||||
|
return packageIdentifier + ".tar.gz";
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildSummary(String packageIdentifier, TedDailyPackage packageEntity) {
|
||||||
|
if (packageEntity == null) {
|
||||||
|
return "TED daily package " + packageIdentifier;
|
||||||
|
}
|
||||||
|
return "TED daily package %s (status=%s, xmlFileCount=%s, processedCount=%s, failedCount=%s, downloadedAt=%s)".formatted(
|
||||||
|
packageIdentifier,
|
||||||
|
packageEntity.getDownloadStatus(),
|
||||||
|
packageEntity.getXmlFileCount(),
|
||||||
|
packageEntity.getProcessedCount(),
|
||||||
|
packageEntity.getFailedCount(),
|
||||||
|
formatTimestamp(packageEntity.getDownloadedAt())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String formatTimestamp(OffsetDateTime value) {
|
||||||
|
return value == null ? null : value.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,31 @@
|
|||||||
|
package at.procon.dip.domain.ted.service;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import org.springframework.util.StringUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolves a TED daily package identifier (YYYYSSSSS) from legacy source metadata.
|
||||||
|
*/
|
||||||
|
@Component
|
||||||
|
public class TedPackageIdentifierResolver {
|
||||||
|
|
||||||
|
private static final Pattern PACKAGE_IDENTIFIER_PATTERN = Pattern.compile("(?<!\\d)(20\\d{7})(?!\\d)");
|
||||||
|
|
||||||
|
public Optional<String> resolveFromSourceMetadata(String sourcePath, String sourceFilename) {
|
||||||
|
return resolve(sourcePath).or(() -> resolve(sourceFilename));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Optional<String> resolve(String value) {
|
||||||
|
if (!StringUtils.hasText(value)) {
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
Matcher matcher = PACKAGE_IDENTIFIER_PATTERN.matcher(value);
|
||||||
|
if (matcher.find()) {
|
||||||
|
return Optional.of(matcher.group(1));
|
||||||
|
}
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,9 +1,9 @@
|
|||||||
package at.procon.ted.repository;
|
package at.procon.ted.repository;
|
||||||
|
|
||||||
import java.time.OffsetDateTime;
|
import java.time.Instant;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
public interface LegacyTedMigrationCursor {
|
public interface LegacyTedMigrationCursor {
|
||||||
UUID getId();
|
UUID getId();
|
||||||
OffsetDateTime getCreatedAt();
|
Instant getCreatedAt();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,95 @@
|
|||||||
|
SET search_path TO TED, DOC, public;
|
||||||
|
|
||||||
|
WITH legacy_package_map AS (
|
||||||
|
SELECT
|
||||||
|
d.id AS legacy_procurement_document_id,
|
||||||
|
p.document_id AS child_document_id,
|
||||||
|
substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') AS package_identifier
|
||||||
|
FROM TED.procurement_document d
|
||||||
|
JOIN TED.ted_notice_projection p
|
||||||
|
ON p.legacy_procurement_document_id = d.id
|
||||||
|
WHERE substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') IS NOT NULL
|
||||||
|
), package_documents AS (
|
||||||
|
SELECT DISTINCT
|
||||||
|
l.package_identifier,
|
||||||
|
'TED:package:' || l.package_identifier AS business_key,
|
||||||
|
encode(digest('TED:package:' || l.package_identifier, 'sha256'), 'hex') AS dedup_hash
|
||||||
|
FROM legacy_package_map l
|
||||||
|
)
|
||||||
|
INSERT INTO DOC.doc_document (
|
||||||
|
id, visibility, document_type, document_family, status, title, summary, mime_type, business_key, dedup_hash
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
gen_random_uuid(),
|
||||||
|
'PUBLIC',
|
||||||
|
'TED_PACKAGE',
|
||||||
|
'PROCUREMENT',
|
||||||
|
CASE
|
||||||
|
WHEN pkg.download_status = 'COMPLETED' THEN 'CLASSIFIED'
|
||||||
|
WHEN pkg.download_status IN ('FAILED', 'NOT_FOUND') THEN 'FAILED'
|
||||||
|
ELSE 'RECEIVED'
|
||||||
|
END,
|
||||||
|
'TED Daily Package ' || pd.package_identifier,
|
||||||
|
CASE
|
||||||
|
WHEN pkg.package_identifier IS NULL THEN 'TED daily package ' || pd.package_identifier
|
||||||
|
ELSE 'TED daily package ' || pd.package_identifier ||
|
||||||
|
' (status=' || coalesce(pkg.download_status::text, 'UNKNOWN') ||
|
||||||
|
', xmlFileCount=' || coalesce(pkg.xml_file_count::text, 'null') ||
|
||||||
|
', processedCount=' || coalesce(pkg.processed_count::text, 'null') ||
|
||||||
|
', failedCount=' || coalesce(pkg.failed_count::text, 'null') || ')'
|
||||||
|
END,
|
||||||
|
'application/gzip',
|
||||||
|
pd.business_key,
|
||||||
|
pd.dedup_hash
|
||||||
|
FROM package_documents pd
|
||||||
|
LEFT JOIN TED.ted_daily_package pkg
|
||||||
|
ON pkg.package_identifier = pd.package_identifier
|
||||||
|
WHERE NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM DOC.doc_document existing
|
||||||
|
WHERE existing.business_key = pd.business_key
|
||||||
|
);
|
||||||
|
|
||||||
|
UPDATE TED.ted_notice_projection p
|
||||||
|
SET package_identifier = substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})')
|
||||||
|
FROM TED.procurement_document d
|
||||||
|
WHERE p.legacy_procurement_document_id = d.id
|
||||||
|
AND substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') IS NOT NULL
|
||||||
|
AND p.package_identifier IS DISTINCT FROM substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})');
|
||||||
|
|
||||||
|
WITH legacy_package_map AS (
|
||||||
|
SELECT
|
||||||
|
p.document_id AS child_document_id,
|
||||||
|
substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') AS package_identifier
|
||||||
|
FROM TED.procurement_document d
|
||||||
|
JOIN TED.ted_notice_projection p
|
||||||
|
ON p.legacy_procurement_document_id = d.id
|
||||||
|
WHERE substring(coalesce(d.source_path, d.source_filename) from '(20[0-9]{7})') IS NOT NULL
|
||||||
|
), package_documents AS (
|
||||||
|
SELECT
|
||||||
|
doc.id AS parent_document_id,
|
||||||
|
substring(doc.business_key from '(20[0-9]{7})') AS package_identifier
|
||||||
|
FROM DOC.doc_document doc
|
||||||
|
WHERE doc.document_type = 'TED_PACKAGE'
|
||||||
|
AND doc.business_key LIKE 'TED:package:%'
|
||||||
|
)
|
||||||
|
INSERT INTO DOC.doc_relation (
|
||||||
|
id, parent_document_id, child_document_id, relation_type, sort_order, relation_metadata
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
gen_random_uuid(),
|
||||||
|
pkg.parent_document_id,
|
||||||
|
l.child_document_id,
|
||||||
|
'CONTAINS',
|
||||||
|
NULL,
|
||||||
|
'packageIdentifier=' || l.package_identifier
|
||||||
|
FROM legacy_package_map l
|
||||||
|
JOIN package_documents pkg
|
||||||
|
ON pkg.package_identifier = l.package_identifier
|
||||||
|
WHERE NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM DOC.doc_relation rel
|
||||||
|
WHERE rel.parent_document_id = pkg.parent_document_id
|
||||||
|
AND rel.child_document_id = l.child_document_id
|
||||||
|
AND rel.relation_type = 'CONTAINS'
|
||||||
|
);
|
||||||
Loading…
Reference in New Issue