notice lot duplication fix, changed embedded job sort order

This commit is contained in:
trifonovt 2026-04-22 19:17:06 +02:00
parent 12f0b0604b
commit 66fb266dec
2 changed files with 79 additions and 32 deletions

View File

@ -122,12 +122,34 @@ public class TedNoticeProjectionService {
private void replaceLots(TedNoticeProjection projection, List<ProcurementLot> legacyLots) { private void replaceLots(TedNoticeProjection projection, List<ProcurementLot> legacyLots) {
lotRepository.deleteByNoticeProjection_Id(projection.getId()); lotRepository.deleteByNoticeProjection_Id(projection.getId());
lotRepository.flush();
if (legacyLots == null || legacyLots.isEmpty()) { if (legacyLots == null || legacyLots.isEmpty()) {
return; return;
} }
List<TedNoticeLot> projectedLots = new ArrayList<>(); Map<String, ProcurementLot> byLotId = new LinkedHashMap<>();
int duplicateCount = 0;
for (ProcurementLot lot : legacyLots) { for (ProcurementLot lot : legacyLots) {
String key = StringUtils.hasText(lot.getLotId()) ? lot.getLotId().trim() : lot.getInternalId();
if (!StringUtils.hasText(key)) {
key = "__row__" + byLotId.size();
}
ProcurementLot existing = byLotId.get(key);
if (existing == null) {
byLotId.put(key, lot);
} else {
duplicateCount++;
byLotId.put(key, mergeLot(existing, lot));
}
}
if (duplicateCount > 0) {
log.warn("Collapsing {} duplicate TED lot rows for projection {} before insert", duplicateCount, projection.getId());
}
List<TedNoticeLot> projectedLots = new ArrayList<>();
for (ProcurementLot lot : byLotId.values()) {
projectedLots.add(TedNoticeLot.builder() projectedLots.add(TedNoticeLot.builder()
.noticeProjection(projection) .noticeProjection(projection)
.lotId(lot.getLotId()) .lotId(lot.getLotId())
@ -147,36 +169,9 @@ public class TedNoticeProjectionService {
lotRepository.saveAll(projectedLots); lotRepository.saveAll(projectedLots);
} }
private Organization mergeOrganization(Organization left, Organization right) {
if (completenessScore(right) > completenessScore(left)) {
return right;
}
return left;
}
private int completenessScore(Organization organization) {
int score = 0;
score += textScore(organization.getOrgReference());
score += textScore(organization.getRole());
score += textScore(organization.getName());
score += textScore(organization.getCompanyId());
score += textScore(organization.getCountryCode());
score += textScore(organization.getCity());
score += textScore(organization.getPostalCode());
score += textScore(organization.getStreetName());
score += textScore(organization.getNutsCode());
score += textScore(organization.getWebsiteUri());
score += textScore(organization.getEmail());
score += textScore(organization.getPhone());
return score;
}
private int textScore(String value) {
return StringUtils.hasText(value) ? 1 : 0;
}
private void replaceOrganizations(TedNoticeProjection projection, List<Organization> legacyOrganizations) { private void replaceOrganizations(TedNoticeProjection projection, List<Organization> legacyOrganizations) {
organizationRepository.deleteByNoticeProjection_Id(projection.getId()); organizationRepository.deleteByNoticeProjection_Id(projection.getId());
organizationRepository.flush();
if (legacyOrganizations == null || legacyOrganizations.isEmpty()) { if (legacyOrganizations == null || legacyOrganizations.isEmpty()) {
return; return;
} }
@ -219,8 +214,56 @@ public class TedNoticeProjectionService {
organizationRepository.saveAll(projectedOrganizations); organizationRepository.saveAll(projectedOrganizations);
} }
private String[] copyArray(String[] source) { private ProcurementLot mergeLot(ProcurementLot left, ProcurementLot right) {
return source == null ? null : source.clone(); if (completenessScore(right) > completenessScore(left)) {
return right;
}
return left;
}
private Organization mergeOrganization(Organization left, Organization right) {
if (completenessScore(right) > completenessScore(left)) {
return right;
}
return left;
}
private int completenessScore(ProcurementLot lot) {
int score = 0;
score += textScore(lot.getLotId());
score += textScore(lot.getInternalId());
score += textScore(lot.getTitle());
score += textScore(lot.getDescription());
score += arrayScore(lot.getCpvCodes());
score += arrayScore(lot.getNutsCodes());
score += lot.getEstimatedValue() != null ? 1 : 0;
score += textScore(lot.getEstimatedValueCurrency());
score += lot.getDurationValue() != null ? 1 : 0;
score += textScore(lot.getDurationUnit());
score += lot.getSubmissionDeadline() != null ? 1 : 0;
score += lot.getEuFunded() != null ? 1 : 0;
return score;
}
private int completenessScore(Organization organization) {
int score = 0;
score += textScore(organization.getOrgReference());
score += textScore(organization.getRole());
score += textScore(organization.getName());
score += textScore(organization.getCompanyId());
score += textScore(organization.getCountryCode());
score += textScore(organization.getCity());
score += textScore(organization.getPostalCode());
score += textScore(organization.getStreetName());
score += textScore(organization.getNutsCode());
score += textScore(organization.getWebsiteUri());
score += textScore(organization.getEmail());
score += textScore(organization.getPhone());
return score;
}
private int textScore(String value) {
return StringUtils.hasText(value) ? 1 : 0;
} }
private int arrayScore(String[] values) { private int arrayScore(String[] values) {
@ -239,4 +282,8 @@ public class TedNoticeProjectionService {
} }
return "__row__" + ordinal; return "__row__" + ordinal;
} }
private String[] copyArray(String[] source) {
return source == null ? null : source.clone();
}
} }

View File

@ -25,7 +25,7 @@ public interface EmbeddingJobRepository extends JpaRepository<EmbeddingJob, UUID
FROM DOC.doc_embedding_job j FROM DOC.doc_embedding_job j
WHERE j.status IN ('PENDING', 'RETRY_SCHEDULED') WHERE j.status IN ('PENDING', 'RETRY_SCHEDULED')
AND (j.next_retry_at IS NULL OR j.next_retry_at <= :now) AND (j.next_retry_at IS NULL OR j.next_retry_at <= :now)
ORDER BY j.priority DESC, j.created_at DESC ORDER BY j.priority DESC, j.updated_at DESC
FOR UPDATE SKIP LOCKED FOR UPDATE SKIP LOCKED
LIMIT :limit LIMIT :limit
""", nativeQuery = true) """, nativeQuery = true)