From 66fb266dec0f442ee10cd7719176a83160cf4058 Mon Sep 17 00:00:00 2001 From: trifonovt <87468028+TihomirTrifonov@users.noreply.github.com> Date: Wed, 22 Apr 2026 19:17:06 +0200 Subject: [PATCH] notice lot duplication fix, changed embedded job sort order --- .../service/TedNoticeProjectionService.java | 109 +++++++++++++----- .../repository/EmbeddingJobRepository.java | 2 +- 2 files changed, 79 insertions(+), 32 deletions(-) diff --git a/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java b/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java index 47206bb..6583cb1 100644 --- a/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java +++ b/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java @@ -122,12 +122,34 @@ public class TedNoticeProjectionService { private void replaceLots(TedNoticeProjection projection, List legacyLots) { lotRepository.deleteByNoticeProjection_Id(projection.getId()); + lotRepository.flush(); + if (legacyLots == null || legacyLots.isEmpty()) { return; } - List projectedLots = new ArrayList<>(); + Map byLotId = new LinkedHashMap<>(); + int duplicateCount = 0; for (ProcurementLot lot : legacyLots) { + String key = StringUtils.hasText(lot.getLotId()) ? lot.getLotId().trim() : lot.getInternalId(); + if (!StringUtils.hasText(key)) { + key = "__row__" + byLotId.size(); + } + ProcurementLot existing = byLotId.get(key); + if (existing == null) { + byLotId.put(key, lot); + } else { + duplicateCount++; + byLotId.put(key, mergeLot(existing, lot)); + } + } + + if (duplicateCount > 0) { + log.warn("Collapsing {} duplicate TED lot rows for projection {} before insert", duplicateCount, projection.getId()); + } + + List projectedLots = new ArrayList<>(); + for (ProcurementLot lot : byLotId.values()) { projectedLots.add(TedNoticeLot.builder() .noticeProjection(projection) .lotId(lot.getLotId()) @@ -147,36 +169,9 @@ public class TedNoticeProjectionService { lotRepository.saveAll(projectedLots); } - private Organization mergeOrganization(Organization left, Organization right) { - if (completenessScore(right) > completenessScore(left)) { - return right; - } - return left; - } - - private int completenessScore(Organization organization) { - int score = 0; - score += textScore(organization.getOrgReference()); - score += textScore(organization.getRole()); - score += textScore(organization.getName()); - score += textScore(organization.getCompanyId()); - score += textScore(organization.getCountryCode()); - score += textScore(organization.getCity()); - score += textScore(organization.getPostalCode()); - score += textScore(organization.getStreetName()); - score += textScore(organization.getNutsCode()); - score += textScore(organization.getWebsiteUri()); - score += textScore(organization.getEmail()); - score += textScore(organization.getPhone()); - return score; - } - - private int textScore(String value) { - return StringUtils.hasText(value) ? 1 : 0; - } - private void replaceOrganizations(TedNoticeProjection projection, List legacyOrganizations) { organizationRepository.deleteByNoticeProjection_Id(projection.getId()); + organizationRepository.flush(); if (legacyOrganizations == null || legacyOrganizations.isEmpty()) { return; } @@ -219,8 +214,56 @@ public class TedNoticeProjectionService { organizationRepository.saveAll(projectedOrganizations); } - private String[] copyArray(String[] source) { - return source == null ? null : source.clone(); + private ProcurementLot mergeLot(ProcurementLot left, ProcurementLot right) { + if (completenessScore(right) > completenessScore(left)) { + return right; + } + return left; + } + + private Organization mergeOrganization(Organization left, Organization right) { + if (completenessScore(right) > completenessScore(left)) { + return right; + } + return left; + } + + private int completenessScore(ProcurementLot lot) { + int score = 0; + score += textScore(lot.getLotId()); + score += textScore(lot.getInternalId()); + score += textScore(lot.getTitle()); + score += textScore(lot.getDescription()); + score += arrayScore(lot.getCpvCodes()); + score += arrayScore(lot.getNutsCodes()); + score += lot.getEstimatedValue() != null ? 1 : 0; + score += textScore(lot.getEstimatedValueCurrency()); + score += lot.getDurationValue() != null ? 1 : 0; + score += textScore(lot.getDurationUnit()); + score += lot.getSubmissionDeadline() != null ? 1 : 0; + score += lot.getEuFunded() != null ? 1 : 0; + return score; + } + + private int completenessScore(Organization organization) { + int score = 0; + score += textScore(organization.getOrgReference()); + score += textScore(organization.getRole()); + score += textScore(organization.getName()); + score += textScore(organization.getCompanyId()); + score += textScore(organization.getCountryCode()); + score += textScore(organization.getCity()); + score += textScore(organization.getPostalCode()); + score += textScore(organization.getStreetName()); + score += textScore(organization.getNutsCode()); + score += textScore(organization.getWebsiteUri()); + score += textScore(organization.getEmail()); + score += textScore(organization.getPhone()); + return score; + } + + private int textScore(String value) { + return StringUtils.hasText(value) ? 1 : 0; } private int arrayScore(String[] values) { @@ -239,4 +282,8 @@ public class TedNoticeProjectionService { } return "__row__" + ordinal; } + + private String[] copyArray(String[] source) { + return source == null ? null : source.clone(); + } } diff --git a/src/main/java/at/procon/dip/embedding/job/repository/EmbeddingJobRepository.java b/src/main/java/at/procon/dip/embedding/job/repository/EmbeddingJobRepository.java index 1e33f8a..6e75652 100644 --- a/src/main/java/at/procon/dip/embedding/job/repository/EmbeddingJobRepository.java +++ b/src/main/java/at/procon/dip/embedding/job/repository/EmbeddingJobRepository.java @@ -25,7 +25,7 @@ public interface EmbeddingJobRepository extends JpaRepository