From f9fa8aadf7f1e81db031a609f262b6762b691098 Mon Sep 17 00:00:00 2001 From: trifonovt <87468028+TihomirTrifonov@users.noreply.github.com> Date: Thu, 16 Apr 2026 14:55:30 +0200 Subject: [PATCH] ted document import - fixed organisation duplication --- .../service/TedNoticeProjectionService.java | 71 +++++++++++++++++-- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java b/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java index 90eaac0..47206bb 100644 --- a/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java +++ b/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java @@ -14,13 +14,14 @@ import at.procon.dip.runtime.config.RuntimeMode; import at.procon.ted.model.entity.Organization; import at.procon.ted.model.entity.ProcurementDocument; import at.procon.ted.model.entity.ProcurementLot; -import java.util.ArrayList; -import java.util.List; -import java.util.UUID; + +import java.util.*; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; /** * Phase 3 service that materializes TED-specific structured projections on top of the generic DOC document root. @@ -146,14 +147,59 @@ public class TedNoticeProjectionService { lotRepository.saveAll(projectedLots); } + private Organization mergeOrganization(Organization left, Organization right) { + if (completenessScore(right) > completenessScore(left)) { + return right; + } + return left; + } + + private int completenessScore(Organization organization) { + int score = 0; + score += textScore(organization.getOrgReference()); + score += textScore(organization.getRole()); + score += textScore(organization.getName()); + score += textScore(organization.getCompanyId()); + score += textScore(organization.getCountryCode()); + score += textScore(organization.getCity()); + score += textScore(organization.getPostalCode()); + score += textScore(organization.getStreetName()); + score += textScore(organization.getNutsCode()); + score += textScore(organization.getWebsiteUri()); + score += textScore(organization.getEmail()); + score += textScore(organization.getPhone()); + return score; + } + + private int textScore(String value) { + return StringUtils.hasText(value) ? 1 : 0; + } + private void replaceOrganizations(TedNoticeProjection projection, List legacyOrganizations) { organizationRepository.deleteByNoticeProjection_Id(projection.getId()); if (legacyOrganizations == null || legacyOrganizations.isEmpty()) { return; } - List projectedOrganizations = new ArrayList<>(); + Map byReference = new LinkedHashMap<>(); + int duplicateCount = 0; for (Organization organization : legacyOrganizations) { + String key = organizationKey(organization, byReference.size()); + Organization existing = byReference.get(key); + if (existing == null) { + byReference.put(key, organization); + } else { + duplicateCount++; + byReference.put(key, mergeOrganization(existing, organization)); + } + } + + if (duplicateCount > 0) { + log.warn("Collapsing {} duplicate TED organization rows for projection {} before insert", duplicateCount, projection.getId()); + } + + List projectedOrganizations = new ArrayList<>(); + for (Organization organization : byReference.values()) { projectedOrganizations.add(TedNoticeOrganization.builder() .noticeProjection(projection) .orgReference(organization.getOrgReference()) @@ -176,4 +222,21 @@ public class TedNoticeProjectionService { private String[] copyArray(String[] source) { return source == null ? null : source.clone(); } + + private int arrayScore(String[] values) { + return values != null && values.length > 0 ? 1 : 0; + } + + private String organizationKey(Organization organization, int ordinal) { + if (StringUtils.hasText(organization.getOrgReference())) { + return organization.getOrgReference().trim(); + } + if (StringUtils.hasText(organization.getCompanyId())) { + return "company:" + organization.getCompanyId().trim(); + } + if (StringUtils.hasText(organization.getName())) { + return "name:" + organization.getName().trim(); + } + return "__row__" + ordinal; + } }