ted document import - fixed organisation duplication

master
trifonovt 3 days ago
parent b3fe628a02
commit f9fa8aadf7

@ -14,13 +14,14 @@ import at.procon.dip.runtime.config.RuntimeMode;
import at.procon.ted.model.entity.Organization; import at.procon.ted.model.entity.Organization;
import at.procon.ted.model.entity.ProcurementDocument; import at.procon.ted.model.entity.ProcurementDocument;
import at.procon.ted.model.entity.ProcurementLot; import at.procon.ted.model.entity.ProcurementLot;
import java.util.ArrayList;
import java.util.List; import java.util.*;
import java.util.UUID;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.StringUtils;
/** /**
* Phase 3 service that materializes TED-specific structured projections on top of the generic DOC document root. * Phase 3 service that materializes TED-specific structured projections on top of the generic DOC document root.
@ -146,14 +147,59 @@ public class TedNoticeProjectionService {
lotRepository.saveAll(projectedLots); lotRepository.saveAll(projectedLots);
} }
private Organization mergeOrganization(Organization left, Organization right) {
if (completenessScore(right) > completenessScore(left)) {
return right;
}
return left;
}
private int completenessScore(Organization organization) {
int score = 0;
score += textScore(organization.getOrgReference());
score += textScore(organization.getRole());
score += textScore(organization.getName());
score += textScore(organization.getCompanyId());
score += textScore(organization.getCountryCode());
score += textScore(organization.getCity());
score += textScore(organization.getPostalCode());
score += textScore(organization.getStreetName());
score += textScore(organization.getNutsCode());
score += textScore(organization.getWebsiteUri());
score += textScore(organization.getEmail());
score += textScore(organization.getPhone());
return score;
}
private int textScore(String value) {
return StringUtils.hasText(value) ? 1 : 0;
}
private void replaceOrganizations(TedNoticeProjection projection, List<Organization> legacyOrganizations) { private void replaceOrganizations(TedNoticeProjection projection, List<Organization> legacyOrganizations) {
organizationRepository.deleteByNoticeProjection_Id(projection.getId()); organizationRepository.deleteByNoticeProjection_Id(projection.getId());
if (legacyOrganizations == null || legacyOrganizations.isEmpty()) { if (legacyOrganizations == null || legacyOrganizations.isEmpty()) {
return; return;
} }
List<TedNoticeOrganization> projectedOrganizations = new ArrayList<>(); Map<String, Organization> byReference = new LinkedHashMap<>();
int duplicateCount = 0;
for (Organization organization : legacyOrganizations) { for (Organization organization : legacyOrganizations) {
String key = organizationKey(organization, byReference.size());
Organization existing = byReference.get(key);
if (existing == null) {
byReference.put(key, organization);
} else {
duplicateCount++;
byReference.put(key, mergeOrganization(existing, organization));
}
}
if (duplicateCount > 0) {
log.warn("Collapsing {} duplicate TED organization rows for projection {} before insert", duplicateCount, projection.getId());
}
List<TedNoticeOrganization> projectedOrganizations = new ArrayList<>();
for (Organization organization : byReference.values()) {
projectedOrganizations.add(TedNoticeOrganization.builder() projectedOrganizations.add(TedNoticeOrganization.builder()
.noticeProjection(projection) .noticeProjection(projection)
.orgReference(organization.getOrgReference()) .orgReference(organization.getOrgReference())
@ -176,4 +222,21 @@ public class TedNoticeProjectionService {
private String[] copyArray(String[] source) { private String[] copyArray(String[] source) {
return source == null ? null : source.clone(); return source == null ? null : source.clone();
} }
private int arrayScore(String[] values) {
return values != null && values.length > 0 ? 1 : 0;
}
private String organizationKey(Organization organization, int ordinal) {
if (StringUtils.hasText(organization.getOrgReference())) {
return organization.getOrgReference().trim();
}
if (StringUtils.hasText(organization.getCompanyId())) {
return "company:" + organization.getCompanyId().trim();
}
if (StringUtils.hasText(organization.getName())) {
return "name:" + organization.getName().trim();
}
return "__row__" + ordinal;
}
} }

Loading…
Cancel
Save