ted document import - fixed organisation duplication
This commit is contained in:
parent
b3fe628a02
commit
f9fa8aadf7
|
|
@ -14,13 +14,14 @@ import at.procon.dip.runtime.config.RuntimeMode;
|
|||
import at.procon.ted.model.entity.Organization;
|
||||
import at.procon.ted.model.entity.ProcurementDocument;
|
||||
import at.procon.ted.model.entity.ProcurementLot;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
/**
|
||||
* Phase 3 service that materializes TED-specific structured projections on top of the generic DOC document root.
|
||||
|
|
@ -146,14 +147,59 @@ public class TedNoticeProjectionService {
|
|||
lotRepository.saveAll(projectedLots);
|
||||
}
|
||||
|
||||
private Organization mergeOrganization(Organization left, Organization right) {
|
||||
if (completenessScore(right) > completenessScore(left)) {
|
||||
return right;
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
private int completenessScore(Organization organization) {
|
||||
int score = 0;
|
||||
score += textScore(organization.getOrgReference());
|
||||
score += textScore(organization.getRole());
|
||||
score += textScore(organization.getName());
|
||||
score += textScore(organization.getCompanyId());
|
||||
score += textScore(organization.getCountryCode());
|
||||
score += textScore(organization.getCity());
|
||||
score += textScore(organization.getPostalCode());
|
||||
score += textScore(organization.getStreetName());
|
||||
score += textScore(organization.getNutsCode());
|
||||
score += textScore(organization.getWebsiteUri());
|
||||
score += textScore(organization.getEmail());
|
||||
score += textScore(organization.getPhone());
|
||||
return score;
|
||||
}
|
||||
|
||||
private int textScore(String value) {
|
||||
return StringUtils.hasText(value) ? 1 : 0;
|
||||
}
|
||||
|
||||
private void replaceOrganizations(TedNoticeProjection projection, List<Organization> legacyOrganizations) {
|
||||
organizationRepository.deleteByNoticeProjection_Id(projection.getId());
|
||||
if (legacyOrganizations == null || legacyOrganizations.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
List<TedNoticeOrganization> projectedOrganizations = new ArrayList<>();
|
||||
Map<String, Organization> byReference = new LinkedHashMap<>();
|
||||
int duplicateCount = 0;
|
||||
for (Organization organization : legacyOrganizations) {
|
||||
String key = organizationKey(organization, byReference.size());
|
||||
Organization existing = byReference.get(key);
|
||||
if (existing == null) {
|
||||
byReference.put(key, organization);
|
||||
} else {
|
||||
duplicateCount++;
|
||||
byReference.put(key, mergeOrganization(existing, organization));
|
||||
}
|
||||
}
|
||||
|
||||
if (duplicateCount > 0) {
|
||||
log.warn("Collapsing {} duplicate TED organization rows for projection {} before insert", duplicateCount, projection.getId());
|
||||
}
|
||||
|
||||
List<TedNoticeOrganization> projectedOrganizations = new ArrayList<>();
|
||||
for (Organization organization : byReference.values()) {
|
||||
projectedOrganizations.add(TedNoticeOrganization.builder()
|
||||
.noticeProjection(projection)
|
||||
.orgReference(organization.getOrgReference())
|
||||
|
|
@ -176,4 +222,21 @@ public class TedNoticeProjectionService {
|
|||
private String[] copyArray(String[] source) {
|
||||
return source == null ? null : source.clone();
|
||||
}
|
||||
|
||||
private int arrayScore(String[] values) {
|
||||
return values != null && values.length > 0 ? 1 : 0;
|
||||
}
|
||||
|
||||
private String organizationKey(Organization organization, int ordinal) {
|
||||
if (StringUtils.hasText(organization.getOrgReference())) {
|
||||
return organization.getOrgReference().trim();
|
||||
}
|
||||
if (StringUtils.hasText(organization.getCompanyId())) {
|
||||
return "company:" + organization.getCompanyId().trim();
|
||||
}
|
||||
if (StringUtils.hasText(organization.getName())) {
|
||||
return "name:" + organization.getName().trim();
|
||||
}
|
||||
return "__row__" + ordinal;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue