From 1ba8cccb62b3d69c293ff2b48359b239302ac345 Mon Sep 17 00:00:00 2001 From: trifonovt <87468028+TihomirTrifonov@users.noreply.github.com> Date: Wed, 18 Mar 2026 15:46:08 +0100 Subject: [PATCH] Refactor phases 4.1 --- ...cMimeAndExtensionDocumentTypeDetector.java | 32 ++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/main/java/at/procon/dip/classification/detector/BasicMimeAndExtensionDocumentTypeDetector.java b/src/main/java/at/procon/dip/classification/detector/BasicMimeAndExtensionDocumentTypeDetector.java index 743b6c7..32df2ec 100644 --- a/src/main/java/at/procon/dip/classification/detector/BasicMimeAndExtensionDocumentTypeDetector.java +++ b/src/main/java/at/procon/dip/classification/detector/BasicMimeAndExtensionDocumentTypeDetector.java @@ -26,9 +26,21 @@ public class BasicMimeAndExtensionDocumentTypeDetector implements DocumentTypeDe @Override public DetectionResult detect(SourceDescriptor sourceDescriptor) { String normalizedMediaType = DocumentImportSupport.normalizeMediaType(sourceDescriptor.mediaType()); + + if (sourceDescriptor.sourceType() == at.procon.dip.domain.document.SourceType.TED_PACKAGE) { + Map attributes = new HashMap<>(); + attributes.put("sourceType", sourceDescriptor.sourceType().name()); + if (StringUtils.hasText(sourceDescriptor.fileName())) { + attributes.put("fileName", sourceDescriptor.fileName()); + } + return new DetectionResult(DocumentType.TED_PACKAGE, DocumentFamily.PROCUREMENT, + normalizedMediaType != null ? normalizedMediaType : "application/gzip", null, attributes); + } + + DocumentType hintedType = detectByHint(sourceDescriptor); String extension = DocumentImportSupport.extensionOf(sourceDescriptor.fileName()); - DocumentType documentType = detectByMediaType(normalizedMediaType); + DocumentType documentType = hintedType != null ? hintedType : detectByMediaType(normalizedMediaType); if (documentType == DocumentType.UNKNOWN) { documentType = detectByExtension(extension); } @@ -46,10 +58,28 @@ public class BasicMimeAndExtensionDocumentTypeDetector implements DocumentTypeDe if (StringUtils.hasText(sourceDescriptor.fileName())) { attributes.put("fileName", sourceDescriptor.fileName()); } + if (hintedType != null) { + attributes.put("documentTypeHint", hintedType.name()); + } return new DetectionResult(documentType, family, normalizedMediaType, languageCode, attributes); } + private DocumentType detectByHint(SourceDescriptor sourceDescriptor) { + if (sourceDescriptor.attributes() == null) { + return null; + } + String hint = sourceDescriptor.attributes().get("documentTypeHint"); + if (!StringUtils.hasText(hint)) { + return null; + } + try { + return DocumentType.valueOf(hint.trim().toUpperCase(Locale.ROOT)); + } catch (IllegalArgumentException ignored) { + return null; + } + } + private DocumentType detectByMediaType(String mediaType) { if (!StringUtils.hasText(mediaType)) { return DocumentType.UNKNOWN;