Refactor phases 4.1

2026-03-18 15:45:50 +01:00 · 2026-03-18 15:45:50 +01:00 · f337af56b5
parent ac59730f3e
commit f337af56b5
36 changed files with 1571 additions and 89 deletions
--- a/docs/architecture/PHASE4_1_ADAPTERS.md
+++ b/docs/architecture/PHASE4_1_ADAPTERS.md
@ -0,0 +1,28 @@
+# Phase 4.1 adapter extensions
+
+## Added adapters
+
+### TED package adapter
+
+- Source type: `TED_PACKAGE`
+- Root access: `PUBLIC`, no owner tenant
+- Root document type: `TED_PACKAGE`
+- Child source type: `PACKAGE_CHILD`
+- Child relation: `EXTRACTED_FROM`
+
+The adapter imports the package artifact plus its XML members into the generic `DOC` model.
+It does not replace the existing legacy TED package processing path; instead it complements it, so the later legacy TED parsing step can still enrich the same canonical child documents into proper `TED_NOTICE` projections by dedup hash.
+
+### Mail/document adapter
+
+- Source type: `MAIL`
+- Root document type: `MIME_MESSAGE`
+- Child relation: `ATTACHMENT_OF`
+- Access: configurable via `mail-default-owner-tenant-key` and `mail-default-visibility`
+
+The adapter stores the message body as the semantic root text and imports attachments as child documents. ZIP attachments can optionally be expanded recursively.
+
+## Deduplication
+
+Phase 4 deduplication by content hash is refined so the same payload is only deduplicated within the same access scope (`visibility` + `owner tenant`).
+This prevents private documents from different tenants from being merged into one canonical document accidentally.
--- a/pom.xml
+++ b/pom.xml
@ -224,6 +224,14 @@
            <artifactId>postgresql</artifactId>
            <scope>test</scope>
        </dependency>
+        <dependency>
+            <groupId>org.flywaydb</groupId>
+            <artifactId>flyway-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.flywaydb</groupId>
+            <artifactId>flyway-database-postgresql</artifactId>
+        </dependency>
    </dependencies>

    <build>
--- a/src/main/java/at/procon/dip/README_PHASE4_1.md
+++ b/src/main/java/at/procon/dip/README_PHASE4_1.md
@ -0,0 +1,34 @@
+# Phase 4.1 – TED package and mail/document adapters
+
+This phase extends the generic DOC ingestion SPI with two richer adapters:
+
+- `TedPackageDocumentIngestionAdapter`
+- `MailDocumentIngestionAdapter`
+
+## TED package adapter
+- imports the package artifact itself as a public DOC document
+- expands the `.tar.gz` package into XML child payloads
+- imports each child XML as a generic DOC child document
+- links children to the package root via `EXTRACTED_FROM`
+- keeps the existing legacy TED package processing path intact
+
+## Mail/document adapter
+- imports the MIME message as a DOC document
+- extracts subject/from/to/body into the mail root semantic text
+- imports attachments as child DOC documents
+- links attachments via `ATTACHMENT_OF`
+- optionally expands ZIP attachments recursively
+
+## Access semantics
+- TED packages and TED XML children are imported as `PUBLIC` with no owner tenant
+- mail documents use a dedicated default mail access context (`mail-default-owner-tenant-key`, `mail-default-visibility`)
+- deduplication is access-scope aware so private content is not merged across different tenants
+
+Additional note:
+- wrapper/container documents (for example TED package roots or ZIP wrapper documents expanded into child documents) can skip persistence of ORIGINAL content via `ted.generic-ingestion.store-original-content-for-wrapper-documents=false`, and adapters can now override that default per imported document through `SourceDescriptor.originalContentStoragePolicy` (`STORE` / `SKIP` / `DEFAULT`), while still keeping metadata, derived representations and child relations.
+
+- when original content storage is skipped for a document, GenericDocumentImportService now also skips extraction, derived-content persistence, representation building, and embedding queueing for that document
+
+
+Schema note:
+- `V8__doc_phase4_1_expand_document_and_source_types.sql` expands the generic `DOC` document/source type domain for `TED_PACKAGE` and `PACKAGE_CHILD`, and also repairs older local/dev schemas that used CHECK constraints instead of PostgreSQL ENUM types.
--- a/src/main/java/at/procon/dip/domain/document/DocumentType.java
+++ b/src/main/java/at/procon/dip/domain/document/DocumentType.java
@ -4,6 +4,7 @@ package at.procon.dip.domain.document;
 * Canonical technical document type.
 */
 public enum DocumentType {
+    TED_PACKAGE,
    TED_NOTICE,
    EMAIL,
    MIME_MESSAGE,
--- a/src/main/java/at/procon/dip/domain/document/SourceType.java
+++ b/src/main/java/at/procon/dip/domain/document/SourceType.java
@ -5,6 +5,7 @@ package at.procon.dip.domain.document;
 */
 public enum SourceType {
    TED_PACKAGE,
+    PACKAGE_CHILD,
    MAIL,
    FILE_SYSTEM,
    REST_UPLOAD,
--- a/src/main/java/at/procon/dip/domain/document/repository/DocumentRelationRepository.java
+++ b/src/main/java/at/procon/dip/domain/document/repository/DocumentRelationRepository.java
@ -13,4 +13,6 @@ public interface DocumentRelationRepository extends JpaRepository<DocumentRelati
    List<DocumentRelation> findByChildDocument_Id(UUID childDocumentId);

    List<DocumentRelation> findByParentDocument_IdAndRelationType(UUID parentDocumentId, RelationType relationType);
+
+    boolean existsByParentDocument_IdAndChildDocument_IdAndRelationType(UUID parentDocumentId, UUID childDocumentId, RelationType relationType);
 }
--- a/src/main/java/at/procon/dip/domain/document/repository/DocumentRepository.java
+++ b/src/main/java/at/procon/dip/domain/document/repository/DocumentRepository.java
@ -15,6 +15,8 @@ public interface DocumentRepository extends JpaRepository<Document, UUID> {

    Optional<Document> findByDedupHash(String dedupHash);

+    List<Document> findAllByDedupHash(String dedupHash);
+
    boolean existsByDedupHash(String dedupHash);

    List<Document> findByDocumentType(DocumentType documentType);
--- a/src/main/java/at/procon/dip/domain/document/service/DocumentRelationService.java
+++ b/src/main/java/at/procon/dip/domain/document/service/DocumentRelationService.java
@ -28,6 +28,19 @@ public class DocumentRelationService {
        return relationRepository.save(relation);
    }

+    public DocumentRelation ensureRelation(CreateDocumentRelationCommand command) {
+        boolean exists = relationRepository.existsByParentDocument_IdAndChildDocument_IdAndRelationType(
+                command.parentDocumentId(), command.childDocumentId(), command.relationType());
+        if (exists) {
+            return relationRepository.findByParentDocument_IdAndRelationType(command.parentDocumentId(), command.relationType())
+                    .stream()
+                    .filter(rel -> rel.getChildDocument() != null && command.childDocumentId().equals(rel.getChildDocument().getId()))
+                    .findFirst()
+                    .orElseGet(() -> createRelation(command));
+        }
+        return createRelation(command);
+    }
+
    @Transactional(readOnly = true)
    public List<DocumentRelation> findChildren(UUID parentDocumentId) {
        return relationRepository.findByParentDocument_Id(parentDocumentId);
--- a/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java
+++ b/src/main/java/at/procon/dip/domain/ted/service/TedNoticeProjectionService.java
@ -15,7 +15,6 @@ import at.procon.ted.model.entity.ProcurementLot;
 import at.procon.ted.service.TedPhase2GenericDocumentService;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Optional;
 import java.util.UUID;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
@ -63,8 +62,7 @@ public class TedNoticeProjectionService {
        Document genericDocument = documentRepository.findById(resolvedDocumentId)
                .orElseThrow(() -> new IllegalArgumentException("Unknown DOC document id: " + finalResolvedDocumentId));

-        TedNoticeProjection projection = projectionRepository.findByLegacyProcurementDocumentId(legacyDocument.getId())
-                .or(() -> projectionRepository.findByDocument_Id(genericDocument.getId()))
+        TedNoticeProjection projection = projectionRepository.findByDocument_Id(genericDocument.getId())
                .orElseGet(TedNoticeProjection::new);

        mapProjection(projection, genericDocument, legacyDocument);
@ -72,19 +70,13 @@ public class TedNoticeProjectionService {
        replaceLots(projection, legacyDocument.getLots());
        replaceOrganizations(projection, legacyDocument.getOrganizations());

-        log.debug("Phase 3 TED projection ensured for legacy {} -> projection {} / doc {}",
-                legacyDocument.getId(), projection.getId(), genericDocument.getId());
+        log.debug("Phase 3 TED projection ensured for generic doc {} -> projection {} (noticeId={}, publicationId={})",
+                genericDocument.getId(), projection.getId(), legacyDocument.getNoticeId(), legacyDocument.getPublicationId());
        return projection.getId();
    }

-    @Transactional(readOnly = true)
-    public Optional<TedNoticeProjection> findByLegacyProcurementDocumentId(UUID legacyDocumentId) {
-        return projectionRepository.findByLegacyProcurementDocumentId(legacyDocumentId);
-    }
-
    private void mapProjection(TedNoticeProjection projection, Document genericDocument, ProcurementDocument legacyDocument) {
        projection.setDocument(genericDocument);
-        projection.setLegacyProcurementDocumentId(legacyDocument.getId());
        projection.setNoticeId(legacyDocument.getNoticeId());
        projection.setPublicationId(legacyDocument.getPublicationId());
        projection.setNoticeUrl(legacyDocument.getNoticeUrl());
--- a/src/main/java/at/procon/dip/extraction/impl/MimeMessageDocumentExtractor.java
+++ b/src/main/java/at/procon/dip/extraction/impl/MimeMessageDocumentExtractor.java
@ -0,0 +1,56 @@
+package at.procon.dip.extraction.impl;
+
+import at.procon.dip.domain.document.ContentRole;
+import at.procon.dip.domain.document.DocumentType;
+import at.procon.dip.extraction.spi.DocumentExtractor;
+import at.procon.dip.extraction.spi.ExtractedStructuredPayload;
+import at.procon.dip.extraction.spi.ExtractionRequest;
+import at.procon.dip.extraction.spi.ExtractionResult;
+import at.procon.dip.ingestion.util.DocumentImportSupport;
+import java.nio.charset.StandardCharsets;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import org.springframework.stereotype.Component;
+import org.springframework.util.StringUtils;
+
+@Component
+public class MimeMessageDocumentExtractor implements DocumentExtractor {
+
+    @Override
+    public boolean supports(DocumentType documentType, String mimeType) {
+        return documentType == DocumentType.MIME_MESSAGE || documentType == DocumentType.EMAIL;
+    }
+
+    @Override
+    public ExtractionResult extract(ExtractionRequest extractionRequest) {
+        String text = extractionRequest.textContent();
+        if (!StringUtils.hasText(text) && extractionRequest.binaryContent() != null) {
+            text = new String(extractionRequest.binaryContent(), StandardCharsets.UTF_8);
+        }
+        text = text == null ? null : text.replace("\r\n", "\n").replace('\r', '\n').trim();
+
+        Map<String, Object> attributes = new LinkedHashMap<>();
+        if (extractionRequest.sourceDescriptor().attributes() != null) {
+            attributes.putAll(extractionRequest.sourceDescriptor().attributes());
+        }
+        String title = DocumentImportSupport.firstNonBlank(extractionRequest.sourceDescriptor().attributes(), "title", "subject");
+        if (!StringUtils.hasText(title)) {
+            title = extractionRequest.sourceDescriptor().fileName();
+        }
+        if (StringUtils.hasText(title)) {
+            attributes.put("title", title);
+        }
+
+        if (!StringUtils.hasText(text)) {
+            return new ExtractionResult(Map.of(), List.of(new ExtractedStructuredPayload("mail-message", attributes)),
+                    List.of("Mail message did not contain extractable text body"));
+        }
+
+        return new ExtractionResult(
+                Map.of(ContentRole.NORMALIZED_TEXT, text),
+                List.of(new ExtractedStructuredPayload("mail-message", attributes)),
+                List.of()
+        );
+    }
+}
--- a/src/main/java/at/procon/dip/extraction/impl/PdfDocumentExtractor.java
+++ b/src/main/java/at/procon/dip/extraction/impl/PdfDocumentExtractor.java
@ -22,7 +22,7 @@ public class PdfDocumentExtractor implements DocumentExtractor {

    @Override
    public boolean supports(DocumentType documentType, String mimeType) {
-        return documentType == DocumentType.PDF || pdfExtractionService.canHandle("dummy.pdf", mimeType);
+        return documentType == DocumentType.PDF;
    }

    @Override
--- a/src/main/java/at/procon/dip/extraction/impl/TedPackageManifestExtractor.java
+++ b/src/main/java/at/procon/dip/extraction/impl/TedPackageManifestExtractor.java
@ -0,0 +1,49 @@
+package at.procon.dip.extraction.impl;
+
+import at.procon.dip.domain.document.ContentRole;
+import at.procon.dip.domain.document.DocumentType;
+import at.procon.dip.extraction.spi.DocumentExtractor;
+import at.procon.dip.extraction.spi.ExtractedStructuredPayload;
+import at.procon.dip.extraction.spi.ExtractionRequest;
+import at.procon.dip.extraction.spi.ExtractionResult;
+import at.procon.dip.ingestion.util.DocumentImportSupport;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import org.springframework.stereotype.Component;
+import org.springframework.util.StringUtils;
+
+//@Component
+public class TedPackageManifestExtractor implements DocumentExtractor {
+
+    @Override
+    public boolean supports(DocumentType documentType, String mimeType) {
+        return documentType == DocumentType.TED_PACKAGE;
+    }
+
+    @Override
+    public ExtractionResult extract(ExtractionRequest extractionRequest) {
+        String manifest = extractionRequest.textContent();
+        if (!StringUtils.hasText(manifest)) {
+            manifest = "TED package: " + extractionRequest.sourceDescriptor().sourceIdentifier();
+        }
+
+        Map<String, Object> attributes = new LinkedHashMap<>();
+        if (extractionRequest.sourceDescriptor().attributes() != null) {
+            attributes.putAll(extractionRequest.sourceDescriptor().attributes());
+        }
+        String title = DocumentImportSupport.firstNonBlank(extractionRequest.sourceDescriptor().attributes(), "title", "packageId");
+        if (!StringUtils.hasText(title)) {
+            title = extractionRequest.sourceDescriptor().fileName();
+        }
+        if (StringUtils.hasText(title)) {
+            attributes.put("title", title);
+        }
+
+        return new ExtractionResult(
+                Map.of(ContentRole.NORMALIZED_TEXT, manifest),
+                List.of(new ExtractedStructuredPayload("ted-package-manifest", attributes)),
+                List.of()
+        );
+    }
+}
--- a/src/main/java/at/procon/dip/ingestion/adapter/MailDocumentIngestionAdapter.java
+++ b/src/main/java/at/procon/dip/ingestion/adapter/MailDocumentIngestionAdapter.java
@ -0,0 +1,170 @@
+package at.procon.dip.ingestion.adapter;
+
+import at.procon.dip.domain.access.DocumentAccessContext;
+import at.procon.dip.domain.access.DocumentVisibility;
+import at.procon.dip.domain.document.RelationType;
+import at.procon.dip.domain.document.SourceType;
+import at.procon.dip.domain.tenant.TenantRef;
+import at.procon.dip.domain.document.service.DocumentRelationService;
+import at.procon.dip.domain.document.service.command.CreateDocumentRelationCommand;
+import at.procon.dip.ingestion.dto.ImportedDocumentResult;
+import at.procon.dip.ingestion.service.GenericDocumentImportService;
+import at.procon.dip.ingestion.service.MailMessageExtractionService;
+import at.procon.dip.ingestion.service.MailMessageExtractionService.MailAttachment;
+import at.procon.dip.ingestion.service.MailMessageExtractionService.ParsedMailMessage;
+import at.procon.dip.ingestion.spi.DocumentIngestionAdapter;
+import at.procon.dip.ingestion.spi.IngestionResult;
+import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
+import at.procon.dip.ingestion.spi.SourceDescriptor;
+import at.procon.dip.ingestion.util.DocumentImportSupport;
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.service.attachment.AttachmentExtractor;
+import at.procon.ted.service.attachment.ZipExtractionService;
+import java.time.OffsetDateTime;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class MailDocumentIngestionAdapter implements DocumentIngestionAdapter {
+
+    private final TedProcessorProperties properties;
+    private final GenericDocumentImportService importService;
+    private final MailMessageExtractionService mailExtractionService;
+    private final DocumentRelationService relationService;
+    private final ZipExtractionService zipExtractionService;
+
+    @Override
+    public boolean supports(SourceDescriptor sourceDescriptor) {
+        return sourceDescriptor.sourceType() == SourceType.MAIL
+                && properties.getGenericIngestion().isEnabled()
+                && properties.getGenericIngestion().isMailAdapterEnabled();
+    }
+
+    @Override
+    public IngestionResult ingest(SourceDescriptor sourceDescriptor) {
+        byte[] rawMime = sourceDescriptor.binaryContent();
+        if (rawMime == null || rawMime.length == 0) {
+            throw new IllegalArgumentException("Mail adapter requires raw MIME bytes");
+        }
+        ParsedMailMessage parsed = mailExtractionService.parse(rawMime);
+        DocumentAccessContext accessContext = sourceDescriptor.accessContext() == null ? defaultMailAccessContext() : sourceDescriptor.accessContext();
+
+        Map<String, String> rootAttributes = new LinkedHashMap<>(sourceDescriptor.attributes() == null ? Map.of() : sourceDescriptor.attributes());
+        if (parsed.subject() != null) rootAttributes.put("subject", parsed.subject());
+        if (parsed.from() != null) rootAttributes.put("from", parsed.from());
+        if (!parsed.recipients().isEmpty()) rootAttributes.put("to", String.join(", ", parsed.recipients()));
+        rootAttributes.putIfAbsent("title", parsed.subject() != null ? parsed.subject() : sourceDescriptor.fileName());
+        rootAttributes.put("attachmentCount", Integer.toString(parsed.attachments().size()));
+        rootAttributes.put("importBatchId", properties.getGenericIngestion().getMailImportBatchId());
+
+        ImportedDocumentResult rootResult = importService.importDocument(new SourceDescriptor(
+                accessContext,
+                SourceType.MAIL,
+                sourceDescriptor.sourceIdentifier(),
+                sourceDescriptor.sourceUri(),
+                sourceDescriptor.fileName() != null ? sourceDescriptor.fileName() : fallbackMailFileName(parsed),
+                "message/rfc822",
+                rawMime,
+                mailExtractionService.serializeMessage(parsed),
+                parsed.receivedAt() == null ? OffsetDateTime.now() : parsed.receivedAt(),
+                OriginalContentStoragePolicy.STORE,
+                rootAttributes
+        ));
+
+        List<at.procon.dip.domain.document.CanonicalDocumentMetadata> documents = new ArrayList<>();
+        List<String> warnings = new ArrayList<>(rootResult.warnings());
+        documents.add(rootResult.document().toCanonicalMetadata());
+
+        int sortOrder = 0;
+        for (MailAttachment attachment : parsed.attachments()) {
+            importAttachment(rootResult.document().getId(), accessContext, sourceDescriptor, attachment, documents, warnings, ++sortOrder, 0);
+        }
+
+        return new IngestionResult(documents, warnings);
+    }
+
+    private void importAttachment(java.util.UUID parentDocumentId, DocumentAccessContext accessContext, SourceDescriptor parentSource,
+                                  MailAttachment attachment, List<at.procon.dip.domain.document.CanonicalDocumentMetadata> documents,
+                                  List<String> warnings, int sortOrder, int depth) {
+        boolean expandableWrapper = properties.getGenericIngestion().isExpandMailZipAttachments()
+                && zipExtractionService.canHandle(attachment.fileName(), attachment.contentType());
+
+        Map<String, String> attachmentAttributes = new LinkedHashMap<>();
+        attachmentAttributes.put("title", attachment.fileName());
+        attachmentAttributes.put("mailSourceIdentifier", parentSource.sourceIdentifier());
+        attachmentAttributes.put("importBatchId", properties.getGenericIngestion().getMailImportBatchId());
+        if (expandableWrapper) {
+            attachmentAttributes.put("wrapperDocument", Boolean.TRUE.toString());
+        }
+
+        ImportedDocumentResult attachmentResult = importService.importDocument(new SourceDescriptor(
+                accessContext,
+                SourceType.MAIL,
+                parentSource.sourceIdentifier() + ":attachment:" + depth + ":" + attachment.fileName(),
+                parentSource.sourceUri(),
+                attachment.fileName(),
+                DocumentImportSupport.normalizeMediaType(attachment.contentType()),
+                attachment.data(),
+                previewTextIfLikelyText(attachment),
+                parentSource.receivedAt() == null ? OffsetDateTime.now() : parentSource.receivedAt(),
+                expandableWrapper ? OriginalContentStoragePolicy.SKIP : OriginalContentStoragePolicy.STORE,
+                attachmentAttributes
+        ));
+        documents.add(attachmentResult.document().toCanonicalMetadata());
+        warnings.addAll(attachmentResult.warnings());
+        RelationType relationType = depth > 0 || attachment.path() != null ? RelationType.EXTRACTED_FROM : RelationType.ATTACHMENT_OF;
+        relationService.ensureRelation(new CreateDocumentRelationCommand(
+                parentDocumentId, attachmentResult.document().getId(), relationType, sortOrder, attachment.fileName()));
+
+        if (expandableWrapper) {
+            AttachmentExtractor.ExtractionResult zipResult = zipExtractionService.extract(attachment.data(), attachment.fileName(), attachment.contentType());
+            if (!zipResult.success()) {
+                warnings.add("ZIP attachment extraction failed for " + attachment.fileName() + ": " + zipResult.errorMessage());
+                return;
+            }
+            int childSort = 0;
+            for (AttachmentExtractor.ChildAttachment child : zipResult.childAttachments()) {
+                importAttachment(attachmentResult.document().getId(), accessContext, parentSource,
+                        new MailAttachment(child.filename(), child.contentType(), child.data(), child.data().length, child.pathInArchive()),
+                        documents, warnings, ++childSort, depth + 1);
+            }
+        }
+    }
+
+    private String fallbackMailFileName(ParsedMailMessage parsed) {
+        String subject = parsed.subject() == null || parsed.subject().isBlank() ? "mail-message" : parsed.subject().replaceAll("[^A-Za-z0-9._-]", "_");
+        return subject + ".eml";
+    }
+
+    private DocumentAccessContext defaultMailAccessContext() {
+        String tenantKey = properties.getGenericIngestion().getMailDefaultOwnerTenantKey();
+        if (tenantKey == null || tenantKey.isBlank()) {
+            tenantKey = properties.getGenericIngestion().getDefaultOwnerTenantKey();
+        }
+        DocumentVisibility visibility = properties.getGenericIngestion().getMailDefaultVisibility();
+        TenantRef tenant = (tenantKey == null || tenantKey.isBlank()) ? null : new TenantRef(null, tenantKey, tenantKey);
+        if (tenant == null && visibility == DocumentVisibility.TENANT) {
+            visibility = DocumentVisibility.RESTRICTED;
+        }
+        return new DocumentAccessContext(tenant, visibility);
+    }
+
+    private String previewTextIfLikelyText(MailAttachment attachment) {
+        String mime = DocumentImportSupport.normalizeMediaType(attachment.contentType());
+        if (DocumentImportSupport.isLikelyTextMime(mime)) {
+            return attachment.safeTextPreview();
+        }
+        String ext = DocumentImportSupport.extensionOf(attachment.fileName());
+        if ("txt".equals(ext) || "xml".equals(ext) || "html".equals(ext) || "htm".equals(ext) || "md".equals(ext)) {
+            return attachment.safeTextPreview();
+        }
+        return null;
+    }
+}
--- a/src/main/java/at/procon/dip/ingestion/adapter/TedPackageDocumentIngestionAdapter.java
+++ b/src/main/java/at/procon/dip/ingestion/adapter/TedPackageDocumentIngestionAdapter.java
@ -0,0 +1,130 @@
+package at.procon.dip.ingestion.adapter;
+
+import at.procon.dip.domain.access.DocumentAccessContext;
+import at.procon.dip.domain.document.RelationType;
+import at.procon.dip.domain.document.SourceType;
+import at.procon.dip.domain.document.entity.Document;
+import at.procon.dip.domain.document.service.DocumentRelationService;
+import at.procon.dip.domain.document.service.command.CreateDocumentRelationCommand;
+import at.procon.dip.ingestion.dto.ImportedDocumentResult;
+import at.procon.dip.ingestion.service.GenericDocumentImportService;
+import at.procon.dip.ingestion.service.TedPackageExpansionService;
+import at.procon.dip.ingestion.spi.DocumentIngestionAdapter;
+import at.procon.dip.ingestion.spi.IngestionResult;
+import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
+import at.procon.dip.ingestion.spi.SourceDescriptor;
+import at.procon.ted.config.TedProcessorProperties;
+import java.nio.charset.StandardCharsets;
+import java.time.OffsetDateTime;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+import org.springframework.transaction.annotation.Transactional;
+
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class TedPackageDocumentIngestionAdapter implements DocumentIngestionAdapter {
+
+    private final TedProcessorProperties properties;
+    private final GenericDocumentImportService importService;
+    private final TedPackageExpansionService expansionService;
+    private final DocumentRelationService relationService;
+
+    @Override
+    public boolean supports(SourceDescriptor sourceDescriptor) {
+        return sourceDescriptor.sourceType() == SourceType.TED_PACKAGE
+                && properties.getGenericIngestion().isEnabled()
+                && properties.getGenericIngestion().isTedPackageAdapterEnabled();
+    }
+
+    @Override
+    public IngestionResult ingest(SourceDescriptor sourceDescriptor) {
+        byte[] packageBytes = sourceDescriptor.binaryContent();
+        if (packageBytes == null || packageBytes.length == 0) {
+            throw new IllegalArgumentException("TED package adapter requires tar.gz bytes");
+        }
+
+        TedPackageExpansionService.TedPackageExpansionResult expanded = expansionService.expand(packageBytes);
+        Map<String, String> rootAttributes = new LinkedHashMap<>(sourceDescriptor.attributes() == null ? Map.of() : sourceDescriptor.attributes());
+        rootAttributes.putIfAbsent("packageId", sourceDescriptor.sourceIdentifier());
+        rootAttributes.putIfAbsent("title", sourceDescriptor.fileName() != null ? sourceDescriptor.fileName() : sourceDescriptor.sourceIdentifier());
+        rootAttributes.put("xmlEntryCount", Integer.toString(expanded.entries().size()));
+        rootAttributes.put("wrapperDocument", Boolean.TRUE.toString());
+        rootAttributes.put("importBatchId", properties.getGenericIngestion().getTedPackageImportBatchId());
+
+        ImportedDocumentResult packageDocument = importService.importDocument(new SourceDescriptor(
+                sourceDescriptor.accessContext() == null ? DocumentAccessContext.publicDocument() : sourceDescriptor.accessContext(),
+                SourceType.TED_PACKAGE,
+                sourceDescriptor.sourceIdentifier(),
+                sourceDescriptor.sourceUri(),
+                sourceDescriptor.fileName(),
+                sourceDescriptor.mediaType() == null ? "application/gzip" : sourceDescriptor.mediaType(),
+                packageBytes,
+                expanded.manifestText(),
+                sourceDescriptor.receivedAt() == null ? OffsetDateTime.now() : sourceDescriptor.receivedAt(),
+                OriginalContentStoragePolicy.SKIP,
+                rootAttributes
+        ));
+
+        List<String> warnings = new ArrayList<>(packageDocument.warnings());
+        List<at.procon.dip.domain.document.CanonicalDocumentMetadata> documents = new ArrayList<>();
+        documents.add(packageDocument.document().toCanonicalMetadata());
+
+        int sortOrder = 0;
+        for (TedPackageExpansionService.TedPackageEntry entry : expanded.entries()) {
+            sortOrder++;
+            String childUri = "tedpkg://" + sourceDescriptor.sourceIdentifier() + "/" + entry.archivePath();
+            String childIdentifier = sourceDescriptor.sourceIdentifier() + ":" + entry.archivePath();
+            String xmlContent = resolveXmlContent(entry);
+
+            Map<String, String> childAttributes = new LinkedHashMap<>();
+            childAttributes.put("documentTypeHint", "TED_NOTICE");
+            childAttributes.put("packageId", sourceDescriptor.sourceIdentifier());
+            childAttributes.put("archivePath", entry.archivePath());
+            childAttributes.put("title", entry.fileName());
+            childAttributes.put("importBatchId", properties.getGenericIngestion().getTedPackageImportBatchId());
+
+            ImportedDocumentResult childResult = importService.importDocument(new SourceDescriptor(
+                    sourceDescriptor.accessContext() == null ? DocumentAccessContext.publicDocument() : sourceDescriptor.accessContext(),
+                    SourceType.PACKAGE_CHILD,
+                    childIdentifier,
+                    childUri,
+                    entry.fileName(),
+                    entry.mediaType() == null ? "application/xml" : entry.mediaType(),
+                    entry.data(),
+                    xmlContent,
+                    sourceDescriptor.receivedAt() == null ? OffsetDateTime.now() : sourceDescriptor.receivedAt(),
+                    OriginalContentStoragePolicy.STORE,
+                    childAttributes
+            ));
+
+            Document childDocument = childResult.document();
+            documents.add(childDocument.toCanonicalMetadata());
+            warnings.addAll(childResult.warnings());
+            if (childResult.deduplicated()) {
+                warnings.add("TED XML child already existed and was linked to package: " + entry.archivePath());
+            }
+            relationService.ensureRelation(new CreateDocumentRelationCommand(
+                    packageDocument.document().getId(),
+                    childDocument.getId(),
+                    RelationType.EXTRACTED_FROM,
+                    sortOrder,
+                    entry.archivePath()
+            ));
+        }
+
+        return new IngestionResult(documents, warnings);
+    }
+
+    private String resolveXmlContent(TedPackageExpansionService.TedPackageEntry entry) {
+        if (entry.textUtf8() != null && !entry.textUtf8().isBlank()) {
+            return entry.textUtf8();
+        }
+        return new String(entry.data(), StandardCharsets.UTF_8);
+    }
+}
--- a/src/main/java/at/procon/dip/ingestion/camel/GenericFileSystemIngestionRoute.java
+++ b/src/main/java/at/procon/dip/ingestion/camel/GenericFileSystemIngestionRoute.java
@ -5,6 +5,7 @@ import at.procon.dip.domain.access.DocumentVisibility;
 import at.procon.dip.domain.document.SourceType;
 import at.procon.dip.domain.tenant.TenantRef;
 import at.procon.dip.ingestion.service.DocumentIngestionGateway;
+import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
 import at.procon.dip.ingestion.spi.SourceDescriptor;
 import at.procon.ted.config.TedProcessorProperties;
 import java.nio.file.Files;
@ -72,6 +73,7 @@ public class GenericFileSystemIngestionRoute extends RouteBuilder {
                payload,
                null,
                OffsetDateTime.now(),
+                OriginalContentStoragePolicy.DEFAULT,
                attributes
        );
        ingestionGateway.ingest(descriptor);
--- a/src/main/java/at/procon/dip/ingestion/controller/GenericDocumentImportController.java
+++ b/src/main/java/at/procon/dip/ingestion/controller/GenericDocumentImportController.java
@ -9,6 +9,7 @@ import at.procon.dip.ingestion.dto.GenericImportResponse;
 import at.procon.dip.ingestion.dto.GenericTextImportRequest;
 import at.procon.dip.ingestion.service.DocumentIngestionGateway;
 import at.procon.dip.ingestion.spi.IngestionResult;
+import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
 import at.procon.dip.ingestion.spi.SourceDescriptor;
 import at.procon.ted.config.TedProcessorProperties;
 import java.time.OffsetDateTime;
@ -62,6 +63,7 @@ public class GenericDocumentImportController {
                file.getBytes(),
                null,
                OffsetDateTime.now(),
+                OriginalContentStoragePolicy.DEFAULT,
                attributes
        );
        IngestionResult result = ingestionGateway.ingest(descriptor);
@ -89,6 +91,7 @@ public class GenericDocumentImportController {
                request.text() == null ? null : request.text().getBytes(java.nio.charset.StandardCharsets.UTF_8),
                request.text(),
                OffsetDateTime.now(),
+                OriginalContentStoragePolicy.DEFAULT,
                attributes
        );
        IngestionResult result = ingestionGateway.ingest(descriptor);
--- a/src/main/java/at/procon/dip/ingestion/service/GenericDocumentImportService.java
+++ b/src/main/java/at/procon/dip/ingestion/service/GenericDocumentImportService.java
@ -4,15 +4,12 @@ import at.procon.dip.classification.service.DocumentClassificationService;
 import at.procon.dip.classification.spi.DetectionResult;
 import at.procon.dip.domain.access.DocumentAccessContext;
 import at.procon.dip.domain.access.DocumentVisibility;
-import at.procon.dip.domain.document.CanonicalDocumentMetadata;
 import at.procon.dip.domain.document.ContentRole;
 import at.procon.dip.domain.document.DocumentStatus;
 import at.procon.dip.domain.document.StorageType;
 import at.procon.dip.domain.document.entity.Document;
 import at.procon.dip.domain.document.entity.DocumentContent;
 import at.procon.dip.domain.document.entity.DocumentEmbeddingModel;
-import at.procon.dip.domain.document.entity.DocumentSource;
-import at.procon.dip.domain.document.repository.DocumentEmbeddingRepository;
 import at.procon.dip.domain.document.repository.DocumentRepository;
 import at.procon.dip.domain.document.repository.DocumentSourceRepository;
 import at.procon.dip.domain.document.service.DocumentContentService;
@ -29,11 +26,15 @@ import at.procon.dip.extraction.service.DocumentExtractionService;
 import at.procon.dip.extraction.spi.ExtractionRequest;
 import at.procon.dip.extraction.spi.ExtractionResult;
 import at.procon.dip.ingestion.dto.ImportedDocumentResult;
+import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
 import at.procon.dip.ingestion.spi.SourceDescriptor;
 import at.procon.dip.ingestion.util.DocumentImportSupport;
 import at.procon.dip.normalization.service.TextRepresentationBuildService;
 import at.procon.dip.normalization.spi.RepresentationBuildRequest;
 import at.procon.dip.normalization.spi.TextRepresentationDraft;
+import at.procon.dip.processing.service.StructuredDocumentProcessingService;
+import at.procon.dip.processing.spi.DocumentProcessingPolicy;
+import at.procon.dip.processing.spi.StructuredProcessingRequest;
 import at.procon.ted.config.TedProcessorProperties;
 import at.procon.ted.util.HashUtils;
 import java.nio.charset.StandardCharsets;
@ -43,7 +44,6 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
-import java.util.UUID;
 import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.stereotype.Service;
@ -51,7 +51,7 @@ import org.springframework.transaction.annotation.Transactional;
 import org.springframework.util.StringUtils;

 /**
- * Phase 4 generic import pipeline that persists arbitrary document types into the DOC model.
+ * Generic import pipeline that persists arbitrary document types into the DOC model.
 */
@Service
@RequiredArgsConstructor
@ -61,7 +61,6 @@ public class GenericDocumentImportService {
    private final TedProcessorProperties properties;
    private final DocumentRepository documentRepository;
    private final DocumentSourceRepository documentSourceRepository;
-    private final DocumentEmbeddingRepository documentEmbeddingRepository;
    private final DocumentService documentService;
    private final DocumentSourceService documentSourceService;
    private final DocumentContentService documentContentService;
@ -70,6 +69,7 @@ public class GenericDocumentImportService {
    private final DocumentClassificationService classificationService;
    private final DocumentExtractionService extractionService;
    private final TextRepresentationBuildService representationBuildService;
+    private final StructuredDocumentProcessingService structuredProcessingService;

    @Transactional
    public ImportedDocumentResult importDocument(SourceDescriptor sourceDescriptor) {
@ -77,20 +77,20 @@ public class GenericDocumentImportService {
        DetectionResult detection = classificationService.detect(withResolvedMediaType(sourceDescriptor, payload));
        String dedupHash = HashUtils.computeSha256(payload.binaryContent());

-        if (properties.getGenericIngestion().isDeduplicateByContentHash()) {
-            Optional<Document> existing = documentRepository.findByDedupHash(dedupHash);
-            if (existing.isPresent()) {
-                Document document = existing.get();
-                ensureSource(document, sourceDescriptor);
-                List<String> warnings = List.of("Document content hash already imported; linked new source to existing document");
-                return new ImportedDocumentResult(document, detection, warnings, true);
-            }
-        }
-
        DocumentAccessContext accessContext = sourceDescriptor.accessContext() == null
                ? defaultAccessContext()
                : sourceDescriptor.accessContext();

+        if (properties.getGenericIngestion().isDeduplicateByContentHash()) {
+            Optional<Document> existing = resolveDeduplicatedDocument(dedupHash, accessContext);
+            if (existing.isPresent()) {
+                Document document = existing.get();
+                ensureSource(document, sourceDescriptor);
+                List<String> warnings = List.of("Document content hash already imported within the same access scope; linked new source to existing document");
+                return new ImportedDocumentResult(document, detection, warnings, true);
+            }
+        }
+
        Document document = documentService.create(new CreateDocumentCommand(
                accessContext.ownerTenant() == null ? null : accessContext.ownerTenant().tenantKey(),
                accessContext.visibility(),
@ -108,21 +108,63 @@ public class GenericDocumentImportService {
        ensureSource(document, sourceDescriptor);
        documentService.updateStatus(document.getId(), DocumentStatus.CLASSIFIED);

-        DocumentContent originalContent = persistOriginalContent(document, sourceDescriptor, detection, payload, dedupHash);
+        boolean persistOriginalContent = shouldPersistOriginalContent(sourceDescriptor);
+        DocumentContent originalContent = persistOriginalContent
+                ? persistOriginalContent(document, sourceDescriptor, detection, payload, dedupHash)
+                : null;

-        ExtractionResult extractionResult = extractionService.extract(new ExtractionRequest(
-                sourceDescriptor,
-                detection,
-                payload.textContent(),
-                payload.binaryContent()
-        ));
-        List<String> warnings = new ArrayList<>(extractionResult.warnings());
+        List<String> warnings = new ArrayList<>();
+        DocumentProcessingPolicy processingPolicy = structuredProcessingService.resolvePolicy(sourceDescriptor, detection);
+        ExtractionResult extractionResult = emptyExtractionResult();
+        Map<ContentRole, DocumentContent> persistedDerivedContent = new LinkedHashMap<>();

-        Map<ContentRole, DocumentContent> persistedDerivedContent = persistDerivedContent(document, detection, extractionResult, dedupHash);
-        documentService.updateStatus(document.getId(), DocumentStatus.EXTRACTED);
+        if (persistOriginalContent) {
+            if (processingPolicy.runGenericExtraction()) {
+                extractionResult = extractionService.extract(new ExtractionRequest(
+                        sourceDescriptor,
+                        detection,
+                        payload.textContent(),
+                        payload.binaryContent()
+                ));
+                warnings.addAll(extractionResult.warnings());
+                if (processingPolicy.persistExtractedContent()) {
+                    persistedDerivedContent.putAll(persistDerivedContent(document, detection, extractionResult, dedupHash, "generic"));
+                }
+                if (!extractionResult.derivedTextByRole().isEmpty()) {
+                    documentService.updateStatus(document.getId(), DocumentStatus.EXTRACTED);
+                }
+            }

-        var drafts = representationBuildService.build(new RepresentationBuildRequest(sourceDescriptor, detection, extractionResult));
-        persistRepresentationsAndEmbeddings(document, originalContent, persistedDerivedContent, drafts);
+            if (processingPolicy.invokeStructuredProcessor()) {
+                Optional<ExtractionResult> structuredExtractionResult = structuredProcessingService.process(new StructuredProcessingRequest(
+                        document,
+                        originalContent,
+                        sourceDescriptor,
+                        detection,
+                        payload.binaryContent(),
+                        payload.textContent(),
+                        dedupHash
+                ));
+                if (structuredExtractionResult.isPresent()) {
+                    ExtractionResult result = structuredExtractionResult.get();
+                    warnings.addAll(result.warnings());
+                    extractionResult = mergeExtractionResults(extractionResult, result);
+                    if (processingPolicy.persistExtractedContent()) {
+                        persistedDerivedContent.putAll(persistDerivedContent(document, detection, result, dedupHash, "structured"));
+                    }
+                    if (!result.derivedTextByRole().isEmpty()) {
+                        documentService.updateStatus(document.getId(), DocumentStatus.EXTRACTED);
+                    }
+                }
+            }
+
+            if (processingPolicy.runRepresentationBuilders()) {
+                var drafts = representationBuildService.build(new RepresentationBuildRequest(sourceDescriptor, detection, extractionResult));
+                persistRepresentationsAndEmbeddings(document, originalContent, persistedDerivedContent, drafts);
+            }
+        } else {
+            warnings.add("Original content storage disabled for this document; skipped extraction and text-representation processing");
+        }

        Document reloaded = documentService.getRequired(document.getId());
        if (reloaded.getStatus() == DocumentStatus.EXTRACTED) {
@ -130,7 +172,7 @@ public class GenericDocumentImportService {
            reloaded = documentService.getRequired(reloaded.getId());
        }

-        if (!extractionResult.structuredPayloads().isEmpty()) {
+        if (processingPolicy.applyStructuredTitleIfMissing() && !extractionResult.structuredPayloads().isEmpty()) {
            applyStructuredTitleIfMissing(reloaded, extractionResult);
            reloaded = documentService.getRequired(reloaded.getId());
        }
@ -138,6 +180,50 @@ public class GenericDocumentImportService {
        return new ImportedDocumentResult(reloaded, detection, warnings, false);
    }

+    private ExtractionResult mergeExtractionResults(ExtractionResult left, ExtractionResult right) {
+        Map<ContentRole, String> derivedText = new LinkedHashMap<>();
+        if (left != null && left.derivedTextByRole() != null) {
+            derivedText.putAll(left.derivedTextByRole());
+        }
+        if (right != null && right.derivedTextByRole() != null) {
+            derivedText.putAll(right.derivedTextByRole());
+        }
+        List<at.procon.dip.extraction.spi.ExtractedStructuredPayload> payloads = new ArrayList<>();
+        if (left != null && left.structuredPayloads() != null) {
+            payloads.addAll(left.structuredPayloads());
+        }
+        if (right != null && right.structuredPayloads() != null) {
+            payloads.addAll(right.structuredPayloads());
+        }
+        List<String> warnings = new ArrayList<>();
+        if (left != null && left.warnings() != null) {
+            warnings.addAll(left.warnings());
+        }
+        if (right != null && right.warnings() != null) {
+            warnings.addAll(right.warnings());
+        }
+        return new ExtractionResult(derivedText, payloads, warnings);
+    }
+
+    private ExtractionResult emptyExtractionResult() {
+        return new ExtractionResult(java.util.Collections.emptyMap(), java.util.Collections.emptyList(), java.util.Collections.emptyList());
+    }
+
+    private Optional<Document> resolveDeduplicatedDocument(String dedupHash, DocumentAccessContext accessContext) {
+        return documentRepository.findAllByDedupHash(dedupHash).stream()
+                .filter(existing -> sameAccessScope(existing, accessContext))
+                .findFirst();
+    }
+
+    private boolean sameAccessScope(Document existing, DocumentAccessContext accessContext) {
+        if (existing.getVisibility() != accessContext.visibility()) {
+            return false;
+        }
+        String existingTenantKey = existing.getOwnerTenant() == null ? null : existing.getOwnerTenant().getTenantKey();
+        String requestedTenantKey = accessContext.ownerTenant() == null ? null : accessContext.ownerTenant().tenantKey();
+        return java.util.Objects.equals(existingTenantKey, requestedTenantKey);
+    }
+
    private SourceDescriptor withResolvedMediaType(SourceDescriptor sourceDescriptor, ResolvedPayload payload) {
        if (StringUtils.hasText(sourceDescriptor.mediaType())) {
            return sourceDescriptor;
@ -152,6 +238,7 @@ public class GenericDocumentImportService {
                sourceDescriptor.binaryContent(),
                sourceDescriptor.textContent(),
                sourceDescriptor.receivedAt(),
+                sourceDescriptor.originalContentStoragePolicy(),
                sourceDescriptor.attributes()
        );
    }
@ -216,7 +303,7 @@ public class GenericDocumentImportService {
            return sourceDescriptor.fileName();
        }
        if (StringUtils.hasText(payload.textContent())) {
-            for (String line : payload.textContent().split("\\n")) {
+            for (String line : payload.textContent().split("\n")) {
                if (StringUtils.hasText(line)) {
                    return DocumentImportSupport.ellipsize(line.trim(), 240);
                }
@ -244,6 +331,10 @@ public class GenericDocumentImportService {
            return;
        }

+        String importBatchId = sourceDescriptor.attributes() != null && StringUtils.hasText(sourceDescriptor.attributes().get("importBatchId"))
+                ? sourceDescriptor.attributes().get("importBatchId")
+                : properties.getGenericIngestion().getImportBatchId();
+
        documentSourceService.addSource(new AddDocumentSourceCommand(
                document.getId(),
                sourceDescriptor.sourceType(),
@ -251,11 +342,35 @@ public class GenericDocumentImportService {
                sourceDescriptor.sourceUri(),
                sourceDescriptor.fileName(),
                null,
-                properties.getGenericIngestion().getImportBatchId(),
+                importBatchId,
                sourceDescriptor.receivedAt() == null ? OffsetDateTime.now() : sourceDescriptor.receivedAt()
        ));
    }

+    private boolean shouldPersistOriginalContent(SourceDescriptor sourceDescriptor) {
+        if (sourceDescriptor.originalContentStoragePolicy() == OriginalContentStoragePolicy.STORE) {
+            return true;
+        }
+        if (sourceDescriptor.originalContentStoragePolicy() == OriginalContentStoragePolicy.SKIP) {
+            return false;
+        }
+        if (properties.getGenericIngestion().isStoreOriginalContentForWrapperDocuments()) {
+            return true;
+        }
+        return !isWrapperDocument(sourceDescriptor);
+    }
+
+    private boolean isWrapperDocument(SourceDescriptor sourceDescriptor) {
+        if (sourceDescriptor.attributes() == null || sourceDescriptor.attributes().isEmpty()) {
+            return false;
+        }
+        String wrapperFlag = sourceDescriptor.attributes().get("wrapperDocument");
+        if (wrapperFlag == null) {
+            wrapperFlag = sourceDescriptor.attributes().get("containerDocument");
+        }
+        return Boolean.parseBoolean(wrapperFlag);
+    }
+
    private DocumentContent persistOriginalContent(Document document,
                                                   SourceDescriptor sourceDescriptor,
                                                   DetectionResult detection,
@ -287,13 +402,14 @@ public class GenericDocumentImportService {
    private Map<ContentRole, DocumentContent> persistDerivedContent(Document document,
                                                                    DetectionResult detection,
                                                                    ExtractionResult extractionResult,
-                                                                    String baseHash) {
+                                                                    String baseHash,
+                                                                    String hashNamespace) {
        Map<ContentRole, DocumentContent> result = new LinkedHashMap<>();
        extractionResult.derivedTextByRole().forEach((role, text) -> {
            if (!StringUtils.hasText(text)) {
                return;
            }
-            String contentHash = HashUtils.computeSha256(baseHash + ":" + role.name() + ":" + text);
+            String contentHash = HashUtils.computeSha256(baseHash + ":" + hashNamespace + ":" + role.name() + ":" + text);
            DocumentContent content = documentContentService.addContent(new AddDocumentContentCommand(
                    document.getId(),
                    role,
@ -336,16 +452,13 @@ public class GenericDocumentImportService {
            if (!StringUtils.hasText(draft.textBody())) {
                continue;
            }
-            DocumentContent linkedContent = switch (draft.representationType()) {
-                case FULLTEXT, SEMANTIC_TEXT, SUMMARY, TITLE_ABSTRACT, METADATA_ENRICHED, CHUNK ->
-                        derivedContent.getOrDefault(ContentRole.NORMALIZED_TEXT, originalContent);
-            };
+            DocumentContent linkedContent = resolveLinkedContent(originalContent, derivedContent, draft);

            var representation = documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand(
                    document.getId(),
                    linkedContent == null ? null : linkedContent.getId(),
                    draft.representationType(),
-                    "phase4-generic-builder",
+                    draft.builderKey() == null ? "phase4-generic-builder" : draft.builderKey(),
                    draft.languageCode(),
                    null,
                    draft.chunkIndex(),
@ -362,7 +475,23 @@ public class GenericDocumentImportService {
        documentService.updateStatus(document.getId(), DocumentStatus.REPRESENTED);
    }

+    private DocumentContent resolveLinkedContent(DocumentContent originalContent,
+                                                 Map<ContentRole, DocumentContent> derivedContent,
+                                                 TextRepresentationDraft draft) {
+        ContentRole sourceRole = draft.sourceContentRole();
+        if (sourceRole == null) {
+            sourceRole = ContentRole.NORMALIZED_TEXT;
+        }
+        if (sourceRole == ContentRole.ORIGINAL) {
+            return originalContent;
+        }
+        return derivedContent.getOrDefault(sourceRole, originalContent);
+    }
+
    private boolean shouldQueueEmbedding(TextRepresentationDraft draft) {
+        if (Boolean.FALSE.equals(draft.queueForEmbedding())) {
+            return false;
+        }
        return properties.getGenericIngestion().isVectorizePrimaryRepresentationOnly() ? draft.primary() : true;
    }

--- a/src/main/java/at/procon/dip/ingestion/service/MailMessageExtractionService.java
+++ b/src/main/java/at/procon/dip/ingestion/service/MailMessageExtractionService.java
@ -0,0 +1,122 @@
+package at.procon.dip.ingestion.service;
+
+import jakarta.mail.BodyPart;
+import jakarta.mail.Multipart;
+import jakarta.mail.Part;
+import jakarta.mail.Session;
+import jakarta.mail.internet.MimeMessage;
+import jakarta.mail.internet.MimeUtility;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.time.OffsetDateTime;
+import java.time.ZoneId;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+import lombok.extern.slf4j.Slf4j;
+import org.jsoup.Jsoup;
+import org.springframework.stereotype.Service;
+
+@Service
+@Slf4j
+public class MailMessageExtractionService {
+
+    public ParsedMailMessage parse(byte[] rawMime) {
+        try {
+            Session session = Session.getDefaultInstance(new Properties());
+            MimeMessage message = new MimeMessage(session, new ByteArrayInputStream(rawMime));
+            String subject = message.getSubject();
+            String from = message.getFrom() != null && message.getFrom().length > 0 ? message.getFrom()[0].toString() : null;
+            List<String> recipients = new ArrayList<>();
+            if (message.getAllRecipients() != null) {
+                for (var recipient : message.getAllRecipients()) {
+                    recipients.add(recipient.toString());
+                }
+            }
+            StringBuilder text = new StringBuilder();
+            StringBuilder html = new StringBuilder();
+            List<MailAttachment> attachments = new ArrayList<>();
+            processPart(message, text, html, attachments);
+            String normalizedText = text.length() > 0 ? text.toString().trim() : htmlToText(html.toString());
+            OffsetDateTime receivedAt = message.getReceivedDate() == null ? OffsetDateTime.now()
+                    : message.getReceivedDate().toInstant().atZone(ZoneId.systemDefault()).toOffsetDateTime();
+            return new ParsedMailMessage(subject, from, recipients, receivedAt, normalizedText, html.toString(), attachments);
+        } catch (Exception e) {
+            throw new IllegalArgumentException("Failed to parse MIME message", e);
+        }
+    }
+
+    private void processPart(Part part, StringBuilder text, StringBuilder html, List<MailAttachment> attachments) throws Exception {
+        String disposition = part.getDisposition();
+        String contentType = part.getContentType() == null ? "application/octet-stream" : part.getContentType();
+        if (disposition != null && (Part.ATTACHMENT.equalsIgnoreCase(disposition) || Part.INLINE.equalsIgnoreCase(disposition))
+                && part.getFileName() != null) {
+            attachments.add(extractAttachment(part));
+            return;
+        }
+        Object content = part.getContent();
+        if (content instanceof Multipart multipart) {
+            for (int i = 0; i < multipart.getCount(); i++) {
+                BodyPart bodyPart = multipart.getBodyPart(i);
+                processPart(bodyPart, text, html, attachments);
+            }
+        } else if (contentType.toLowerCase().contains("text/plain")) {
+            text.append(content.toString()).append("\n");
+        } else if (contentType.toLowerCase().contains("text/html")) {
+            html.append(content.toString()).append("\n");
+        } else if (part.getFileName() != null) {
+            attachments.add(extractAttachment(part));
+        }
+    }
+
+    private MailAttachment extractAttachment(Part part) throws Exception {
+        String fileName = part.getFileName();
+        if (fileName == null) {
+            fileName = "attachment";
+        }
+        try {
+            fileName = MimeUtility.decodeText(fileName);
+        } catch (Exception ignored) {
+        }
+        String contentType = part.getContentType();
+        byte[] data;
+        try (InputStream in = part.getInputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream()) {
+            in.transferTo(out);
+            data = out.toByteArray();
+        }
+        return new MailAttachment(fileName, contentType, data, data.length, null);
+    }
+
+    private String htmlToText(String html) {
+        if (html == null || html.isBlank()) {
+            return "";
+        }
+        try {
+            return Jsoup.parse(html).text().replaceAll("\s+", " ").trim();
+        } catch (Exception e) {
+            log.debug("Falling back to naive HTML cleanup: {}", e.getMessage());
+            return html.replaceAll("<[^>]+>", " ").replaceAll("\s+", " ").trim();
+        }
+    }
+
+    public String serializeMessage(ParsedMailMessage parsed) {
+        StringBuilder sb = new StringBuilder();
+        if (parsed.subject() != null) sb.append("Subject: ").append(parsed.subject()).append("\n");
+        if (parsed.from() != null) sb.append("From: ").append(parsed.from()).append("\n");
+        if (!parsed.recipients().isEmpty()) sb.append("To: ").append(String.join(", ", parsed.recipients())).append("\n");
+        sb.append("\n");
+        if (parsed.textBody() != null) sb.append(parsed.textBody());
+        return sb.toString().trim();
+    }
+
+    public record ParsedMailMessage(String subject, String from, List<String> recipients, OffsetDateTime receivedAt,
+                                    String textBody, String htmlBody, List<MailAttachment> attachments) {}
+
+    public record MailAttachment(String fileName, String contentType, byte[] data, long sizeBytes, String path) {
+        public String safeTextPreview() {
+            return new String(data, StandardCharsets.UTF_8);
+        }
+    }
+}
--- a/src/main/java/at/procon/dip/ingestion/service/TedPackageExpansionService.java
+++ b/src/main/java/at/procon/dip/ingestion/service/TedPackageExpansionService.java
@ -0,0 +1,88 @@
+package at.procon.dip.ingestion.service;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.springframework.stereotype.Service;
+
+@Service
+@Slf4j
+public class TedPackageExpansionService {
+
+    private static final int MAX_FILES = 10000;
+    private static final long MAX_SINGLE_FILE_SIZE = 20L * 1024 * 1024;
+    private static final long MAX_TOTAL_EXTRACTED_SIZE = 1024L * 1024 * 1024;
+
+    public TedPackageExpansionResult expand(byte[] tarGzBytes) {
+        List<TedPackageEntry> entries = new ArrayList<>();
+        long total = 0;
+        try (TarArchiveInputStream tais = new TarArchiveInputStream(
+                new GzipCompressorInputStream(new ByteArrayInputStream(tarGzBytes)))) {
+            TarArchiveEntry entry;
+            while ((entry = tais.getNextTarEntry()) != null) {
+                if (entry.isDirectory()) {
+                    continue;
+                }
+                if (entries.size() >= MAX_FILES) {
+                    break;
+                }
+                String entryName = entry.getName();
+                if (!entryName.toLowerCase().endsWith(".xml")) {
+                    continue;
+                }
+                if (entryName.contains("..") || entryName.startsWith("/") || entryName.startsWith("\\")) {
+                    log.warn("Skipping suspicious TED package entry {}", entryName);
+                    continue;
+                }
+                ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                byte[] buffer = new byte[8192];
+                long fileSize = 0;
+                int read;
+                while ((read = tais.read(buffer)) > 0) {
+                    fileSize += read;
+                    total += read;
+                    if (fileSize > MAX_SINGLE_FILE_SIZE || total > MAX_TOTAL_EXTRACTED_SIZE) {
+                        throw new IOException("TED package extraction limits exceeded");
+                    }
+                    baos.write(buffer, 0, read);
+                }
+                byte[] data = baos.toByteArray();
+                entries.add(new TedPackageEntry(extractFilename(entryName), entryName, data, data.length, "application/xml"));
+            }
+        } catch (Exception e) {
+            throw new IllegalArgumentException("Failed to expand TED package", e);
+        }
+
+        String manifest = buildManifest(entries);
+        return new TedPackageExpansionResult(entries, manifest);
+    }
+
+    private String buildManifest(List<TedPackageEntry> entries) {
+        StringBuilder sb = new StringBuilder();
+        sb.append("TED package contains ").append(entries.size()).append(" XML notice files\n");
+        for (TedPackageEntry entry : entries) {
+            sb.append("- ").append(entry.archivePath()).append(" (" ).append(entry.sizeBytes()).append(" bytes)\n");
+        }
+        return sb.toString().trim();
+    }
+
+    private String extractFilename(String path) {
+        int idx = Math.max(path.lastIndexOf('/'), path.lastIndexOf('\\'));
+        return idx >= 0 ? path.substring(idx + 1) : path;
+    }
+
+    public record TedPackageExpansionResult(List<TedPackageEntry> entries, String manifestText) {}
+
+    public record TedPackageEntry(String fileName, String archivePath, byte[] data, long sizeBytes, String mediaType) {
+        public String textUtf8() {
+            return new String(data, StandardCharsets.UTF_8);
+        }
+    }
+}
--- a/src/main/java/at/procon/dip/ingestion/spi/OriginalContentStoragePolicy.java
+++ b/src/main/java/at/procon/dip/ingestion/spi/OriginalContentStoragePolicy.java
@ -0,0 +1,11 @@
+package at.procon.dip.ingestion.spi;
+
+/**
+ * Controls whether the ORIGINAL raw payload should be persisted for a single imported document.
+ * DEFAULT defers to the global ingestion configuration and wrapper-document heuristics.
+ */
+public enum OriginalContentStoragePolicy {
+    DEFAULT,
+    STORE,
+    SKIP
+}
--- a/src/main/java/at/procon/dip/ingestion/spi/SourceDescriptor.java
+++ b/src/main/java/at/procon/dip/ingestion/spi/SourceDescriptor.java
@ -21,9 +21,16 @@ public record SourceDescriptor(
        byte[] binaryContent,
        String textContent,
        OffsetDateTime receivedAt,
+        OriginalContentStoragePolicy originalContentStoragePolicy,
        Map<String, String> attributes
 ) {

+    public SourceDescriptor {
+        if (originalContentStoragePolicy == null) {
+            originalContentStoragePolicy = OriginalContentStoragePolicy.DEFAULT;
+        }
+    }
+
    public boolean hasInlineBinaryContent() {
        return binaryContent != null && binaryContent.length > 0;
    }
--- a/src/main/java/at/procon/dip/ingestion/util/DocumentImportSupport.java
+++ b/src/main/java/at/procon/dip/ingestion/util/DocumentImportSupport.java
@ -45,7 +45,7 @@ public final class DocumentImportSupport {

    public static DocumentFamily familyFor(DocumentType documentType) {
        return switch (documentType) {
-            case TED_NOTICE -> DocumentFamily.PROCUREMENT;
+            case TED_PACKAGE, TED_NOTICE -> DocumentFamily.PROCUREMENT;
            case EMAIL, MIME_MESSAGE -> DocumentFamily.MAIL;
            case PDF, DOCX, HTML, XML_GENERIC, TEXT, MARKDOWN, ZIP_ARCHIVE, GENERIC_BINARY, UNKNOWN ->
                    DocumentFamily.GENERIC;
--- a/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java
+++ b/src/main/java/at/procon/dip/normalization/impl/DefaultGenericTextRepresentationBuilder.java
@ -12,12 +12,16 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
+import org.springframework.core.annotation.Order;
 import org.springframework.stereotype.Component;
 import org.springframework.util.StringUtils;

@Component
+@Order(100)
 public class DefaultGenericTextRepresentationBuilder implements TextRepresentationBuilder {

+    public static final String BUILDER_KEY = "default-generic-text";
+
    @Override
    public boolean supports(DocumentType documentType) {
        return documentType != DocumentType.TED_NOTICE;
@ -39,25 +43,34 @@ public class DefaultGenericTextRepresentationBuilder implements TextRepresentati
        List<TextRepresentationDraft> drafts = new ArrayList<>();
        drafts.add(new TextRepresentationDraft(
                RepresentationType.FULLTEXT,
+                BUILDER_KEY,
                request.detectionResult().languageCode(),
                baseText,
                false,
-                null
+                null,
+                ContentRole.NORMALIZED_TEXT,
+                Boolean.TRUE
        ));
        drafts.add(new TextRepresentationDraft(
                RepresentationType.SEMANTIC_TEXT,
+                BUILDER_KEY,
                request.detectionResult().languageCode(),
                semantic,
                true,
-                null
+                null,
+                ContentRole.NORMALIZED_TEXT,
+                Boolean.TRUE
        ));
        if (StringUtils.hasText(title)) {
            drafts.add(new TextRepresentationDraft(
                    RepresentationType.TITLE_ABSTRACT,
+                    BUILDER_KEY,
                    request.detectionResult().languageCode(),
                    title + "\n\n" + summary,
                    false,
-                    null
+                    null,
+                    ContentRole.NORMALIZED_TEXT,
+                    Boolean.FALSE
            ));
        }
        return drafts;
--- a/src/main/java/at/procon/dip/normalization/impl/TedStructuredTextRepresentationBuilder.java
+++ b/src/main/java/at/procon/dip/normalization/impl/TedStructuredTextRepresentationBuilder.java
@ -0,0 +1,138 @@
+package at.procon.dip.normalization.impl;
+
+import at.procon.dip.domain.document.ContentRole;
+import at.procon.dip.domain.document.DocumentType;
+import at.procon.dip.domain.document.RepresentationType;
+import at.procon.dip.extraction.spi.ExtractedStructuredPayload;
+import at.procon.dip.ingestion.util.DocumentImportSupport;
+import at.procon.dip.normalization.spi.RepresentationBuildRequest;
+import at.procon.dip.normalization.spi.TextRepresentationBuilder;
+import at.procon.dip.normalization.spi.TextRepresentationDraft;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import org.springframework.core.annotation.Order;
+import org.springframework.stereotype.Component;
+import org.springframework.util.StringUtils;
+
+@Component
+@Order(10)
+public class TedStructuredTextRepresentationBuilder implements TextRepresentationBuilder {
+
+    public static final String BUILDER_KEY = "ted-structured-text";
+
+    @Override
+    public boolean supports(DocumentType documentType) {
+        return documentType == DocumentType.TED_NOTICE;
+    }
+
+    @Override
+    public List<TextRepresentationDraft> build(RepresentationBuildRequest request) {
+        String normalizedText = request.extractionResult().derivedTextByRole().get(ContentRole.NORMALIZED_TEXT);
+        if (!StringUtils.hasText(normalizedText)) {
+            return List.of();
+        }
+
+        Map<String, Object> attributes = request.extractionResult().structuredPayloads().stream()
+                .filter(payload -> Objects.equals(payload.projectionName(), "ted-notice"))
+                .map(ExtractedStructuredPayload::attributes)
+                .filter(Objects::nonNull)
+                .findFirst()
+                .orElse(Map.of());
+
+        String title = asString(attributes.get("title"));
+        String description = asString(attributes.get("description"));
+        String buyerName = asString(attributes.get("buyerName"));
+        String cpvCodes = asString(attributes.get("cpvCodes"));
+        String nutsCodes = asString(attributes.get("nutsCodes"));
+        String publicationId = asString(attributes.get("publicationId"));
+        String semanticText = buildSemanticText(title, description, buyerName, cpvCodes, nutsCodes, publicationId, normalizedText);
+        String summary = DocumentImportSupport.ellipsize(
+                StringUtils.hasText(description) ? description.trim() : normalizedText.replace('\n', ' ').trim(),
+                1200
+        );
+
+        List<TextRepresentationDraft> drafts = new ArrayList<>();
+        drafts.add(new TextRepresentationDraft(
+                RepresentationType.SEMANTIC_TEXT,
+                BUILDER_KEY,
+                request.detectionResult().languageCode(),
+                semanticText,
+                true,
+                null,
+                ContentRole.NORMALIZED_TEXT,
+                Boolean.TRUE
+        ));
+        /*
+        drafts.add(new TextRepresentationDraft(
+                RepresentationType.FULLTEXT,
+                BUILDER_KEY,
+                request.detectionResult().languageCode(),
+                normalizedText,
+                false,
+                null,
+                ContentRole.NORMALIZED_TEXT,
+                Boolean.TRUE
+        ));
+        if (StringUtils.hasText(title)) {
+            drafts.add(new TextRepresentationDraft(
+                    RepresentationType.TITLE_ABSTRACT,
+                    BUILDER_KEY,
+                    request.detectionResult().languageCode(),
+                    title + "\n\n" + summary,
+                    false,
+                    null,
+                    ContentRole.NORMALIZED_TEXT,
+                    Boolean.FALSE
+            ));
+        }
+        drafts.add(new TextRepresentationDraft(
+                RepresentationType.SUMMARY,
+                BUILDER_KEY,
+                request.detectionResult().languageCode(),
+                summary,
+                false,
+                null,
+                ContentRole.NORMALIZED_TEXT,
+                Boolean.FALSE
+        ));
+         */
+        return drafts;
+    }
+
+    private String buildSemanticText(String title,
+                                     String description,
+                                     String buyerName,
+                                     String cpvCodes,
+                                     String nutsCodes,
+                                     String publicationId,
+                                     String normalizedText) {
+        StringBuilder sb = new StringBuilder();
+        sb.append("Document type: TED_NOTICE\n");
+        if (StringUtils.hasText(publicationId)) {
+            sb.append("Publication: ").append(publicationId.trim()).append('\n');
+        }
+        if (StringUtils.hasText(title)) {
+            sb.append("Title: ").append(title.trim()).append("\n\n");
+        }
+        if (StringUtils.hasText(buyerName)) {
+            sb.append("Contracting Authority: ").append(buyerName.trim()).append('\n');
+        }
+        if (StringUtils.hasText(cpvCodes)) {
+            sb.append("CPV Codes: ").append(cpvCodes.trim()).append('\n');
+        }
+        if (StringUtils.hasText(nutsCodes)) {
+            sb.append("NUTS Codes: ").append(nutsCodes.trim()).append('\n');
+        }
+        if (StringUtils.hasText(description)) {
+            sb.append("\nDescription: ").append(description.trim()).append("\n\n");
+        }
+        sb.append(normalizedText.trim());
+        return sb.toString().trim();
+    }
+
+    private String asString(Object value) {
+        return value instanceof String s && StringUtils.hasText(s) ? s : null;
+    }
+}
--- a/src/main/java/at/procon/dip/normalization/service/TextRepresentationBuildService.java
+++ b/src/main/java/at/procon/dip/normalization/service/TextRepresentationBuildService.java
@ -3,8 +3,10 @@ package at.procon.dip.normalization.service;
 import at.procon.dip.normalization.spi.RepresentationBuildRequest;
 import at.procon.dip.normalization.spi.TextRepresentationBuilder;
 import at.procon.dip.normalization.spi.TextRepresentationDraft;
+import java.util.ArrayList;
 import java.util.List;
 import lombok.RequiredArgsConstructor;
+import org.springframework.core.annotation.AnnotationAwareOrderComparator;
 import org.springframework.stereotype.Service;

@Service
@ -14,11 +16,22 @@ public class TextRepresentationBuildService {
    private final List<TextRepresentationBuilder> builders;

    public List<TextRepresentationDraft> build(RepresentationBuildRequest request) {
-        return builders.stream()
+        List<TextRepresentationBuilder> matchingBuilders = builders.stream()
                .filter(builder -> builder.supports(request.detectionResult().documentType()))
-                .findFirst()
-                .orElseThrow(() -> new IllegalStateException(
-                        "No text representation builder registered for " + request.detectionResult().documentType()))
-                .build(request);
+                .sorted(AnnotationAwareOrderComparator.INSTANCE)
+                .toList();
+        if (matchingBuilders.isEmpty()) {
+            throw new IllegalStateException(
+                    "No text representation builder registered for " + request.detectionResult().documentType());
+        }
+
+        List<TextRepresentationDraft> result = new ArrayList<>();
+        for (TextRepresentationBuilder builder : matchingBuilders) {
+            List<TextRepresentationDraft> drafts = builder.build(request);
+            if (drafts != null && !drafts.isEmpty()) {
+                result.addAll(drafts);
+            }
+        }
+        return result;
    }
 }
--- a/src/main/java/at/procon/dip/normalization/spi/TextRepresentationDraft.java
+++ b/src/main/java/at/procon/dip/normalization/spi/TextRepresentationDraft.java
@ -1,5 +1,6 @@
 package at.procon.dip.normalization.spi;

+import at.procon.dip.domain.document.ContentRole;
 import at.procon.dip.domain.document.RepresentationType;

 /**
@ -7,9 +8,20 @@ import at.procon.dip.domain.document.RepresentationType;
 */
 public record TextRepresentationDraft(
        RepresentationType representationType,
+        String builderKey,
        String languageCode,
        String textBody,
        boolean primary,
-        Integer chunkIndex
+        Integer chunkIndex,
+        ContentRole sourceContentRole,
+        Boolean queueForEmbedding
 ) {
+
+    public TextRepresentationDraft(RepresentationType representationType,
+                                   String languageCode,
+                                   String textBody,
+                                   boolean primary,
+                                   Integer chunkIndex) {
+        this(representationType, null, languageCode, textBody, primary, chunkIndex, ContentRole.NORMALIZED_TEXT, null);
+    }
 }
--- a/src/main/java/at/procon/dip/processing/impl/TedStructuredDocumentProcessor.java
+++ b/src/main/java/at/procon/dip/processing/impl/TedStructuredDocumentProcessor.java
@ -0,0 +1,110 @@
+package at.procon.dip.processing.impl;
+
+import at.procon.dip.classification.spi.DetectionResult;
+import at.procon.dip.domain.document.ContentRole;
+import at.procon.dip.domain.document.DocumentFamily;
+import at.procon.dip.domain.document.DocumentStatus;
+import at.procon.dip.domain.document.DocumentType;
+import at.procon.dip.extraction.spi.ExtractedStructuredPayload;
+import at.procon.dip.extraction.spi.ExtractionResult;
+import at.procon.dip.ingestion.spi.SourceDescriptor;
+import at.procon.dip.processing.spi.DocumentProcessingPolicy;
+import at.procon.dip.processing.spi.StructuredDocumentProcessor;
+import at.procon.dip.processing.spi.StructuredProcessingRequest;
+import at.procon.dip.domain.document.service.DocumentService;
+import at.procon.dip.domain.ted.service.TedNoticeProjectionService;
+import at.procon.ted.model.entity.ProcurementDocument;
+import at.procon.ted.service.XmlParserService;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+import org.springframework.util.StringUtils;
+
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class TedStructuredDocumentProcessor implements StructuredDocumentProcessor {
+
+    private final XmlParserService xmlParserService;
+    private final DocumentService documentService;
+    private final TedNoticeProjectionService tedNoticeProjectionService;
+
+    @Override
+    public boolean supports(SourceDescriptor sourceDescriptor, DetectionResult detectionResult) {
+        return detectionResult.documentType() == DocumentType.TED_NOTICE;
+    }
+
+    @Override
+    public DocumentProcessingPolicy processingPolicy(SourceDescriptor sourceDescriptor, DetectionResult detectionResult) {
+        return DocumentProcessingPolicy.replaceGenericTextProcessing();
+    }
+
+    @Override
+    public ExtractionResult process(StructuredProcessingRequest request) {
+        String xml = request.textContent();
+        if (!StringUtils.hasText(xml) && request.binaryContent() != null) {
+            xml = new String(request.binaryContent(), java.nio.charset.StandardCharsets.UTF_8);
+        }
+        if (!StringUtils.hasText(xml)) {
+            return new ExtractionResult(Map.of(), List.of(), List.of("TED structured processor received no XML payload"));
+        }
+
+        ProcurementDocument tedDocument = xmlParserService.parseDocument(xml);
+        tedDocument.setDocumentHash(request.dedupHash());
+        tedDocument.setXmlDocument(xml);
+        tedDocument.setSourceFilename(request.sourceDescriptor().fileName());
+        tedDocument.setSourcePath(request.sourceDescriptor().sourceUri());
+        tedDocument.setFileSizeBytes(request.binaryContent() == null ? null : (long) request.binaryContent().length);
+
+        var canonical = request.document();
+        canonical.setDocumentType(DocumentType.TED_NOTICE);
+        canonical.setDocumentFamily(DocumentFamily.PROCUREMENT);
+        canonical.setStatus(DocumentStatus.CLASSIFIED);
+        canonical.setTitle(tedDocument.getProjectTitle());
+        canonical.setSummary(tedDocument.getProjectDescription());
+        canonical.setLanguageCode(tedDocument.getLanguageCode());
+        canonical.setMimeType(request.detectionResult().mimeType() == null ? "application/xml" : request.detectionResult().mimeType());
+        if (StringUtils.hasText(tedDocument.getPublicationId())) {
+            canonical.setBusinessKey("TED_NOTICE:" + tedDocument.getPublicationId());
+        } else if (StringUtils.hasText(tedDocument.getNoticeId())) {
+            canonical.setBusinessKey("TED_NOTICE:" + tedDocument.getNoticeId());
+        }
+        documentService.save(canonical);
+
+        tedNoticeProjectionService.registerOrRefreshProjection(tedDocument, canonical.getId());
+
+        Map<String, Object> payload = new LinkedHashMap<>();
+        if (StringUtils.hasText(tedDocument.getProjectTitle())) {
+            payload.put("title", tedDocument.getProjectTitle());
+        }
+        if (StringUtils.hasText(tedDocument.getProjectDescription())) {
+            payload.put("description", tedDocument.getProjectDescription());
+        }
+        if (StringUtils.hasText(tedDocument.getBuyerName())) {
+            payload.put("buyerName", tedDocument.getBuyerName());
+        }
+        if (tedDocument.getCpvCodes() != null && tedDocument.getCpvCodes().length > 0) {
+            payload.put("cpvCodes", String.join(", ", tedDocument.getCpvCodes()));
+        }
+        if (tedDocument.getNutsCodes() != null && tedDocument.getNutsCodes().length > 0) {
+            payload.put("nutsCodes", String.join(", ", tedDocument.getNutsCodes()));
+        }
+        payload.put("lotCount", tedDocument.getLots() == null ? 0 : tedDocument.getLots().size());
+        payload.put("noticeId", tedDocument.getNoticeId());
+        payload.put("publicationId", tedDocument.getPublicationId());
+
+        Map<ContentRole, String> derivedText = new LinkedHashMap<>();
+        if (StringUtils.hasText(tedDocument.getTextContent())) {
+            derivedText.put(ContentRole.NORMALIZED_TEXT, tedDocument.getTextContent());
+        }
+
+        return new ExtractionResult(
+                derivedText,
+                List.of(new ExtractedStructuredPayload("ted-notice", payload)),
+                List.of()
+        );
+    }
+}
--- a/src/main/java/at/procon/dip/processing/service/StructuredDocumentProcessingService.java
+++ b/src/main/java/at/procon/dip/processing/service/StructuredDocumentProcessingService.java
@ -0,0 +1,36 @@
+package at.procon.dip.processing.service;
+
+import at.procon.dip.classification.spi.DetectionResult;
+import at.procon.dip.processing.spi.DocumentProcessingPolicy;
+import at.procon.dip.processing.spi.StructuredDocumentProcessor;
+import at.procon.dip.processing.spi.StructuredProcessingRequest;
+import at.procon.dip.extraction.spi.ExtractionResult;
+import at.procon.dip.ingestion.spi.SourceDescriptor;
+import java.util.List;
+import java.util.Optional;
+import lombok.RequiredArgsConstructor;
+import org.springframework.stereotype.Service;
+
+@Service
+@RequiredArgsConstructor
+public class StructuredDocumentProcessingService {
+
+    private final List<StructuredDocumentProcessor> processors;
+
+    public Optional<StructuredDocumentProcessor> resolve(SourceDescriptor sourceDescriptor, DetectionResult detectionResult) {
+        return processors.stream()
+                .filter(processor -> processor.supports(sourceDescriptor, detectionResult))
+                .findFirst();
+    }
+
+    public DocumentProcessingPolicy resolvePolicy(SourceDescriptor sourceDescriptor, DetectionResult detectionResult) {
+        return resolve(sourceDescriptor, detectionResult)
+                .map(processor -> processor.processingPolicy(sourceDescriptor, detectionResult))
+                .orElse(DocumentProcessingPolicy.genericDefault());
+    }
+
+    public Optional<ExtractionResult> process(StructuredProcessingRequest request) {
+        return resolve(request.sourceDescriptor(), request.detectionResult())
+                .map(processor -> processor.process(request));
+    }
+}
--- a/src/main/java/at/procon/dip/processing/spi/DocumentProcessingPolicy.java
+++ b/src/main/java/at/procon/dip/processing/spi/DocumentProcessingPolicy.java
@ -0,0 +1,22 @@
+package at.procon.dip.processing.spi;
+
+/**
+ * Controls which generic pipeline stages should run for a document and whether
+ * a structured processor should be invoked.
+ */
+public record DocumentProcessingPolicy(
+        boolean runGenericExtraction,
+        boolean persistExtractedContent,
+        boolean runRepresentationBuilders,
+        boolean invokeStructuredProcessor,
+        boolean applyStructuredTitleIfMissing
+) {
+
+    public static DocumentProcessingPolicy genericDefault() {
+        return new DocumentProcessingPolicy(true, true, true, true, true);
+    }
+
+    public static DocumentProcessingPolicy replaceGenericTextProcessing() {
+        return new DocumentProcessingPolicy(false, true, true, true, true);
+    }
+}
--- a/src/main/java/at/procon/dip/processing/spi/StructuredDocumentProcessor.java
+++ b/src/main/java/at/procon/dip/processing/spi/StructuredDocumentProcessor.java
@ -0,0 +1,19 @@
+package at.procon.dip.processing.spi;
+
+import at.procon.dip.classification.spi.DetectionResult;
+import at.procon.dip.extraction.spi.ExtractionResult;
+import at.procon.dip.ingestion.spi.SourceDescriptor;
+
+/**
+ * Optional type-specific enrichment layer on top of the canonical DOC import.
+ */
+public interface StructuredDocumentProcessor {
+
+    boolean supports(SourceDescriptor sourceDescriptor, DetectionResult detectionResult);
+
+    default DocumentProcessingPolicy processingPolicy(SourceDescriptor sourceDescriptor, DetectionResult detectionResult) {
+        return DocumentProcessingPolicy.replaceGenericTextProcessing();
+    }
+
+    ExtractionResult process(StructuredProcessingRequest request);
+}
--- a/src/main/java/at/procon/dip/processing/spi/StructuredProcessingRequest.java
+++ b/src/main/java/at/procon/dip/processing/spi/StructuredProcessingRequest.java
@ -0,0 +1,20 @@
+package at.procon.dip.processing.spi;
+
+import at.procon.dip.classification.spi.DetectionResult;
+import at.procon.dip.domain.document.entity.Document;
+import at.procon.dip.domain.document.entity.DocumentContent;
+import at.procon.dip.ingestion.spi.SourceDescriptor;
+
+/**
+ * Canonical import context handed to a structured document processor.
+ */
+public record StructuredProcessingRequest(
+        Document document,
+        DocumentContent originalContent,
+        SourceDescriptor sourceDescriptor,
+        DetectionResult detectionResult,
+        byte[] binaryContent,
+        String textContent,
+        String dedupHash
+) {
+}
--- a/src/main/java/at/procon/ted/camel/MailRoute.java
+++ b/src/main/java/at/procon/ted/camel/MailRoute.java
@ -1,6 +1,10 @@
 package at.procon.ted.camel;

+import at.procon.dip.domain.document.SourceType;
 import at.procon.ted.config.TedProcessorProperties;
+import at.procon.dip.ingestion.service.DocumentIngestionGateway;
+import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
+import at.procon.dip.ingestion.spi.SourceDescriptor;
 import at.procon.ted.service.attachment.AttachmentExtractor;
 import at.procon.ted.service.attachment.AttachmentProcessingService;
 import jakarta.mail.BodyPart;
@ -14,16 +18,18 @@ import lombok.extern.slf4j.Slf4j;
 import org.apache.camel.Exchange;
 import org.apache.camel.LoggingLevel;
 import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mail.MailMessage;
 import org.jsoup.Jsoup;
 import org.springframework.stereotype.Component;

 import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
+import java.time.OffsetDateTime;
+import java.time.ZoneId;
+import java.util.*;

 /**
 * Apache Camel route for IMAP mail processing.
@ -51,6 +57,7 @@ public class MailRoute extends RouteBuilder {

    private final TedProcessorProperties properties;
    private final AttachmentProcessingService attachmentProcessingService;
+    private final DocumentIngestionGateway documentIngestionGateway;

    @Override
    public void configure() throws Exception {
@ -105,7 +112,14 @@ public class MailRoute extends RouteBuilder {
        from("direct:mime")
                .routeId(ROUTE_ID_MIME)
                .process(exchange -> {
-                    Message mailMessage = exchange.getIn().getBody(Message.class);
+                    Message mailMessage = null;
+                    MailMessage mailMessage_ = exchange.getIn().getBody(MailMessage.class);
+                    if(mailMessage_ != null) {
+                        mailMessage = mailMessage_.getMessage();
+                    }
+                    else {
+                        mailMessage = exchange.getIn().getBody(Message.class);
+                    }

                    if (mailMessage == null) {
                        log.warn("Received null mail message, skipping");
@ -147,6 +161,41 @@ public class MailRoute extends RouteBuilder {

                    log.info("MIME decoded: subject='{}', textLength={}, htmlLength={}, attachments={}",
                            subject, finalTextContent.length(), htmlContent.length(), attachments.size());
+
+                    if (properties.getGenericIngestion().isEnabled() && properties.getGenericIngestion().isMailAdapterEnabled()) {
+                        try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+                            mailMessage.writeTo(baos);
+                            String sourceIdentifier = mailMessage.getHeader("Message-ID") != null ?
+                                    Arrays.stream(mailMessage.getHeader("Message-ID")).findFirst().orElse(null) : null;
+                            if (sourceIdentifier == null || sourceIdentifier.isBlank()) {
+                                sourceIdentifier = UUID.randomUUID().toString();
+                            }
+                            var result = documentIngestionGateway.ingest(new SourceDescriptor(
+                                    null,
+                                    SourceType.MAIL,
+                                    sourceIdentifier,
+                                    null,
+                                    subject != null ? subject.replaceAll("[^A-Za-z0-9._-]", "_") + ".eml" : "mail-message.eml",
+                                    "message/rfc822",
+                                    baos.toByteArray(),
+                                    null,
+                                    exchange.getIn().getHeader("mailReceivedDate", Date.class) == null
+                                            ? OffsetDateTime.now()
+                                            : exchange.getIn().getHeader("mailReceivedDate", Date.class).toInstant()
+                                                    .atZone(ZoneId.systemDefault()).toOffsetDateTime(),
+                                    OriginalContentStoragePolicy.DEFAULT,
+                                    Map.of(
+                                            "subject", subject != null ? subject : "",
+                                            "from", from != null ? from : ""
+                                    )
+                            ));
+                            if (!result.warnings().isEmpty()) {
+                                log.info("Mail adapter imported MIME message with {} warnings", result.warnings().size());
+                            }
+                        } catch (Exception e) {
+                            log.warn("Phase 4.1 mail adapter import failed for subject '{}': {}", subject, e.getMessage());
+                        }
+                    }
                })
                // Queue attachments for async processing
                .choice()
--- a/src/main/java/at/procon/ted/camel/TedPackageDownloadCamelRoute.java
+++ b/src/main/java/at/procon/ted/camel/TedPackageDownloadCamelRoute.java
@ -1,5 +1,9 @@
 package at.procon.ted.camel;

+import at.procon.dip.ingestion.service.DocumentIngestionGateway;
+import at.procon.dip.ingestion.spi.IngestionResult;
+import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
+import at.procon.dip.ingestion.spi.SourceDescriptor;
 import at.procon.ted.config.TedProcessorProperties;
 import at.procon.ted.model.entity.TedDailyPackage;
 import at.procon.ted.repository.TedDailyPackageRepository;
@ -53,6 +57,7 @@ public class TedPackageDownloadCamelRoute extends RouteBuilder {
    private final TedProcessorProperties properties;
    private final TedDailyPackageRepository packageRepository;
    private final TedPackageDownloadService downloadService;
+    private final DocumentIngestionGateway documentIngestionGateway;
    private final BatchDocumentProcessingService batchProcessingService;

    /**
@ -146,9 +151,21 @@ public class TedPackageDownloadCamelRoute extends RouteBuilder {
                        .process(this::markPackageDuplicate)
                    .otherwise()
                        .process(this::saveDownloadedPackage)
-                        .to("direct:extract-tar-gz")
+                        .choice()
+                            .when(header("skipLegacyXmlProcessing").isEqualTo(true))
+                                .log(LoggingLevel.INFO, "Package ${header.packageId}: generic ingestion gateway-only mode active, skipping legacy XML batch persistence")
+                                .to("direct:complete-package-after-gateway")
+                            .otherwise()
+                                .to("direct:extract-tar-gz")
+                        .endChoice()
                .end();

+        from("direct:complete-package-after-gateway")
+                .routeId("ted-package-gateway-only-completer")
+                .setHeader("processingStartTime", constant(System.currentTimeMillis()))
+                .process(this::markPackageCompleted)
+                .process(this::logPackageStatistics);
+
        // tar.gz Extraction Route
        from("direct:extract-tar-gz")
                .routeId(ROUTE_ID_EXTRACTOR)
@ -214,7 +231,7 @@ public class TedPackageDownloadCamelRoute extends RouteBuilder {
        long runningCount = downloadingCount + processingCount;

        exchange.getIn().setHeader("runningCount", runningCount);
-        exchange.getIn().setHeader("tooManyRunning", runningCount >= 2);
+        exchange.getIn().setHeader("tooManyRunning", runningCount >= 1);

        if (runningCount > 0) {
            log.info("Currently {} packages in progress ({} downloading, {} processing)",
@ -278,21 +295,9 @@ public class TedPackageDownloadCamelRoute extends RouteBuilder {
        Integer serialNumber = exchange.getIn().getHeader("serialNumber", Integer.class);
        String downloadUrl = exchange.getIn().getHeader("downloadUrl", String.class);

-        Optional<TedDailyPackage> existing = packageRepository.findByPackageIdentifier(packageId);
-        if (existing.isPresent()) {
-            TedDailyPackage pkg = existing.get();
-            if (pkg.getDownloadStatus() == TedDailyPackage.DownloadStatus.NOT_FOUND) {
-                log.info("Retrying existing NOT_FOUND package in Camel route: {}", packageId);
-                pkg.setDownloadUrl(downloadUrl);
-                pkg.setErrorMessage(null);
-                pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.DOWNLOADING);
-                pkg = packageRepository.save(pkg);
-                exchange.getIn().setHeader("packageDbId", pkg.getId());
-                return;
-            }
-
-            log.debug("Package {} already exists in DB with status {}", packageId, pkg.getDownloadStatus());
-            exchange.getIn().setHeader("packageDbId", pkg.getId());
+        // Check if already exists
+        if (packageRepository.existsByPackageIdentifier(packageId)) {
+            log.debug("Package {} already exists in DB", packageId);
            return;
        }

@ -372,6 +377,50 @@ public class TedPackageDownloadCamelRoute extends RouteBuilder {
        exchange.getIn().setHeader("downloadPath", downloadPath.toString());
        exchange.getIn().setHeader("deleteAfterExtraction",
            properties.getDownload().isDeleteAfterExtraction());
+
+        exchange.getIn().setHeader("skipLegacyXmlProcessing", false);
+
+        if (properties.getGenericIngestion().isEnabled() && properties.getGenericIngestion().isTedPackageAdapterEnabled()) {
+            try {
+                IngestionResult ingestionResult = documentIngestionGateway.ingest(new SourceDescriptor(
+                        null,
+                        at.procon.dip.domain.document.SourceType.TED_PACKAGE,
+                        packageId,
+                        downloadPath.toString(),
+                        packageId + ".tar.gz",
+                        "application/gzip",
+                        body,
+                        null,
+                        OffsetDateTime.now(),
+                        OriginalContentStoragePolicy.DEFAULT,
+                        java.util.Map.of(
+                                "packageId", packageId,
+                                "title", packageId + ".tar.gz"
+                        )
+                ));
+
+                int importedChildCount = Math.max(0, ingestionResult.documents().size() - 1);
+                exchange.getIn().setHeader("gatewayImportedChildCount", importedChildCount);
+                exchange.getIn().setHeader("gatewayImportWarnings", ingestionResult.warnings().size());
+
+                if (properties.getGenericIngestion().isGatewayOnlyForTedPackages()) {
+                    packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+                        pkg.setXmlFileCount(importedChildCount);
+                        pkg.setProcessedCount(importedChildCount);
+                        pkg.setFailedCount(0);
+                        packageRepository.save(pkg);
+                    });
+
+                    if (properties.getDownload().isDeleteAfterExtraction()) {
+                        Files.deleteIfExists(downloadPath);
+                    }
+
+                    exchange.getIn().setHeader("skipLegacyXmlProcessing", true);
+                }
+            } catch (Exception e) {
+                log.warn("Phase 4.1 TED package adapter import failed for {}: {}", packageId, e.getMessage());
+            }
+        }
    }

    /**
--- a/src/main/java/at/procon/ted/config/TedProcessorProperties.java
+++ b/src/main/java/at/procon/ted/config/TedProcessorProperties.java
@ -510,7 +510,7 @@ public class TedProcessorProperties {
        /**
         * Input directory for the generic filesystem importer.
         */
-        private String inputDirectory = "D:/ted.europe/generic-input";
+        private String inputDirectory = "/ted.europe/generic-input";

        /**
         * Regular-expression file pattern used by the Camel file route.
@ -570,6 +570,14 @@ public class TedProcessorProperties {
         */
        private boolean deduplicateByContentHash = true;

+        /**
+         * Persist ORIGINAL content rows for wrapper/container documents that primarily exist
+         * to group or reference child documents (for example TED packages or expanded ZIP wrappers).
+         * When disabled, wrappers are still classified, extracted and represented, but the raw
+         * ORIGINAL content payload is not stored in DOC.doc_content.
+         */
+        private boolean storeOriginalContentForWrapperDocuments = true;
+
        /**
         * Queue only the primary text representation for embedding.
         */
@ -580,6 +588,50 @@ public class TedProcessorProperties {
         */
        @NotBlank
        private String importBatchId = "phase4-generic";
+
+        /**
+         * Enable the Phase 4.1 TED package adapter built on top of the generic DOC ingestion SPI.
+         */
+        private boolean tedPackageAdapterEnabled = true;
+
+        /**
+         * Enable the Phase 4.1 mail/document adapter built on top of the generic DOC ingestion SPI.
+         */
+        private boolean mailAdapterEnabled = false;
+
+        /**
+         * Optional dedicated owner tenant key for imported mail messages and attachments.
+         * Falls back to defaultOwnerTenantKey when not configured.
+         */
+        private String mailDefaultOwnerTenantKey;
+
+        /**
+         * Default visibility for imported mail messages and attachments.
+         */
+        private at.procon.dip.domain.access.DocumentVisibility mailDefaultVisibility = at.procon.dip.domain.access.DocumentVisibility.TENANT;
+
+        /**
+         * Expand ZIP attachments recursively through the mail adapter.
+         */
+        private boolean expandMailZipAttachments = true;
+
+        /**
+         * Import batch identifier for TED package roots and extracted TED child documents.
+         */
+        @NotBlank
+        private String tedPackageImportBatchId = "phase41-ted-package";
+
+        /**
+         * When true, TED packages are persisted only through the generic ingestion gateway
+         * and the legacy XML batch persistence path is skipped.
+         */
+        private boolean gatewayOnlyForTedPackages = false;
+
+        /**
+         * Import batch identifier for imported mail root messages and child attachments.
+         */
+        @NotBlank
+        private String mailImportBatchId = "phase41-mail";
    }

 }
--- a/src/main/resources/application.yml
+++ b/src/main/resources/application.yml
@ -39,7 +39,7 @@ spring:
        order_updates: true

  flyway:
-    enabled: true
+    enabled: false
    locations: classpath:db/migration
    baseline-on-migrate: true
    create-schemas: true
@ -128,7 +128,9 @@ ted:
  # TED Daily Package Download configuration
  download:
    # Enable/disable automatic package download
-    enabled: false
+    enabled: true
+    # User service-based camel route
+    use-service-based: false
    # Base URL for TED Daily Packages
    base-url: https://ted.europa.eu/packages/daily/
    # Download directory for tar.gz files
@ -136,11 +138,11 @@ ted:
    # Extract directory for XML files
    extract-directory: /ted.europe/extracted
    # Start year for downloads
-    start-year: 2023
+    start-year: 2026
    # Max consecutive 404 errors before stopping
    max-consecutive-404: 4
    # Polling interval (milliseconds) - 2 minutes
-    poll-interval: 120000
+    poll-interval: 3600000
    # Retry interval for tail NOT_FOUND packages - 6 hours
    not-found-retry-interval: 21600000
    # Grace period after year end before a previous-year tail 404 is treated as final
@ -150,7 +152,7 @@ ted:
    # Download timeout (milliseconds) - 5 minutes
    download-timeout: 300000
    # Max concurrent downloads
-    max-concurrent-downloads: 2
+    max-concurrent-downloads: 1
    # Delay between downloads (milliseconds) for rate limiting - 5 seconds
    delay-between-downloads: 3000
    # Delete tar.gz after extraction
@ -207,13 +209,13 @@ ted:
  # Phase 4 generic ingestion configuration
  generic-ingestion:
    # Master switch for arbitrary document ingestion into the DOC model
-    enabled: false
+    enabled: true
    # Enable file-system polling for non-TED documents
    file-system-enabled: false
    # Allow REST/API upload endpoints for arbitrary documents
    rest-upload-enabled: true
    # Input directory for the generic Camel file route
-    input-directory: D:/ted.europe/generic-input
+    input-directory: /ted.europe/generic-input
    # Regex for files accepted by the generic file route
    file-pattern: .*\.(pdf|txt|html|htm|xml|md|markdown|csv|json|yaml|yml)$
    # Move successfully processed files here
@ -236,10 +238,29 @@ ted:
    max-binary-bytes-in-db: 5242880
    # Deduplicate by content hash and attach additional sources to the same canonical document
    deduplicate-by-content-hash: true
+    # Persist ORIGINAL content rows for wrapper/container documents such as TED packages or ZIP wrappers
+    store-original-content-for-wrapper-documents: true
    # Queue only the primary text representation for vectorization
    vectorize-primary-representation-only: true
    # Import batch marker written to DOC.doc_source.import_batch_id
    import-batch-id: phase4-generic
+    # Enable Phase 4.1 TED package adapter on top of the generic DOC ingestion SPI
+    ted-package-adapter-enabled: true
+    # Enable Phase 4.1 mail/document adapter on top of the generic DOC ingestion SPI
+    mail-adapter-enabled: false
+    # Optional dedicated mail owner tenant, falls back to default-owner-tenant-key
+    mail-default-owner-tenant-key:
+    # Visibility for imported mail messages and attachments
+    mail-default-visibility: TENANT
+    # Expand ZIP attachments recursively through the mail adapter
+    expand-mail-zip-attachments: true
+    # Import batch marker for TED package roots and children
+    ted-package-import-batch-id: phase41-ted-package
+    # When true, TED package documents are stored only through the generic ingestion gateway
+    # and the legacy XML batch processing path is skipped
+    gateway-only-for-ted-packages: true
+    # Import batch marker for mail roots and attachments
+    mail-import-batch-id: phase41-mail

  # Solution Brief processing configuration
  solution-brief:
--- a/src/main/resources/db/migration/V8__doc_phase4_1_expand_document_and_source_types.sql
+++ b/src/main/resources/db/migration/V8__doc_phase4_1_expand_document_and_source_types.sql
@ -0,0 +1,80 @@
+-- Phase 4.1 enum/check-constraint expansion for newly introduced generic document/source types.
+-- Supports both:
+-- 1) PostgreSQL ENUM-backed columns created by Flyway
+-- 2) legacy VARCHAR + CHECK constraint variants that may exist in local/dev databases
+
+DO $$
+BEGIN
+    IF EXISTS (
+        SELECT 1
+        FROM pg_type t
+        JOIN pg_namespace n ON n.oid = t.typnamespace
+        WHERE n.nspname = 'doc'
+          AND t.typname = 'doc_document_type'
+    ) THEN
+        ALTER TYPE DOC.doc_document_type ADD VALUE IF NOT EXISTS 'TED_PACKAGE';
+    END IF;
+END
+$$;
+
+DO $$
+BEGIN
+    IF EXISTS (
+        SELECT 1
+        FROM pg_type t
+        JOIN pg_namespace n ON n.oid = t.typnamespace
+        WHERE n.nspname = 'doc'
+          AND t.typname = 'doc_source_type'
+    ) THEN
+        ALTER TYPE DOC.doc_source_type ADD VALUE IF NOT EXISTS 'PACKAGE_CHILD';
+    END IF;
+END
+$$;
+
+DO $$
+BEGIN
+    IF EXISTS (
+        SELECT 1
+        FROM pg_constraint c
+        JOIN pg_class r ON r.oid = c.conrelid
+        JOIN pg_namespace n ON n.oid = r.relnamespace
+        WHERE n.nspname = 'doc'
+          AND r.relname = 'doc_document'
+          AND c.conname = 'doc_document_document_type_check'
+    ) THEN
+        ALTER TABLE DOC.doc_document DROP CONSTRAINT doc_document_document_type_check;
+        ALTER TABLE DOC.doc_document
+            ADD CONSTRAINT doc_document_document_type_check
+            CHECK (
+                document_type IN (
+                    'TED_PACKAGE', 'TED_NOTICE', 'EMAIL', 'MIME_MESSAGE', 'PDF', 'DOCX', 'HTML',
+                    'XML_GENERIC', 'TEXT', 'MARKDOWN', 'ZIP_ARCHIVE', 'GENERIC_BINARY', 'UNKNOWN'
+                )
+            );
+    END IF;
+END
+$$;
+
+DO $$
+BEGIN
+    IF EXISTS (
+        SELECT 1
+        FROM pg_constraint c
+        JOIN pg_class r ON r.oid = c.conrelid
+        JOIN pg_namespace n ON n.oid = r.relnamespace
+        WHERE n.nspname = 'doc'
+          AND r.relname = 'doc_source'
+          AND c.conname = 'doc_source_source_type_check'
+    ) THEN
+        ALTER TABLE DOC.doc_source DROP CONSTRAINT doc_source_source_type_check;
+        ALTER TABLE DOC.doc_source
+            ADD CONSTRAINT doc_source_source_type_check
+            CHECK (
+                source_type IN (
+                    'TED_PACKAGE', 'PACKAGE_CHILD', 'MAIL', 'FILE_SYSTEM', 'REST_UPLOAD',
+                    'MANUAL_UPLOAD', 'ZIP_CHILD', 'API', 'MIGRATION'
+                )
+            );
+    END IF;
+END
+$$;