package at.procon.dip.ingestion.adapter; import at.procon.dip.classification.spi.DetectionResult; import at.procon.dip.domain.access.DocumentAccessContext; import at.procon.dip.domain.access.DocumentVisibility; import at.procon.dip.domain.document.DocumentFamily; import at.procon.dip.domain.document.DocumentStatus; import at.procon.dip.domain.document.DocumentType; import at.procon.dip.domain.document.RelationType; import at.procon.dip.domain.document.SourceType; import at.procon.dip.domain.document.entity.Document; import at.procon.dip.domain.document.service.DocumentRelationService; import at.procon.dip.domain.document.repository.DocumentSourceRepository; import at.procon.dip.domain.document.service.command.CreateDocumentRelationCommand; import at.procon.dip.ingestion.config.DipIngestionProperties; import at.procon.dip.ingestion.dto.ImportedDocumentResult; import at.procon.dip.ingestion.service.GenericDocumentImportService; import at.procon.dip.ingestion.service.MailMessageExtractionService; import at.procon.dip.ingestion.service.MailMetadataPersistenceService; import at.procon.dip.ingestion.mail.MailImportIdentityResolver; import at.procon.dip.ingestion.spi.IngestionResult; import at.procon.dip.ingestion.spi.SourceDescriptor; import at.procon.ted.service.attachment.ZipExtractionService; import java.nio.file.Files; import java.nio.file.Path; import java.time.OffsetDateTime; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.UUID; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.times; import static org.mockito.Mockito.lenient; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @ExtendWith(MockitoExtension.class) class MailDocumentIngestionAdapterFileSystemTest { @Mock private GenericDocumentImportService importService; @Mock private DocumentRelationService relationService; @Mock private ZipExtractionService zipExtractionService; @Mock private MailMetadataPersistenceService mailMetadataPersistenceService; @Mock private DocumentSourceRepository documentSourceRepository; private MailDocumentIngestionAdapter adapter; private final List importedDescriptors = new ArrayList<>(); @BeforeEach void setUp() { var properties = new DipIngestionProperties(); properties.setEnabled(true); properties.setMailAdapterEnabled(true); properties.setMailImportBatchId("test-mail-batch"); properties.setDefaultOwnerTenantKey("tenant-a"); properties.setMailDefaultVisibility(DocumentVisibility.TENANT); MailMessageExtractionService extractionService = new MailMessageExtractionService(); adapter = new MailDocumentIngestionAdapter( properties, importService, extractionService, relationService, zipExtractionService, new MailImportIdentityResolver(), mailMetadataPersistenceService, documentSourceRepository ); when(zipExtractionService.canHandle(any(), any())).thenReturn(false); lenient().when(documentSourceRepository.findBySourceTypeAndExternalSourceId(any(), any())).thenReturn(java.util.Optional.empty()); when(relationService.ensureRelation(any())).thenReturn(null); when(importService.importDocument(any())).thenAnswer(invocation -> { SourceDescriptor descriptor = invocation.getArgument(0); importedDescriptors.add(descriptor); return new ImportedDocumentResult( buildDocumentFor(descriptor), new DetectionResult(inferType(descriptor), inferFamily(descriptor), descriptor.mediaType(), "en", Map.of()), List.of(), false ); }); } @Test @DisplayName("Should ingest filesystem-loaded mail message with text and binary attachments") void shouldIngestFileSystemLoadedMailMessage() throws Exception { Path emlPath = Path.of("src", "test", "resources", "mail", "sample-message.eml"); assertTrue(Files.exists(emlPath), "sample .eml test file must exist"); byte[] mimeBytes = Files.readAllBytes(emlPath); SourceDescriptor sourceDescriptor = new SourceDescriptor( null, SourceType.MAIL, "fs-mail-001", emlPath.toAbsolutePath().toUri().toString(), emlPath.getFileName().toString(), "message/rfc822", mimeBytes, null, OffsetDateTime.parse("2026-03-18T15:27:59+01:00"), null, Map.of("source", "filesystem-test") ); assertTrue(adapter.supports(sourceDescriptor)); IngestionResult result = adapter.ingest(sourceDescriptor); assertEquals(3, result.documents().size(), "expected root mail document plus 2 attachment documents"); assertTrue(result.warnings().isEmpty(), "mail import should not create warnings for the sample message"); assertEquals(3, importedDescriptors.size(), "root + notes.txt + legacy.xls should be imported"); SourceDescriptor root = importedDescriptors.get(0); assertEquals("message/rfc822", root.mediaType()); assertEquals("sample-message.eml", root.fileName()); assertNotNull(root.textContent()); assertTrue(root.textContent().contains("Subject: Sample mail with filesystem-loaded attachments")); assertTrue(root.textContent().contains("Hello from the filesystem-backed sample message.")); assertEquals(DocumentVisibility.TENANT, root.accessContext().visibility()); assertNotNull(root.accessContext().ownerTenant()); assertEquals("tenant-a", root.accessContext().ownerTenant().tenantKey()); SourceDescriptor textAttachment = importedDescriptors.stream() .filter(d -> "notes.txt".equals(d.fileName())) .findFirst() .orElseThrow(); assertEquals(SourceType.MAIL_ATTACHMENT, textAttachment.sourceType()); assertEquals("text/plain", textAttachment.mediaType()); assertNotNull(textAttachment.textContent(), "plain text attachment should expose preview text"); assertTrue(textAttachment.textContent().contains("attachment notes")); SourceDescriptor binaryAttachment = importedDescriptors.stream() .filter(d -> "legacy.xls".equals(d.fileName())) .findFirst() .orElseThrow(); assertEquals(SourceType.MAIL_ATTACHMENT, binaryAttachment.sourceType()); assertNull(binaryAttachment.textContent(), "binary old Excel attachment must not be passed as text content"); assertEquals("application/vnd.ms-excel", binaryAttachment.mediaType()); assertNotNull(binaryAttachment.binaryContent()); assertTrue(binaryAttachment.binaryContent().length > 0); ArgumentCaptor relationCaptor = ArgumentCaptor.forClass(CreateDocumentRelationCommand.class); verify(relationService, times(2)).ensureRelation(relationCaptor.capture()); assertTrue(relationCaptor.getAllValues().stream().allMatch(cmd -> cmd.relationType() == RelationType.ATTACHMENT_OF)); } private Document buildDocumentFor(SourceDescriptor descriptor) { return Document.builder() .id(UUID.nameUUIDFromBytes((descriptor.sourceIdentifier() + ":" + descriptor.fileName()).getBytes())) .visibility(descriptor.accessContext() == null ? DocumentVisibility.PUBLIC : descriptor.accessContext().visibility()) .documentType(inferType(descriptor)) .documentFamily(inferFamily(descriptor)) .status(DocumentStatus.RECEIVED) .title(descriptor.fileName()) .mimeType(descriptor.mediaType()) .dedupHash(Integer.toHexString((descriptor.sourceIdentifier() + descriptor.fileName()).hashCode())) .build(); } private DocumentType inferType(SourceDescriptor descriptor) { if (descriptor.sourceType() == SourceType.MAIL && "message/rfc822".equals(descriptor.mediaType())) { return DocumentType.EMAIL; } String fileName = descriptor.fileName() == null ? "" : descriptor.fileName().toLowerCase(); if (fileName.endsWith(".txt")) { return DocumentType.TEXT; } return DocumentType.GENERIC_BINARY; } private DocumentFamily inferFamily(SourceDescriptor descriptor) { return descriptor.sourceType() == SourceType.MAIL ? DocumentFamily.MAIL : DocumentFamily.GENERIC; } }