You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

194 lines
8.9 KiB
Java

package at.procon.dip.ingestion.adapter;
import at.procon.dip.classification.spi.DetectionResult;
import at.procon.dip.domain.access.DocumentAccessContext;
import at.procon.dip.domain.access.DocumentVisibility;
import at.procon.dip.domain.document.DocumentFamily;
import at.procon.dip.domain.document.DocumentStatus;
import at.procon.dip.domain.document.DocumentType;
import at.procon.dip.domain.document.RelationType;
import at.procon.dip.domain.document.SourceType;
import at.procon.dip.domain.document.entity.Document;
import at.procon.dip.domain.document.service.DocumentRelationService;
import at.procon.dip.domain.document.repository.DocumentSourceRepository;
import at.procon.dip.domain.document.service.command.CreateDocumentRelationCommand;
import at.procon.dip.ingestion.config.DipIngestionProperties;
import at.procon.dip.ingestion.dto.ImportedDocumentResult;
import at.procon.dip.ingestion.service.GenericDocumentImportService;
import at.procon.dip.ingestion.service.MailMessageExtractionService;
import at.procon.dip.ingestion.service.MailMetadataPersistenceService;
import at.procon.dip.ingestion.mail.MailImportIdentityResolver;
import at.procon.dip.ingestion.spi.IngestionResult;
import at.procon.dip.ingestion.spi.SourceDescriptor;
import at.procon.ted.service.attachment.ZipExtractionService;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.lenient;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@ExtendWith(MockitoExtension.class)
class MailDocumentIngestionAdapterFileSystemTest {
@Mock
private GenericDocumentImportService importService;
@Mock
private DocumentRelationService relationService;
@Mock
private ZipExtractionService zipExtractionService;
@Mock
private MailMetadataPersistenceService mailMetadataPersistenceService;
@Mock
private DocumentSourceRepository documentSourceRepository;
private MailDocumentIngestionAdapter adapter;
private final List<SourceDescriptor> importedDescriptors = new ArrayList<>();
@BeforeEach
void setUp() {
var properties = new DipIngestionProperties();
properties.setEnabled(true);
properties.setMailAdapterEnabled(true);
properties.setMailImportBatchId("test-mail-batch");
properties.setDefaultOwnerTenantKey("tenant-a");
properties.setMailDefaultVisibility(DocumentVisibility.TENANT);
MailMessageExtractionService extractionService = new MailMessageExtractionService();
adapter = new MailDocumentIngestionAdapter(
properties,
importService,
extractionService,
relationService,
zipExtractionService,
new MailImportIdentityResolver(),
mailMetadataPersistenceService,
documentSourceRepository
);
when(zipExtractionService.canHandle(any(), any())).thenReturn(false);
lenient().when(documentSourceRepository.findBySourceTypeAndExternalSourceId(any(), any())).thenReturn(java.util.Optional.empty());
when(relationService.ensureRelation(any())).thenReturn(null);
when(importService.importDocument(any())).thenAnswer(invocation -> {
SourceDescriptor descriptor = invocation.getArgument(0);
importedDescriptors.add(descriptor);
return new ImportedDocumentResult(
buildDocumentFor(descriptor),
new DetectionResult(inferType(descriptor), inferFamily(descriptor), descriptor.mediaType(), "en", Map.of()),
List.of(),
false
);
});
}
@Test
@DisplayName("Should ingest filesystem-loaded mail message with text and binary attachments")
void shouldIngestFileSystemLoadedMailMessage() throws Exception {
Path emlPath = Path.of("src", "test", "resources", "mail", "sample-message.eml");
assertTrue(Files.exists(emlPath), "sample .eml test file must exist");
byte[] mimeBytes = Files.readAllBytes(emlPath);
SourceDescriptor sourceDescriptor = new SourceDescriptor(
null,
SourceType.MAIL,
"fs-mail-001",
emlPath.toAbsolutePath().toUri().toString(),
emlPath.getFileName().toString(),
"message/rfc822",
mimeBytes,
null,
OffsetDateTime.parse("2026-03-18T15:27:59+01:00"),
null,
Map.of("source", "filesystem-test")
);
assertTrue(adapter.supports(sourceDescriptor));
IngestionResult result = adapter.ingest(sourceDescriptor);
assertEquals(3, result.documents().size(), "expected root mail document plus 2 attachment documents");
assertTrue(result.warnings().isEmpty(), "mail import should not create warnings for the sample message");
assertEquals(3, importedDescriptors.size(), "root + notes.txt + legacy.xls should be imported");
SourceDescriptor root = importedDescriptors.get(0);
assertEquals("message/rfc822", root.mediaType());
assertEquals("sample-message.eml", root.fileName());
assertNotNull(root.textContent());
assertTrue(root.textContent().contains("Subject: Sample mail with filesystem-loaded attachments"));
assertTrue(root.textContent().contains("Hello from the filesystem-backed sample message."));
assertEquals(DocumentVisibility.TENANT, root.accessContext().visibility());
assertNotNull(root.accessContext().ownerTenant());
assertEquals("tenant-a", root.accessContext().ownerTenant().tenantKey());
SourceDescriptor textAttachment = importedDescriptors.stream()
.filter(d -> "notes.txt".equals(d.fileName()))
.findFirst()
.orElseThrow();
assertEquals(SourceType.MAIL_ATTACHMENT, textAttachment.sourceType());
assertEquals("text/plain", textAttachment.mediaType());
assertNotNull(textAttachment.textContent(), "plain text attachment should expose preview text");
assertTrue(textAttachment.textContent().contains("attachment notes"));
SourceDescriptor binaryAttachment = importedDescriptors.stream()
.filter(d -> "legacy.xls".equals(d.fileName()))
.findFirst()
.orElseThrow();
assertEquals(SourceType.MAIL_ATTACHMENT, binaryAttachment.sourceType());
assertNull(binaryAttachment.textContent(), "binary old Excel attachment must not be passed as text content");
assertEquals("application/vnd.ms-excel", binaryAttachment.mediaType());
assertNotNull(binaryAttachment.binaryContent());
assertTrue(binaryAttachment.binaryContent().length > 0);
ArgumentCaptor<CreateDocumentRelationCommand> relationCaptor = ArgumentCaptor.forClass(CreateDocumentRelationCommand.class);
verify(relationService, times(2)).ensureRelation(relationCaptor.capture());
assertTrue(relationCaptor.getAllValues().stream().allMatch(cmd -> cmd.relationType() == RelationType.ATTACHMENT_OF));
}
private Document buildDocumentFor(SourceDescriptor descriptor) {
return Document.builder()
.id(UUID.nameUUIDFromBytes((descriptor.sourceIdentifier() + ":" + descriptor.fileName()).getBytes()))
.visibility(descriptor.accessContext() == null ? DocumentVisibility.PUBLIC : descriptor.accessContext().visibility())
.documentType(inferType(descriptor))
.documentFamily(inferFamily(descriptor))
.status(DocumentStatus.RECEIVED)
.title(descriptor.fileName())
.mimeType(descriptor.mediaType())
.dedupHash(Integer.toHexString((descriptor.sourceIdentifier() + descriptor.fileName()).hashCode()))
.build();
}
private DocumentType inferType(SourceDescriptor descriptor) {
if (descriptor.sourceType() == SourceType.MAIL && "message/rfc822".equals(descriptor.mediaType())) {
return DocumentType.EMAIL;
}
String fileName = descriptor.fileName() == null ? "" : descriptor.fileName().toLowerCase();
if (fileName.endsWith(".txt")) {
return DocumentType.TEXT;
}
return DocumentType.GENERIC_BINARY;
}
private DocumentFamily inferFamily(SourceDescriptor descriptor) {
return descriptor.sourceType() == SourceType.MAIL ? DocumentFamily.MAIL : DocumentFamily.GENERIC;
}
}