You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
194 lines
8.9 KiB
Java
194 lines
8.9 KiB
Java
package at.procon.dip.ingestion.adapter;
|
|
|
|
import at.procon.dip.classification.spi.DetectionResult;
|
|
import at.procon.dip.domain.access.DocumentAccessContext;
|
|
import at.procon.dip.domain.access.DocumentVisibility;
|
|
import at.procon.dip.domain.document.DocumentFamily;
|
|
import at.procon.dip.domain.document.DocumentStatus;
|
|
import at.procon.dip.domain.document.DocumentType;
|
|
import at.procon.dip.domain.document.RelationType;
|
|
import at.procon.dip.domain.document.SourceType;
|
|
import at.procon.dip.domain.document.entity.Document;
|
|
import at.procon.dip.domain.document.service.DocumentRelationService;
|
|
import at.procon.dip.domain.document.repository.DocumentSourceRepository;
|
|
import at.procon.dip.domain.document.service.command.CreateDocumentRelationCommand;
|
|
import at.procon.dip.ingestion.config.DipIngestionProperties;
|
|
import at.procon.dip.ingestion.dto.ImportedDocumentResult;
|
|
import at.procon.dip.ingestion.service.GenericDocumentImportService;
|
|
import at.procon.dip.ingestion.service.MailMessageExtractionService;
|
|
import at.procon.dip.ingestion.service.MailMetadataPersistenceService;
|
|
import at.procon.dip.ingestion.mail.MailImportIdentityResolver;
|
|
import at.procon.dip.ingestion.spi.IngestionResult;
|
|
import at.procon.dip.ingestion.spi.SourceDescriptor;
|
|
import at.procon.ted.service.attachment.ZipExtractionService;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.time.OffsetDateTime;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.UUID;
|
|
import org.junit.jupiter.api.BeforeEach;
|
|
import org.junit.jupiter.api.DisplayName;
|
|
import org.junit.jupiter.api.Test;
|
|
import org.junit.jupiter.api.extension.ExtendWith;
|
|
import org.mockito.ArgumentCaptor;
|
|
import org.mockito.Mock;
|
|
import org.mockito.junit.jupiter.MockitoExtension;
|
|
|
|
import static org.junit.jupiter.api.Assertions.*;
|
|
import static org.mockito.ArgumentMatchers.any;
|
|
import static org.mockito.Mockito.times;
|
|
import static org.mockito.Mockito.lenient;
|
|
import static org.mockito.Mockito.verify;
|
|
import static org.mockito.Mockito.when;
|
|
|
|
@ExtendWith(MockitoExtension.class)
|
|
class MailDocumentIngestionAdapterFileSystemTest {
|
|
|
|
@Mock
|
|
private GenericDocumentImportService importService;
|
|
|
|
@Mock
|
|
private DocumentRelationService relationService;
|
|
|
|
@Mock
|
|
private ZipExtractionService zipExtractionService;
|
|
|
|
@Mock
|
|
private MailMetadataPersistenceService mailMetadataPersistenceService;
|
|
|
|
@Mock
|
|
private DocumentSourceRepository documentSourceRepository;
|
|
|
|
private MailDocumentIngestionAdapter adapter;
|
|
private final List<SourceDescriptor> importedDescriptors = new ArrayList<>();
|
|
|
|
@BeforeEach
|
|
void setUp() {
|
|
var properties = new DipIngestionProperties();
|
|
properties.setEnabled(true);
|
|
properties.setMailAdapterEnabled(true);
|
|
properties.setMailImportBatchId("test-mail-batch");
|
|
properties.setDefaultOwnerTenantKey("tenant-a");
|
|
properties.setMailDefaultVisibility(DocumentVisibility.TENANT);
|
|
|
|
MailMessageExtractionService extractionService = new MailMessageExtractionService();
|
|
adapter = new MailDocumentIngestionAdapter(
|
|
properties,
|
|
importService,
|
|
extractionService,
|
|
relationService,
|
|
zipExtractionService,
|
|
new MailImportIdentityResolver(),
|
|
mailMetadataPersistenceService,
|
|
documentSourceRepository
|
|
);
|
|
|
|
when(zipExtractionService.canHandle(any(), any())).thenReturn(false);
|
|
lenient().when(documentSourceRepository.findBySourceTypeAndExternalSourceId(any(), any())).thenReturn(java.util.Optional.empty());
|
|
when(relationService.ensureRelation(any())).thenReturn(null);
|
|
when(importService.importDocument(any())).thenAnswer(invocation -> {
|
|
SourceDescriptor descriptor = invocation.getArgument(0);
|
|
importedDescriptors.add(descriptor);
|
|
return new ImportedDocumentResult(
|
|
buildDocumentFor(descriptor),
|
|
new DetectionResult(inferType(descriptor), inferFamily(descriptor), descriptor.mediaType(), "en", Map.of()),
|
|
List.of(),
|
|
false
|
|
);
|
|
});
|
|
}
|
|
|
|
@Test
|
|
@DisplayName("Should ingest filesystem-loaded mail message with text and binary attachments")
|
|
void shouldIngestFileSystemLoadedMailMessage() throws Exception {
|
|
Path emlPath = Path.of("src", "test", "resources", "mail", "sample-message.eml");
|
|
assertTrue(Files.exists(emlPath), "sample .eml test file must exist");
|
|
byte[] mimeBytes = Files.readAllBytes(emlPath);
|
|
|
|
SourceDescriptor sourceDescriptor = new SourceDescriptor(
|
|
null,
|
|
SourceType.MAIL,
|
|
"fs-mail-001",
|
|
emlPath.toAbsolutePath().toUri().toString(),
|
|
emlPath.getFileName().toString(),
|
|
"message/rfc822",
|
|
mimeBytes,
|
|
null,
|
|
OffsetDateTime.parse("2026-03-18T15:27:59+01:00"),
|
|
null,
|
|
Map.of("source", "filesystem-test")
|
|
);
|
|
|
|
assertTrue(adapter.supports(sourceDescriptor));
|
|
|
|
IngestionResult result = adapter.ingest(sourceDescriptor);
|
|
|
|
assertEquals(3, result.documents().size(), "expected root mail document plus 2 attachment documents");
|
|
assertTrue(result.warnings().isEmpty(), "mail import should not create warnings for the sample message");
|
|
assertEquals(3, importedDescriptors.size(), "root + notes.txt + legacy.xls should be imported");
|
|
|
|
SourceDescriptor root = importedDescriptors.get(0);
|
|
assertEquals("message/rfc822", root.mediaType());
|
|
assertEquals("sample-message.eml", root.fileName());
|
|
assertNotNull(root.textContent());
|
|
assertTrue(root.textContent().contains("Subject: Sample mail with filesystem-loaded attachments"));
|
|
assertTrue(root.textContent().contains("Hello from the filesystem-backed sample message."));
|
|
assertEquals(DocumentVisibility.TENANT, root.accessContext().visibility());
|
|
assertNotNull(root.accessContext().ownerTenant());
|
|
assertEquals("tenant-a", root.accessContext().ownerTenant().tenantKey());
|
|
|
|
SourceDescriptor textAttachment = importedDescriptors.stream()
|
|
.filter(d -> "notes.txt".equals(d.fileName()))
|
|
.findFirst()
|
|
.orElseThrow();
|
|
assertEquals(SourceType.MAIL_ATTACHMENT, textAttachment.sourceType());
|
|
assertEquals("text/plain", textAttachment.mediaType());
|
|
assertNotNull(textAttachment.textContent(), "plain text attachment should expose preview text");
|
|
assertTrue(textAttachment.textContent().contains("attachment notes"));
|
|
|
|
SourceDescriptor binaryAttachment = importedDescriptors.stream()
|
|
.filter(d -> "legacy.xls".equals(d.fileName()))
|
|
.findFirst()
|
|
.orElseThrow();
|
|
assertEquals(SourceType.MAIL_ATTACHMENT, binaryAttachment.sourceType());
|
|
assertNull(binaryAttachment.textContent(), "binary old Excel attachment must not be passed as text content");
|
|
assertEquals("application/vnd.ms-excel", binaryAttachment.mediaType());
|
|
assertNotNull(binaryAttachment.binaryContent());
|
|
assertTrue(binaryAttachment.binaryContent().length > 0);
|
|
|
|
ArgumentCaptor<CreateDocumentRelationCommand> relationCaptor = ArgumentCaptor.forClass(CreateDocumentRelationCommand.class);
|
|
verify(relationService, times(2)).ensureRelation(relationCaptor.capture());
|
|
assertTrue(relationCaptor.getAllValues().stream().allMatch(cmd -> cmd.relationType() == RelationType.ATTACHMENT_OF));
|
|
}
|
|
|
|
private Document buildDocumentFor(SourceDescriptor descriptor) {
|
|
return Document.builder()
|
|
.id(UUID.nameUUIDFromBytes((descriptor.sourceIdentifier() + ":" + descriptor.fileName()).getBytes()))
|
|
.visibility(descriptor.accessContext() == null ? DocumentVisibility.PUBLIC : descriptor.accessContext().visibility())
|
|
.documentType(inferType(descriptor))
|
|
.documentFamily(inferFamily(descriptor))
|
|
.status(DocumentStatus.RECEIVED)
|
|
.title(descriptor.fileName())
|
|
.mimeType(descriptor.mediaType())
|
|
.dedupHash(Integer.toHexString((descriptor.sourceIdentifier() + descriptor.fileName()).hashCode()))
|
|
.build();
|
|
}
|
|
|
|
private DocumentType inferType(SourceDescriptor descriptor) {
|
|
if (descriptor.sourceType() == SourceType.MAIL && "message/rfc822".equals(descriptor.mediaType())) {
|
|
return DocumentType.EMAIL;
|
|
}
|
|
String fileName = descriptor.fileName() == null ? "" : descriptor.fileName().toLowerCase();
|
|
if (fileName.endsWith(".txt")) {
|
|
return DocumentType.TEXT;
|
|
}
|
|
return DocumentType.GENERIC_BINARY;
|
|
}
|
|
|
|
private DocumentFamily inferFamily(SourceDescriptor descriptor) {
|
|
return descriptor.sourceType() == SourceType.MAIL ? DocumentFamily.MAIL : DocumentFamily.GENERIC;
|
|
}
|
|
}
|