structured email storage and processing
parent
3284205a9e
commit
8fddc2a429
@ -0,0 +1,114 @@
|
|||||||
|
# Mail Processing Stabilization Phase — Step 1
|
||||||
|
|
||||||
|
This step implements the first practical slice of the mail-processing stabilization work:
|
||||||
|
|
||||||
|
- generic mail-provider contract
|
||||||
|
- provider-aware source identifiers for idempotent import
|
||||||
|
- typed mail metadata persistence
|
||||||
|
- attachment occurrence tracking
|
||||||
|
- current Camel/IMAP route adapted to the generic provider contract
|
||||||
|
|
||||||
|
## Included scope
|
||||||
|
|
||||||
|
### 1. Generic mail provider contract
|
||||||
|
Added a generic abstraction so the ingestion pipeline does not depend on IMAP-specific semantics:
|
||||||
|
|
||||||
|
- `MailProviderType`
|
||||||
|
- `MailProviderEnvelope`
|
||||||
|
- `GenericMailProviderEnvelope`
|
||||||
|
- `MailProviderEnvelopeAttributes`
|
||||||
|
|
||||||
|
Current implementation uses `GenericMailProviderEnvelope` for the existing Camel IMAP route.
|
||||||
|
Future providers such as POP3, EWS, Microsoft Graph, Gmail API, or replay/file sources can use the same contract.
|
||||||
|
|
||||||
|
### 2. Provider-aware idempotency foundation
|
||||||
|
Added `MailImportIdentityResolver` to derive stable source identifiers for:
|
||||||
|
|
||||||
|
- root mail message
|
||||||
|
- attachment occurrences
|
||||||
|
|
||||||
|
Priority for root message identity:
|
||||||
|
1. provider message key
|
||||||
|
2. `Message-ID`
|
||||||
|
3. raw MIME hash
|
||||||
|
|
||||||
|
This allows the import path to remain restart-safe and replay-safe even when content-hash-only deduplication is insufficient.
|
||||||
|
|
||||||
|
### 3. Generic source-id idempotency in document import
|
||||||
|
`GenericDocumentImportService` now checks for an existing `DOC.doc_source` row using:
|
||||||
|
|
||||||
|
- `source_type`
|
||||||
|
- `external_source_id`
|
||||||
|
|
||||||
|
before content-hash deduplication.
|
||||||
|
|
||||||
|
This makes source-identifier idempotency reusable beyond mail as well.
|
||||||
|
|
||||||
|
### 4. Typed mail metadata persistence
|
||||||
|
Added new DOC metadata tables/entities:
|
||||||
|
|
||||||
|
- `DOC.doc_mail_message`
|
||||||
|
- `DOC.doc_mail_recipient`
|
||||||
|
- `DOC.doc_mail_attachment`
|
||||||
|
|
||||||
|
These persist:
|
||||||
|
- provider/account/folder/message/thread keys
|
||||||
|
- `Message-ID`, `In-Reply-To`, `References`
|
||||||
|
- normalized subject
|
||||||
|
- sender/recipients
|
||||||
|
- attachment occurrence metadata
|
||||||
|
- part path / archive path / disposition / content-id
|
||||||
|
|
||||||
|
### 5. Attachment source typing
|
||||||
|
Attachments imported from mail now use:
|
||||||
|
- `SourceType.MAIL_ATTACHMENT`
|
||||||
|
|
||||||
|
instead of the generic `MAIL` source type.
|
||||||
|
|
||||||
|
### 6. Camel IMAP route integration
|
||||||
|
The existing Camel mail route now emits generic provider metadata into `SourceDescriptor.attributes()` using the new provider contract.
|
||||||
|
|
||||||
|
## Not yet included
|
||||||
|
|
||||||
|
The following are intentionally left for the next step:
|
||||||
|
|
||||||
|
- replay/reprocess workflows
|
||||||
|
- import/reprocess run tracking tables
|
||||||
|
- failed attachment retry services
|
||||||
|
- thread-aware search/reporting
|
||||||
|
- admin/ops visibility endpoints or Camel admin routes
|
||||||
|
|
||||||
|
## Main implementation files
|
||||||
|
|
||||||
|
### New files
|
||||||
|
- `src/main/java/at/procon/dip/ingestion/mail/MailProviderType.java`
|
||||||
|
- `src/main/java/at/procon/dip/ingestion/mail/MailProviderEnvelope.java`
|
||||||
|
- `src/main/java/at/procon/dip/ingestion/mail/GenericMailProviderEnvelope.java`
|
||||||
|
- `src/main/java/at/procon/dip/ingestion/mail/MailProviderEnvelopeAttributes.java`
|
||||||
|
- `src/main/java/at/procon/dip/ingestion/mail/MailImportIdentityResolver.java`
|
||||||
|
- `src/main/java/at/procon/dip/domain/document/entity/DocumentMailMessage.java`
|
||||||
|
- `src/main/java/at/procon/dip/domain/document/entity/DocumentMailRecipient.java`
|
||||||
|
- `src/main/java/at/procon/dip/domain/document/entity/DocumentMailAttachment.java`
|
||||||
|
- `src/main/java/at/procon/dip/domain/document/entity/MailRecipientType.java`
|
||||||
|
- `src/main/java/at/procon/dip/domain/document/repository/DocumentMailMessageRepository.java`
|
||||||
|
- `src/main/java/at/procon/dip/domain/document/repository/DocumentMailRecipientRepository.java`
|
||||||
|
- `src/main/java/at/procon/dip/domain/document/repository/DocumentMailAttachmentRepository.java`
|
||||||
|
- `src/main/java/at/procon/dip/ingestion/service/MailMetadataPersistenceService.java`
|
||||||
|
- `src/main/resources/db/migration/V23__doc_mail_processing_stabilization_step1.sql`
|
||||||
|
|
||||||
|
### Modified files
|
||||||
|
- `src/main/java/at/procon/dip/domain/document/SourceType.java`
|
||||||
|
- `src/main/java/at/procon/dip/domain/document/repository/DocumentSourceRepository.java`
|
||||||
|
- `src/main/java/at/procon/dip/ingestion/service/GenericDocumentImportService.java`
|
||||||
|
- `src/main/java/at/procon/dip/ingestion/service/MailMessageExtractionService.java`
|
||||||
|
- `src/main/java/at/procon/dip/ingestion/adapter/MailDocumentIngestionAdapter.java`
|
||||||
|
- `src/main/java/at/procon/ted/camel/MailRoute.java`
|
||||||
|
|
||||||
|
## Recommended next step
|
||||||
|
|
||||||
|
Proceed with **Step 2** of the Mail Processing Stabilization Phase:
|
||||||
|
|
||||||
|
- replay/reprocess services
|
||||||
|
- failed attachment retry flow
|
||||||
|
- import/reprocess run tracking
|
||||||
|
- reporting / operational visibility
|
||||||
@ -0,0 +1,94 @@
|
|||||||
|
package at.procon.dip.domain.document.entity;
|
||||||
|
|
||||||
|
import at.procon.dip.architecture.SchemaNames;
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.GeneratedValue;
|
||||||
|
import jakarta.persistence.GenerationType;
|
||||||
|
import jakarta.persistence.Id;
|
||||||
|
import jakarta.persistence.Index;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
|
import jakarta.persistence.PreUpdate;
|
||||||
|
import jakarta.persistence.Table;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(schema = SchemaNames.DOC, name = "doc_mail_attachment", indexes = {
|
||||||
|
@Index(name = "idx_doc_mail_attachment_mail", columnList = "mail_document_id"),
|
||||||
|
@Index(name = "idx_doc_mail_attachment_document", columnList = "attachment_document_id"),
|
||||||
|
@Index(name = "idx_doc_mail_attachment_part_path", columnList = "part_path"),
|
||||||
|
@Index(name = "idx_doc_mail_attachment_attachment_index", columnList = "attachment_index")
|
||||||
|
})
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class DocumentMailAttachment {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.UUID)
|
||||||
|
private UUID id;
|
||||||
|
|
||||||
|
@Column(name = "mail_document_id", nullable = false)
|
||||||
|
private UUID mailDocumentId;
|
||||||
|
|
||||||
|
@Column(name = "attachment_document_id", nullable = false)
|
||||||
|
private UUID attachmentDocumentId;
|
||||||
|
|
||||||
|
@Column(name = "disposition", length = 32)
|
||||||
|
private String disposition;
|
||||||
|
|
||||||
|
@Column(name = "content_id", length = 500)
|
||||||
|
private String contentId;
|
||||||
|
|
||||||
|
@Column(name = "filename", length = 1000)
|
||||||
|
private String filename;
|
||||||
|
|
||||||
|
@Column(name = "mime_type", length = 255)
|
||||||
|
private String mimeType;
|
||||||
|
|
||||||
|
@Column(name = "size_bytes")
|
||||||
|
private Long sizeBytes;
|
||||||
|
|
||||||
|
@Column(name = "attachment_index")
|
||||||
|
private Integer attachmentIndex;
|
||||||
|
|
||||||
|
@Column(name = "part_path", length = 500)
|
||||||
|
private String partPath;
|
||||||
|
|
||||||
|
@Column(name = "path_in_archive", columnDefinition = "TEXT")
|
||||||
|
private String pathInArchive;
|
||||||
|
|
||||||
|
@Column(name = "extraction_status", nullable = false, length = 32)
|
||||||
|
@Builder.Default
|
||||||
|
private String extractionStatus = "IMPORTED";
|
||||||
|
|
||||||
|
@Column(name = "error_message", columnDefinition = "TEXT")
|
||||||
|
private String errorMessage;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "created_at", nullable = false, updatable = false)
|
||||||
|
private OffsetDateTime createdAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "updated_at", nullable = false)
|
||||||
|
private OffsetDateTime updatedAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@PrePersist
|
||||||
|
protected void onCreate() {
|
||||||
|
createdAt = OffsetDateTime.now();
|
||||||
|
updatedAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
|
||||||
|
@PreUpdate
|
||||||
|
protected void onUpdate() {
|
||||||
|
updatedAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,127 @@
|
|||||||
|
package at.procon.dip.domain.document.entity;
|
||||||
|
|
||||||
|
import at.procon.dip.architecture.SchemaNames;
|
||||||
|
import at.procon.dip.ingestion.mail.MailProviderType;
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.EnumType;
|
||||||
|
import jakarta.persistence.Enumerated;
|
||||||
|
import jakarta.persistence.FetchType;
|
||||||
|
import jakarta.persistence.Id;
|
||||||
|
import jakarta.persistence.Index;
|
||||||
|
import jakarta.persistence.JoinColumn;
|
||||||
|
import jakarta.persistence.OneToOne;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
|
import jakarta.persistence.PreUpdate;
|
||||||
|
import jakarta.persistence.Table;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(schema = SchemaNames.DOC, name = "doc_mail_message", indexes = {
|
||||||
|
@Index(name = "idx_doc_mail_message_source_id", columnList = "source_id"),
|
||||||
|
@Index(name = "idx_doc_mail_message_message_id", columnList = "message_id"),
|
||||||
|
@Index(name = "idx_doc_mail_message_thread_key", columnList = "thread_key"),
|
||||||
|
@Index(name = "idx_doc_mail_message_provider_identity", columnList = "provider_type, account_key, folder_key, provider_message_key")
|
||||||
|
})
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class DocumentMailMessage {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@Column(name = "document_id", nullable = false)
|
||||||
|
private UUID documentId;
|
||||||
|
|
||||||
|
@OneToOne(fetch = FetchType.LAZY, optional = false)
|
||||||
|
@JoinColumn(name = "document_id", nullable = false, insertable = false, updatable = false)
|
||||||
|
private Document document;
|
||||||
|
|
||||||
|
@Column(name = "source_id")
|
||||||
|
private UUID sourceId;
|
||||||
|
|
||||||
|
@Enumerated(EnumType.STRING)
|
||||||
|
@Column(name = "provider_type", nullable = false, length = 64)
|
||||||
|
@Builder.Default
|
||||||
|
private MailProviderType providerType = MailProviderType.GENERIC;
|
||||||
|
|
||||||
|
@Column(name = "account_key", length = 255)
|
||||||
|
private String accountKey;
|
||||||
|
|
||||||
|
@Column(name = "folder_key", length = 255)
|
||||||
|
private String folderKey;
|
||||||
|
|
||||||
|
@Column(name = "provider_message_key", length = 500)
|
||||||
|
private String providerMessageKey;
|
||||||
|
|
||||||
|
@Column(name = "provider_thread_key", length = 500)
|
||||||
|
private String providerThreadKey;
|
||||||
|
|
||||||
|
@Column(name = "message_id", length = 1000)
|
||||||
|
private String messageId;
|
||||||
|
|
||||||
|
@Column(name = "in_reply_to", length = 1000)
|
||||||
|
private String inReplyTo;
|
||||||
|
|
||||||
|
@Column(name = "references_header", columnDefinition = "TEXT")
|
||||||
|
private String referencesHeader;
|
||||||
|
|
||||||
|
@Column(name = "thread_key", length = 1000)
|
||||||
|
private String threadKey;
|
||||||
|
|
||||||
|
@Column(name = "subject", length = 1000)
|
||||||
|
private String subject;
|
||||||
|
|
||||||
|
@Column(name = "normalized_subject", length = 1000)
|
||||||
|
private String normalizedSubject;
|
||||||
|
|
||||||
|
@Column(name = "from_display_name", length = 500)
|
||||||
|
private String fromDisplayName;
|
||||||
|
|
||||||
|
@Column(name = "from_email", length = 500)
|
||||||
|
private String fromEmail;
|
||||||
|
|
||||||
|
@Column(name = "from_raw", length = 1000)
|
||||||
|
private String fromRaw;
|
||||||
|
|
||||||
|
@Column(name = "reply_to_raw", columnDefinition = "TEXT")
|
||||||
|
private String replyToRaw;
|
||||||
|
|
||||||
|
@Column(name = "sent_at")
|
||||||
|
private OffsetDateTime sentAt;
|
||||||
|
|
||||||
|
@Column(name = "received_at")
|
||||||
|
private OffsetDateTime receivedAt;
|
||||||
|
|
||||||
|
@Column(name = "raw_message_hash", length = 64)
|
||||||
|
private String rawMessageHash;
|
||||||
|
|
||||||
|
@Column(name = "raw_header_hash", length = 64)
|
||||||
|
private String rawHeaderHash;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "created_at", nullable = false, updatable = false)
|
||||||
|
private OffsetDateTime createdAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "updated_at", nullable = false)
|
||||||
|
private OffsetDateTime updatedAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@PrePersist
|
||||||
|
protected void onCreate() {
|
||||||
|
createdAt = OffsetDateTime.now();
|
||||||
|
updatedAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
|
||||||
|
@PreUpdate
|
||||||
|
protected void onUpdate() {
|
||||||
|
updatedAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,67 @@
|
|||||||
|
package at.procon.dip.domain.document.entity;
|
||||||
|
|
||||||
|
import at.procon.dip.architecture.SchemaNames;
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.EnumType;
|
||||||
|
import jakarta.persistence.Enumerated;
|
||||||
|
import jakarta.persistence.GeneratedValue;
|
||||||
|
import jakarta.persistence.GenerationType;
|
||||||
|
import jakarta.persistence.Id;
|
||||||
|
import jakarta.persistence.Index;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
|
import jakarta.persistence.Table;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(schema = SchemaNames.DOC, name = "doc_mail_recipient", indexes = {
|
||||||
|
@Index(name = "idx_doc_mail_recipient_mail", columnList = "mail_document_id"),
|
||||||
|
@Index(name = "idx_doc_mail_recipient_type", columnList = "recipient_type"),
|
||||||
|
@Index(name = "idx_doc_mail_recipient_email", columnList = "email_address")
|
||||||
|
})
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class DocumentMailRecipient {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.UUID)
|
||||||
|
private UUID id;
|
||||||
|
|
||||||
|
@Column(name = "mail_document_id", nullable = false)
|
||||||
|
private UUID mailDocumentId;
|
||||||
|
|
||||||
|
@Enumerated(EnumType.STRING)
|
||||||
|
@Column(name = "recipient_type", nullable = false, length = 16)
|
||||||
|
private MailRecipientType recipientType;
|
||||||
|
|
||||||
|
@Column(name = "display_name", length = 500)
|
||||||
|
private String displayName;
|
||||||
|
|
||||||
|
@Column(name = "email_address", length = 500)
|
||||||
|
private String emailAddress;
|
||||||
|
|
||||||
|
@Column(name = "raw_value", columnDefinition = "TEXT")
|
||||||
|
private String rawValue;
|
||||||
|
|
||||||
|
@Column(name = "sort_order", nullable = false)
|
||||||
|
@Builder.Default
|
||||||
|
private int sortOrder = 0;
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "created_at", nullable = false, updatable = false)
|
||||||
|
private OffsetDateTime createdAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@PrePersist
|
||||||
|
protected void onCreate() {
|
||||||
|
createdAt = OffsetDateTime.now();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
package at.procon.dip.domain.document.entity;
|
||||||
|
|
||||||
|
public enum MailRecipientType {
|
||||||
|
TO,
|
||||||
|
CC,
|
||||||
|
BCC
|
||||||
|
}
|
||||||
@ -0,0 +1,16 @@
|
|||||||
|
package at.procon.dip.domain.document.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.entity.DocumentMailAttachment;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
|
||||||
|
public interface DocumentMailAttachmentRepository extends JpaRepository<DocumentMailAttachment, UUID> {
|
||||||
|
|
||||||
|
List<DocumentMailAttachment> findByMailDocumentId(UUID mailDocumentId);
|
||||||
|
|
||||||
|
Optional<DocumentMailAttachment> findByMailDocumentIdAndAttachmentIndex(UUID mailDocumentId, Integer attachmentIndex);
|
||||||
|
|
||||||
|
Optional<DocumentMailAttachment> findByMailDocumentIdAndPartPath(UUID mailDocumentId, String partPath);
|
||||||
|
}
|
||||||
@ -0,0 +1,17 @@
|
|||||||
|
package at.procon.dip.domain.document.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.entity.DocumentMailMessage;
|
||||||
|
import at.procon.dip.ingestion.mail.MailProviderType;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
|
||||||
|
public interface DocumentMailMessageRepository extends JpaRepository<DocumentMailMessage, UUID> {
|
||||||
|
|
||||||
|
Optional<DocumentMailMessage> findByProviderTypeAndAccountKeyAndFolderKeyAndProviderMessageKey(
|
||||||
|
MailProviderType providerType,
|
||||||
|
String accountKey,
|
||||||
|
String folderKey,
|
||||||
|
String providerMessageKey
|
||||||
|
);
|
||||||
|
}
|
||||||
@ -0,0 +1,13 @@
|
|||||||
|
package at.procon.dip.domain.document.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.entity.DocumentMailRecipient;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
|
||||||
|
public interface DocumentMailRecipientRepository extends JpaRepository<DocumentMailRecipient, UUID> {
|
||||||
|
|
||||||
|
List<DocumentMailRecipient> findByMailDocumentId(UUID mailDocumentId);
|
||||||
|
|
||||||
|
void deleteByMailDocumentId(UUID mailDocumentId);
|
||||||
|
}
|
||||||
@ -0,0 +1,25 @@
|
|||||||
|
package at.procon.dip.embedding.config;
|
||||||
|
|
||||||
|
import java.util.concurrent.Executor;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
public class EmbeddingJobProcessingConfiguration {
|
||||||
|
|
||||||
|
@Bean(name = "embeddingJobProcessingExecutor")
|
||||||
|
public Executor embeddingJobProcessingExecutor(EmbeddingProperties properties) {
|
||||||
|
int parallelBatchCount = Math.max(1, properties.getJobs().getParallelBatchCount());
|
||||||
|
|
||||||
|
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
|
||||||
|
executor.setThreadNamePrefix("embedding-job-");
|
||||||
|
executor.setCorePoolSize(parallelBatchCount);
|
||||||
|
executor.setMaxPoolSize(parallelBatchCount);
|
||||||
|
executor.setQueueCapacity(parallelBatchCount);
|
||||||
|
executor.setWaitForTasksToCompleteOnShutdown(true);
|
||||||
|
executor.setAwaitTerminationSeconds(30);
|
||||||
|
executor.initialize();
|
||||||
|
return executor;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,52 @@
|
|||||||
|
package at.procon.dip.embedding.service;
|
||||||
|
|
||||||
|
import at.procon.dip.embedding.job.service.EmbeddingJobService;
|
||||||
|
import at.procon.dip.embedding.model.EmbeddingProviderResult;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Propagation;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class EmbeddingJobExecutionPersistenceService {
|
||||||
|
|
||||||
|
private final EmbeddingPersistenceService embeddingPersistenceService;
|
||||||
|
private final EmbeddingJobService jobService;
|
||||||
|
|
||||||
|
@Transactional(propagation = Propagation.REQUIRES_NEW)
|
||||||
|
public UUID startProcessing(UUID representationId, String modelKey) {
|
||||||
|
var embedding = embeddingPersistenceService.ensurePending(representationId, modelKey);
|
||||||
|
embeddingPersistenceService.markProcessing(embedding.getId());
|
||||||
|
return embedding.getId();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional(propagation = Propagation.REQUIRES_NEW)
|
||||||
|
public void completeJob(UUID embeddingId,
|
||||||
|
EmbeddingProviderResult result,
|
||||||
|
UUID jobId,
|
||||||
|
String providerRequestId) {
|
||||||
|
embeddingPersistenceService.saveCompleted(embeddingId, result);
|
||||||
|
jobService.markCompleted(jobId, providerRequestId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional(propagation = Propagation.REQUIRES_NEW)
|
||||||
|
public void completeJob(UUID embeddingId,
|
||||||
|
float[] vector,
|
||||||
|
Integer tokenCount,
|
||||||
|
UUID jobId,
|
||||||
|
String providerRequestId) {
|
||||||
|
embeddingPersistenceService.saveCompleted(embeddingId, vector, tokenCount);
|
||||||
|
jobService.markCompleted(jobId, providerRequestId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional(propagation = Propagation.REQUIRES_NEW)
|
||||||
|
public void failJob(UUID embeddingId,
|
||||||
|
UUID jobId,
|
||||||
|
String errorMessage,
|
||||||
|
boolean retryable) {
|
||||||
|
embeddingPersistenceService.markFailed(embeddingId, errorMessage);
|
||||||
|
jobService.markFailed(jobId, errorMessage, retryable);
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,207 @@
|
|||||||
|
package at.procon.dip.ingestion.camel;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.SourceType;
|
||||||
|
import at.procon.dip.ingestion.config.DipIngestionProperties;
|
||||||
|
import at.procon.dip.ingestion.mail.MailServerEndpointUriFactory;
|
||||||
|
import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
|
||||||
|
import at.procon.dip.ingestion.spi.SourceDescriptor;
|
||||||
|
import at.procon.dip.ingestion.service.DocumentIngestionGateway;
|
||||||
|
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||||
|
import at.procon.dip.runtime.config.RuntimeMode;
|
||||||
|
import jakarta.mail.Message;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.time.ZoneId;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.camel.Exchange;
|
||||||
|
import org.apache.camel.LoggingLevel;
|
||||||
|
import org.apache.camel.builder.RouteBuilder;
|
||||||
|
import org.apache.camel.component.mail.MailMessage;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import org.springframework.util.StringUtils;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class GenericMailIngestionRoute extends RouteBuilder {
|
||||||
|
|
||||||
|
static final String ROUTE_ID_MAIL_CONSUMER = "dip-mail-consumer";
|
||||||
|
static final String ROUTE_ID_MAIL_INGEST = "dip-mail-ingest";
|
||||||
|
|
||||||
|
private final DipIngestionProperties properties;
|
||||||
|
private final DocumentIngestionGateway ingestionGateway;
|
||||||
|
private final MailServerEndpointUriFactory endpointUriFactory;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void configure() {
|
||||||
|
if (!properties.isEnabled() || !properties.isMailAdapterEnabled() || !properties.getMailRoute().isEnabled()) {
|
||||||
|
log.info("NEW mail ingestion Camel route disabled");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String consumerUri = endpointUriFactory.buildConsumerUri(properties.getMailRoute());
|
||||||
|
log.info("Configuring NEW mail ingestion route (protocol={}, host={}, folder={}, user={})",
|
||||||
|
properties.getMailRoute().getProtocol(),
|
||||||
|
properties.getMailRoute().getHost(),
|
||||||
|
properties.getMailRoute().getFolderName(),
|
||||||
|
properties.getMailRoute().getUsername());
|
||||||
|
|
||||||
|
errorHandler(deadLetterChannel("direct:dip-mail-error")
|
||||||
|
.maximumRedeliveries(3)
|
||||||
|
.redeliveryDelay(5000)
|
||||||
|
.retryAttemptedLogLevel(LoggingLevel.WARN)
|
||||||
|
.logStackTrace(true));
|
||||||
|
|
||||||
|
from("direct:dip-mail-error")
|
||||||
|
.routeId("dip-mail-error")
|
||||||
|
.process(exchange -> {
|
||||||
|
Exception exception = exchange.getProperty(Exchange.EXCEPTION_CAUGHT, Exception.class);
|
||||||
|
String subject = exchange.getIn().getHeader("mailSubject", String.class);
|
||||||
|
if (exception != null) {
|
||||||
|
log.error("NEW mail ingestion failed for subject '{}': {}", subject, exception.getMessage(), exception);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
from(consumerUri)
|
||||||
|
.routeId(ROUTE_ID_MAIL_CONSUMER)
|
||||||
|
.to("direct:dip-mail-ingest");
|
||||||
|
|
||||||
|
from("direct:dip-mail-ingest")
|
||||||
|
.routeId(ROUTE_ID_MAIL_INGEST)
|
||||||
|
.process(this::ingestMailExchange)
|
||||||
|
.log(LoggingLevel.INFO, "Imported mail message into NEW ingestion pipeline: ${header.mailSubject}");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ingestMailExchange(Exchange exchange) throws Exception {
|
||||||
|
Message mailMessage = resolveMessage(exchange);
|
||||||
|
if (mailMessage == null) {
|
||||||
|
log.warn("Received null mail message, skipping NEW mail ingestion");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String subject = mailMessage.getSubject();
|
||||||
|
String from = mailMessage.getFrom() != null && mailMessage.getFrom().length > 0
|
||||||
|
? mailMessage.getFrom()[0].toString() : null;
|
||||||
|
String messageId = firstHeader(mailMessage, "Message-ID");
|
||||||
|
String providerMessageKey = firstNonBlankHeader(exchange, "CamelMailUid", "uid", "mailUid", "MailUid");
|
||||||
|
String providerThreadKey = firstNonBlankHeader(exchange, "CamelMailThreadId", "mailThreadId", "MailThreadId");
|
||||||
|
|
||||||
|
exchange.getIn().setHeader("mailSubject", subject);
|
||||||
|
|
||||||
|
byte[] rawMime;
|
||||||
|
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
|
||||||
|
mailMessage.writeTo(baos);
|
||||||
|
rawMime = baos.toByteArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, String> attributes = new LinkedHashMap<>();
|
||||||
|
putIfHasText(attributes, "subject", subject);
|
||||||
|
putIfHasText(attributes, "from", from);
|
||||||
|
putIfHasText(attributes, "providerType", properties.getMailRoute().getProtocol().name());
|
||||||
|
putIfHasText(attributes, "providerProtocol", properties.getMailRoute().getProtocol().camelScheme());
|
||||||
|
putIfHasText(attributes, "providerAccountKey", accountKey());
|
||||||
|
putIfHasText(attributes, "providerFolderKey", properties.getMailRoute().getFolderName());
|
||||||
|
putIfHasText(attributes, "providerMessageKey", providerMessageKey);
|
||||||
|
putIfHasText(attributes, "providerThreadKey", providerThreadKey);
|
||||||
|
putIfHasText(attributes, "messageId", messageId);
|
||||||
|
|
||||||
|
SourceDescriptor descriptor = new SourceDescriptor(
|
||||||
|
null,
|
||||||
|
SourceType.MAIL,
|
||||||
|
buildSourceIdentifier(providerMessageKey, messageId),
|
||||||
|
buildSourceUri(providerMessageKey),
|
||||||
|
fallbackMailFileName(subject),
|
||||||
|
"message/rfc822",
|
||||||
|
rawMime,
|
||||||
|
null,
|
||||||
|
mailMessage.getReceivedDate() == null
|
||||||
|
? OffsetDateTime.now()
|
||||||
|
: mailMessage.getReceivedDate().toInstant().atZone(ZoneId.systemDefault()).toOffsetDateTime(),
|
||||||
|
OriginalContentStoragePolicy.DEFAULT,
|
||||||
|
attributes
|
||||||
|
);
|
||||||
|
ingestionGateway.ingest(descriptor);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Message resolveMessage(Exchange exchange) {
|
||||||
|
MailMessage camelMailMessage = exchange.getIn().getBody(MailMessage.class);
|
||||||
|
if (camelMailMessage != null) {
|
||||||
|
return camelMailMessage.getMessage();
|
||||||
|
}
|
||||||
|
return exchange.getIn().getBody(Message.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildSourceIdentifier(String providerMessageKey, String messageId) {
|
||||||
|
if (StringUtils.hasText(providerMessageKey)) {
|
||||||
|
return properties.getMailRoute().getProtocol().name() + ":" + accountKey() + ":"
|
||||||
|
+ defaultIfBlank(properties.getMailRoute().getFolderName(), "INBOX") + ":" + providerMessageKey;
|
||||||
|
}
|
||||||
|
if (StringUtils.hasText(messageId)) {
|
||||||
|
return messageId;
|
||||||
|
}
|
||||||
|
return UUID.randomUUID().toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildSourceUri(String providerMessageKey) {
|
||||||
|
StringBuilder uri = new StringBuilder();
|
||||||
|
uri.append(properties.getMailRoute().getProtocol().camelScheme())
|
||||||
|
.append("://")
|
||||||
|
.append(properties.getMailRoute().getHost())
|
||||||
|
.append("/")
|
||||||
|
.append(defaultIfBlank(properties.getMailRoute().getFolderName(), "INBOX"));
|
||||||
|
if (StringUtils.hasText(providerMessageKey)) {
|
||||||
|
uri.append("#").append(providerMessageKey);
|
||||||
|
}
|
||||||
|
return uri.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String fallbackMailFileName(String subject) {
|
||||||
|
String safeSubject = !StringUtils.hasText(subject) ? "mail-message" : subject.replaceAll("[^A-Za-z0-9._-]", "_");
|
||||||
|
return safeSubject + ".eml";
|
||||||
|
}
|
||||||
|
|
||||||
|
private String firstHeader(Message message, String name) {
|
||||||
|
try {
|
||||||
|
String[] values = message.getHeader(name);
|
||||||
|
return values != null && values.length > 0 ? values[0] : null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String firstNonBlankHeader(Exchange exchange, String... names) {
|
||||||
|
for (String name : names) {
|
||||||
|
String value = exchange.getIn().getHeader(name, String.class);
|
||||||
|
if (StringUtils.hasText(value)) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
Object objectValue = exchange.getIn().getHeader(name);
|
||||||
|
if (objectValue != null) {
|
||||||
|
value = String.valueOf(objectValue);
|
||||||
|
if (StringUtils.hasText(value)) {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String accountKey() {
|
||||||
|
return defaultIfBlank(properties.getMailRoute().getAccountKey(), properties.getMailRoute().getUsername());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void putIfHasText(Map<String, String> attributes, String key, String value) {
|
||||||
|
if (StringUtils.hasText(value)) {
|
||||||
|
attributes.put(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String defaultIfBlank(String value, String defaultValue) {
|
||||||
|
return StringUtils.hasText(value) ? value : defaultValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,58 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
import at.procon.dip.ingestion.config.DipIngestionProperties;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import org.springframework.util.StringUtils;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class CamelMailServerEndpointUriFactory implements MailServerEndpointUriFactory {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String buildConsumerUri(DipIngestionProperties.MailRouteProperties properties) {
|
||||||
|
if (properties == null) {
|
||||||
|
throw new IllegalArgumentException("Mail route properties must not be null");
|
||||||
|
}
|
||||||
|
if (!StringUtils.hasText(properties.getHost())) {
|
||||||
|
throw new IllegalArgumentException("dip.ingestion.mail-route.host must not be blank when the mail route is enabled");
|
||||||
|
}
|
||||||
|
if (!StringUtils.hasText(properties.getUsername())) {
|
||||||
|
throw new IllegalArgumentException("dip.ingestion.mail-route.username must not be blank when the mail route is enabled");
|
||||||
|
}
|
||||||
|
if (properties.getProtocol() == null) {
|
||||||
|
throw new IllegalArgumentException("dip.ingestion.mail-route.protocol must not be null");
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder uri = new StringBuilder();
|
||||||
|
uri.append(properties.getProtocol().camelScheme()).append("://");
|
||||||
|
uri.append(properties.getHost());
|
||||||
|
if (properties.getPort() != null) {
|
||||||
|
uri.append(":").append(properties.getPort());
|
||||||
|
}
|
||||||
|
uri.append("?username=").append(encode(properties.getUsername()));
|
||||||
|
uri.append("&password=").append(encode(properties.getPassword()));
|
||||||
|
uri.append("&folderName=").append(encode(defaultIfBlank(properties.getFolderName(), "INBOX")));
|
||||||
|
uri.append("&delete=").append(properties.isDelete());
|
||||||
|
uri.append("&peek=").append(properties.isPeek());
|
||||||
|
uri.append("&unseen=").append(properties.isUnseen());
|
||||||
|
uri.append("&delay=").append(properties.getDelay());
|
||||||
|
uri.append("&maxMessagesPerPoll=").append(properties.getMaxMessagesPerPoll());
|
||||||
|
uri.append("&fetchSize=").append(properties.getFetchSize());
|
||||||
|
uri.append("&closeFolder=").append(properties.isCloseFolder());
|
||||||
|
uri.append("&debugMode=").append(properties.isDebugMode());
|
||||||
|
uri.append("&connectionTimeout=").append(properties.getConnectionTimeout());
|
||||||
|
return uri.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String encode(String value) {
|
||||||
|
if (value == null) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return URLEncoder.encode(value, StandardCharsets.UTF_8);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String defaultIfBlank(String value, String defaultValue) {
|
||||||
|
return StringUtils.hasText(value) ? value : defaultValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,22 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default implementation of the generic provider metadata contract.
|
||||||
|
*/
|
||||||
|
public record GenericMailProviderEnvelope(
|
||||||
|
MailProviderType providerType,
|
||||||
|
String accountKey,
|
||||||
|
String folderKey,
|
||||||
|
String providerMessageKey,
|
||||||
|
String providerThreadKey,
|
||||||
|
Map<String, String> providerAttributes
|
||||||
|
) implements MailProviderEnvelope {
|
||||||
|
|
||||||
|
public GenericMailProviderEnvelope {
|
||||||
|
providerType = providerType == null ? MailProviderType.GENERIC : providerType;
|
||||||
|
providerAttributes = providerAttributes == null ? Map.of() : Map.copyOf(new LinkedHashMap<>(providerAttributes));
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,52 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
import at.procon.dip.ingestion.service.MailMessageExtractionService;
|
||||||
|
import at.procon.ted.util.HashUtils;
|
||||||
|
import java.util.Locale;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
public class MailImportIdentityResolver {
|
||||||
|
|
||||||
|
public String resolveRootSourceIdentifier(MailMessageExtractionService.ParsedMailMessage parsed,
|
||||||
|
MailProviderEnvelope envelope,
|
||||||
|
byte[] rawMime) {
|
||||||
|
if (envelope != null && hasText(envelope.providerMessageKey())) {
|
||||||
|
return "mail:" + envelope.providerType().name().toLowerCase(Locale.ROOT)
|
||||||
|
+ ":" + normalizeIdentityFragment(envelope.accountKey())
|
||||||
|
+ ":" + normalizeIdentityFragment(envelope.folderKey())
|
||||||
|
+ ":" + normalizeIdentityFragment(envelope.providerMessageKey());
|
||||||
|
}
|
||||||
|
if (hasText(parsed.messageId())) {
|
||||||
|
return "mail:message-id:" + normalizeIdentityFragment(parsed.messageId());
|
||||||
|
}
|
||||||
|
return "mail:raw:" + HashUtils.computeSha256(rawMime);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String resolveAttachmentSourceIdentifier(String rootSourceIdentifier,
|
||||||
|
MailMessageExtractionService.MailAttachment attachment) {
|
||||||
|
if (hasText(attachment.partPath())) {
|
||||||
|
return rootSourceIdentifier + ":part:" + normalizeIdentityFragment(attachment.partPath());
|
||||||
|
}
|
||||||
|
if (attachment.attachmentIndex() != null) {
|
||||||
|
return rootSourceIdentifier + ":attachment:" + attachment.attachmentIndex();
|
||||||
|
}
|
||||||
|
return rootSourceIdentifier + ":attachment:"
|
||||||
|
+ normalizeIdentityFragment(attachment.fileName())
|
||||||
|
+ ":" + HashUtils.computeSha256(attachment.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
private String normalizeIdentityFragment(String raw) {
|
||||||
|
if (raw == null || raw.isBlank()) {
|
||||||
|
return "_";
|
||||||
|
}
|
||||||
|
return raw.trim()
|
||||||
|
.replaceAll("^<|>$", "")
|
||||||
|
.replaceAll("\\s+", "_")
|
||||||
|
.replaceAll("[^A-Za-z0-9._:@/+\\-]", "_");
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasText(String value) {
|
||||||
|
return value != null && !value.isBlank();
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,22 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generic provider metadata for a mail message fetched from an upstream mail system.
|
||||||
|
* Implementations may be backed by IMAP, EWS, Graph, Gmail API, filesystem replay, etc.
|
||||||
|
*/
|
||||||
|
public interface MailProviderEnvelope {
|
||||||
|
|
||||||
|
MailProviderType providerType();
|
||||||
|
|
||||||
|
String accountKey();
|
||||||
|
|
||||||
|
String folderKey();
|
||||||
|
|
||||||
|
String providerMessageKey();
|
||||||
|
|
||||||
|
String providerThreadKey();
|
||||||
|
|
||||||
|
Map<String, String> providerAttributes();
|
||||||
|
}
|
||||||
@ -0,0 +1,81 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encodes/decodes generic mail provider metadata into SourceDescriptor attributes.
|
||||||
|
*/
|
||||||
|
public final class MailProviderEnvelopeAttributes {
|
||||||
|
|
||||||
|
public static final String KEY_PROVIDER_TYPE = "mail.providerType";
|
||||||
|
public static final String KEY_ACCOUNT_KEY = "mail.accountKey";
|
||||||
|
public static final String KEY_FOLDER_KEY = "mail.folderKey";
|
||||||
|
public static final String KEY_PROVIDER_MESSAGE_KEY = "mail.providerMessageKey";
|
||||||
|
public static final String KEY_PROVIDER_THREAD_KEY = "mail.providerThreadKey";
|
||||||
|
public static final String KEY_PREFIX_PROVIDER_ATTRIBUTE = "mail.providerAttribute.";
|
||||||
|
|
||||||
|
private MailProviderEnvelopeAttributes() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static GenericMailProviderEnvelope fromAttributes(Map<String, String> attributes) {
|
||||||
|
if (attributes == null || attributes.isEmpty()) {
|
||||||
|
return new GenericMailProviderEnvelope(MailProviderType.GENERIC, null, null, null, null, Map.of());
|
||||||
|
}
|
||||||
|
MailProviderType providerType = parseProviderType(attributes.get(KEY_PROVIDER_TYPE));
|
||||||
|
Map<String, String> providerAttributes = new LinkedHashMap<>();
|
||||||
|
for (Map.Entry<String, String> entry : attributes.entrySet()) {
|
||||||
|
if (entry.getKey() != null && entry.getKey().startsWith(KEY_PREFIX_PROVIDER_ATTRIBUTE)) {
|
||||||
|
providerAttributes.put(entry.getKey().substring(KEY_PREFIX_PROVIDER_ATTRIBUTE.length()), entry.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new GenericMailProviderEnvelope(
|
||||||
|
providerType,
|
||||||
|
emptyToNull(attributes.get(KEY_ACCOUNT_KEY)),
|
||||||
|
emptyToNull(attributes.get(KEY_FOLDER_KEY)),
|
||||||
|
emptyToNull(attributes.get(KEY_PROVIDER_MESSAGE_KEY)),
|
||||||
|
emptyToNull(attributes.get(KEY_PROVIDER_THREAD_KEY)),
|
||||||
|
providerAttributes
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Map<String, String> merge(Map<String, String> baseAttributes, MailProviderEnvelope envelope) {
|
||||||
|
Map<String, String> merged = new LinkedHashMap<>();
|
||||||
|
if (baseAttributes != null && !baseAttributes.isEmpty()) {
|
||||||
|
merged.putAll(baseAttributes);
|
||||||
|
}
|
||||||
|
if (envelope == null) {
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
merged.put(KEY_PROVIDER_TYPE, envelope.providerType().name());
|
||||||
|
putIfHasText(merged, KEY_ACCOUNT_KEY, envelope.accountKey());
|
||||||
|
putIfHasText(merged, KEY_FOLDER_KEY, envelope.folderKey());
|
||||||
|
putIfHasText(merged, KEY_PROVIDER_MESSAGE_KEY, envelope.providerMessageKey());
|
||||||
|
putIfHasText(merged, KEY_PROVIDER_THREAD_KEY, envelope.providerThreadKey());
|
||||||
|
if (envelope.providerAttributes() != null) {
|
||||||
|
envelope.providerAttributes().forEach((key, value) -> putIfHasText(merged, KEY_PREFIX_PROVIDER_ATTRIBUTE + key, value));
|
||||||
|
}
|
||||||
|
return merged;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static MailProviderType parseProviderType(String raw) {
|
||||||
|
if (raw == null || raw.isBlank()) {
|
||||||
|
return MailProviderType.GENERIC;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return MailProviderType.valueOf(raw.trim().toUpperCase());
|
||||||
|
} catch (IllegalArgumentException ex) {
|
||||||
|
return MailProviderType.GENERIC;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void putIfHasText(Map<String, String> target, String key, String value) {
|
||||||
|
if (value != null && !value.isBlank()) {
|
||||||
|
target.put(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String emptyToNull(String value) {
|
||||||
|
return value == null || value.isBlank() ? null : value;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,15 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logical provider/source type for imported mail messages.
|
||||||
|
* The import pipeline should depend on this generic designation instead of
|
||||||
|
* binding itself to IMAP-specific semantics.
|
||||||
|
*/
|
||||||
|
public enum MailProviderType {
|
||||||
|
IMAP,
|
||||||
|
POP3,
|
||||||
|
EWS,
|
||||||
|
MICROSOFT_GRAPH,
|
||||||
|
GMAIL_API,
|
||||||
|
GENERIC
|
||||||
|
}
|
||||||
@ -0,0 +1,7 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
import at.procon.dip.ingestion.config.DipIngestionProperties;
|
||||||
|
|
||||||
|
public interface MailServerEndpointUriFactory {
|
||||||
|
String buildConsumerUri(DipIngestionProperties.MailRouteProperties properties);
|
||||||
|
}
|
||||||
@ -0,0 +1,18 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
public enum MailServerProtocol {
|
||||||
|
IMAP("imap"),
|
||||||
|
IMAPS("imaps"),
|
||||||
|
POP3("pop3"),
|
||||||
|
POP3S("pop3s");
|
||||||
|
|
||||||
|
private final String camelScheme;
|
||||||
|
|
||||||
|
MailServerProtocol(String camelScheme) {
|
||||||
|
this.camelScheme = camelScheme;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String camelScheme() {
|
||||||
|
return camelScheme;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,137 @@
|
|||||||
|
package at.procon.dip.ingestion.service;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.entity.DocumentMailAttachment;
|
||||||
|
import at.procon.dip.domain.document.entity.DocumentMailMessage;
|
||||||
|
import at.procon.dip.domain.document.entity.DocumentMailRecipient;
|
||||||
|
import at.procon.dip.domain.document.repository.DocumentMailAttachmentRepository;
|
||||||
|
import at.procon.dip.domain.document.repository.DocumentMailMessageRepository;
|
||||||
|
import at.procon.dip.domain.document.repository.DocumentMailRecipientRepository;
|
||||||
|
import at.procon.dip.ingestion.mail.MailProviderEnvelope;
|
||||||
|
import at.procon.ted.util.HashUtils;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Transactional
|
||||||
|
public class MailMetadataPersistenceService {
|
||||||
|
|
||||||
|
private final DocumentMailMessageRepository mailMessageRepository;
|
||||||
|
private final DocumentMailRecipientRepository recipientRepository;
|
||||||
|
private final DocumentMailAttachmentRepository attachmentRepository;
|
||||||
|
|
||||||
|
public void upsertMessageMetadata(UUID documentId,
|
||||||
|
UUID sourceId,
|
||||||
|
MailMessageExtractionService.ParsedMailMessage parsed,
|
||||||
|
MailProviderEnvelope envelope,
|
||||||
|
String rawMessageHash) {
|
||||||
|
DocumentMailMessage entity = mailMessageRepository.findById(documentId)
|
||||||
|
.orElse(DocumentMailMessage.builder().documentId(documentId).build());
|
||||||
|
entity.setSourceId(sourceId);
|
||||||
|
entity.setProviderType(envelope == null ? at.procon.dip.ingestion.mail.MailProviderType.GENERIC : envelope.providerType());
|
||||||
|
entity.setAccountKey(envelope == null ? null : envelope.accountKey());
|
||||||
|
entity.setFolderKey(envelope == null ? null : envelope.folderKey());
|
||||||
|
entity.setProviderMessageKey(envelope == null ? null : envelope.providerMessageKey());
|
||||||
|
entity.setProviderThreadKey(envelope == null ? null : envelope.providerThreadKey());
|
||||||
|
entity.setMessageId(parsed.messageId());
|
||||||
|
entity.setInReplyTo(parsed.inReplyTo());
|
||||||
|
entity.setReferencesHeader(parsed.referencesHeader());
|
||||||
|
entity.setThreadKey(resolveThreadKey(parsed, envelope));
|
||||||
|
entity.setSubject(parsed.subject());
|
||||||
|
entity.setNormalizedSubject(parsed.normalizedSubject());
|
||||||
|
entity.setFromDisplayName(parsed.fromDisplayName());
|
||||||
|
entity.setFromEmail(parsed.fromEmail());
|
||||||
|
entity.setFromRaw(parsed.fromRaw());
|
||||||
|
entity.setReplyToRaw(parsed.replyToRaw());
|
||||||
|
entity.setSentAt(parsed.sentAt());
|
||||||
|
entity.setReceivedAt(parsed.receivedAt());
|
||||||
|
entity.setRawMessageHash(rawMessageHash);
|
||||||
|
entity.setRawHeaderHash(HashUtils.computeSha256(buildHeaderFingerprint(parsed)));
|
||||||
|
mailMessageRepository.save(entity);
|
||||||
|
|
||||||
|
recipientRepository.deleteByMailDocumentId(documentId);
|
||||||
|
List<DocumentMailRecipient> recipients = new ArrayList<>();
|
||||||
|
for (MailMessageExtractionService.MailRecipient recipient : parsed.recipients()) {
|
||||||
|
recipients.add(DocumentMailRecipient.builder()
|
||||||
|
.mailDocumentId(documentId)
|
||||||
|
.recipientType(recipient.recipientType())
|
||||||
|
.displayName(recipient.displayName())
|
||||||
|
.emailAddress(recipient.emailAddress())
|
||||||
|
.rawValue(recipient.rawValue())
|
||||||
|
.sortOrder(recipient.sortOrder())
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
recipientRepository.saveAll(recipients);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void upsertAttachmentMetadata(UUID mailDocumentId,
|
||||||
|
UUID attachmentDocumentId,
|
||||||
|
MailMessageExtractionService.MailAttachment attachment,
|
||||||
|
String extractionStatus,
|
||||||
|
String errorMessage) {
|
||||||
|
Optional<DocumentMailAttachment> existing = Optional.empty();
|
||||||
|
if (attachment.attachmentIndex() != null) {
|
||||||
|
existing = attachmentRepository.findByMailDocumentIdAndAttachmentIndex(mailDocumentId, attachment.attachmentIndex());
|
||||||
|
}
|
||||||
|
if (existing.isEmpty() && attachment.partPath() != null && !attachment.partPath().isBlank()) {
|
||||||
|
existing = attachmentRepository.findByMailDocumentIdAndPartPath(mailDocumentId, attachment.partPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
DocumentMailAttachment entity = existing.orElse(DocumentMailAttachment.builder()
|
||||||
|
.mailDocumentId(mailDocumentId)
|
||||||
|
.attachmentDocumentId(attachmentDocumentId)
|
||||||
|
.build());
|
||||||
|
entity.setAttachmentDocumentId(attachmentDocumentId);
|
||||||
|
entity.setDisposition(attachment.disposition());
|
||||||
|
entity.setContentId(attachment.contentId());
|
||||||
|
entity.setFilename(attachment.fileName());
|
||||||
|
entity.setMimeType(attachment.contentType());
|
||||||
|
entity.setSizeBytes(attachment.sizeBytes());
|
||||||
|
entity.setAttachmentIndex(attachment.attachmentIndex());
|
||||||
|
entity.setPartPath(attachment.partPath());
|
||||||
|
entity.setPathInArchive(attachment.path());
|
||||||
|
entity.setExtractionStatus(extractionStatus == null || extractionStatus.isBlank() ? "IMPORTED" : extractionStatus);
|
||||||
|
entity.setErrorMessage(errorMessage);
|
||||||
|
attachmentRepository.save(entity);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String resolveThreadKey(MailMessageExtractionService.ParsedMailMessage parsed, MailProviderEnvelope envelope) {
|
||||||
|
if (envelope != null && envelope.providerThreadKey() != null && !envelope.providerThreadKey().isBlank()) {
|
||||||
|
return envelope.providerThreadKey();
|
||||||
|
}
|
||||||
|
if (parsed.referencesHeader() != null && !parsed.referencesHeader().isBlank()) {
|
||||||
|
String[] refs = parsed.referencesHeader().trim().split("\\s+");
|
||||||
|
return refs.length == 0 ? parsed.referencesHeader() : refs[0];
|
||||||
|
}
|
||||||
|
if (parsed.inReplyTo() != null && !parsed.inReplyTo().isBlank()) {
|
||||||
|
return parsed.inReplyTo();
|
||||||
|
}
|
||||||
|
if (parsed.messageId() != null && !parsed.messageId().isBlank()) {
|
||||||
|
return parsed.messageId();
|
||||||
|
}
|
||||||
|
return parsed.normalizedSubject();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String buildHeaderFingerprint(MailMessageExtractionService.ParsedMailMessage parsed) {
|
||||||
|
return String.join("|",
|
||||||
|
safe(parsed.subject()),
|
||||||
|
safe(parsed.fromRaw()),
|
||||||
|
safe(parsed.replyToRaw()),
|
||||||
|
safe(parsed.messageId()),
|
||||||
|
safe(parsed.inReplyTo()),
|
||||||
|
safe(parsed.referencesHeader()),
|
||||||
|
safe(parsed.sentAt() == null ? null : parsed.sentAt().toString()),
|
||||||
|
safe(parsed.receivedAt() == null ? null : parsed.receivedAt().toString())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String safe(String value) {
|
||||||
|
return value == null ? "" : value;
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,90 @@
|
|||||||
|
SET search_path TO DOC, TED, public;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOC.doc_mail_message (
|
||||||
|
document_id uuid PRIMARY KEY REFERENCES DOC.doc_document(id) ON DELETE CASCADE,
|
||||||
|
source_id uuid REFERENCES DOC.doc_source(id) ON DELETE SET NULL,
|
||||||
|
provider_type varchar(64) NOT NULL,
|
||||||
|
account_key varchar(255),
|
||||||
|
folder_key varchar(255),
|
||||||
|
provider_message_key varchar(500),
|
||||||
|
provider_thread_key varchar(500),
|
||||||
|
message_id varchar(1000),
|
||||||
|
in_reply_to varchar(1000),
|
||||||
|
references_header text,
|
||||||
|
thread_key varchar(1000),
|
||||||
|
subject varchar(1000),
|
||||||
|
normalized_subject varchar(1000),
|
||||||
|
from_display_name varchar(500),
|
||||||
|
from_email varchar(500),
|
||||||
|
from_raw varchar(1000),
|
||||||
|
reply_to_raw text,
|
||||||
|
sent_at timestamptz,
|
||||||
|
received_at timestamptz,
|
||||||
|
raw_message_hash varchar(64),
|
||||||
|
raw_header_hash varchar(64),
|
||||||
|
created_at timestamptz NOT NULL DEFAULT now(),
|
||||||
|
updated_at timestamptz NOT NULL DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_message_source_id
|
||||||
|
ON DOC.doc_mail_message (source_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_message_message_id
|
||||||
|
ON DOC.doc_mail_message (message_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_message_thread_key
|
||||||
|
ON DOC.doc_mail_message (thread_key);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_message_provider_identity
|
||||||
|
ON DOC.doc_mail_message (provider_type, account_key, folder_key, provider_message_key);
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS uq_doc_mail_message_provider_identity
|
||||||
|
ON DOC.doc_mail_message (provider_type, account_key, folder_key, provider_message_key)
|
||||||
|
WHERE provider_message_key IS NOT NULL;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOC.doc_mail_recipient (
|
||||||
|
id uuid PRIMARY KEY,
|
||||||
|
mail_document_id uuid NOT NULL REFERENCES DOC.doc_mail_message(document_id) ON DELETE CASCADE,
|
||||||
|
recipient_type varchar(16) NOT NULL,
|
||||||
|
display_name varchar(500),
|
||||||
|
email_address varchar(500),
|
||||||
|
raw_value text,
|
||||||
|
sort_order integer NOT NULL DEFAULT 0,
|
||||||
|
created_at timestamptz NOT NULL DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_recipient_mail
|
||||||
|
ON DOC.doc_mail_recipient (mail_document_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_recipient_type
|
||||||
|
ON DOC.doc_mail_recipient (recipient_type);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_recipient_email
|
||||||
|
ON DOC.doc_mail_recipient (email_address);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOC.doc_mail_attachment (
|
||||||
|
id uuid PRIMARY KEY,
|
||||||
|
mail_document_id uuid NOT NULL REFERENCES DOC.doc_mail_message(document_id) ON DELETE CASCADE,
|
||||||
|
attachment_document_id uuid NOT NULL REFERENCES DOC.doc_document(id) ON DELETE CASCADE,
|
||||||
|
disposition varchar(32),
|
||||||
|
content_id varchar(500),
|
||||||
|
filename varchar(1000),
|
||||||
|
mime_type varchar(255),
|
||||||
|
size_bytes bigint,
|
||||||
|
attachment_index integer,
|
||||||
|
part_path varchar(500),
|
||||||
|
path_in_archive text,
|
||||||
|
extraction_status varchar(32) NOT NULL DEFAULT 'IMPORTED',
|
||||||
|
error_message text,
|
||||||
|
created_at timestamptz NOT NULL DEFAULT now(),
|
||||||
|
updated_at timestamptz NOT NULL DEFAULT now()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_attachment_mail
|
||||||
|
ON DOC.doc_mail_attachment (mail_document_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_attachment_document
|
||||||
|
ON DOC.doc_mail_attachment (attachment_document_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_attachment_part_path
|
||||||
|
ON DOC.doc_mail_attachment (part_path);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_mail_attachment_attachment_index
|
||||||
|
ON DOC.doc_mail_attachment (attachment_index);
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS uq_doc_mail_attachment_mail_index
|
||||||
|
ON DOC.doc_mail_attachment (mail_document_id, attachment_index)
|
||||||
|
WHERE attachment_index IS NOT NULL;
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS uq_doc_mail_attachment_mail_part
|
||||||
|
ON DOC.doc_mail_attachment (mail_document_id, part_path)
|
||||||
|
WHERE part_path IS NOT NULL;
|
||||||
@ -0,0 +1,48 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||||
|
|
||||||
|
import at.procon.dip.ingestion.config.DipIngestionProperties;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class CamelMailServerEndpointUriFactoryTest {
|
||||||
|
|
||||||
|
private final CamelMailServerEndpointUriFactory factory = new CamelMailServerEndpointUriFactory();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void shouldBuildImapsConsumerUri() {
|
||||||
|
DipIngestionProperties.MailRouteProperties properties = new DipIngestionProperties.MailRouteProperties();
|
||||||
|
properties.setProtocol(MailServerProtocol.IMAPS);
|
||||||
|
properties.setHost("mail.example.org");
|
||||||
|
properties.setPort(993);
|
||||||
|
properties.setUsername("user@example.org");
|
||||||
|
properties.setPassword("p@ss word");
|
||||||
|
properties.setFolderName("INBOX/Orders");
|
||||||
|
properties.setDelete(false);
|
||||||
|
properties.setPeek(true);
|
||||||
|
properties.setUnseen(true);
|
||||||
|
properties.setDelay(15000);
|
||||||
|
properties.setMaxMessagesPerPoll(25);
|
||||||
|
|
||||||
|
String uri = factory.buildConsumerUri(properties);
|
||||||
|
|
||||||
|
assertThat(uri).startsWith("imaps://mail.example.org:993?");
|
||||||
|
assertThat(uri).contains("username=user%40example.org");
|
||||||
|
assertThat(uri).contains("password=p%40ss+word");
|
||||||
|
assertThat(uri).contains("folderName=INBOX%2FOrders");
|
||||||
|
assertThat(uri).contains("peek=true");
|
||||||
|
assertThat(uri).contains("unseen=true");
|
||||||
|
assertThat(uri).contains("maxMessagesPerPoll=25");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void shouldFailWhenRequiredValuesAreMissing() {
|
||||||
|
DipIngestionProperties.MailRouteProperties properties = new DipIngestionProperties.MailRouteProperties();
|
||||||
|
properties.setProtocol(MailServerProtocol.IMAPS);
|
||||||
|
|
||||||
|
assertThatThrownBy(() -> factory.buildConsumerUri(properties))
|
||||||
|
.isInstanceOf(IllegalArgumentException.class)
|
||||||
|
.hasMessageContaining("host");
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,60 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.entity.MailRecipientType;
|
||||||
|
import at.procon.dip.ingestion.service.MailMessageExtractionService;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
class MailImportIdentityResolverTest {
|
||||||
|
|
||||||
|
private final MailImportIdentityResolver resolver = new MailImportIdentityResolver();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void shouldPreferProviderMessageKeyForRootIdentity() {
|
||||||
|
var parsed = new MailMessageExtractionService.ParsedMailMessage(
|
||||||
|
"Subject",
|
||||||
|
"Subject",
|
||||||
|
"Sender",
|
||||||
|
"sender@example.com",
|
||||||
|
"Sender <sender@example.com>",
|
||||||
|
null,
|
||||||
|
"<message-id@example.com>",
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
OffsetDateTime.now(),
|
||||||
|
OffsetDateTime.now(),
|
||||||
|
"body",
|
||||||
|
"",
|
||||||
|
List.of(new MailMessageExtractionService.MailRecipient(MailRecipientType.TO, null, "to@example.com", "to@example.com", 0)),
|
||||||
|
List.of()
|
||||||
|
);
|
||||||
|
var envelope = new GenericMailProviderEnvelope(MailProviderType.IMAP, "account-a", "Inbox", "uid-77", null, Map.of());
|
||||||
|
|
||||||
|
String identity = resolver.resolveRootSourceIdentifier(parsed, envelope, "raw".getBytes());
|
||||||
|
|
||||||
|
assertThat(identity).contains("imap").contains("account-a").contains("Inbox").contains("uid-77");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void shouldUsePartPathForAttachmentIdentity() {
|
||||||
|
var attachment = new MailMessageExtractionService.MailAttachment(
|
||||||
|
"offer.pdf",
|
||||||
|
"application/pdf",
|
||||||
|
new byte[]{1, 2, 3},
|
||||||
|
3l,
|
||||||
|
null,
|
||||||
|
"0.1.2",
|
||||||
|
"ATTACHMENT",
|
||||||
|
null,
|
||||||
|
1
|
||||||
|
);
|
||||||
|
|
||||||
|
String identity = resolver.resolveAttachmentSourceIdentifier("mail:imap:account:Inbox:uid-77", attachment);
|
||||||
|
|
||||||
|
assertThat(identity).endsWith(":part:0.1.2");
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -0,0 +1,31 @@
|
|||||||
|
package at.procon.dip.ingestion.mail;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
|
||||||
|
class MailProviderEnvelopeAttributesTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void shouldRoundTripGenericProviderEnvelopeThroughAttributes() {
|
||||||
|
GenericMailProviderEnvelope envelope = new GenericMailProviderEnvelope(
|
||||||
|
MailProviderType.MICROSOFT_GRAPH,
|
||||||
|
"mailbox-a",
|
||||||
|
"Inbox/Subfolder",
|
||||||
|
"provider-message-42",
|
||||||
|
"provider-thread-7",
|
||||||
|
Map.of("tenant", "demo")
|
||||||
|
);
|
||||||
|
|
||||||
|
Map<String, String> attributes = MailProviderEnvelopeAttributes.merge(Map.of("subject", "Hello"), envelope);
|
||||||
|
GenericMailProviderEnvelope restored = MailProviderEnvelopeAttributes.fromAttributes(attributes);
|
||||||
|
|
||||||
|
assertThat(restored.providerType()).isEqualTo(MailProviderType.MICROSOFT_GRAPH);
|
||||||
|
assertThat(restored.accountKey()).isEqualTo("mailbox-a");
|
||||||
|
assertThat(restored.folderKey()).isEqualTo("Inbox/Subfolder");
|
||||||
|
assertThat(restored.providerMessageKey()).isEqualTo("provider-message-42");
|
||||||
|
assertThat(restored.providerThreadKey()).isEqualTo("provider-thread-7");
|
||||||
|
assertThat(restored.providerAttributes()).containsEntry("tenant", "demo");
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue