introduced document attributes and text import rest api

This commit is contained in:
trifonovt 2026-04-21 14:07:16 +02:00
parent 1500e84757
commit 1cd8ebe066
15 changed files with 941 additions and 12 deletions

View File

@ -0,0 +1,10 @@
package at.procon.dip.domain.document;
public enum DocumentAttributeValueType {
STRING,
INTEGER,
NUMBER,
DATE,
DATETIME,
BOOLEAN
}

View File

@ -0,0 +1,80 @@
package at.procon.dip.domain.document.entity;
import at.procon.dip.architecture.SchemaNames;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.FetchType;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.Index;
import jakarta.persistence.JoinColumn;
import jakarta.persistence.ManyToOne;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.util.UUID;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
@Entity
@Table(schema = SchemaNames.DOC, name = "doc_document_attribute", indexes = {
@Index(name = "idx_doc_doc_attr_document", columnList = "document_id"),
@Index(name = "idx_doc_doc_attr_name", columnList = "attribute_name_id"),
@Index(name = "idx_doc_doc_attr_value_hash", columnList = "attribute_value_hash"),
@Index(name = "idx_doc_doc_attr_doc_name_hash", columnList = "document_id, attribute_name_id, attribute_value_hash", unique = true)
})
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class DocumentAttribute {
@Id
@GeneratedValue(strategy = GenerationType.UUID)
private UUID id;
@ManyToOne(fetch = FetchType.LAZY, optional = false)
@JoinColumn(name = "document_id", nullable = false)
private Document document;
@ManyToOne(fetch = FetchType.LAZY, optional = false)
@JoinColumn(name = "attribute_name_id", nullable = false)
private DocumentAttributeName attributeName;
@Column(name = "string_value", columnDefinition = "TEXT")
private String stringValue;
@Column(name = "integer_value")
private Long integerValue;
@Column(name = "number_value", columnDefinition = "NUMERIC")
private BigDecimal numberValue;
@Column(name = "date_value")
private LocalDate dateValue;
@Column(name = "datetime_value")
private OffsetDateTime datetimeValue;
@Column(name = "boolean_value")
private Boolean booleanValue;
@Column(name = "attribute_value_hash", nullable = false, length = 64)
private String attributeValueHash;
@Builder.Default
@Column(name = "created_at", nullable = false, updatable = false)
private OffsetDateTime createdAt = OffsetDateTime.now();
@PrePersist
protected void onCreate() {
createdAt = OffsetDateTime.now();
}
}

View File

@ -0,0 +1,72 @@
package at.procon.dip.domain.document.entity;
import at.procon.dip.architecture.SchemaNames;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.EnumType;
import jakarta.persistence.Enumerated;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.Index;
import jakarta.persistence.PrePersist;
import jakarta.persistence.PreUpdate;
import jakarta.persistence.Table;
import java.time.OffsetDateTime;
import java.util.UUID;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
@Entity
@Table(schema = SchemaNames.DOC, name = "doc_attribute_name", indexes = {
@Index(name = "idx_doc_attr_name_name", columnList = "attribute_name"),
@Index(name = "idx_doc_attr_name_context", columnList = "attribute_context"),
@Index(name = "idx_doc_attr_name_normalized_ctx", columnList = "normalized_name, attribute_context", unique = true)
})
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
@Builder
public class DocumentAttributeName {
@Id
@GeneratedValue(strategy = GenerationType.UUID)
private UUID id;
@Column(name = "attribute_name", nullable = false, length = 255)
private String attributeName;
@Column(name = "normalized_name", nullable = false, length = 255)
private String normalizedName;
@Column(name = "attribute_context", nullable = false, length = 100)
private String attributeContext;
@Enumerated(EnumType.STRING)
@Column(name = "attribute_value_type", nullable = false, length = 32)
private DocumentAttributeValueType attributeValueType;
@Builder.Default
@Column(name = "created_at", nullable = false, updatable = false)
private OffsetDateTime createdAt = OffsetDateTime.now();
@Builder.Default
@Column(name = "updated_at", nullable = false)
private OffsetDateTime updatedAt = OffsetDateTime.now();
@PrePersist
protected void onCreate() {
createdAt = OffsetDateTime.now();
updatedAt = OffsetDateTime.now();
}
@PreUpdate
protected void onUpdate() {
updatedAt = OffsetDateTime.now();
}
}

View File

@ -0,0 +1,11 @@
package at.procon.dip.domain.document.repository;
import at.procon.dip.domain.document.entity.DocumentAttributeName;
import java.util.Optional;
import java.util.UUID;
import org.springframework.data.jpa.repository.JpaRepository;
public interface DocumentAttributeNameRepository extends JpaRepository<DocumentAttributeName, UUID> {
Optional<DocumentAttributeName> findByNormalizedNameAndAttributeContext(String normalizedName, String attributeContext);
}

View File

@ -0,0 +1,13 @@
package at.procon.dip.domain.document.repository;
import at.procon.dip.domain.document.entity.DocumentAttribute;
import at.procon.dip.domain.document.entity.DocumentAttributeName;
import org.springframework.data.jpa.repository.JpaRepository;
import java.util.Optional;
import java.util.UUID;
public interface DocumentAttributeRepository extends JpaRepository<DocumentAttribute, UUID> {
boolean existsByDocument_IdAndAttributeName_IdAndAttributeValueHash(UUID documentId, UUID attributeNameId, String attributeValueHash);
}

View File

@ -0,0 +1,315 @@
package at.procon.dip.domain.document.service;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import at.procon.dip.domain.document.entity.Document;
import at.procon.dip.domain.document.entity.DocumentAttribute;
import at.procon.dip.domain.document.entity.DocumentAttributeName;
import at.procon.dip.domain.document.repository.DocumentAttributeNameRepository;
import at.procon.dip.domain.document.repository.DocumentAttributeRepository;
import at.procon.dip.ingestion.dto.GenericNameValuePairRequest;
import at.procon.ted.util.HashUtils;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.StringUtils;
@Service
@RequiredArgsConstructor
@Transactional
public class DocumentAttributeService {
public static final String GLOBAL_CONTEXT = "GLOBAL";
private final DocumentService documentService;
private final DocumentAttributeNameRepository attributeNameRepository;
private final DocumentAttributeRepository attributeRepository;
public void addAttributes(UUID documentId, List<GenericNameValuePairRequest> attributes) {
if (attributes == null || attributes.isEmpty()) {
return;
}
Document document = documentService.getRequired(documentId);
Set<String> seenInRequest = new HashSet<>();
Map<String, DocumentAttributeName> catalogEntriesByKey = new HashMap<>();
for (GenericNameValuePairRequest attribute : attributes) {
ResolvedAttributeInput resolved = resolveInput(attribute);
if (resolved == null) {
continue;
}
String catalogKey = resolved.normalizedName() + "|" + resolved.context();
DocumentAttributeName catalogEntry = catalogEntriesByKey.computeIfAbsent(catalogKey, ignored ->
resolveOrCreateCatalogEntry(resolved));
if (catalogEntry.getAttributeValueType() != resolved.type()) {
throw new IllegalArgumentException(
"Attribute '" + resolved.name() + "' in context '" + resolved.context() + "' is already registered as type "
+ catalogEntry.getAttributeValueType() + " but request uses " + resolved.type()
);
}
String attributeValueHash = HashUtils.computeSha256(resolved.canonicalValue());
String requestKey = catalogEntry.getId() + ":" + attributeValueHash;
if (!seenInRequest.add(requestKey)) {
continue;
}
boolean alreadyAssigned = attributeRepository.existsByDocument_IdAndAttributeName_IdAndAttributeValueHash(
document.getId(),
catalogEntry.getId(),
attributeValueHash
);
if (alreadyAssigned) {
continue;
}
DocumentAttribute entity = DocumentAttribute.builder()
.document(document)
.attributeName(catalogEntry)
.attributeValueHash(attributeValueHash)
.build();
applyTypedValue(entity, resolved);
attributeRepository.save(entity);
}
}
private DocumentAttributeName resolveOrCreateCatalogEntry(ResolvedAttributeInput resolved) {
return attributeNameRepository.findByNormalizedNameAndAttributeContext(resolved.normalizedName(), resolved.context())
.orElseGet(() -> attributeNameRepository.save(DocumentAttributeName.builder()
.attributeName(resolved.name())
.normalizedName(resolved.normalizedName())
.attributeContext(resolved.context())
.attributeValueType(resolved.type())
.build()));
}
private void applyTypedValue(DocumentAttribute entity, ResolvedAttributeInput resolved) {
switch (resolved.type()) {
case STRING -> entity.setStringValue(resolved.stringValue());
case INTEGER -> entity.setIntegerValue(resolved.integerValue());
case NUMBER -> entity.setNumberValue(resolved.numberValue());
case DATE -> entity.setDateValue(resolved.dateValue());
case DATETIME -> entity.setDatetimeValue(resolved.datetimeValue());
case BOOLEAN -> entity.setBooleanValue(resolved.booleanValue());
}
}
private ResolvedAttributeInput resolveInput(GenericNameValuePairRequest attribute) {
if (attribute == null || !StringUtils.hasText(attribute.name())) {
return null;
}
String name = attribute.name().trim();
String normalizedName = normalizeName(name);
String context = normalizeContext(attribute.context());
int explicitTypedValues = countExplicitTypedValues(attribute);
String legacyValue = StringUtils.hasText(attribute.value()) ? attribute.value().trim() : null;
String explicitStringValue = StringUtils.hasText(attribute.stringValue()) ? attribute.stringValue().trim() : null;
DocumentAttributeValueType requestedType = attribute.type();
if (requestedType == null) {
if (legacyValue != null && explicitTypedValues > 0) {
throw new IllegalArgumentException("Attribute '" + name + "' uses both legacy and typed value fields");
}
if (explicitTypedValues > 1) {
throw new IllegalArgumentException("Attribute '" + name + "' defines multiple typed values without declaring a type");
}
if (legacyValue != null) {
return new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.STRING, legacyValue, null, null, null, null, null, legacyValue
);
}
if (explicitStringValue != null && explicitTypedValues == 1) {
return new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.STRING, explicitStringValue, null, null, null, null, null, explicitStringValue
);
}
if (attribute.integerValue() != null && explicitTypedValues == 1) {
return new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.INTEGER, null, attribute.integerValue(), null, null, null, null, attribute.integerValue().toString()
);
}
if (attribute.numberValue() != null && explicitTypedValues == 1) {
BigDecimal normalized = normalizeNumber(attribute.numberValue());
return new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.NUMBER, null, null, normalized, null, null, null, normalized.toPlainString()
);
}
if (attribute.dateValue() != null && explicitTypedValues == 1) {
return new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.DATE, null, null, null, attribute.dateValue(), null, null, attribute.dateValue().toString()
);
}
if (attribute.datetimeValue() != null && explicitTypedValues == 1) {
OffsetDateTime normalized = normalizeDateTime(attribute.datetimeValue());
return new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.DATETIME, null, null, null, null, normalized, null, normalized.toString()
);
}
if (attribute.booleanValue() != null && explicitTypedValues == 1) {
return new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.BOOLEAN, null, null, null, null, null, attribute.booleanValue(), attribute.booleanValue().toString()
);
}
return null;
}
return switch (requestedType) {
case STRING -> {
String value = explicitStringValue != null ? explicitStringValue : legacyValue;
if (!StringUtils.hasText(value) || hasOtherTypedValues(attribute, DocumentAttributeValueType.STRING)) {
throw new IllegalArgumentException("String attribute '" + name + "' requires only string or value input");
}
yield new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.STRING, value.trim(), null, null, null, null, null, value.trim()
);
}
case INTEGER -> {
Long value = attribute.integerValue();
if (value == null && legacyValue != null) {
value = Long.parseLong(legacyValue);
}
if (value == null || hasOtherTypedValues(attribute, DocumentAttributeValueType.INTEGER)) {
throw new IllegalArgumentException("Integer attribute '" + name + "' requires only integer input");
}
yield new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.INTEGER, null, value, null, null, null, null, value.toString()
);
}
case NUMBER -> {
BigDecimal value = attribute.numberValue();
if (value == null && legacyValue != null) {
value = normalizeNumber(new BigDecimal(legacyValue));
}
if (value == null || hasOtherTypedValues(attribute, DocumentAttributeValueType.NUMBER)) {
throw new IllegalArgumentException("Number attribute '" + name + "' requires only number input");
}
value = normalizeNumber(value);
yield new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.NUMBER, null, null, value, null, null, null, value.toPlainString()
);
}
case DATE -> {
LocalDate value = attribute.dateValue();
if (value == null && legacyValue != null) {
value = LocalDate.parse(legacyValue);
}
if (value == null || hasOtherTypedValues(attribute, DocumentAttributeValueType.DATE)) {
throw new IllegalArgumentException("Date attribute '" + name + "' requires only date input");
}
yield new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.DATE, null, null, null, value, null, null, value.toString()
);
}
case DATETIME -> {
OffsetDateTime value = attribute.datetimeValue();
if (value == null && legacyValue != null) {
value = OffsetDateTime.parse(legacyValue);
}
if (value == null || hasOtherTypedValues(attribute, DocumentAttributeValueType.DATETIME)) {
throw new IllegalArgumentException("Datetime attribute '" + name + "' requires only datetime input");
}
value = normalizeDateTime(value);
yield new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.DATETIME, null, null, null, null, value, null, value.toString()
);
}
case BOOLEAN -> {
Boolean value = attribute.booleanValue();
if (value == null && legacyValue != null) {
if ("true".equalsIgnoreCase(legacyValue) || "false".equalsIgnoreCase(legacyValue)) {
value = Boolean.parseBoolean(legacyValue);
} else {
throw new IllegalArgumentException("Boolean attribute '" + name + "' requires true or false");
}
}
if (value == null || hasOtherTypedValues(attribute, DocumentAttributeValueType.BOOLEAN)) {
throw new IllegalArgumentException("Boolean attribute '" + name + "' requires only boolean input");
}
yield new ResolvedAttributeInput(
name, normalizedName, context, DocumentAttributeValueType.BOOLEAN, null, null, null, null, null, value, value.toString()
);
}
};
}
private int countExplicitTypedValues(GenericNameValuePairRequest attribute) {
int count = 0;
if (StringUtils.hasText(attribute.stringValue())) {
count++;
}
if (attribute.integerValue() != null) {
count++;
}
if (attribute.numberValue() != null) {
count++;
}
if (attribute.dateValue() != null) {
count++;
}
if (attribute.datetimeValue() != null) {
count++;
}
if (attribute.booleanValue() != null) {
count++;
}
return count;
}
private boolean hasOtherTypedValues(GenericNameValuePairRequest attribute, DocumentAttributeValueType expectedType) {
return switch (expectedType) {
case STRING -> attribute.integerValue() != null || attribute.numberValue() != null || attribute.dateValue() != null || attribute.datetimeValue() != null || attribute.booleanValue() != null;
case INTEGER -> StringUtils.hasText(attribute.stringValue()) || attribute.numberValue() != null || attribute.dateValue() != null || attribute.datetimeValue() != null || attribute.booleanValue() != null;
case NUMBER -> StringUtils.hasText(attribute.stringValue()) || attribute.integerValue() != null || attribute.dateValue() != null || attribute.datetimeValue() != null || attribute.booleanValue() != null;
case DATE -> StringUtils.hasText(attribute.stringValue()) || attribute.integerValue() != null || attribute.numberValue() != null || attribute.datetimeValue() != null || attribute.booleanValue() != null;
case DATETIME -> StringUtils.hasText(attribute.stringValue()) || attribute.integerValue() != null || attribute.numberValue() != null || attribute.dateValue() != null || attribute.booleanValue() != null;
case BOOLEAN -> StringUtils.hasText(attribute.stringValue()) || attribute.integerValue() != null || attribute.numberValue() != null || attribute.dateValue() != null || attribute.datetimeValue() != null;
};
}
private BigDecimal normalizeNumber(BigDecimal value) {
return value.stripTrailingZeros();
}
private OffsetDateTime normalizeDateTime(OffsetDateTime value) {
return value.withNano(0);
}
private String normalizeName(String value) {
return value.trim().toLowerCase(Locale.ROOT);
}
private String normalizeContext(String value) {
return StringUtils.hasText(value)
? value.trim().toUpperCase(Locale.ROOT)
: GLOBAL_CONTEXT;
}
private record ResolvedAttributeInput(
String name,
String normalizedName,
String context,
DocumentAttributeValueType type,
String stringValue,
Long integerValue,
BigDecimal numberValue,
LocalDate dateValue,
OffsetDateTime datetimeValue,
Boolean booleanValue,
String canonicalValue
) {
}
}

View File

@ -25,7 +25,7 @@ public interface EmbeddingJobRepository extends JpaRepository<EmbeddingJob, UUID
FROM DOC.doc_embedding_job j FROM DOC.doc_embedding_job j
WHERE j.status IN ('PENDING', 'RETRY_SCHEDULED') WHERE j.status IN ('PENDING', 'RETRY_SCHEDULED')
AND (j.next_retry_at IS NULL OR j.next_retry_at <= :now) AND (j.next_retry_at IS NULL OR j.next_retry_at <= :now)
ORDER BY j.priority DESC, j.created_at ASC ORDER BY j.priority DESC, j.created_at DESC
FOR UPDATE SKIP LOCKED FOR UPDATE SKIP LOCKED
LIMIT :limit LIMIT :limit
""", nativeQuery = true) """, nativeQuery = true)

View File

@ -4,16 +4,20 @@ import at.procon.dip.domain.access.DocumentAccessContext;
import at.procon.dip.domain.access.DocumentVisibility; import at.procon.dip.domain.access.DocumentVisibility;
import at.procon.dip.domain.document.CanonicalDocumentMetadata; import at.procon.dip.domain.document.CanonicalDocumentMetadata;
import at.procon.dip.domain.document.SourceType; import at.procon.dip.domain.document.SourceType;
import at.procon.dip.domain.document.service.DocumentAttributeService;
import at.procon.dip.domain.tenant.TenantRef; import at.procon.dip.domain.tenant.TenantRef;
import at.procon.dip.ingestion.config.DipIngestionProperties;
import at.procon.dip.ingestion.dto.GenericImportResponse; import at.procon.dip.ingestion.dto.GenericImportResponse;
import at.procon.dip.ingestion.dto.GenericNameValuePairRequest;
import at.procon.dip.ingestion.dto.GenericTextImportRequest; import at.procon.dip.ingestion.dto.GenericTextImportRequest;
import at.procon.dip.ingestion.service.DocumentIngestionGateway; import at.procon.dip.ingestion.service.DocumentIngestionGateway;
import at.procon.dip.ingestion.spi.IngestionResult; import at.procon.dip.ingestion.spi.IngestionResult;
import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy; import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
import at.procon.dip.ingestion.spi.SourceDescriptor; import at.procon.dip.ingestion.spi.SourceDescriptor;
import at.procon.dip.ingestion.config.DipIngestionProperties;
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
import at.procon.dip.runtime.config.RuntimeMode; import at.procon.dip.runtime.config.RuntimeMode;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.Map; import java.util.Map;
@ -36,6 +40,7 @@ public class GenericDocumentImportController {
private final DipIngestionProperties properties; private final DipIngestionProperties properties;
private final DocumentIngestionGateway ingestionGateway; private final DocumentIngestionGateway ingestionGateway;
private final DocumentAttributeService documentAttributeService;
@PostMapping(path = "/upload", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @PostMapping(path = "/upload", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<GenericImportResponse> upload( public ResponseEntity<GenericImportResponse> upload(
@ -76,13 +81,7 @@ public class GenericDocumentImportController {
@PostMapping(path = "/text", consumes = MediaType.APPLICATION_JSON_VALUE) @PostMapping(path = "/text", consumes = MediaType.APPLICATION_JSON_VALUE)
public ResponseEntity<GenericImportResponse> importText(@RequestBody GenericTextImportRequest request) { public ResponseEntity<GenericImportResponse> importText(@RequestBody GenericTextImportRequest request) {
ensureRestUploadEnabled(); ensureRestUploadEnabled();
Map<String, String> attributes = new LinkedHashMap<>(); Map<String, String> attributes = buildDescriptorAttributes(request);
if (StringUtils.hasText(request.languageCode())) {
attributes.put("languageCode", request.languageCode());
}
if (StringUtils.hasText(request.title())) {
attributes.put("title", request.title());
}
SourceDescriptor descriptor = new SourceDescriptor( SourceDescriptor descriptor = new SourceDescriptor(
buildAccessContext(request.ownerTenantKey(), request.visibility()), buildAccessContext(request.ownerTenantKey(), request.visibility()),
@ -91,14 +90,70 @@ public class GenericDocumentImportController {
null, null,
request.fileName(), request.fileName(),
request.mediaType(), request.mediaType(),
request.text() == null ? null : request.text().getBytes(java.nio.charset.StandardCharsets.UTF_8), request.text() == null ? null : request.text().getBytes(StandardCharsets.UTF_8),
request.text(), request.text(),
OffsetDateTime.now(), OffsetDateTime.now(),
OriginalContentStoragePolicy.DEFAULT, OriginalContentStoragePolicy.DEFAULT,
attributes attributes
); );
IngestionResult result = ingestionGateway.ingest(descriptor); IngestionResult result = ingestionGateway.ingest(descriptor);
return ResponseEntity.ok(toResponse(result)); GenericImportResponse response = toResponse(result);
if (response.documentId() != null) {
documentAttributeService.addAttributes(response.documentId(), request.attributes());
}
return ResponseEntity.ok(response);
}
private Map<String, String> buildDescriptorAttributes(GenericTextImportRequest request) {
Map<String, String> attributes = new LinkedHashMap<>();
if (request.attributes() != null) {
for (GenericNameValuePairRequest attribute : request.attributes()) {
if (attribute == null || !StringUtils.hasText(attribute.name()) || !isGlobalContext(attribute.context())) {
continue;
}
String renderedValue = renderAsDescriptorValue(attribute);
if (StringUtils.hasText(renderedValue)) {
attributes.put(attribute.name().trim(), renderedValue);
}
}
}
if (StringUtils.hasText(request.languageCode())) {
attributes.put("languageCode", request.languageCode());
}
if (StringUtils.hasText(request.title())) {
attributes.put("title", request.title());
}
return attributes;
}
private String renderAsDescriptorValue(GenericNameValuePairRequest attribute) {
if (StringUtils.hasText(attribute.value())) {
return attribute.value().trim();
}
if (StringUtils.hasText(attribute.stringValue())) {
return attribute.stringValue().trim();
}
if (attribute.integerValue() != null) {
return attribute.integerValue().toString();
}
if (attribute.numberValue() != null) {
BigDecimal number = attribute.numberValue().stripTrailingZeros();
return number.toPlainString();
}
if (attribute.dateValue() != null) {
return attribute.dateValue().toString();
}
if (attribute.datetimeValue() != null) {
return attribute.datetimeValue().withNano(0).toString();
}
if (attribute.booleanValue() != null) {
return attribute.booleanValue().toString();
}
return null;
}
private boolean isGlobalContext(String context) {
return !StringUtils.hasText(context) || DocumentAttributeService.GLOBAL_CONTEXT.equalsIgnoreCase(context.trim());
} }
private void ensureRestUploadEnabled() { private void ensureRestUploadEnabled() {

View File

@ -0,0 +1,20 @@
package at.procon.dip.ingestion.dto;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.OffsetDateTime;
public record GenericNameValuePairRequest(
String name,
String context,
DocumentAttributeValueType type,
String value,
String stringValue,
Long integerValue,
BigDecimal numberValue,
LocalDate dateValue,
OffsetDateTime datetimeValue,
Boolean booleanValue
) {
}

View File

@ -1,6 +1,7 @@
package at.procon.dip.ingestion.dto; package at.procon.dip.ingestion.dto;
import at.procon.dip.domain.access.DocumentVisibility; import at.procon.dip.domain.access.DocumentVisibility;
import java.util.List;
public record GenericTextImportRequest( public record GenericTextImportRequest(
String text, String text,
@ -10,6 +11,7 @@ public record GenericTextImportRequest(
DocumentVisibility visibility, DocumentVisibility visibility,
String languageCode, String languageCode,
String title, String title,
String sourceIdentifier String sourceIdentifier,
List<GenericNameValuePairRequest> attributes
) { ) {
} }

View File

@ -88,3 +88,27 @@ CREATE UNIQUE INDEX IF NOT EXISTS uq_doc_mail_attachment_mail_index
CREATE UNIQUE INDEX IF NOT EXISTS uq_doc_mail_attachment_mail_part CREATE UNIQUE INDEX IF NOT EXISTS uq_doc_mail_attachment_mail_part
ON DOC.doc_mail_attachment (mail_document_id, part_path) ON DOC.doc_mail_attachment (mail_document_id, part_path)
WHERE part_path IS NOT NULL; WHERE part_path IS NOT NULL;
DO $$
BEGIN
IF EXISTS (
SELECT 1
FROM pg_constraint c
JOIN pg_class r ON r.oid = c.conrelid
JOIN pg_namespace n ON n.oid = r.relnamespace
WHERE n.nspname = 'doc'
AND r.relname = 'doc_source'
AND c.conname = 'doc_source_source_type_check'
) THEN
ALTER TABLE DOC.doc_source DROP CONSTRAINT doc_source_source_type_check;
ALTER TABLE DOC.doc_source
ADD CONSTRAINT doc_source_source_type_check
CHECK (
source_type IN (
'TED_PACKAGE', 'PACKAGE_CHILD', 'MAIL', 'MAIL_ATTACHMENT', 'FILE_SYSTEM', 'REST_UPLOAD',
'MANUAL_UPLOAD', 'ZIP_CHILD', 'API', 'MIGRATION'
)
);
END IF;
END
$$;

View File

@ -0,0 +1,64 @@
-- Upgrade text-import attribute catalog to typed values with optional context.
ALTER TABLE DOC.doc_attribute_name
ADD COLUMN IF NOT EXISTS attribute_context VARCHAR(100);
UPDATE DOC.doc_attribute_name
SET attribute_context = 'GLOBAL'
WHERE attribute_context IS NULL OR BTRIM(attribute_context) = '';
ALTER TABLE DOC.doc_attribute_name
ALTER COLUMN attribute_context SET NOT NULL;
ALTER TABLE DOC.doc_attribute_name
ADD COLUMN IF NOT EXISTS attribute_value_type VARCHAR(32);
UPDATE DOC.doc_attribute_name
SET attribute_value_type = 'STRING'
WHERE attribute_value_type IS NULL OR BTRIM(attribute_value_type) = '';
ALTER TABLE DOC.doc_attribute_name
ALTER COLUMN attribute_value_type SET NOT NULL;
DROP INDEX IF EXISTS DOC.idx_doc_attr_name_normalized;
CREATE UNIQUE INDEX IF NOT EXISTS idx_doc_attr_name_normalized_ctx
ON DOC.doc_attribute_name(normalized_name, attribute_context);
CREATE INDEX IF NOT EXISTS idx_doc_attr_name_context
ON DOC.doc_attribute_name(attribute_context);
ALTER TABLE DOC.doc_document_attribute
ADD COLUMN IF NOT EXISTS string_value TEXT;
UPDATE DOC.doc_document_attribute
SET string_value = attribute_value
WHERE string_value IS NULL AND attribute_value IS NOT NULL;
ALTER TABLE DOC.doc_document_attribute
ADD COLUMN IF NOT EXISTS number_value NUMERIC;
ALTER TABLE DOC.doc_document_attribute
ADD COLUMN IF NOT EXISTS date_value DATE;
ALTER TABLE DOC.doc_document_attribute
ADD COLUMN IF NOT EXISTS boolean_value BOOLEAN;
ALTER TABLE DOC.doc_document_attribute
DROP COLUMN IF EXISTS attribute_value;
ALTER TABLE DOC.doc_document_attribute
DROP CONSTRAINT IF EXISTS chk_doc_document_attribute_single_typed_value;
ALTER TABLE DOC.doc_document_attribute
ADD CONSTRAINT chk_doc_document_attribute_single_typed_value CHECK (
(CASE WHEN string_value IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN number_value IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN date_value IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN boolean_value IS NOT NULL THEN 1 ELSE 0 END) = 1
);
COMMENT ON COLUMN DOC.doc_attribute_name.attribute_context IS 'Optional namespace/context for avoiding name conflicts. GLOBAL is the default context.';
COMMENT ON COLUMN DOC.doc_attribute_name.attribute_value_type IS 'Declared type of the attribute value for this catalog entry.';
COMMENT ON COLUMN DOC.doc_document_attribute.string_value IS 'String representation when the catalog entry type is STRING.';
COMMENT ON COLUMN DOC.doc_document_attribute.number_value IS 'Numeric representation when the catalog entry type is NUMBER.';
COMMENT ON COLUMN DOC.doc_document_attribute.date_value IS 'Date representation when the catalog entry type is DATE.';
COMMENT ON COLUMN DOC.doc_document_attribute.boolean_value IS 'Boolean representation when the catalog entry type is BOOLEAN.';

View File

@ -0,0 +1,23 @@
-- Extend typed text-import attributes with INTEGER and DATETIME support.
ALTER TABLE DOC.doc_document_attribute
ADD COLUMN IF NOT EXISTS integer_value BIGINT;
ALTER TABLE DOC.doc_document_attribute
ADD COLUMN IF NOT EXISTS datetime_value TIMESTAMP WITH TIME ZONE;
ALTER TABLE DOC.doc_document_attribute
DROP CONSTRAINT IF EXISTS chk_doc_document_attribute_single_typed_value;
ALTER TABLE DOC.doc_document_attribute
ADD CONSTRAINT chk_doc_document_attribute_single_typed_value CHECK (
(CASE WHEN string_value IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN integer_value IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN number_value IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN date_value IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN datetime_value IS NOT NULL THEN 1 ELSE 0 END) +
(CASE WHEN boolean_value IS NOT NULL THEN 1 ELSE 0 END) = 1
);
COMMENT ON COLUMN DOC.doc_document_attribute.integer_value IS 'Integer representation when the catalog entry type is INTEGER.';
COMMENT ON COLUMN DOC.doc_document_attribute.datetime_value IS 'Date-time representation when the catalog entry type is DATETIME.';

View File

@ -0,0 +1,125 @@
package at.procon.dip.domain.document.service;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import at.procon.dip.domain.document.entity.Document;
import at.procon.dip.domain.document.entity.DocumentAttribute;
import at.procon.dip.domain.document.entity.DocumentAttributeName;
import at.procon.dip.domain.document.repository.DocumentAttributeNameRepository;
import at.procon.dip.domain.document.repository.DocumentAttributeRepository;
import at.procon.dip.ingestion.dto.GenericNameValuePairRequest;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
class DocumentAttributeServiceTest {
@Mock
private DocumentService documentService;
@Mock
private DocumentAttributeNameRepository attributeNameRepository;
@Mock
private DocumentAttributeRepository attributeRepository;
private DocumentAttributeService service;
@BeforeEach
void setUp() {
service = new DocumentAttributeService(documentService, attributeNameRepository, attributeRepository);
}
@Test
void shouldCreateTypedCatalogEntriesIncludingIntegerAndDatetimeAndUseContextToAvoidConflicts() {
UUID documentId = UUID.randomUUID();
when(documentService.getRequired(documentId)).thenReturn(Document.builder().id(documentId).build());
when(attributeNameRepository.findByNormalizedNameAndAttributeContext("status", "GLOBAL")).thenReturn(Optional.empty());
when(attributeNameRepository.findByNormalizedNameAndAttributeContext("status", "TED")).thenReturn(Optional.empty());
when(attributeNameRepository.findByNormalizedNameAndAttributeContext("estimatedvalue", "GLOBAL")).thenReturn(Optional.empty());
when(attributeNameRepository.findByNormalizedNameAndAttributeContext("publisheddate", "GLOBAL")).thenReturn(Optional.empty());
when(attributeNameRepository.findByNormalizedNameAndAttributeContext("framework", "GLOBAL")).thenReturn(Optional.empty());
when(attributeNameRepository.findByNormalizedNameAndAttributeContext("version", "GLOBAL")).thenReturn(Optional.empty());
when(attributeNameRepository.findByNormalizedNameAndAttributeContext("lastsyncedat", "GLOBAL")).thenReturn(Optional.empty());
when(attributeNameRepository.save(any(DocumentAttributeName.class))).thenAnswer(invocation -> {
DocumentAttributeName value = invocation.getArgument(0);
value.setId(UUID.randomUUID());
return value;
});
when(attributeRepository.existsByDocument_IdAndAttributeName_IdAndAttributeValueHash(any(), any(), any()))
.thenReturn(false);
OffsetDateTime syncedAt = OffsetDateTime.parse("2026-04-21T11:05:00+02:00");
service.addAttributes(documentId, List.of(
new GenericNameValuePairRequest("Status", null, DocumentAttributeValueType.STRING, null, "published", null, null, null, null, null),
new GenericNameValuePairRequest("Status", "ted", DocumentAttributeValueType.STRING, null, "closed", null, null, null, null, null),
new GenericNameValuePairRequest("estimatedValue", null, DocumentAttributeValueType.NUMBER, null, null, null, new BigDecimal("125000.50"), null, null, null),
new GenericNameValuePairRequest("publishedDate", null, DocumentAttributeValueType.DATE, null, null, null, null, LocalDate.of(2026, 4, 21), null, null),
new GenericNameValuePairRequest("framework", null, DocumentAttributeValueType.BOOLEAN, null, null, null, null, null, null, true),
new GenericNameValuePairRequest("version", null, DocumentAttributeValueType.INTEGER, null, null, 7L, null, null, null, null),
new GenericNameValuePairRequest("lastSyncedAt", null, DocumentAttributeValueType.DATETIME, null, null, null, null, null, syncedAt, null),
new GenericNameValuePairRequest("status", null, DocumentAttributeValueType.STRING, null, "published", null, null, null, null, null)
));
verify(attributeNameRepository, times(7)).save(any(DocumentAttributeName.class));
ArgumentCaptor<DocumentAttribute> captor = ArgumentCaptor.forClass(DocumentAttribute.class);
verify(attributeRepository, times(7)).save(captor.capture());
List<DocumentAttribute> saved = captor.getAllValues();
assertThat(saved).hasSize(7);
assertThat(saved).anySatisfy(attribute -> {
assertThat(attribute.getAttributeName().getAttributeContext()).isEqualTo("GLOBAL");
assertThat(attribute.getAttributeName().getNormalizedName()).isEqualTo("status");
assertThat(attribute.getStringValue()).isEqualTo("published");
});
assertThat(saved).anySatisfy(attribute -> {
assertThat(attribute.getAttributeName().getAttributeContext()).isEqualTo("TED");
assertThat(attribute.getAttributeName().getNormalizedName()).isEqualTo("status");
assertThat(attribute.getStringValue()).isEqualTo("closed");
});
assertThat(saved).anySatisfy(attribute -> assertThat(attribute.getNumberValue()).isEqualByComparingTo("125000.5"));
assertThat(saved).anySatisfy(attribute -> assertThat(attribute.getDateValue()).isEqualTo(LocalDate.of(2026, 4, 21)));
assertThat(saved).anySatisfy(attribute -> assertThat(attribute.getBooleanValue()).isTrue());
assertThat(saved).anySatisfy(attribute -> assertThat(attribute.getIntegerValue()).isEqualTo(7L));
assertThat(saved).anySatisfy(attribute -> assertThat(attribute.getDatetimeValue()).isEqualTo(syncedAt.withNano(0)));
}
@Test
void shouldRejectTypeConflictForExistingCatalogEntryInSameContext() {
UUID documentId = UUID.randomUUID();
when(documentService.getRequired(documentId)).thenReturn(Document.builder().id(documentId).build());
when(attributeNameRepository.findByNormalizedNameAndAttributeContext("status", "TED")).thenReturn(Optional.of(
DocumentAttributeName.builder()
.id(UUID.randomUUID())
.attributeName("status")
.normalizedName("status")
.attributeContext("TED")
.attributeValueType(DocumentAttributeValueType.STRING)
.build()
));
assertThatThrownBy(() -> service.addAttributes(documentId, List.of(
new GenericNameValuePairRequest("status", "ted", DocumentAttributeValueType.INTEGER, null, null, 1L, null, null, null, null)
)))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("already registered as type STRING");
verify(attributeRepository, times(0)).save(any());
}
}

View File

@ -0,0 +1,115 @@
package at.procon.dip.ingestion.controller;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import at.procon.dip.domain.access.DocumentAccessContext;
import at.procon.dip.domain.access.DocumentVisibility;
import at.procon.dip.domain.document.CanonicalDocumentMetadata;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import at.procon.dip.domain.document.DocumentFamily;
import at.procon.dip.domain.document.DocumentStatus;
import at.procon.dip.domain.document.DocumentType;
import at.procon.dip.domain.document.service.DocumentAttributeService;
import at.procon.dip.ingestion.config.DipIngestionProperties;
import at.procon.dip.ingestion.dto.GenericNameValuePairRequest;
import at.procon.dip.ingestion.dto.GenericTextImportRequest;
import at.procon.dip.ingestion.service.DocumentIngestionGateway;
import at.procon.dip.ingestion.spi.IngestionResult;
import at.procon.dip.ingestion.spi.SourceDescriptor;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.util.List;
import java.util.UUID;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
class GenericDocumentImportControllerTest {
@Mock
private DocumentIngestionGateway ingestionGateway;
@Mock
private DocumentAttributeService documentAttributeService;
private GenericDocumentImportController controller;
@BeforeEach
void setUp() {
DipIngestionProperties properties = new DipIngestionProperties();
properties.setEnabled(true);
properties.setRestUploadEnabled(true);
properties.setDefaultVisibility(DocumentVisibility.PUBLIC);
controller = new GenericDocumentImportController(properties, ingestionGateway, documentAttributeService);
}
@Test
void shouldPassGlobalTypedAttributesIntoIngestionAndPersistAllAttributes() {
UUID documentId = UUID.randomUUID();
OffsetDateTime syncedAt = OffsetDateTime.parse("2026-04-21T11:05:00+02:00");
List<GenericNameValuePairRequest> pairs = List.of(
new GenericNameValuePairRequest("title", null, DocumentAttributeValueType.STRING, null, "Text title from pair", null, null, null, null, null),
new GenericNameValuePairRequest("languageCode", null, DocumentAttributeValueType.STRING, null, "de", null, null, null, null, null),
new GenericNameValuePairRequest("country", null, null, "AT", null, null, null, null, null, null),
new GenericNameValuePairRequest("estimatedValue", null, DocumentAttributeValueType.NUMBER, null, null, null, new BigDecimal("125000.50"), null, null, null),
new GenericNameValuePairRequest("publishedDate", null, DocumentAttributeValueType.DATE, null, null, null, null, LocalDate.of(2026, 4, 21), null, null),
new GenericNameValuePairRequest("version", null, DocumentAttributeValueType.INTEGER, null, null, 7L, null, null, null, null),
new GenericNameValuePairRequest("lastSyncedAt", null, DocumentAttributeValueType.DATETIME, null, null, null, null, null, syncedAt, null),
new GenericNameValuePairRequest("status", "ted", DocumentAttributeValueType.STRING, null, "closed", null, null, null, null, null)
);
GenericTextImportRequest request = new GenericTextImportRequest(
"Hello world",
"sample.txt",
"text/plain",
null,
null,
null,
null,
"source-1",
pairs
);
when(ingestionGateway.ingest(any(SourceDescriptor.class))).thenReturn(new IngestionResult(
List.of(new CanonicalDocumentMetadata(
documentId,
new DocumentAccessContext(null, DocumentVisibility.PUBLIC),
DocumentType.TEXT,
DocumentFamily.GENERIC,
DocumentStatus.REPRESENTED,
"Text title from pair",
"de",
"text/plain",
null,
OffsetDateTime.now(),
OffsetDateTime.now()
)),
List.of()
));
var response = controller.importText(request);
ArgumentCaptor<SourceDescriptor> descriptorCaptor = ArgumentCaptor.forClass(SourceDescriptor.class);
verify(ingestionGateway).ingest(descriptorCaptor.capture());
SourceDescriptor descriptor = descriptorCaptor.getValue();
assertThat(descriptor.textContent()).isEqualTo("Hello world");
assertThat(descriptor.attributes()).containsEntry("title", "Text title from pair");
assertThat(descriptor.attributes()).containsEntry("languageCode", "de");
assertThat(descriptor.attributes()).containsEntry("country", "AT");
assertThat(descriptor.attributes()).containsEntry("estimatedValue", "125000.5");
assertThat(descriptor.attributes()).containsEntry("publishedDate", "2026-04-21");
assertThat(descriptor.attributes()).containsEntry("version", "7");
assertThat(descriptor.attributes()).containsEntry("lastSyncedAt", "2026-04-21T11:05+02:00");
assertThat(descriptor.attributes()).doesNotContainKey("status");
verify(documentAttributeService).addAttributes(documentId, pairs);
assertThat(response.getBody()).isNotNull();
assertThat(response.getBody().documentId()).isEqualTo(documentId);
}
}