text import improvements

This commit is contained in:
trifonovt 2026-04-22 23:59:46 +02:00
parent 66fb266dec
commit a501176c83
20 changed files with 710 additions and 60 deletions

View File

@ -0,0 +1,23 @@
package at.procon.dip.domain.document.dto;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.util.UUID;
public record GenericDocumentAttributeResponse(
UUID id,
String name,
String normalizedName,
String context,
DocumentAttributeValueType type,
String stringValue,
Long integerValue,
BigDecimal numberValue,
LocalDate dateValue,
OffsetDateTime datetimeValue,
Boolean booleanValue,
OffsetDateTime createdAt
) {
}

View File

@ -0,0 +1,30 @@
package at.procon.dip.domain.document.dto;
import at.procon.dip.domain.access.DocumentVisibility;
import at.procon.dip.domain.document.DocumentFamily;
import at.procon.dip.domain.document.DocumentStatus;
import at.procon.dip.domain.document.DocumentType;
import java.time.OffsetDateTime;
import java.util.List;
import java.util.UUID;
public record GenericDocumentDetailResponse(
UUID id,
String ownerTenantKey,
DocumentVisibility visibility,
DocumentType documentType,
DocumentFamily documentFamily,
DocumentStatus status,
String title,
String summary,
String languageCode,
String mimeType,
String businessKey,
String dedupHash,
OffsetDateTime createdAt,
OffsetDateTime updatedAt,
List<GenericDocumentAttributeResponse> attributes,
List<GenericDocumentSourceResponse> sources,
List<GenericDocumentRepresentationResponse> representations
) {
}

View File

@ -0,0 +1,22 @@
package at.procon.dip.domain.document.dto;
import at.procon.dip.domain.document.RepresentationType;
import java.time.OffsetDateTime;
import java.util.UUID;
public record GenericDocumentRepresentationResponse(
UUID id,
RepresentationType representationType,
String builderKey,
String languageCode,
Integer tokenCount,
Integer charCount,
Integer chunkIndex,
Integer chunkStartOffset,
Integer chunkEndOffset,
boolean primaryRepresentation,
UUID contentId,
String textPreview,
OffsetDateTime createdAt
) {
}

View File

@ -0,0 +1,18 @@
package at.procon.dip.domain.document.dto;
import at.procon.dip.domain.document.SourceType;
import java.time.OffsetDateTime;
import java.util.UUID;
public record GenericDocumentSourceResponse(
UUID id,
SourceType sourceType,
String externalSourceId,
String sourceUri,
String sourceFilename,
UUID parentSourceId,
String importBatchId,
OffsetDateTime receivedAt,
OffsetDateTime createdAt
) {
}

View File

@ -1,13 +1,13 @@
package at.procon.dip.domain.document.repository; package at.procon.dip.domain.document.repository;
import at.procon.dip.domain.document.entity.DocumentAttribute; import at.procon.dip.domain.document.entity.DocumentAttribute;
import at.procon.dip.domain.document.entity.DocumentAttributeName; import java.util.List;
import org.springframework.data.jpa.repository.JpaRepository;
import java.util.Optional;
import java.util.UUID; import java.util.UUID;
import org.springframework.data.jpa.repository.JpaRepository;
public interface DocumentAttributeRepository extends JpaRepository<DocumentAttribute, UUID> { public interface DocumentAttributeRepository extends JpaRepository<DocumentAttribute, UUID> {
boolean existsByDocument_IdAndAttributeName_IdAndAttributeValueHash(UUID documentId, UUID attributeNameId, String attributeValueHash); boolean existsByDocument_IdAndAttributeName_IdAndAttributeValueHash(UUID documentId, UUID attributeNameId, String attributeValueHash);
List<DocumentAttribute> findByDocument_IdOrderByAttributeName_AttributeContextAscAttributeName_AttributeNameAsc(UUID documentId);
} }

View File

@ -0,0 +1,104 @@
package at.procon.dip.domain.document.service;
import at.procon.dip.domain.document.dto.GenericDocumentAttributeResponse;
import at.procon.dip.domain.document.dto.GenericDocumentDetailResponse;
import at.procon.dip.domain.document.dto.GenericDocumentRepresentationResponse;
import at.procon.dip.domain.document.dto.GenericDocumentSourceResponse;
import at.procon.dip.domain.document.entity.Document;
import at.procon.dip.domain.document.repository.DocumentAttributeRepository;
import at.procon.dip.domain.document.repository.DocumentRepository;
import at.procon.dip.domain.document.repository.DocumentSourceRepository;
import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository;
import java.util.Optional;
import java.util.UUID;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
@Service
@RequiredArgsConstructor
@Transactional(readOnly = true)
public class GenericDocumentReadService {
private final DocumentRepository documentRepository;
private final DocumentAttributeRepository documentAttributeRepository;
private final DocumentSourceRepository documentSourceRepository;
private final DocumentTextRepresentationRepository documentTextRepresentationRepository;
public Optional<GenericDocumentDetailResponse> findById(UUID documentId) {
return documentRepository.findById(documentId).map(this::toResponse);
}
private GenericDocumentDetailResponse toResponse(Document document) {
return new GenericDocumentDetailResponse(
document.getId(),
document.getOwnerTenant() == null ? null : document.getOwnerTenant().getTenantKey(),
document.getVisibility(),
document.getDocumentType(),
document.getDocumentFamily(),
document.getStatus(),
document.getTitle(),
document.getSummary(),
document.getLanguageCode(),
document.getMimeType(),
document.getBusinessKey(),
document.getDedupHash(),
document.getCreatedAt(),
document.getUpdatedAt(),
documentAttributeRepository.findByDocument_IdOrderByAttributeName_AttributeContextAscAttributeName_AttributeNameAsc(document.getId())
.stream()
.map(attribute -> new GenericDocumentAttributeResponse(
attribute.getId(),
attribute.getAttributeName().getAttributeName(),
attribute.getAttributeName().getNormalizedName(),
attribute.getAttributeName().getAttributeContext(),
attribute.getAttributeName().getAttributeValueType(),
attribute.getStringValue(),
attribute.getIntegerValue(),
attribute.getNumberValue(),
attribute.getDateValue(),
attribute.getDatetimeValue(),
attribute.getBooleanValue(),
attribute.getCreatedAt()
))
.toList(),
documentSourceRepository.findByDocument_Id(document.getId()).stream()
.map(source -> new GenericDocumentSourceResponse(
source.getId(),
source.getSourceType(),
source.getExternalSourceId(),
source.getSourceUri(),
source.getSourceFilename(),
source.getParentSourceId(),
source.getImportBatchId(),
source.getReceivedAt(),
source.getCreatedAt()
))
.toList(),
documentTextRepresentationRepository.findByDocument_Id(document.getId()).stream()
.map(representation -> new GenericDocumentRepresentationResponse(
representation.getId(),
representation.getRepresentationType(),
representation.getBuilderKey(),
representation.getLanguageCode(),
representation.getTokenCount(),
representation.getCharCount(),
representation.getChunkIndex(),
representation.getChunkStartOffset(),
representation.getChunkEndOffset(),
representation.isPrimaryRepresentation(),
representation.getContent() == null ? null : representation.getContent().getId(),
preview(representation.getTextBody()),
representation.getCreatedAt()
))
.toList()
);
}
private String preview(String textBody) {
if (textBody == null) {
return null;
}
return textBody.length() <= 400 ? textBody : textBody.substring(0, 400);
}
}

View File

@ -0,0 +1,29 @@
package at.procon.dip.domain.document.web;
import at.procon.dip.domain.document.dto.GenericDocumentDetailResponse;
import at.procon.dip.domain.document.service.GenericDocumentReadService;
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
import at.procon.dip.runtime.config.RuntimeMode;
import java.util.UUID;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
@RestController
@RequestMapping("/v1/dip/documents")
@RequiredArgsConstructor
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
public class GenericDocumentController {
private final GenericDocumentReadService documentReadService;
@GetMapping("/{id}")
public ResponseEntity<GenericDocumentDetailResponse> getDocument(@PathVariable UUID id) {
return documentReadService.findById(id)
.map(ResponseEntity::ok)
.orElse(ResponseEntity.notFound().build());
}
}

View File

@ -8,7 +8,7 @@ import at.procon.dip.domain.document.service.DocumentAttributeService;
import at.procon.dip.domain.tenant.TenantRef; import at.procon.dip.domain.tenant.TenantRef;
import at.procon.dip.ingestion.config.DipIngestionProperties; import at.procon.dip.ingestion.config.DipIngestionProperties;
import at.procon.dip.ingestion.dto.GenericImportResponse; import at.procon.dip.ingestion.dto.GenericImportResponse;
import at.procon.dip.ingestion.dto.GenericNameValuePairRequest; import at.procon.dip.ingestion.dto.GenericIngestionHintsRequest;
import at.procon.dip.ingestion.dto.GenericTextImportRequest; import at.procon.dip.ingestion.dto.GenericTextImportRequest;
import at.procon.dip.ingestion.service.DocumentIngestionGateway; import at.procon.dip.ingestion.service.DocumentIngestionGateway;
import at.procon.dip.ingestion.spi.IngestionResult; import at.procon.dip.ingestion.spi.IngestionResult;
@ -16,7 +16,6 @@ import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
import at.procon.dip.ingestion.spi.SourceDescriptor; import at.procon.dip.ingestion.spi.SourceDescriptor;
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
import at.procon.dip.runtime.config.RuntimeMode; import at.procon.dip.runtime.config.RuntimeMode;
import java.math.BigDecimal;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
@ -106,54 +105,34 @@ public class GenericDocumentImportController {
private Map<String, String> buildDescriptorAttributes(GenericTextImportRequest request) { private Map<String, String> buildDescriptorAttributes(GenericTextImportRequest request) {
Map<String, String> attributes = new LinkedHashMap<>(); Map<String, String> attributes = new LinkedHashMap<>();
if (request.attributes() != null) {
for (GenericNameValuePairRequest attribute : request.attributes()) {
if (attribute == null || !StringUtils.hasText(attribute.name()) || !isGlobalContext(attribute.context())) {
continue;
}
String renderedValue = renderAsDescriptorValue(attribute);
if (StringUtils.hasText(renderedValue)) {
attributes.put(attribute.name().trim(), renderedValue);
}
}
}
if (StringUtils.hasText(request.languageCode())) { if (StringUtils.hasText(request.languageCode())) {
attributes.put("languageCode", request.languageCode()); attributes.put("languageCode", request.languageCode());
} }
if (StringUtils.hasText(request.title())) { if (StringUtils.hasText(request.title())) {
attributes.put("title", request.title()); attributes.put("title", request.title());
} }
GenericIngestionHintsRequest hints = request.ingestionHints();
if (hints != null) {
putIfHasText(attributes, "documentTypeHint", hints.documentTypeHint());
putIfHasText(attributes, "embeddingPolicyKey", hints.embeddingPolicyKey());
putIfHasText(attributes, "embeddingPolicyHint", hints.embeddingPolicyHint());
putIfHasText(attributes, "importBatchId", hints.importBatchId());
putIfNotNull(attributes, "wrapperDocument", hints.wrapperDocument());
putIfNotNull(attributes, "containerDocument", hints.containerDocument());
}
return attributes; return attributes;
} }
private String renderAsDescriptorValue(GenericNameValuePairRequest attribute) { private void putIfHasText(Map<String, String> attributes, String key, String value) {
if (StringUtils.hasText(attribute.value())) { if (StringUtils.hasText(value)) {
return attribute.value().trim(); attributes.put(key, value.trim());
} }
if (StringUtils.hasText(attribute.stringValue())) {
return attribute.stringValue().trim();
}
if (attribute.integerValue() != null) {
return attribute.integerValue().toString();
}
if (attribute.numberValue() != null) {
BigDecimal number = attribute.numberValue().stripTrailingZeros();
return number.toPlainString();
}
if (attribute.dateValue() != null) {
return attribute.dateValue().toString();
}
if (attribute.datetimeValue() != null) {
return attribute.datetimeValue().withNano(0).toString();
}
if (attribute.booleanValue() != null) {
return attribute.booleanValue().toString();
}
return null;
} }
private boolean isGlobalContext(String context) { private void putIfNotNull(Map<String, String> attributes, String key, Boolean value) {
return !StringUtils.hasText(context) || DocumentAttributeService.GLOBAL_CONTEXT.equalsIgnoreCase(context.trim()); if (value != null) {
attributes.put(key, value.toString());
}
} }
private void ensureRestUploadEnabled() { private void ensureRestUploadEnabled() {

View File

@ -0,0 +1,11 @@
package at.procon.dip.ingestion.dto;
public record GenericIngestionHintsRequest(
String documentTypeHint,
String embeddingPolicyKey,
String embeddingPolicyHint,
String importBatchId,
Boolean wrapperDocument,
Boolean containerDocument
) {
}

View File

@ -12,6 +12,7 @@ public record GenericTextImportRequest(
String languageCode, String languageCode,
String title, String title,
String sourceIdentifier, String sourceIdentifier,
List<GenericNameValuePairRequest> attributes List<GenericNameValuePairRequest> attributes,
GenericIngestionHintsRequest ingestionHints
) { ) {
} }

View File

@ -0,0 +1,19 @@
package at.procon.dip.ingestion.service;
import java.util.Map;
public final class IngestionInternalAttributes {
public static final String DEFER_LEXICAL_INDEX = "_deferLexicalIndex";
private IngestionInternalAttributes() {
}
public static boolean isTruthy(Map<String, String> attributes, String key) {
if (attributes == null || attributes.isEmpty()) {
return false;
}
String value = attributes.get(key);
return value != null && Boolean.parseBoolean(value);
}
}

View File

@ -0,0 +1,13 @@
package at.procon.dip.ingestion.service;
import at.procon.dip.domain.document.SourceType;
import org.springframework.http.HttpStatus;
import org.springframework.web.bind.annotation.ResponseStatus;
@ResponseStatus(HttpStatus.CONFLICT)
public class SourceIdentifierConflictException extends RuntimeException {
public SourceIdentifierConflictException(SourceType sourceType, String sourceIdentifier) {
super("Source identifier '" + sourceIdentifier + "' for source type '" + sourceType + "' is already linked to a different payload");
}
}

View File

@ -0,0 +1,11 @@
package at.procon.dip.search.dto;
public enum DocumentAttributeFilterOperator {
EQ,
NE,
GT,
GTE,
LT,
LTE,
EXISTS
}

View File

@ -0,0 +1,28 @@
package at.procon.dip.search.dto;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class DocumentAttributeFilterRequest {
private String name;
private String context;
private DocumentAttributeValueType type;
@Builder.Default
private DocumentAttributeFilterOperator operator = DocumentAttributeFilterOperator.EQ;
private String stringValue;
private Long integerValue;
private BigDecimal numberValue;
private LocalDate dateValue;
private OffsetDateTime datetimeValue;
private Boolean booleanValue;
}

View File

@ -31,6 +31,7 @@ public class SearchRequest {
private Set<RepresentationType> representationTypes; private Set<RepresentationType> representationTypes;
private OffsetDateTime createdFrom; private OffsetDateTime createdFrom;
private OffsetDateTime createdTo; private OffsetDateTime createdTo;
private java.util.List<DocumentAttributeFilterRequest> attributeFilters;
private Integer page; private Integer page;
private Integer size; private Integer size;

View File

@ -1,17 +1,25 @@
package at.procon.dip.search.repository; package at.procon.dip.search.repository;
import at.procon.dip.domain.access.DocumentVisibility; import at.procon.dip.domain.access.DocumentVisibility;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import at.procon.dip.domain.document.DocumentFamily; import at.procon.dip.domain.document.DocumentFamily;
import at.procon.dip.domain.document.DocumentType; import at.procon.dip.domain.document.DocumentType;
import at.procon.dip.domain.document.RepresentationType; import at.procon.dip.domain.document.RepresentationType;
import at.procon.dip.search.api.SearchExecutionContext; import at.procon.dip.search.api.SearchExecutionContext;
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
import at.procon.dip.search.dto.SearchRepresentationSelectionMode; import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
import org.springframework.util.CollectionUtils; import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
final class SearchSqlFilterSupport { final class SearchSqlFilterSupport {
@ -90,6 +98,144 @@ final class SearchSqlFilterSupport {
sql.append(" AND ").append(documentAlias).append(".id IN (:candidateDocumentIds)"); sql.append(" AND ").append(documentAlias).append(".id IN (:candidateDocumentIds)");
params.addValue("candidateDocumentIds", context.getScope().candidateDocumentIds()); params.addValue("candidateDocumentIds", context.getScope().candidateDocumentIds());
} }
appendAttributeFilters(sql, params, context, documentAlias);
}
private static void appendAttributeFilters(StringBuilder sql,
MapSqlParameterSource params,
SearchExecutionContext context,
String documentAlias) {
List<DocumentAttributeFilterRequest> filters = context.getRequest().getAttributeFilters();
if (CollectionUtils.isEmpty(filters)) {
return;
}
int index = 0;
for (DocumentAttributeFilterRequest filter : filters) {
if (filter == null || !StringUtils.hasText(filter.getName())) {
continue;
}
String filterAlias = "af" + index;
String nameParam = "attributeName" + index;
String contextParam = "attributeContext" + index;
String valueParam = "attributeValue" + index;
sql.append(" AND ");
DocumentAttributeFilterOperator operator = filter.getOperator() == null
? DocumentAttributeFilterOperator.EQ
: filter.getOperator();
if (operator == DocumentAttributeFilterOperator.NE) {
sql.append("NOT ");
}
sql.append("EXISTS (SELECT 1 FROM doc.doc_document_attribute ").append(filterAlias)
.append(" JOIN doc.doc_attribute_name ").append(filterAlias).append("n ON ")
.append(filterAlias).append("n.id = ").append(filterAlias).append(".attribute_name_id")
.append(" WHERE ").append(filterAlias).append(".document_id = ").append(documentAlias).append(".id")
.append(" AND ").append(filterAlias).append("n.normalized_name = :").append(nameParam);
params.addValue(nameParam, normalizeName(filter.getName()));
if (StringUtils.hasText(filter.getContext())) {
sql.append(" AND ").append(filterAlias).append("n.attribute_context = :").append(contextParam);
params.addValue(contextParam, filter.getContext().trim().toUpperCase(Locale.ROOT));
}
if (operator != DocumentAttributeFilterOperator.EXISTS && operator != DocumentAttributeFilterOperator.NE) {
appendAttributeValuePredicate(sql, params, filterAlias, valueParam, filter, operator);
} else if (operator == DocumentAttributeFilterOperator.NE) {
appendAttributeValuePredicate(sql, params, filterAlias, valueParam, filter, DocumentAttributeFilterOperator.EQ);
}
sql.append(")");
index++;
}
}
private static void appendAttributeValuePredicate(StringBuilder sql,
MapSqlParameterSource params,
String filterAlias,
String valueParam,
DocumentAttributeFilterRequest filter,
DocumentAttributeFilterOperator operator) {
ResolvedAttributeFilterValue resolved = resolveFilterValue(filter);
String column = switch (resolved.type()) {
case STRING -> filterAlias + ".string_value";
case INTEGER -> filterAlias + ".integer_value";
case NUMBER -> filterAlias + ".number_value";
case DATE -> filterAlias + ".date_value";
case DATETIME -> filterAlias + ".datetime_value";
case BOOLEAN -> filterAlias + ".boolean_value";
};
String sqlOperator = switch (operator) {
case EQ -> "=";
case GT -> ">";
case GTE -> ">=";
case LT -> "<";
case LTE -> "<=";
case NE, EXISTS -> throw new IllegalArgumentException("Unsupported attribute operator in value predicate: " + operator);
};
if ((resolved.type() == DocumentAttributeValueType.STRING || resolved.type() == DocumentAttributeValueType.BOOLEAN)
&& operator != DocumentAttributeFilterOperator.EQ) {
throw new IllegalArgumentException("Only EQ/NE operators are supported for " + resolved.type() + " attribute filters");
}
sql.append(" AND ").append(column).append(" ").append(sqlOperator).append(" :").append(valueParam);
params.addValue(valueParam, resolved.value());
}
private static ResolvedAttributeFilterValue resolveFilterValue(DocumentAttributeFilterRequest filter) {
DocumentAttributeValueType type = filter.getType();
if (type == null) {
if (StringUtils.hasText(filter.getStringValue())) {
type = DocumentAttributeValueType.STRING;
} else if (filter.getIntegerValue() != null) {
type = DocumentAttributeValueType.INTEGER;
} else if (filter.getNumberValue() != null) {
type = DocumentAttributeValueType.NUMBER;
} else if (filter.getDateValue() != null) {
type = DocumentAttributeValueType.DATE;
} else if (filter.getDatetimeValue() != null) {
type = DocumentAttributeValueType.DATETIME;
} else if (filter.getBooleanValue() != null) {
type = DocumentAttributeValueType.BOOLEAN;
} else {
throw new IllegalArgumentException("Attribute filter requires a typed value unless operator is EXISTS");
}
}
Object value = switch (type) {
case STRING -> requireText(filter.getStringValue(), filter.getName(), "string");
case INTEGER -> requireValue(filter.getIntegerValue(), filter.getName(), "integer");
case NUMBER -> requireNumber(filter.getNumberValue(), filter.getName());
case DATE -> requireValue(filter.getDateValue(), filter.getName(), "date");
case DATETIME -> requireValue(filter.getDatetimeValue(), filter.getName(), "datetime");
case BOOLEAN -> requireValue(filter.getBooleanValue(), filter.getName(), "boolean");
};
return new ResolvedAttributeFilterValue(type, value);
}
private static String requireText(String value, String name, String kind) {
if (!StringUtils.hasText(value)) {
throw new IllegalArgumentException("Attribute filter '" + name + "' requires a " + kind + " value");
}
return value.trim();
}
private static BigDecimal requireNumber(BigDecimal value, String name) {
if (value == null) {
throw new IllegalArgumentException("Attribute filter '" + name + "' requires a number value");
}
return value.stripTrailingZeros();
}
private static <T> T requireValue(T value, String name, String kind) {
if (value == null) {
throw new IllegalArgumentException("Attribute filter '" + name + "' requires a " + kind + " value");
}
return value;
}
private static String normalizeName(String name) {
return name.trim().toLowerCase(Locale.ROOT);
} }
private static <T> Set<T> firstNonEmpty(Set<T> primary, Set<T> fallback) { private static <T> Set<T> firstNonEmpty(Set<T> primary, Set<T> fallback) {
@ -99,4 +245,7 @@ final class SearchSqlFilterSupport {
private static List<String> enumNames(Collection<? extends Enum<?>> values) { private static List<String> enumNames(Collection<? extends Enum<?>> values) {
return values.stream().map(Enum::name).collect(Collectors.toList()); return values.stream().map(Enum::name).collect(Collectors.toList());
} }
private record ResolvedAttributeFilterValue(DocumentAttributeValueType type, Object value) {
}
} }

View File

@ -0,0 +1,107 @@
package at.procon.dip.domain.document.service;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import at.procon.dip.domain.access.DocumentVisibility;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import at.procon.dip.domain.document.DocumentFamily;
import at.procon.dip.domain.document.DocumentStatus;
import at.procon.dip.domain.document.DocumentType;
import at.procon.dip.domain.document.RepresentationType;
import at.procon.dip.domain.document.SourceType;
import at.procon.dip.domain.document.entity.Document;
import at.procon.dip.domain.document.entity.DocumentAttribute;
import at.procon.dip.domain.document.entity.DocumentAttributeName;
import at.procon.dip.domain.document.entity.DocumentSource;
import at.procon.dip.domain.document.entity.DocumentTextRepresentation;
import at.procon.dip.domain.document.repository.DocumentAttributeRepository;
import at.procon.dip.domain.document.repository.DocumentRepository;
import at.procon.dip.domain.document.repository.DocumentSourceRepository;
import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository;
import java.time.OffsetDateTime;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
class GenericDocumentReadServiceTest {
@Mock private DocumentRepository documentRepository;
@Mock private DocumentAttributeRepository documentAttributeRepository;
@Mock private DocumentSourceRepository documentSourceRepository;
@Mock private DocumentTextRepresentationRepository documentTextRepresentationRepository;
private GenericDocumentReadService service;
@BeforeEach
void setUp() {
service = new GenericDocumentReadService(documentRepository, documentAttributeRepository, documentSourceRepository, documentTextRepresentationRepository);
}
@Test
void shouldReturnDocumentDetailWithAttributesSourcesAndRepresentations() {
UUID documentId = UUID.randomUUID();
Document document = Document.builder()
.id(documentId)
.visibility(DocumentVisibility.PUBLIC)
.documentType(DocumentType.TEXT)
.documentFamily(DocumentFamily.GENERIC)
.status(DocumentStatus.REPRESENTED)
.title("Doc")
.languageCode("de")
.mimeType("text/plain")
.businessKey("API:ext-1")
.dedupHash("abc")
.build();
when(documentRepository.findById(documentId)).thenReturn(Optional.of(document));
when(documentAttributeRepository.findByDocument_IdOrderByAttributeName_AttributeContextAscAttributeName_AttributeNameAsc(documentId))
.thenReturn(List.of(DocumentAttribute.builder()
.id(UUID.randomUUID())
.document(document)
.attributeName(DocumentAttributeName.builder()
.id(UUID.randomUUID())
.attributeName("status")
.normalizedName("status")
.attributeContext("TED")
.attributeValueType(DocumentAttributeValueType.STRING)
.build())
.stringValue("closed")
.createdAt(OffsetDateTime.now())
.attributeValueHash("x")
.build()));
when(documentSourceRepository.findByDocument_Id(documentId))
.thenReturn(List.of(DocumentSource.builder()
.id(UUID.randomUUID())
.document(document)
.sourceType(SourceType.API)
.externalSourceId("ext-1")
.sourceFilename("sample.txt")
.receivedAt(OffsetDateTime.now())
.build()));
when(documentTextRepresentationRepository.findByDocument_Id(documentId))
.thenReturn(List.of(DocumentTextRepresentation.builder()
.id(UUID.randomUUID())
.document(document)
.representationType(RepresentationType.SEMANTIC_TEXT)
.primaryRepresentation(true)
.textBody("hello world")
.build()));
var response = service.findById(documentId);
assertThat(response).isPresent();
assertThat(response.get().id()).isEqualTo(documentId);
assertThat(response.get().attributes()).hasSize(1);
assertThat(response.get().attributes().getFirst().context()).isEqualTo("TED");
assertThat(response.get().sources()).hasSize(1);
assertThat(response.get().sources().getFirst().externalSourceId()).isEqualTo("ext-1");
assertThat(response.get().representations()).hasSize(1);
assertThat(response.get().representations().getFirst().textPreview()).isEqualTo("hello world");
}
}

View File

@ -0,0 +1,44 @@
package at.procon.dip.domain.document.web;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;
import at.procon.dip.domain.document.DocumentFamily;
import at.procon.dip.domain.document.DocumentStatus;
import at.procon.dip.domain.document.DocumentType;
import at.procon.dip.domain.document.dto.GenericDocumentDetailResponse;
import at.procon.dip.domain.document.service.GenericDocumentReadService;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
@ExtendWith(MockitoExtension.class)
class GenericDocumentControllerTest {
@Mock private GenericDocumentReadService readService;
private GenericDocumentController controller;
@BeforeEach
void setUp() {
controller = new GenericDocumentController(readService);
}
@Test
void shouldReturnDocumentWhenFound() {
UUID id = UUID.randomUUID();
when(readService.findById(id)).thenReturn(Optional.of(new GenericDocumentDetailResponse(
id, null, null, DocumentType.TEXT, DocumentFamily.GENERIC, DocumentStatus.REPRESENTED,
"Doc", null, null, "text/plain", "API:1", "hash", null, null, List.of(), List.of(), List.of()
)));
var response = controller.getDocument(id);
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(response.getBody()).isNotNull();
assertThat(response.getBody().id()).isEqualTo(id);
}
}

View File

@ -7,18 +7,19 @@ import static org.mockito.Mockito.when;
import at.procon.dip.domain.access.DocumentAccessContext; import at.procon.dip.domain.access.DocumentAccessContext;
import at.procon.dip.domain.access.DocumentVisibility; import at.procon.dip.domain.access.DocumentVisibility;
import at.procon.dip.domain.document.CanonicalDocumentMetadata;
import at.procon.dip.domain.document.DocumentAttributeValueType; import at.procon.dip.domain.document.DocumentAttributeValueType;
import at.procon.dip.domain.document.DocumentFamily; import at.procon.dip.domain.document.DocumentFamily;
import at.procon.dip.domain.document.DocumentStatus; import at.procon.dip.domain.document.DocumentStatus;
import at.procon.dip.domain.document.DocumentType; import at.procon.dip.domain.document.DocumentType;
import at.procon.dip.domain.document.service.DocumentAttributeService; import at.procon.dip.domain.document.service.DocumentAttributeService;
import at.procon.dip.ingestion.config.DipIngestionProperties; import at.procon.dip.ingestion.config.DipIngestionProperties;
import at.procon.dip.ingestion.dto.GenericIngestionHintsRequest;
import at.procon.dip.ingestion.dto.GenericNameValuePairRequest; import at.procon.dip.ingestion.dto.GenericNameValuePairRequest;
import at.procon.dip.ingestion.dto.GenericTextImportRequest; import at.procon.dip.ingestion.dto.GenericTextImportRequest;
import at.procon.dip.ingestion.service.DocumentIngestionGateway; import at.procon.dip.ingestion.service.DocumentIngestionGateway;
import at.procon.dip.ingestion.spi.IngestionResult; import at.procon.dip.ingestion.spi.IngestionResult;
import at.procon.dip.ingestion.spi.SourceDescriptor; import at.procon.dip.ingestion.spi.SourceDescriptor;
import at.procon.dip.domain.document.CanonicalDocumentMetadata;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
@ -46,18 +47,17 @@ class GenericDocumentImportControllerTest {
DipIngestionProperties properties = new DipIngestionProperties(); DipIngestionProperties properties = new DipIngestionProperties();
properties.setEnabled(true); properties.setEnabled(true);
properties.setRestUploadEnabled(true); properties.setRestUploadEnabled(true);
properties.setDefaultVisibility(DocumentVisibility.PUBLIC);
controller = new GenericDocumentImportController(properties, ingestionGateway, documentAttributeService); controller = new GenericDocumentImportController(properties, ingestionGateway, documentAttributeService);
} }
@Test @Test
void shouldPassGlobalTypedAttributesIntoIngestionAndPersistAllAttributes() { void shouldPersistAttributesButUseOnlyTopLevelFieldsAndIngestionHintsForDescriptorAttributes() {
UUID documentId = UUID.randomUUID(); UUID documentId = UUID.randomUUID();
OffsetDateTime syncedAt = OffsetDateTime.parse("2026-04-21T11:05:00+02:00"); OffsetDateTime syncedAt = OffsetDateTime.parse("2026-04-21T11:05:00+02:00");
List<GenericNameValuePairRequest> pairs = List.of( List<GenericNameValuePairRequest> pairs = List.of(
new GenericNameValuePairRequest("title", null, DocumentAttributeValueType.STRING, null, "Text title from pair", null, null, null, null, null), new GenericNameValuePairRequest("title", null, DocumentAttributeValueType.STRING, null, "Text title from pair", null, null, null, null, null),
new GenericNameValuePairRequest("languageCode", null, DocumentAttributeValueType.STRING, null, "de", null, null, null, null, null), new GenericNameValuePairRequest("languageCode", null, DocumentAttributeValueType.STRING, null, "de", null, null, null, null, null),
new GenericNameValuePairRequest("country", null, null, "AT", null, null, null, null, null, null), new GenericNameValuePairRequest("country", null, DocumentAttributeValueType.STRING, null, "AT", null, null, null, null, null),
new GenericNameValuePairRequest("estimatedValue", null, DocumentAttributeValueType.NUMBER, null, null, null, new BigDecimal("125000.50"), null, null, null), new GenericNameValuePairRequest("estimatedValue", null, DocumentAttributeValueType.NUMBER, null, null, null, new BigDecimal("125000.50"), null, null, null),
new GenericNameValuePairRequest("publishedDate", null, DocumentAttributeValueType.DATE, null, null, null, null, LocalDate.of(2026, 4, 21), null, null), new GenericNameValuePairRequest("publishedDate", null, DocumentAttributeValueType.DATE, null, null, null, null, LocalDate.of(2026, 4, 21), null, null),
new GenericNameValuePairRequest("version", null, DocumentAttributeValueType.INTEGER, null, null, 7L, null, null, null, null), new GenericNameValuePairRequest("version", null, DocumentAttributeValueType.INTEGER, null, null, 7L, null, null, null, null),
@ -70,10 +70,11 @@ class GenericDocumentImportControllerTest {
"text/plain", "text/plain",
null, null,
null, null,
null, "fr",
null, "Top level title",
"source-1", "source-1",
pairs pairs,
new GenericIngestionHintsRequest("TEXT", "policy-key", "policy-hint", "batch-1", true, false)
); );
when(ingestionGateway.ingest(any(SourceDescriptor.class))).thenReturn(new IngestionResult( when(ingestionGateway.ingest(any(SourceDescriptor.class))).thenReturn(new IngestionResult(
@ -83,8 +84,8 @@ class GenericDocumentImportControllerTest {
DocumentType.TEXT, DocumentType.TEXT,
DocumentFamily.GENERIC, DocumentFamily.GENERIC,
DocumentStatus.REPRESENTED, DocumentStatus.REPRESENTED,
"Text title from pair", "Top level title",
"de", "fr",
"text/plain", "text/plain",
null, null,
OffsetDateTime.now(), OffsetDateTime.now(),
@ -99,14 +100,17 @@ class GenericDocumentImportControllerTest {
verify(ingestionGateway).ingest(descriptorCaptor.capture()); verify(ingestionGateway).ingest(descriptorCaptor.capture());
SourceDescriptor descriptor = descriptorCaptor.getValue(); SourceDescriptor descriptor = descriptorCaptor.getValue();
assertThat(descriptor.textContent()).isEqualTo("Hello world"); assertThat(descriptor.textContent()).isEqualTo("Hello world");
assertThat(descriptor.attributes()).containsEntry("title", "Text title from pair"); assertThat(descriptor.attributes()).containsEntry("title", "Top level title");
assertThat(descriptor.attributes()).containsEntry("languageCode", "de"); assertThat(descriptor.attributes()).containsEntry("languageCode", "fr");
assertThat(descriptor.attributes()).containsEntry("country", "AT"); assertThat(descriptor.attributes()).containsEntry("documentTypeHint", "TEXT");
assertThat(descriptor.attributes()).containsEntry("estimatedValue", "125000.5"); assertThat(descriptor.attributes()).containsEntry("embeddingPolicyKey", "policy-key");
assertThat(descriptor.attributes()).containsEntry("publishedDate", "2026-04-21"); assertThat(descriptor.attributes()).containsEntry("embeddingPolicyHint", "policy-hint");
assertThat(descriptor.attributes()).containsEntry("version", "7"); assertThat(descriptor.attributes()).containsEntry("importBatchId", "batch-1");
assertThat(descriptor.attributes()).containsEntry("lastSyncedAt", "2026-04-21T11:05+02:00"); assertThat(descriptor.attributes()).containsEntry("wrapperDocument", "true");
assertThat(descriptor.attributes()).doesNotContainKey("status"); assertThat(descriptor.attributes()).containsEntry("containerDocument", "false");
assertThat(descriptor.attributes()).doesNotContainKeys(
"country", "estimatedValue", "publishedDate", "version", "lastSyncedAt", "status"
);
verify(documentAttributeService).addAttributes(documentId, pairs); verify(documentAttributeService).addAttributes(documentId, pairs);
assertThat(response.getBody()).isNotNull(); assertThat(response.getBody()).isNotNull();

View File

@ -0,0 +1,57 @@
package at.procon.dip.search.repository;
import static org.assertj.core.api.Assertions.assertThat;
import at.procon.dip.domain.document.DocumentAttributeValueType;
import at.procon.dip.search.api.SearchExecutionContext;
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
import at.procon.dip.search.dto.SearchRequest;
import at.procon.dip.search.spi.SearchDocumentScope;
import org.junit.jupiter.api.Test;
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
class SearchSqlFilterSupportTest {
@Test
void shouldAppendAttributeFilterSql() {
SearchRequest request = SearchRequest.builder()
.queryText("kitchen")
.attributeFilters(java.util.List.of(
DocumentAttributeFilterRequest.builder()
.name("status")
.context("ted")
.type(DocumentAttributeValueType.STRING)
.operator(DocumentAttributeFilterOperator.EQ)
.stringValue("closed")
.build(),
DocumentAttributeFilterRequest.builder()
.name("version")
.type(DocumentAttributeValueType.INTEGER)
.operator(DocumentAttributeFilterOperator.GTE)
.integerValue(3L)
.build()
))
.build();
SearchExecutionContext context = SearchExecutionContext.builder()
.request(request)
.scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null))
.page(0)
.size(20)
.build();
StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1");
MapSqlParameterSource params = new MapSqlParameterSource();
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false);
String rendered = sql.toString();
assertThat(rendered).contains("doc.doc_document_attribute");
assertThat(rendered).contains("attribute_context = :attributeContext0");
assertThat(rendered).contains("integer_value >= :attributeValue1");
assertThat(params.getValue("attributeName0")).isEqualTo("status");
assertThat(params.getValue("attributeContext0")).isEqualTo("TED");
assertThat(params.getValue("attributeValue0")).isEqualTo("closed");
assertThat(params.getValue("attributeName1")).isEqualTo("version");
assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
}
}