diff --git a/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentAttributeResponse.java b/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentAttributeResponse.java new file mode 100644 index 0000000..d0ec04e --- /dev/null +++ b/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentAttributeResponse.java @@ -0,0 +1,23 @@ +package at.procon.dip.domain.document.dto; + +import at.procon.dip.domain.document.DocumentAttributeValueType; +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import java.util.UUID; + +public record GenericDocumentAttributeResponse( + UUID id, + String name, + String normalizedName, + String context, + DocumentAttributeValueType type, + String stringValue, + Long integerValue, + BigDecimal numberValue, + LocalDate dateValue, + OffsetDateTime datetimeValue, + Boolean booleanValue, + OffsetDateTime createdAt +) { +} diff --git a/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentDetailResponse.java b/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentDetailResponse.java new file mode 100644 index 0000000..4c6e84f --- /dev/null +++ b/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentDetailResponse.java @@ -0,0 +1,30 @@ +package at.procon.dip.domain.document.dto; + +import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentStatus; +import at.procon.dip.domain.document.DocumentType; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.UUID; + +public record GenericDocumentDetailResponse( + UUID id, + String ownerTenantKey, + DocumentVisibility visibility, + DocumentType documentType, + DocumentFamily documentFamily, + DocumentStatus status, + String title, + String summary, + String languageCode, + String mimeType, + String businessKey, + String dedupHash, + OffsetDateTime createdAt, + OffsetDateTime updatedAt, + List attributes, + List sources, + List representations +) { +} diff --git a/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentRepresentationResponse.java b/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentRepresentationResponse.java new file mode 100644 index 0000000..0eea5a2 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentRepresentationResponse.java @@ -0,0 +1,22 @@ +package at.procon.dip.domain.document.dto; + +import at.procon.dip.domain.document.RepresentationType; +import java.time.OffsetDateTime; +import java.util.UUID; + +public record GenericDocumentRepresentationResponse( + UUID id, + RepresentationType representationType, + String builderKey, + String languageCode, + Integer tokenCount, + Integer charCount, + Integer chunkIndex, + Integer chunkStartOffset, + Integer chunkEndOffset, + boolean primaryRepresentation, + UUID contentId, + String textPreview, + OffsetDateTime createdAt +) { +} diff --git a/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentSourceResponse.java b/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentSourceResponse.java new file mode 100644 index 0000000..96fed70 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/document/dto/GenericDocumentSourceResponse.java @@ -0,0 +1,18 @@ +package at.procon.dip.domain.document.dto; + +import at.procon.dip.domain.document.SourceType; +import java.time.OffsetDateTime; +import java.util.UUID; + +public record GenericDocumentSourceResponse( + UUID id, + SourceType sourceType, + String externalSourceId, + String sourceUri, + String sourceFilename, + UUID parentSourceId, + String importBatchId, + OffsetDateTime receivedAt, + OffsetDateTime createdAt +) { +} diff --git a/src/main/java/at/procon/dip/domain/document/repository/DocumentAttributeRepository.java b/src/main/java/at/procon/dip/domain/document/repository/DocumentAttributeRepository.java index 4d5c734..ef18ce5 100644 --- a/src/main/java/at/procon/dip/domain/document/repository/DocumentAttributeRepository.java +++ b/src/main/java/at/procon/dip/domain/document/repository/DocumentAttributeRepository.java @@ -1,13 +1,13 @@ package at.procon.dip.domain.document.repository; import at.procon.dip.domain.document.entity.DocumentAttribute; -import at.procon.dip.domain.document.entity.DocumentAttributeName; -import org.springframework.data.jpa.repository.JpaRepository; - -import java.util.Optional; +import java.util.List; import java.util.UUID; +import org.springframework.data.jpa.repository.JpaRepository; public interface DocumentAttributeRepository extends JpaRepository { boolean existsByDocument_IdAndAttributeName_IdAndAttributeValueHash(UUID documentId, UUID attributeNameId, String attributeValueHash); + + List findByDocument_IdOrderByAttributeName_AttributeContextAscAttributeName_AttributeNameAsc(UUID documentId); } diff --git a/src/main/java/at/procon/dip/domain/document/service/GenericDocumentReadService.java b/src/main/java/at/procon/dip/domain/document/service/GenericDocumentReadService.java new file mode 100644 index 0000000..7165058 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/document/service/GenericDocumentReadService.java @@ -0,0 +1,104 @@ +package at.procon.dip.domain.document.service; + +import at.procon.dip.domain.document.dto.GenericDocumentAttributeResponse; +import at.procon.dip.domain.document.dto.GenericDocumentDetailResponse; +import at.procon.dip.domain.document.dto.GenericDocumentRepresentationResponse; +import at.procon.dip.domain.document.dto.GenericDocumentSourceResponse; +import at.procon.dip.domain.document.entity.Document; +import at.procon.dip.domain.document.repository.DocumentAttributeRepository; +import at.procon.dip.domain.document.repository.DocumentRepository; +import at.procon.dip.domain.document.repository.DocumentSourceRepository; +import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository; +import java.util.Optional; +import java.util.UUID; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +@Service +@RequiredArgsConstructor +@Transactional(readOnly = true) +public class GenericDocumentReadService { + + private final DocumentRepository documentRepository; + private final DocumentAttributeRepository documentAttributeRepository; + private final DocumentSourceRepository documentSourceRepository; + private final DocumentTextRepresentationRepository documentTextRepresentationRepository; + + public Optional findById(UUID documentId) { + return documentRepository.findById(documentId).map(this::toResponse); + } + + private GenericDocumentDetailResponse toResponse(Document document) { + return new GenericDocumentDetailResponse( + document.getId(), + document.getOwnerTenant() == null ? null : document.getOwnerTenant().getTenantKey(), + document.getVisibility(), + document.getDocumentType(), + document.getDocumentFamily(), + document.getStatus(), + document.getTitle(), + document.getSummary(), + document.getLanguageCode(), + document.getMimeType(), + document.getBusinessKey(), + document.getDedupHash(), + document.getCreatedAt(), + document.getUpdatedAt(), + documentAttributeRepository.findByDocument_IdOrderByAttributeName_AttributeContextAscAttributeName_AttributeNameAsc(document.getId()) + .stream() + .map(attribute -> new GenericDocumentAttributeResponse( + attribute.getId(), + attribute.getAttributeName().getAttributeName(), + attribute.getAttributeName().getNormalizedName(), + attribute.getAttributeName().getAttributeContext(), + attribute.getAttributeName().getAttributeValueType(), + attribute.getStringValue(), + attribute.getIntegerValue(), + attribute.getNumberValue(), + attribute.getDateValue(), + attribute.getDatetimeValue(), + attribute.getBooleanValue(), + attribute.getCreatedAt() + )) + .toList(), + documentSourceRepository.findByDocument_Id(document.getId()).stream() + .map(source -> new GenericDocumentSourceResponse( + source.getId(), + source.getSourceType(), + source.getExternalSourceId(), + source.getSourceUri(), + source.getSourceFilename(), + source.getParentSourceId(), + source.getImportBatchId(), + source.getReceivedAt(), + source.getCreatedAt() + )) + .toList(), + documentTextRepresentationRepository.findByDocument_Id(document.getId()).stream() + .map(representation -> new GenericDocumentRepresentationResponse( + representation.getId(), + representation.getRepresentationType(), + representation.getBuilderKey(), + representation.getLanguageCode(), + representation.getTokenCount(), + representation.getCharCount(), + representation.getChunkIndex(), + representation.getChunkStartOffset(), + representation.getChunkEndOffset(), + representation.isPrimaryRepresentation(), + representation.getContent() == null ? null : representation.getContent().getId(), + preview(representation.getTextBody()), + representation.getCreatedAt() + )) + .toList() + ); + } + + private String preview(String textBody) { + if (textBody == null) { + return null; + } + return textBody.length() <= 400 ? textBody : textBody.substring(0, 400); + } +} diff --git a/src/main/java/at/procon/dip/domain/document/web/GenericDocumentController.java b/src/main/java/at/procon/dip/domain/document/web/GenericDocumentController.java new file mode 100644 index 0000000..6f1ad69 --- /dev/null +++ b/src/main/java/at/procon/dip/domain/document/web/GenericDocumentController.java @@ -0,0 +1,29 @@ +package at.procon.dip.domain.document.web; + +import at.procon.dip.domain.document.dto.GenericDocumentDetailResponse; +import at.procon.dip.domain.document.service.GenericDocumentReadService; +import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; +import at.procon.dip.runtime.config.RuntimeMode; +import java.util.UUID; +import lombok.RequiredArgsConstructor; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/v1/dip/documents") +@RequiredArgsConstructor +@ConditionalOnRuntimeMode(RuntimeMode.NEW) +public class GenericDocumentController { + + private final GenericDocumentReadService documentReadService; + + @GetMapping("/{id}") + public ResponseEntity getDocument(@PathVariable UUID id) { + return documentReadService.findById(id) + .map(ResponseEntity::ok) + .orElse(ResponseEntity.notFound().build()); + } +} diff --git a/src/main/java/at/procon/dip/ingestion/controller/GenericDocumentImportController.java b/src/main/java/at/procon/dip/ingestion/controller/GenericDocumentImportController.java index b8fe902..fd88dfd 100644 --- a/src/main/java/at/procon/dip/ingestion/controller/GenericDocumentImportController.java +++ b/src/main/java/at/procon/dip/ingestion/controller/GenericDocumentImportController.java @@ -8,7 +8,7 @@ import at.procon.dip.domain.document.service.DocumentAttributeService; import at.procon.dip.domain.tenant.TenantRef; import at.procon.dip.ingestion.config.DipIngestionProperties; import at.procon.dip.ingestion.dto.GenericImportResponse; -import at.procon.dip.ingestion.dto.GenericNameValuePairRequest; +import at.procon.dip.ingestion.dto.GenericIngestionHintsRequest; import at.procon.dip.ingestion.dto.GenericTextImportRequest; import at.procon.dip.ingestion.service.DocumentIngestionGateway; import at.procon.dip.ingestion.spi.IngestionResult; @@ -16,7 +16,6 @@ import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy; import at.procon.dip.ingestion.spi.SourceDescriptor; import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; import at.procon.dip.runtime.config.RuntimeMode; -import java.math.BigDecimal; import java.nio.charset.StandardCharsets; import java.time.OffsetDateTime; import java.util.LinkedHashMap; @@ -106,54 +105,34 @@ public class GenericDocumentImportController { private Map buildDescriptorAttributes(GenericTextImportRequest request) { Map attributes = new LinkedHashMap<>(); - if (request.attributes() != null) { - for (GenericNameValuePairRequest attribute : request.attributes()) { - if (attribute == null || !StringUtils.hasText(attribute.name()) || !isGlobalContext(attribute.context())) { - continue; - } - String renderedValue = renderAsDescriptorValue(attribute); - if (StringUtils.hasText(renderedValue)) { - attributes.put(attribute.name().trim(), renderedValue); - } - } - } if (StringUtils.hasText(request.languageCode())) { attributes.put("languageCode", request.languageCode()); } if (StringUtils.hasText(request.title())) { attributes.put("title", request.title()); } + GenericIngestionHintsRequest hints = request.ingestionHints(); + if (hints != null) { + putIfHasText(attributes, "documentTypeHint", hints.documentTypeHint()); + putIfHasText(attributes, "embeddingPolicyKey", hints.embeddingPolicyKey()); + putIfHasText(attributes, "embeddingPolicyHint", hints.embeddingPolicyHint()); + putIfHasText(attributes, "importBatchId", hints.importBatchId()); + putIfNotNull(attributes, "wrapperDocument", hints.wrapperDocument()); + putIfNotNull(attributes, "containerDocument", hints.containerDocument()); + } return attributes; } - private String renderAsDescriptorValue(GenericNameValuePairRequest attribute) { - if (StringUtils.hasText(attribute.value())) { - return attribute.value().trim(); + private void putIfHasText(Map attributes, String key, String value) { + if (StringUtils.hasText(value)) { + attributes.put(key, value.trim()); } - if (StringUtils.hasText(attribute.stringValue())) { - return attribute.stringValue().trim(); - } - if (attribute.integerValue() != null) { - return attribute.integerValue().toString(); - } - if (attribute.numberValue() != null) { - BigDecimal number = attribute.numberValue().stripTrailingZeros(); - return number.toPlainString(); - } - if (attribute.dateValue() != null) { - return attribute.dateValue().toString(); - } - if (attribute.datetimeValue() != null) { - return attribute.datetimeValue().withNano(0).toString(); - } - if (attribute.booleanValue() != null) { - return attribute.booleanValue().toString(); - } - return null; } - private boolean isGlobalContext(String context) { - return !StringUtils.hasText(context) || DocumentAttributeService.GLOBAL_CONTEXT.equalsIgnoreCase(context.trim()); + private void putIfNotNull(Map attributes, String key, Boolean value) { + if (value != null) { + attributes.put(key, value.toString()); + } } private void ensureRestUploadEnabled() { diff --git a/src/main/java/at/procon/dip/ingestion/dto/GenericIngestionHintsRequest.java b/src/main/java/at/procon/dip/ingestion/dto/GenericIngestionHintsRequest.java new file mode 100644 index 0000000..2fba163 --- /dev/null +++ b/src/main/java/at/procon/dip/ingestion/dto/GenericIngestionHintsRequest.java @@ -0,0 +1,11 @@ +package at.procon.dip.ingestion.dto; + +public record GenericIngestionHintsRequest( + String documentTypeHint, + String embeddingPolicyKey, + String embeddingPolicyHint, + String importBatchId, + Boolean wrapperDocument, + Boolean containerDocument +) { +} diff --git a/src/main/java/at/procon/dip/ingestion/dto/GenericTextImportRequest.java b/src/main/java/at/procon/dip/ingestion/dto/GenericTextImportRequest.java index ba18265..584a763 100644 --- a/src/main/java/at/procon/dip/ingestion/dto/GenericTextImportRequest.java +++ b/src/main/java/at/procon/dip/ingestion/dto/GenericTextImportRequest.java @@ -12,6 +12,7 @@ public record GenericTextImportRequest( String languageCode, String title, String sourceIdentifier, - List attributes + List attributes, + GenericIngestionHintsRequest ingestionHints ) { } diff --git a/src/main/java/at/procon/dip/ingestion/service/IngestionInternalAttributes.java b/src/main/java/at/procon/dip/ingestion/service/IngestionInternalAttributes.java new file mode 100644 index 0000000..db6538b --- /dev/null +++ b/src/main/java/at/procon/dip/ingestion/service/IngestionInternalAttributes.java @@ -0,0 +1,19 @@ +package at.procon.dip.ingestion.service; + +import java.util.Map; + +public final class IngestionInternalAttributes { + + public static final String DEFER_LEXICAL_INDEX = "_deferLexicalIndex"; + + private IngestionInternalAttributes() { + } + + public static boolean isTruthy(Map attributes, String key) { + if (attributes == null || attributes.isEmpty()) { + return false; + } + String value = attributes.get(key); + return value != null && Boolean.parseBoolean(value); + } +} diff --git a/src/main/java/at/procon/dip/ingestion/service/SourceIdentifierConflictException.java b/src/main/java/at/procon/dip/ingestion/service/SourceIdentifierConflictException.java new file mode 100644 index 0000000..83b880a --- /dev/null +++ b/src/main/java/at/procon/dip/ingestion/service/SourceIdentifierConflictException.java @@ -0,0 +1,13 @@ +package at.procon.dip.ingestion.service; + +import at.procon.dip.domain.document.SourceType; +import org.springframework.http.HttpStatus; +import org.springframework.web.bind.annotation.ResponseStatus; + +@ResponseStatus(HttpStatus.CONFLICT) +public class SourceIdentifierConflictException extends RuntimeException { + + public SourceIdentifierConflictException(SourceType sourceType, String sourceIdentifier) { + super("Source identifier '" + sourceIdentifier + "' for source type '" + sourceType + "' is already linked to a different payload"); + } +} diff --git a/src/main/java/at/procon/dip/search/dto/DocumentAttributeFilterOperator.java b/src/main/java/at/procon/dip/search/dto/DocumentAttributeFilterOperator.java new file mode 100644 index 0000000..888ce4a --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/DocumentAttributeFilterOperator.java @@ -0,0 +1,11 @@ +package at.procon.dip.search.dto; + +public enum DocumentAttributeFilterOperator { + EQ, + NE, + GT, + GTE, + LT, + LTE, + EXISTS +} diff --git a/src/main/java/at/procon/dip/search/dto/DocumentAttributeFilterRequest.java b/src/main/java/at/procon/dip/search/dto/DocumentAttributeFilterRequest.java new file mode 100644 index 0000000..88724fe --- /dev/null +++ b/src/main/java/at/procon/dip/search/dto/DocumentAttributeFilterRequest.java @@ -0,0 +1,28 @@ +package at.procon.dip.search.dto; + +import at.procon.dip.domain.document.DocumentAttributeValueType; +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.OffsetDateTime; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class DocumentAttributeFilterRequest { + private String name; + private String context; + private DocumentAttributeValueType type; + @Builder.Default + private DocumentAttributeFilterOperator operator = DocumentAttributeFilterOperator.EQ; + private String stringValue; + private Long integerValue; + private BigDecimal numberValue; + private LocalDate dateValue; + private OffsetDateTime datetimeValue; + private Boolean booleanValue; +} diff --git a/src/main/java/at/procon/dip/search/dto/SearchRequest.java b/src/main/java/at/procon/dip/search/dto/SearchRequest.java index 0e0278e..45e2ac0 100644 --- a/src/main/java/at/procon/dip/search/dto/SearchRequest.java +++ b/src/main/java/at/procon/dip/search/dto/SearchRequest.java @@ -31,6 +31,7 @@ public class SearchRequest { private Set representationTypes; private OffsetDateTime createdFrom; private OffsetDateTime createdTo; + private java.util.List attributeFilters; private Integer page; private Integer size; diff --git a/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java b/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java index b9c8788..9a7862f 100644 --- a/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java +++ b/src/main/java/at/procon/dip/search/repository/SearchSqlFilterSupport.java @@ -1,17 +1,25 @@ package at.procon.dip.search.repository; import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.domain.document.DocumentAttributeValueType; import at.procon.dip.domain.document.DocumentFamily; import at.procon.dip.domain.document.DocumentType; import at.procon.dip.domain.document.RepresentationType; import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.dto.DocumentAttributeFilterOperator; +import at.procon.dip.search.dto.DocumentAttributeFilterRequest; import at.procon.dip.search.dto.SearchRepresentationSelectionMode; +import java.math.BigDecimal; +import java.time.LocalDate; +import java.time.OffsetDateTime; import java.util.Collection; import java.util.List; +import java.util.Locale; import java.util.Set; import java.util.stream.Collectors; import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; import org.springframework.util.CollectionUtils; +import org.springframework.util.StringUtils; final class SearchSqlFilterSupport { @@ -90,6 +98,144 @@ final class SearchSqlFilterSupport { sql.append(" AND ").append(documentAlias).append(".id IN (:candidateDocumentIds)"); params.addValue("candidateDocumentIds", context.getScope().candidateDocumentIds()); } + + appendAttributeFilters(sql, params, context, documentAlias); + } + + private static void appendAttributeFilters(StringBuilder sql, + MapSqlParameterSource params, + SearchExecutionContext context, + String documentAlias) { + List filters = context.getRequest().getAttributeFilters(); + if (CollectionUtils.isEmpty(filters)) { + return; + } + int index = 0; + for (DocumentAttributeFilterRequest filter : filters) { + if (filter == null || !StringUtils.hasText(filter.getName())) { + continue; + } + String filterAlias = "af" + index; + String nameParam = "attributeName" + index; + String contextParam = "attributeContext" + index; + String valueParam = "attributeValue" + index; + + sql.append(" AND "); + DocumentAttributeFilterOperator operator = filter.getOperator() == null + ? DocumentAttributeFilterOperator.EQ + : filter.getOperator(); + if (operator == DocumentAttributeFilterOperator.NE) { + sql.append("NOT "); + } + sql.append("EXISTS (SELECT 1 FROM doc.doc_document_attribute ").append(filterAlias) + .append(" JOIN doc.doc_attribute_name ").append(filterAlias).append("n ON ") + .append(filterAlias).append("n.id = ").append(filterAlias).append(".attribute_name_id") + .append(" WHERE ").append(filterAlias).append(".document_id = ").append(documentAlias).append(".id") + .append(" AND ").append(filterAlias).append("n.normalized_name = :").append(nameParam); + params.addValue(nameParam, normalizeName(filter.getName())); + + if (StringUtils.hasText(filter.getContext())) { + sql.append(" AND ").append(filterAlias).append("n.attribute_context = :").append(contextParam); + params.addValue(contextParam, filter.getContext().trim().toUpperCase(Locale.ROOT)); + } + + if (operator != DocumentAttributeFilterOperator.EXISTS && operator != DocumentAttributeFilterOperator.NE) { + appendAttributeValuePredicate(sql, params, filterAlias, valueParam, filter, operator); + } else if (operator == DocumentAttributeFilterOperator.NE) { + appendAttributeValuePredicate(sql, params, filterAlias, valueParam, filter, DocumentAttributeFilterOperator.EQ); + } + sql.append(")"); + index++; + } + } + + private static void appendAttributeValuePredicate(StringBuilder sql, + MapSqlParameterSource params, + String filterAlias, + String valueParam, + DocumentAttributeFilterRequest filter, + DocumentAttributeFilterOperator operator) { + ResolvedAttributeFilterValue resolved = resolveFilterValue(filter); + String column = switch (resolved.type()) { + case STRING -> filterAlias + ".string_value"; + case INTEGER -> filterAlias + ".integer_value"; + case NUMBER -> filterAlias + ".number_value"; + case DATE -> filterAlias + ".date_value"; + case DATETIME -> filterAlias + ".datetime_value"; + case BOOLEAN -> filterAlias + ".boolean_value"; + }; + String sqlOperator = switch (operator) { + case EQ -> "="; + case GT -> ">"; + case GTE -> ">="; + case LT -> "<"; + case LTE -> "<="; + case NE, EXISTS -> throw new IllegalArgumentException("Unsupported attribute operator in value predicate: " + operator); + }; + + if ((resolved.type() == DocumentAttributeValueType.STRING || resolved.type() == DocumentAttributeValueType.BOOLEAN) + && operator != DocumentAttributeFilterOperator.EQ) { + throw new IllegalArgumentException("Only EQ/NE operators are supported for " + resolved.type() + " attribute filters"); + } + + sql.append(" AND ").append(column).append(" ").append(sqlOperator).append(" :").append(valueParam); + params.addValue(valueParam, resolved.value()); + } + + private static ResolvedAttributeFilterValue resolveFilterValue(DocumentAttributeFilterRequest filter) { + DocumentAttributeValueType type = filter.getType(); + if (type == null) { + if (StringUtils.hasText(filter.getStringValue())) { + type = DocumentAttributeValueType.STRING; + } else if (filter.getIntegerValue() != null) { + type = DocumentAttributeValueType.INTEGER; + } else if (filter.getNumberValue() != null) { + type = DocumentAttributeValueType.NUMBER; + } else if (filter.getDateValue() != null) { + type = DocumentAttributeValueType.DATE; + } else if (filter.getDatetimeValue() != null) { + type = DocumentAttributeValueType.DATETIME; + } else if (filter.getBooleanValue() != null) { + type = DocumentAttributeValueType.BOOLEAN; + } else { + throw new IllegalArgumentException("Attribute filter requires a typed value unless operator is EXISTS"); + } + } + + Object value = switch (type) { + case STRING -> requireText(filter.getStringValue(), filter.getName(), "string"); + case INTEGER -> requireValue(filter.getIntegerValue(), filter.getName(), "integer"); + case NUMBER -> requireNumber(filter.getNumberValue(), filter.getName()); + case DATE -> requireValue(filter.getDateValue(), filter.getName(), "date"); + case DATETIME -> requireValue(filter.getDatetimeValue(), filter.getName(), "datetime"); + case BOOLEAN -> requireValue(filter.getBooleanValue(), filter.getName(), "boolean"); + }; + return new ResolvedAttributeFilterValue(type, value); + } + + private static String requireText(String value, String name, String kind) { + if (!StringUtils.hasText(value)) { + throw new IllegalArgumentException("Attribute filter '" + name + "' requires a " + kind + " value"); + } + return value.trim(); + } + + private static BigDecimal requireNumber(BigDecimal value, String name) { + if (value == null) { + throw new IllegalArgumentException("Attribute filter '" + name + "' requires a number value"); + } + return value.stripTrailingZeros(); + } + + private static T requireValue(T value, String name, String kind) { + if (value == null) { + throw new IllegalArgumentException("Attribute filter '" + name + "' requires a " + kind + " value"); + } + return value; + } + + private static String normalizeName(String name) { + return name.trim().toLowerCase(Locale.ROOT); } private static Set firstNonEmpty(Set primary, Set fallback) { @@ -99,4 +245,7 @@ final class SearchSqlFilterSupport { private static List enumNames(Collection> values) { return values.stream().map(Enum::name).collect(Collectors.toList()); } + + private record ResolvedAttributeFilterValue(DocumentAttributeValueType type, Object value) { + } } diff --git a/src/test/java/at/procon/dip/domain/document/service/GenericDocumentReadServiceTest.java b/src/test/java/at/procon/dip/domain/document/service/GenericDocumentReadServiceTest.java new file mode 100644 index 0000000..24eeb18 --- /dev/null +++ b/src/test/java/at/procon/dip/domain/document/service/GenericDocumentReadServiceTest.java @@ -0,0 +1,107 @@ +package at.procon.dip.domain.document.service; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; + +import at.procon.dip.domain.access.DocumentVisibility; +import at.procon.dip.domain.document.DocumentAttributeValueType; +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentStatus; +import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.domain.document.RepresentationType; +import at.procon.dip.domain.document.SourceType; +import at.procon.dip.domain.document.entity.Document; +import at.procon.dip.domain.document.entity.DocumentAttribute; +import at.procon.dip.domain.document.entity.DocumentAttributeName; +import at.procon.dip.domain.document.entity.DocumentSource; +import at.procon.dip.domain.document.entity.DocumentTextRepresentation; +import at.procon.dip.domain.document.repository.DocumentAttributeRepository; +import at.procon.dip.domain.document.repository.DocumentRepository; +import at.procon.dip.domain.document.repository.DocumentSourceRepository; +import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +@ExtendWith(MockitoExtension.class) +class GenericDocumentReadServiceTest { + + @Mock private DocumentRepository documentRepository; + @Mock private DocumentAttributeRepository documentAttributeRepository; + @Mock private DocumentSourceRepository documentSourceRepository; + @Mock private DocumentTextRepresentationRepository documentTextRepresentationRepository; + + private GenericDocumentReadService service; + + @BeforeEach + void setUp() { + service = new GenericDocumentReadService(documentRepository, documentAttributeRepository, documentSourceRepository, documentTextRepresentationRepository); + } + + @Test + void shouldReturnDocumentDetailWithAttributesSourcesAndRepresentations() { + UUID documentId = UUID.randomUUID(); + Document document = Document.builder() + .id(documentId) + .visibility(DocumentVisibility.PUBLIC) + .documentType(DocumentType.TEXT) + .documentFamily(DocumentFamily.GENERIC) + .status(DocumentStatus.REPRESENTED) + .title("Doc") + .languageCode("de") + .mimeType("text/plain") + .businessKey("API:ext-1") + .dedupHash("abc") + .build(); + when(documentRepository.findById(documentId)).thenReturn(Optional.of(document)); + when(documentAttributeRepository.findByDocument_IdOrderByAttributeName_AttributeContextAscAttributeName_AttributeNameAsc(documentId)) + .thenReturn(List.of(DocumentAttribute.builder() + .id(UUID.randomUUID()) + .document(document) + .attributeName(DocumentAttributeName.builder() + .id(UUID.randomUUID()) + .attributeName("status") + .normalizedName("status") + .attributeContext("TED") + .attributeValueType(DocumentAttributeValueType.STRING) + .build()) + .stringValue("closed") + .createdAt(OffsetDateTime.now()) + .attributeValueHash("x") + .build())); + when(documentSourceRepository.findByDocument_Id(documentId)) + .thenReturn(List.of(DocumentSource.builder() + .id(UUID.randomUUID()) + .document(document) + .sourceType(SourceType.API) + .externalSourceId("ext-1") + .sourceFilename("sample.txt") + .receivedAt(OffsetDateTime.now()) + .build())); + when(documentTextRepresentationRepository.findByDocument_Id(documentId)) + .thenReturn(List.of(DocumentTextRepresentation.builder() + .id(UUID.randomUUID()) + .document(document) + .representationType(RepresentationType.SEMANTIC_TEXT) + .primaryRepresentation(true) + .textBody("hello world") + .build())); + + var response = service.findById(documentId); + + assertThat(response).isPresent(); + assertThat(response.get().id()).isEqualTo(documentId); + assertThat(response.get().attributes()).hasSize(1); + assertThat(response.get().attributes().getFirst().context()).isEqualTo("TED"); + assertThat(response.get().sources()).hasSize(1); + assertThat(response.get().sources().getFirst().externalSourceId()).isEqualTo("ext-1"); + assertThat(response.get().representations()).hasSize(1); + assertThat(response.get().representations().getFirst().textPreview()).isEqualTo("hello world"); + } +} diff --git a/src/test/java/at/procon/dip/domain/document/web/GenericDocumentControllerTest.java b/src/test/java/at/procon/dip/domain/document/web/GenericDocumentControllerTest.java new file mode 100644 index 0000000..c7d3d3a --- /dev/null +++ b/src/test/java/at/procon/dip/domain/document/web/GenericDocumentControllerTest.java @@ -0,0 +1,44 @@ +package at.procon.dip.domain.document.web; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.when; + +import at.procon.dip.domain.document.DocumentFamily; +import at.procon.dip.domain.document.DocumentStatus; +import at.procon.dip.domain.document.DocumentType; +import at.procon.dip.domain.document.dto.GenericDocumentDetailResponse; +import at.procon.dip.domain.document.service.GenericDocumentReadService; +import java.util.List; +import java.util.Optional; +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +@ExtendWith(MockitoExtension.class) +class GenericDocumentControllerTest { + + @Mock private GenericDocumentReadService readService; + private GenericDocumentController controller; + + @BeforeEach + void setUp() { + controller = new GenericDocumentController(readService); + } + + @Test + void shouldReturnDocumentWhenFound() { + UUID id = UUID.randomUUID(); + when(readService.findById(id)).thenReturn(Optional.of(new GenericDocumentDetailResponse( + id, null, null, DocumentType.TEXT, DocumentFamily.GENERIC, DocumentStatus.REPRESENTED, + "Doc", null, null, "text/plain", "API:1", "hash", null, null, List.of(), List.of(), List.of() + ))); + + var response = controller.getDocument(id); + assertThat(response.getStatusCode().is2xxSuccessful()).isTrue(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().id()).isEqualTo(id); + } +} diff --git a/src/test/java/at/procon/dip/ingestion/controller/GenericDocumentImportControllerTest.java b/src/test/java/at/procon/dip/ingestion/controller/GenericDocumentImportControllerTest.java index 75f2fbb..40ce4d6 100644 --- a/src/test/java/at/procon/dip/ingestion/controller/GenericDocumentImportControllerTest.java +++ b/src/test/java/at/procon/dip/ingestion/controller/GenericDocumentImportControllerTest.java @@ -7,18 +7,19 @@ import static org.mockito.Mockito.when; import at.procon.dip.domain.access.DocumentAccessContext; import at.procon.dip.domain.access.DocumentVisibility; -import at.procon.dip.domain.document.CanonicalDocumentMetadata; import at.procon.dip.domain.document.DocumentAttributeValueType; import at.procon.dip.domain.document.DocumentFamily; import at.procon.dip.domain.document.DocumentStatus; import at.procon.dip.domain.document.DocumentType; import at.procon.dip.domain.document.service.DocumentAttributeService; import at.procon.dip.ingestion.config.DipIngestionProperties; +import at.procon.dip.ingestion.dto.GenericIngestionHintsRequest; import at.procon.dip.ingestion.dto.GenericNameValuePairRequest; import at.procon.dip.ingestion.dto.GenericTextImportRequest; import at.procon.dip.ingestion.service.DocumentIngestionGateway; import at.procon.dip.ingestion.spi.IngestionResult; import at.procon.dip.ingestion.spi.SourceDescriptor; +import at.procon.dip.domain.document.CanonicalDocumentMetadata; import java.math.BigDecimal; import java.time.LocalDate; import java.time.OffsetDateTime; @@ -46,18 +47,17 @@ class GenericDocumentImportControllerTest { DipIngestionProperties properties = new DipIngestionProperties(); properties.setEnabled(true); properties.setRestUploadEnabled(true); - properties.setDefaultVisibility(DocumentVisibility.PUBLIC); controller = new GenericDocumentImportController(properties, ingestionGateway, documentAttributeService); } @Test - void shouldPassGlobalTypedAttributesIntoIngestionAndPersistAllAttributes() { + void shouldPersistAttributesButUseOnlyTopLevelFieldsAndIngestionHintsForDescriptorAttributes() { UUID documentId = UUID.randomUUID(); OffsetDateTime syncedAt = OffsetDateTime.parse("2026-04-21T11:05:00+02:00"); List pairs = List.of( new GenericNameValuePairRequest("title", null, DocumentAttributeValueType.STRING, null, "Text title from pair", null, null, null, null, null), new GenericNameValuePairRequest("languageCode", null, DocumentAttributeValueType.STRING, null, "de", null, null, null, null, null), - new GenericNameValuePairRequest("country", null, null, "AT", null, null, null, null, null, null), + new GenericNameValuePairRequest("country", null, DocumentAttributeValueType.STRING, null, "AT", null, null, null, null, null), new GenericNameValuePairRequest("estimatedValue", null, DocumentAttributeValueType.NUMBER, null, null, null, new BigDecimal("125000.50"), null, null, null), new GenericNameValuePairRequest("publishedDate", null, DocumentAttributeValueType.DATE, null, null, null, null, LocalDate.of(2026, 4, 21), null, null), new GenericNameValuePairRequest("version", null, DocumentAttributeValueType.INTEGER, null, null, 7L, null, null, null, null), @@ -70,10 +70,11 @@ class GenericDocumentImportControllerTest { "text/plain", null, null, - null, - null, + "fr", + "Top level title", "source-1", - pairs + pairs, + new GenericIngestionHintsRequest("TEXT", "policy-key", "policy-hint", "batch-1", true, false) ); when(ingestionGateway.ingest(any(SourceDescriptor.class))).thenReturn(new IngestionResult( @@ -83,8 +84,8 @@ class GenericDocumentImportControllerTest { DocumentType.TEXT, DocumentFamily.GENERIC, DocumentStatus.REPRESENTED, - "Text title from pair", - "de", + "Top level title", + "fr", "text/plain", null, OffsetDateTime.now(), @@ -99,14 +100,17 @@ class GenericDocumentImportControllerTest { verify(ingestionGateway).ingest(descriptorCaptor.capture()); SourceDescriptor descriptor = descriptorCaptor.getValue(); assertThat(descriptor.textContent()).isEqualTo("Hello world"); - assertThat(descriptor.attributes()).containsEntry("title", "Text title from pair"); - assertThat(descriptor.attributes()).containsEntry("languageCode", "de"); - assertThat(descriptor.attributes()).containsEntry("country", "AT"); - assertThat(descriptor.attributes()).containsEntry("estimatedValue", "125000.5"); - assertThat(descriptor.attributes()).containsEntry("publishedDate", "2026-04-21"); - assertThat(descriptor.attributes()).containsEntry("version", "7"); - assertThat(descriptor.attributes()).containsEntry("lastSyncedAt", "2026-04-21T11:05+02:00"); - assertThat(descriptor.attributes()).doesNotContainKey("status"); + assertThat(descriptor.attributes()).containsEntry("title", "Top level title"); + assertThat(descriptor.attributes()).containsEntry("languageCode", "fr"); + assertThat(descriptor.attributes()).containsEntry("documentTypeHint", "TEXT"); + assertThat(descriptor.attributes()).containsEntry("embeddingPolicyKey", "policy-key"); + assertThat(descriptor.attributes()).containsEntry("embeddingPolicyHint", "policy-hint"); + assertThat(descriptor.attributes()).containsEntry("importBatchId", "batch-1"); + assertThat(descriptor.attributes()).containsEntry("wrapperDocument", "true"); + assertThat(descriptor.attributes()).containsEntry("containerDocument", "false"); + assertThat(descriptor.attributes()).doesNotContainKeys( + "country", "estimatedValue", "publishedDate", "version", "lastSyncedAt", "status" + ); verify(documentAttributeService).addAttributes(documentId, pairs); assertThat(response.getBody()).isNotNull(); diff --git a/src/test/java/at/procon/dip/search/repository/SearchSqlFilterSupportTest.java b/src/test/java/at/procon/dip/search/repository/SearchSqlFilterSupportTest.java new file mode 100644 index 0000000..ad5cfcc --- /dev/null +++ b/src/test/java/at/procon/dip/search/repository/SearchSqlFilterSupportTest.java @@ -0,0 +1,57 @@ +package at.procon.dip.search.repository; + +import static org.assertj.core.api.Assertions.assertThat; + +import at.procon.dip.domain.document.DocumentAttributeValueType; +import at.procon.dip.search.api.SearchExecutionContext; +import at.procon.dip.search.dto.DocumentAttributeFilterOperator; +import at.procon.dip.search.dto.DocumentAttributeFilterRequest; +import at.procon.dip.search.dto.SearchRequest; +import at.procon.dip.search.spi.SearchDocumentScope; +import org.junit.jupiter.api.Test; +import org.springframework.jdbc.core.namedparam.MapSqlParameterSource; + +class SearchSqlFilterSupportTest { + + @Test + void shouldAppendAttributeFilterSql() { + SearchRequest request = SearchRequest.builder() + .queryText("kitchen") + .attributeFilters(java.util.List.of( + DocumentAttributeFilterRequest.builder() + .name("status") + .context("ted") + .type(DocumentAttributeValueType.STRING) + .operator(DocumentAttributeFilterOperator.EQ) + .stringValue("closed") + .build(), + DocumentAttributeFilterRequest.builder() + .name("version") + .type(DocumentAttributeValueType.INTEGER) + .operator(DocumentAttributeFilterOperator.GTE) + .integerValue(3L) + .build() + )) + .build(); + SearchExecutionContext context = SearchExecutionContext.builder() + .request(request) + .scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null)) + .page(0) + .size(20) + .build(); + + StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1"); + MapSqlParameterSource params = new MapSqlParameterSource(); + SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false); + + String rendered = sql.toString(); + assertThat(rendered).contains("doc.doc_document_attribute"); + assertThat(rendered).contains("attribute_context = :attributeContext0"); + assertThat(rendered).contains("integer_value >= :attributeValue1"); + assertThat(params.getValue("attributeName0")).isEqualTo("status"); + assertThat(params.getValue("attributeContext0")).isEqualTo("TED"); + assertThat(params.getValue("attributeValue0")).isEqualTo("closed"); + assertThat(params.getValue("attributeName1")).isEqualTo("version"); + assertThat(params.getValue("attributeValue1")).isEqualTo(3L); + } +}