text import improvements
This commit is contained in:
parent
66fb266dec
commit
a501176c83
|
|
@ -0,0 +1,23 @@
|
|||
package at.procon.dip.domain.document.dto;
|
||||
|
||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDate;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.UUID;
|
||||
|
||||
public record GenericDocumentAttributeResponse(
|
||||
UUID id,
|
||||
String name,
|
||||
String normalizedName,
|
||||
String context,
|
||||
DocumentAttributeValueType type,
|
||||
String stringValue,
|
||||
Long integerValue,
|
||||
BigDecimal numberValue,
|
||||
LocalDate dateValue,
|
||||
OffsetDateTime datetimeValue,
|
||||
Boolean booleanValue,
|
||||
OffsetDateTime createdAt
|
||||
) {
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
package at.procon.dip.domain.document.dto;
|
||||
|
||||
import at.procon.dip.domain.access.DocumentVisibility;
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentStatus;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
public record GenericDocumentDetailResponse(
|
||||
UUID id,
|
||||
String ownerTenantKey,
|
||||
DocumentVisibility visibility,
|
||||
DocumentType documentType,
|
||||
DocumentFamily documentFamily,
|
||||
DocumentStatus status,
|
||||
String title,
|
||||
String summary,
|
||||
String languageCode,
|
||||
String mimeType,
|
||||
String businessKey,
|
||||
String dedupHash,
|
||||
OffsetDateTime createdAt,
|
||||
OffsetDateTime updatedAt,
|
||||
List<GenericDocumentAttributeResponse> attributes,
|
||||
List<GenericDocumentSourceResponse> sources,
|
||||
List<GenericDocumentRepresentationResponse> representations
|
||||
) {
|
||||
}
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
package at.procon.dip.domain.document.dto;
|
||||
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.UUID;
|
||||
|
||||
public record GenericDocumentRepresentationResponse(
|
||||
UUID id,
|
||||
RepresentationType representationType,
|
||||
String builderKey,
|
||||
String languageCode,
|
||||
Integer tokenCount,
|
||||
Integer charCount,
|
||||
Integer chunkIndex,
|
||||
Integer chunkStartOffset,
|
||||
Integer chunkEndOffset,
|
||||
boolean primaryRepresentation,
|
||||
UUID contentId,
|
||||
String textPreview,
|
||||
OffsetDateTime createdAt
|
||||
) {
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
package at.procon.dip.domain.document.dto;
|
||||
|
||||
import at.procon.dip.domain.document.SourceType;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.UUID;
|
||||
|
||||
public record GenericDocumentSourceResponse(
|
||||
UUID id,
|
||||
SourceType sourceType,
|
||||
String externalSourceId,
|
||||
String sourceUri,
|
||||
String sourceFilename,
|
||||
UUID parentSourceId,
|
||||
String importBatchId,
|
||||
OffsetDateTime receivedAt,
|
||||
OffsetDateTime createdAt
|
||||
) {
|
||||
}
|
||||
|
|
@ -1,13 +1,13 @@
|
|||
package at.procon.dip.domain.document.repository;
|
||||
|
||||
import at.procon.dip.domain.document.entity.DocumentAttribute;
|
||||
import at.procon.dip.domain.document.entity.DocumentAttributeName;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
|
||||
import java.util.Optional;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
|
||||
public interface DocumentAttributeRepository extends JpaRepository<DocumentAttribute, UUID> {
|
||||
|
||||
boolean existsByDocument_IdAndAttributeName_IdAndAttributeValueHash(UUID documentId, UUID attributeNameId, String attributeValueHash);
|
||||
|
||||
List<DocumentAttribute> findByDocument_IdOrderByAttributeName_AttributeContextAscAttributeName_AttributeNameAsc(UUID documentId);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,104 @@
|
|||
package at.procon.dip.domain.document.service;
|
||||
|
||||
import at.procon.dip.domain.document.dto.GenericDocumentAttributeResponse;
|
||||
import at.procon.dip.domain.document.dto.GenericDocumentDetailResponse;
|
||||
import at.procon.dip.domain.document.dto.GenericDocumentRepresentationResponse;
|
||||
import at.procon.dip.domain.document.dto.GenericDocumentSourceResponse;
|
||||
import at.procon.dip.domain.document.entity.Document;
|
||||
import at.procon.dip.domain.document.repository.DocumentAttributeRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentSourceRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Transactional(readOnly = true)
|
||||
public class GenericDocumentReadService {
|
||||
|
||||
private final DocumentRepository documentRepository;
|
||||
private final DocumentAttributeRepository documentAttributeRepository;
|
||||
private final DocumentSourceRepository documentSourceRepository;
|
||||
private final DocumentTextRepresentationRepository documentTextRepresentationRepository;
|
||||
|
||||
public Optional<GenericDocumentDetailResponse> findById(UUID documentId) {
|
||||
return documentRepository.findById(documentId).map(this::toResponse);
|
||||
}
|
||||
|
||||
private GenericDocumentDetailResponse toResponse(Document document) {
|
||||
return new GenericDocumentDetailResponse(
|
||||
document.getId(),
|
||||
document.getOwnerTenant() == null ? null : document.getOwnerTenant().getTenantKey(),
|
||||
document.getVisibility(),
|
||||
document.getDocumentType(),
|
||||
document.getDocumentFamily(),
|
||||
document.getStatus(),
|
||||
document.getTitle(),
|
||||
document.getSummary(),
|
||||
document.getLanguageCode(),
|
||||
document.getMimeType(),
|
||||
document.getBusinessKey(),
|
||||
document.getDedupHash(),
|
||||
document.getCreatedAt(),
|
||||
document.getUpdatedAt(),
|
||||
documentAttributeRepository.findByDocument_IdOrderByAttributeName_AttributeContextAscAttributeName_AttributeNameAsc(document.getId())
|
||||
.stream()
|
||||
.map(attribute -> new GenericDocumentAttributeResponse(
|
||||
attribute.getId(),
|
||||
attribute.getAttributeName().getAttributeName(),
|
||||
attribute.getAttributeName().getNormalizedName(),
|
||||
attribute.getAttributeName().getAttributeContext(),
|
||||
attribute.getAttributeName().getAttributeValueType(),
|
||||
attribute.getStringValue(),
|
||||
attribute.getIntegerValue(),
|
||||
attribute.getNumberValue(),
|
||||
attribute.getDateValue(),
|
||||
attribute.getDatetimeValue(),
|
||||
attribute.getBooleanValue(),
|
||||
attribute.getCreatedAt()
|
||||
))
|
||||
.toList(),
|
||||
documentSourceRepository.findByDocument_Id(document.getId()).stream()
|
||||
.map(source -> new GenericDocumentSourceResponse(
|
||||
source.getId(),
|
||||
source.getSourceType(),
|
||||
source.getExternalSourceId(),
|
||||
source.getSourceUri(),
|
||||
source.getSourceFilename(),
|
||||
source.getParentSourceId(),
|
||||
source.getImportBatchId(),
|
||||
source.getReceivedAt(),
|
||||
source.getCreatedAt()
|
||||
))
|
||||
.toList(),
|
||||
documentTextRepresentationRepository.findByDocument_Id(document.getId()).stream()
|
||||
.map(representation -> new GenericDocumentRepresentationResponse(
|
||||
representation.getId(),
|
||||
representation.getRepresentationType(),
|
||||
representation.getBuilderKey(),
|
||||
representation.getLanguageCode(),
|
||||
representation.getTokenCount(),
|
||||
representation.getCharCount(),
|
||||
representation.getChunkIndex(),
|
||||
representation.getChunkStartOffset(),
|
||||
representation.getChunkEndOffset(),
|
||||
representation.isPrimaryRepresentation(),
|
||||
representation.getContent() == null ? null : representation.getContent().getId(),
|
||||
preview(representation.getTextBody()),
|
||||
representation.getCreatedAt()
|
||||
))
|
||||
.toList()
|
||||
);
|
||||
}
|
||||
|
||||
private String preview(String textBody) {
|
||||
if (textBody == null) {
|
||||
return null;
|
||||
}
|
||||
return textBody.length() <= 400 ? textBody : textBody.substring(0, 400);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
package at.procon.dip.domain.document.web;
|
||||
|
||||
import at.procon.dip.domain.document.dto.GenericDocumentDetailResponse;
|
||||
import at.procon.dip.domain.document.service.GenericDocumentReadService;
|
||||
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||
import at.procon.dip.runtime.config.RuntimeMode;
|
||||
import java.util.UUID;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/v1/dip/documents")
|
||||
@RequiredArgsConstructor
|
||||
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||
public class GenericDocumentController {
|
||||
|
||||
private final GenericDocumentReadService documentReadService;
|
||||
|
||||
@GetMapping("/{id}")
|
||||
public ResponseEntity<GenericDocumentDetailResponse> getDocument(@PathVariable UUID id) {
|
||||
return documentReadService.findById(id)
|
||||
.map(ResponseEntity::ok)
|
||||
.orElse(ResponseEntity.notFound().build());
|
||||
}
|
||||
}
|
||||
|
|
@ -8,7 +8,7 @@ import at.procon.dip.domain.document.service.DocumentAttributeService;
|
|||
import at.procon.dip.domain.tenant.TenantRef;
|
||||
import at.procon.dip.ingestion.config.DipIngestionProperties;
|
||||
import at.procon.dip.ingestion.dto.GenericImportResponse;
|
||||
import at.procon.dip.ingestion.dto.GenericNameValuePairRequest;
|
||||
import at.procon.dip.ingestion.dto.GenericIngestionHintsRequest;
|
||||
import at.procon.dip.ingestion.dto.GenericTextImportRequest;
|
||||
import at.procon.dip.ingestion.service.DocumentIngestionGateway;
|
||||
import at.procon.dip.ingestion.spi.IngestionResult;
|
||||
|
|
@ -16,7 +16,6 @@ import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
|
|||
import at.procon.dip.ingestion.spi.SourceDescriptor;
|
||||
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||
import at.procon.dip.runtime.config.RuntimeMode;
|
||||
import java.math.BigDecimal;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.LinkedHashMap;
|
||||
|
|
@ -106,54 +105,34 @@ public class GenericDocumentImportController {
|
|||
|
||||
private Map<String, String> buildDescriptorAttributes(GenericTextImportRequest request) {
|
||||
Map<String, String> attributes = new LinkedHashMap<>();
|
||||
if (request.attributes() != null) {
|
||||
for (GenericNameValuePairRequest attribute : request.attributes()) {
|
||||
if (attribute == null || !StringUtils.hasText(attribute.name()) || !isGlobalContext(attribute.context())) {
|
||||
continue;
|
||||
}
|
||||
String renderedValue = renderAsDescriptorValue(attribute);
|
||||
if (StringUtils.hasText(renderedValue)) {
|
||||
attributes.put(attribute.name().trim(), renderedValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (StringUtils.hasText(request.languageCode())) {
|
||||
attributes.put("languageCode", request.languageCode());
|
||||
}
|
||||
if (StringUtils.hasText(request.title())) {
|
||||
attributes.put("title", request.title());
|
||||
}
|
||||
GenericIngestionHintsRequest hints = request.ingestionHints();
|
||||
if (hints != null) {
|
||||
putIfHasText(attributes, "documentTypeHint", hints.documentTypeHint());
|
||||
putIfHasText(attributes, "embeddingPolicyKey", hints.embeddingPolicyKey());
|
||||
putIfHasText(attributes, "embeddingPolicyHint", hints.embeddingPolicyHint());
|
||||
putIfHasText(attributes, "importBatchId", hints.importBatchId());
|
||||
putIfNotNull(attributes, "wrapperDocument", hints.wrapperDocument());
|
||||
putIfNotNull(attributes, "containerDocument", hints.containerDocument());
|
||||
}
|
||||
return attributes;
|
||||
}
|
||||
|
||||
private String renderAsDescriptorValue(GenericNameValuePairRequest attribute) {
|
||||
if (StringUtils.hasText(attribute.value())) {
|
||||
return attribute.value().trim();
|
||||
private void putIfHasText(Map<String, String> attributes, String key, String value) {
|
||||
if (StringUtils.hasText(value)) {
|
||||
attributes.put(key, value.trim());
|
||||
}
|
||||
if (StringUtils.hasText(attribute.stringValue())) {
|
||||
return attribute.stringValue().trim();
|
||||
}
|
||||
if (attribute.integerValue() != null) {
|
||||
return attribute.integerValue().toString();
|
||||
}
|
||||
if (attribute.numberValue() != null) {
|
||||
BigDecimal number = attribute.numberValue().stripTrailingZeros();
|
||||
return number.toPlainString();
|
||||
}
|
||||
if (attribute.dateValue() != null) {
|
||||
return attribute.dateValue().toString();
|
||||
}
|
||||
if (attribute.datetimeValue() != null) {
|
||||
return attribute.datetimeValue().withNano(0).toString();
|
||||
}
|
||||
if (attribute.booleanValue() != null) {
|
||||
return attribute.booleanValue().toString();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean isGlobalContext(String context) {
|
||||
return !StringUtils.hasText(context) || DocumentAttributeService.GLOBAL_CONTEXT.equalsIgnoreCase(context.trim());
|
||||
private void putIfNotNull(Map<String, String> attributes, String key, Boolean value) {
|
||||
if (value != null) {
|
||||
attributes.put(key, value.toString());
|
||||
}
|
||||
}
|
||||
|
||||
private void ensureRestUploadEnabled() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
package at.procon.dip.ingestion.dto;
|
||||
|
||||
public record GenericIngestionHintsRequest(
|
||||
String documentTypeHint,
|
||||
String embeddingPolicyKey,
|
||||
String embeddingPolicyHint,
|
||||
String importBatchId,
|
||||
Boolean wrapperDocument,
|
||||
Boolean containerDocument
|
||||
) {
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ public record GenericTextImportRequest(
|
|||
String languageCode,
|
||||
String title,
|
||||
String sourceIdentifier,
|
||||
List<GenericNameValuePairRequest> attributes
|
||||
List<GenericNameValuePairRequest> attributes,
|
||||
GenericIngestionHintsRequest ingestionHints
|
||||
) {
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
package at.procon.dip.ingestion.service;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public final class IngestionInternalAttributes {
|
||||
|
||||
public static final String DEFER_LEXICAL_INDEX = "_deferLexicalIndex";
|
||||
|
||||
private IngestionInternalAttributes() {
|
||||
}
|
||||
|
||||
public static boolean isTruthy(Map<String, String> attributes, String key) {
|
||||
if (attributes == null || attributes.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
String value = attributes.get(key);
|
||||
return value != null && Boolean.parseBoolean(value);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
package at.procon.dip.ingestion.service;
|
||||
|
||||
import at.procon.dip.domain.document.SourceType;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.web.bind.annotation.ResponseStatus;
|
||||
|
||||
@ResponseStatus(HttpStatus.CONFLICT)
|
||||
public class SourceIdentifierConflictException extends RuntimeException {
|
||||
|
||||
public SourceIdentifierConflictException(SourceType sourceType, String sourceIdentifier) {
|
||||
super("Source identifier '" + sourceIdentifier + "' for source type '" + sourceType + "' is already linked to a different payload");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
package at.procon.dip.search.dto;
|
||||
|
||||
public enum DocumentAttributeFilterOperator {
|
||||
EQ,
|
||||
NE,
|
||||
GT,
|
||||
GTE,
|
||||
LT,
|
||||
LTE,
|
||||
EXISTS
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
package at.procon.dip.search.dto;
|
||||
|
||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDate;
|
||||
import java.time.OffsetDateTime;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class DocumentAttributeFilterRequest {
|
||||
private String name;
|
||||
private String context;
|
||||
private DocumentAttributeValueType type;
|
||||
@Builder.Default
|
||||
private DocumentAttributeFilterOperator operator = DocumentAttributeFilterOperator.EQ;
|
||||
private String stringValue;
|
||||
private Long integerValue;
|
||||
private BigDecimal numberValue;
|
||||
private LocalDate dateValue;
|
||||
private OffsetDateTime datetimeValue;
|
||||
private Boolean booleanValue;
|
||||
}
|
||||
|
|
@ -31,6 +31,7 @@ public class SearchRequest {
|
|||
private Set<RepresentationType> representationTypes;
|
||||
private OffsetDateTime createdFrom;
|
||||
private OffsetDateTime createdTo;
|
||||
private java.util.List<DocumentAttributeFilterRequest> attributeFilters;
|
||||
|
||||
private Integer page;
|
||||
private Integer size;
|
||||
|
|
|
|||
|
|
@ -1,17 +1,25 @@
|
|||
package at.procon.dip.search.repository;
|
||||
|
||||
import at.procon.dip.domain.access.DocumentVisibility;
|
||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import at.procon.dip.search.api.SearchExecutionContext;
|
||||
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
|
||||
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
|
||||
import at.procon.dip.search.dto.SearchRepresentationSelectionMode;
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDate;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
final class SearchSqlFilterSupport {
|
||||
|
||||
|
|
@ -90,6 +98,144 @@ final class SearchSqlFilterSupport {
|
|||
sql.append(" AND ").append(documentAlias).append(".id IN (:candidateDocumentIds)");
|
||||
params.addValue("candidateDocumentIds", context.getScope().candidateDocumentIds());
|
||||
}
|
||||
|
||||
appendAttributeFilters(sql, params, context, documentAlias);
|
||||
}
|
||||
|
||||
private static void appendAttributeFilters(StringBuilder sql,
|
||||
MapSqlParameterSource params,
|
||||
SearchExecutionContext context,
|
||||
String documentAlias) {
|
||||
List<DocumentAttributeFilterRequest> filters = context.getRequest().getAttributeFilters();
|
||||
if (CollectionUtils.isEmpty(filters)) {
|
||||
return;
|
||||
}
|
||||
int index = 0;
|
||||
for (DocumentAttributeFilterRequest filter : filters) {
|
||||
if (filter == null || !StringUtils.hasText(filter.getName())) {
|
||||
continue;
|
||||
}
|
||||
String filterAlias = "af" + index;
|
||||
String nameParam = "attributeName" + index;
|
||||
String contextParam = "attributeContext" + index;
|
||||
String valueParam = "attributeValue" + index;
|
||||
|
||||
sql.append(" AND ");
|
||||
DocumentAttributeFilterOperator operator = filter.getOperator() == null
|
||||
? DocumentAttributeFilterOperator.EQ
|
||||
: filter.getOperator();
|
||||
if (operator == DocumentAttributeFilterOperator.NE) {
|
||||
sql.append("NOT ");
|
||||
}
|
||||
sql.append("EXISTS (SELECT 1 FROM doc.doc_document_attribute ").append(filterAlias)
|
||||
.append(" JOIN doc.doc_attribute_name ").append(filterAlias).append("n ON ")
|
||||
.append(filterAlias).append("n.id = ").append(filterAlias).append(".attribute_name_id")
|
||||
.append(" WHERE ").append(filterAlias).append(".document_id = ").append(documentAlias).append(".id")
|
||||
.append(" AND ").append(filterAlias).append("n.normalized_name = :").append(nameParam);
|
||||
params.addValue(nameParam, normalizeName(filter.getName()));
|
||||
|
||||
if (StringUtils.hasText(filter.getContext())) {
|
||||
sql.append(" AND ").append(filterAlias).append("n.attribute_context = :").append(contextParam);
|
||||
params.addValue(contextParam, filter.getContext().trim().toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
if (operator != DocumentAttributeFilterOperator.EXISTS && operator != DocumentAttributeFilterOperator.NE) {
|
||||
appendAttributeValuePredicate(sql, params, filterAlias, valueParam, filter, operator);
|
||||
} else if (operator == DocumentAttributeFilterOperator.NE) {
|
||||
appendAttributeValuePredicate(sql, params, filterAlias, valueParam, filter, DocumentAttributeFilterOperator.EQ);
|
||||
}
|
||||
sql.append(")");
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
private static void appendAttributeValuePredicate(StringBuilder sql,
|
||||
MapSqlParameterSource params,
|
||||
String filterAlias,
|
||||
String valueParam,
|
||||
DocumentAttributeFilterRequest filter,
|
||||
DocumentAttributeFilterOperator operator) {
|
||||
ResolvedAttributeFilterValue resolved = resolveFilterValue(filter);
|
||||
String column = switch (resolved.type()) {
|
||||
case STRING -> filterAlias + ".string_value";
|
||||
case INTEGER -> filterAlias + ".integer_value";
|
||||
case NUMBER -> filterAlias + ".number_value";
|
||||
case DATE -> filterAlias + ".date_value";
|
||||
case DATETIME -> filterAlias + ".datetime_value";
|
||||
case BOOLEAN -> filterAlias + ".boolean_value";
|
||||
};
|
||||
String sqlOperator = switch (operator) {
|
||||
case EQ -> "=";
|
||||
case GT -> ">";
|
||||
case GTE -> ">=";
|
||||
case LT -> "<";
|
||||
case LTE -> "<=";
|
||||
case NE, EXISTS -> throw new IllegalArgumentException("Unsupported attribute operator in value predicate: " + operator);
|
||||
};
|
||||
|
||||
if ((resolved.type() == DocumentAttributeValueType.STRING || resolved.type() == DocumentAttributeValueType.BOOLEAN)
|
||||
&& operator != DocumentAttributeFilterOperator.EQ) {
|
||||
throw new IllegalArgumentException("Only EQ/NE operators are supported for " + resolved.type() + " attribute filters");
|
||||
}
|
||||
|
||||
sql.append(" AND ").append(column).append(" ").append(sqlOperator).append(" :").append(valueParam);
|
||||
params.addValue(valueParam, resolved.value());
|
||||
}
|
||||
|
||||
private static ResolvedAttributeFilterValue resolveFilterValue(DocumentAttributeFilterRequest filter) {
|
||||
DocumentAttributeValueType type = filter.getType();
|
||||
if (type == null) {
|
||||
if (StringUtils.hasText(filter.getStringValue())) {
|
||||
type = DocumentAttributeValueType.STRING;
|
||||
} else if (filter.getIntegerValue() != null) {
|
||||
type = DocumentAttributeValueType.INTEGER;
|
||||
} else if (filter.getNumberValue() != null) {
|
||||
type = DocumentAttributeValueType.NUMBER;
|
||||
} else if (filter.getDateValue() != null) {
|
||||
type = DocumentAttributeValueType.DATE;
|
||||
} else if (filter.getDatetimeValue() != null) {
|
||||
type = DocumentAttributeValueType.DATETIME;
|
||||
} else if (filter.getBooleanValue() != null) {
|
||||
type = DocumentAttributeValueType.BOOLEAN;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Attribute filter requires a typed value unless operator is EXISTS");
|
||||
}
|
||||
}
|
||||
|
||||
Object value = switch (type) {
|
||||
case STRING -> requireText(filter.getStringValue(), filter.getName(), "string");
|
||||
case INTEGER -> requireValue(filter.getIntegerValue(), filter.getName(), "integer");
|
||||
case NUMBER -> requireNumber(filter.getNumberValue(), filter.getName());
|
||||
case DATE -> requireValue(filter.getDateValue(), filter.getName(), "date");
|
||||
case DATETIME -> requireValue(filter.getDatetimeValue(), filter.getName(), "datetime");
|
||||
case BOOLEAN -> requireValue(filter.getBooleanValue(), filter.getName(), "boolean");
|
||||
};
|
||||
return new ResolvedAttributeFilterValue(type, value);
|
||||
}
|
||||
|
||||
private static String requireText(String value, String name, String kind) {
|
||||
if (!StringUtils.hasText(value)) {
|
||||
throw new IllegalArgumentException("Attribute filter '" + name + "' requires a " + kind + " value");
|
||||
}
|
||||
return value.trim();
|
||||
}
|
||||
|
||||
private static BigDecimal requireNumber(BigDecimal value, String name) {
|
||||
if (value == null) {
|
||||
throw new IllegalArgumentException("Attribute filter '" + name + "' requires a number value");
|
||||
}
|
||||
return value.stripTrailingZeros();
|
||||
}
|
||||
|
||||
private static <T> T requireValue(T value, String name, String kind) {
|
||||
if (value == null) {
|
||||
throw new IllegalArgumentException("Attribute filter '" + name + "' requires a " + kind + " value");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
private static String normalizeName(String name) {
|
||||
return name.trim().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
private static <T> Set<T> firstNonEmpty(Set<T> primary, Set<T> fallback) {
|
||||
|
|
@ -99,4 +245,7 @@ final class SearchSqlFilterSupport {
|
|||
private static List<String> enumNames(Collection<? extends Enum<?>> values) {
|
||||
return values.stream().map(Enum::name).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private record ResolvedAttributeFilterValue(DocumentAttributeValueType type, Object value) {
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,107 @@
|
|||
package at.procon.dip.domain.document.service;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import at.procon.dip.domain.access.DocumentVisibility;
|
||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentStatus;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import at.procon.dip.domain.document.SourceType;
|
||||
import at.procon.dip.domain.document.entity.Document;
|
||||
import at.procon.dip.domain.document.entity.DocumentAttribute;
|
||||
import at.procon.dip.domain.document.entity.DocumentAttributeName;
|
||||
import at.procon.dip.domain.document.entity.DocumentSource;
|
||||
import at.procon.dip.domain.document.entity.DocumentTextRepresentation;
|
||||
import at.procon.dip.domain.document.repository.DocumentAttributeRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentSourceRepository;
|
||||
import at.procon.dip.domain.document.repository.DocumentTextRepresentationRepository;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class GenericDocumentReadServiceTest {
|
||||
|
||||
@Mock private DocumentRepository documentRepository;
|
||||
@Mock private DocumentAttributeRepository documentAttributeRepository;
|
||||
@Mock private DocumentSourceRepository documentSourceRepository;
|
||||
@Mock private DocumentTextRepresentationRepository documentTextRepresentationRepository;
|
||||
|
||||
private GenericDocumentReadService service;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
service = new GenericDocumentReadService(documentRepository, documentAttributeRepository, documentSourceRepository, documentTextRepresentationRepository);
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldReturnDocumentDetailWithAttributesSourcesAndRepresentations() {
|
||||
UUID documentId = UUID.randomUUID();
|
||||
Document document = Document.builder()
|
||||
.id(documentId)
|
||||
.visibility(DocumentVisibility.PUBLIC)
|
||||
.documentType(DocumentType.TEXT)
|
||||
.documentFamily(DocumentFamily.GENERIC)
|
||||
.status(DocumentStatus.REPRESENTED)
|
||||
.title("Doc")
|
||||
.languageCode("de")
|
||||
.mimeType("text/plain")
|
||||
.businessKey("API:ext-1")
|
||||
.dedupHash("abc")
|
||||
.build();
|
||||
when(documentRepository.findById(documentId)).thenReturn(Optional.of(document));
|
||||
when(documentAttributeRepository.findByDocument_IdOrderByAttributeName_AttributeContextAscAttributeName_AttributeNameAsc(documentId))
|
||||
.thenReturn(List.of(DocumentAttribute.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.document(document)
|
||||
.attributeName(DocumentAttributeName.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.attributeName("status")
|
||||
.normalizedName("status")
|
||||
.attributeContext("TED")
|
||||
.attributeValueType(DocumentAttributeValueType.STRING)
|
||||
.build())
|
||||
.stringValue("closed")
|
||||
.createdAt(OffsetDateTime.now())
|
||||
.attributeValueHash("x")
|
||||
.build()));
|
||||
when(documentSourceRepository.findByDocument_Id(documentId))
|
||||
.thenReturn(List.of(DocumentSource.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.document(document)
|
||||
.sourceType(SourceType.API)
|
||||
.externalSourceId("ext-1")
|
||||
.sourceFilename("sample.txt")
|
||||
.receivedAt(OffsetDateTime.now())
|
||||
.build()));
|
||||
when(documentTextRepresentationRepository.findByDocument_Id(documentId))
|
||||
.thenReturn(List.of(DocumentTextRepresentation.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.document(document)
|
||||
.representationType(RepresentationType.SEMANTIC_TEXT)
|
||||
.primaryRepresentation(true)
|
||||
.textBody("hello world")
|
||||
.build()));
|
||||
|
||||
var response = service.findById(documentId);
|
||||
|
||||
assertThat(response).isPresent();
|
||||
assertThat(response.get().id()).isEqualTo(documentId);
|
||||
assertThat(response.get().attributes()).hasSize(1);
|
||||
assertThat(response.get().attributes().getFirst().context()).isEqualTo("TED");
|
||||
assertThat(response.get().sources()).hasSize(1);
|
||||
assertThat(response.get().sources().getFirst().externalSourceId()).isEqualTo("ext-1");
|
||||
assertThat(response.get().representations()).hasSize(1);
|
||||
assertThat(response.get().representations().getFirst().textPreview()).isEqualTo("hello world");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
package at.procon.dip.domain.document.web;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentStatus;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.dto.GenericDocumentDetailResponse;
|
||||
import at.procon.dip.domain.document.service.GenericDocumentReadService;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.extension.ExtendWith;
|
||||
import org.mockito.Mock;
|
||||
import org.mockito.junit.jupiter.MockitoExtension;
|
||||
|
||||
@ExtendWith(MockitoExtension.class)
|
||||
class GenericDocumentControllerTest {
|
||||
|
||||
@Mock private GenericDocumentReadService readService;
|
||||
private GenericDocumentController controller;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
controller = new GenericDocumentController(readService);
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldReturnDocumentWhenFound() {
|
||||
UUID id = UUID.randomUUID();
|
||||
when(readService.findById(id)).thenReturn(Optional.of(new GenericDocumentDetailResponse(
|
||||
id, null, null, DocumentType.TEXT, DocumentFamily.GENERIC, DocumentStatus.REPRESENTED,
|
||||
"Doc", null, null, "text/plain", "API:1", "hash", null, null, List.of(), List.of(), List.of()
|
||||
)));
|
||||
|
||||
var response = controller.getDocument(id);
|
||||
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(response.getBody()).isNotNull();
|
||||
assertThat(response.getBody().id()).isEqualTo(id);
|
||||
}
|
||||
}
|
||||
|
|
@ -7,18 +7,19 @@ import static org.mockito.Mockito.when;
|
|||
|
||||
import at.procon.dip.domain.access.DocumentAccessContext;
|
||||
import at.procon.dip.domain.access.DocumentVisibility;
|
||||
import at.procon.dip.domain.document.CanonicalDocumentMetadata;
|
||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentStatus;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.service.DocumentAttributeService;
|
||||
import at.procon.dip.ingestion.config.DipIngestionProperties;
|
||||
import at.procon.dip.ingestion.dto.GenericIngestionHintsRequest;
|
||||
import at.procon.dip.ingestion.dto.GenericNameValuePairRequest;
|
||||
import at.procon.dip.ingestion.dto.GenericTextImportRequest;
|
||||
import at.procon.dip.ingestion.service.DocumentIngestionGateway;
|
||||
import at.procon.dip.ingestion.spi.IngestionResult;
|
||||
import at.procon.dip.ingestion.spi.SourceDescriptor;
|
||||
import at.procon.dip.domain.document.CanonicalDocumentMetadata;
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDate;
|
||||
import java.time.OffsetDateTime;
|
||||
|
|
@ -46,18 +47,17 @@ class GenericDocumentImportControllerTest {
|
|||
DipIngestionProperties properties = new DipIngestionProperties();
|
||||
properties.setEnabled(true);
|
||||
properties.setRestUploadEnabled(true);
|
||||
properties.setDefaultVisibility(DocumentVisibility.PUBLIC);
|
||||
controller = new GenericDocumentImportController(properties, ingestionGateway, documentAttributeService);
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldPassGlobalTypedAttributesIntoIngestionAndPersistAllAttributes() {
|
||||
void shouldPersistAttributesButUseOnlyTopLevelFieldsAndIngestionHintsForDescriptorAttributes() {
|
||||
UUID documentId = UUID.randomUUID();
|
||||
OffsetDateTime syncedAt = OffsetDateTime.parse("2026-04-21T11:05:00+02:00");
|
||||
List<GenericNameValuePairRequest> pairs = List.of(
|
||||
new GenericNameValuePairRequest("title", null, DocumentAttributeValueType.STRING, null, "Text title from pair", null, null, null, null, null),
|
||||
new GenericNameValuePairRequest("languageCode", null, DocumentAttributeValueType.STRING, null, "de", null, null, null, null, null),
|
||||
new GenericNameValuePairRequest("country", null, null, "AT", null, null, null, null, null, null),
|
||||
new GenericNameValuePairRequest("country", null, DocumentAttributeValueType.STRING, null, "AT", null, null, null, null, null),
|
||||
new GenericNameValuePairRequest("estimatedValue", null, DocumentAttributeValueType.NUMBER, null, null, null, new BigDecimal("125000.50"), null, null, null),
|
||||
new GenericNameValuePairRequest("publishedDate", null, DocumentAttributeValueType.DATE, null, null, null, null, LocalDate.of(2026, 4, 21), null, null),
|
||||
new GenericNameValuePairRequest("version", null, DocumentAttributeValueType.INTEGER, null, null, 7L, null, null, null, null),
|
||||
|
|
@ -70,10 +70,11 @@ class GenericDocumentImportControllerTest {
|
|||
"text/plain",
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
"fr",
|
||||
"Top level title",
|
||||
"source-1",
|
||||
pairs
|
||||
pairs,
|
||||
new GenericIngestionHintsRequest("TEXT", "policy-key", "policy-hint", "batch-1", true, false)
|
||||
);
|
||||
|
||||
when(ingestionGateway.ingest(any(SourceDescriptor.class))).thenReturn(new IngestionResult(
|
||||
|
|
@ -83,8 +84,8 @@ class GenericDocumentImportControllerTest {
|
|||
DocumentType.TEXT,
|
||||
DocumentFamily.GENERIC,
|
||||
DocumentStatus.REPRESENTED,
|
||||
"Text title from pair",
|
||||
"de",
|
||||
"Top level title",
|
||||
"fr",
|
||||
"text/plain",
|
||||
null,
|
||||
OffsetDateTime.now(),
|
||||
|
|
@ -99,14 +100,17 @@ class GenericDocumentImportControllerTest {
|
|||
verify(ingestionGateway).ingest(descriptorCaptor.capture());
|
||||
SourceDescriptor descriptor = descriptorCaptor.getValue();
|
||||
assertThat(descriptor.textContent()).isEqualTo("Hello world");
|
||||
assertThat(descriptor.attributes()).containsEntry("title", "Text title from pair");
|
||||
assertThat(descriptor.attributes()).containsEntry("languageCode", "de");
|
||||
assertThat(descriptor.attributes()).containsEntry("country", "AT");
|
||||
assertThat(descriptor.attributes()).containsEntry("estimatedValue", "125000.5");
|
||||
assertThat(descriptor.attributes()).containsEntry("publishedDate", "2026-04-21");
|
||||
assertThat(descriptor.attributes()).containsEntry("version", "7");
|
||||
assertThat(descriptor.attributes()).containsEntry("lastSyncedAt", "2026-04-21T11:05+02:00");
|
||||
assertThat(descriptor.attributes()).doesNotContainKey("status");
|
||||
assertThat(descriptor.attributes()).containsEntry("title", "Top level title");
|
||||
assertThat(descriptor.attributes()).containsEntry("languageCode", "fr");
|
||||
assertThat(descriptor.attributes()).containsEntry("documentTypeHint", "TEXT");
|
||||
assertThat(descriptor.attributes()).containsEntry("embeddingPolicyKey", "policy-key");
|
||||
assertThat(descriptor.attributes()).containsEntry("embeddingPolicyHint", "policy-hint");
|
||||
assertThat(descriptor.attributes()).containsEntry("importBatchId", "batch-1");
|
||||
assertThat(descriptor.attributes()).containsEntry("wrapperDocument", "true");
|
||||
assertThat(descriptor.attributes()).containsEntry("containerDocument", "false");
|
||||
assertThat(descriptor.attributes()).doesNotContainKeys(
|
||||
"country", "estimatedValue", "publishedDate", "version", "lastSyncedAt", "status"
|
||||
);
|
||||
|
||||
verify(documentAttributeService).addAttributes(documentId, pairs);
|
||||
assertThat(response.getBody()).isNotNull();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,57 @@
|
|||
package at.procon.dip.search.repository;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import at.procon.dip.domain.document.DocumentAttributeValueType;
|
||||
import at.procon.dip.search.api.SearchExecutionContext;
|
||||
import at.procon.dip.search.dto.DocumentAttributeFilterOperator;
|
||||
import at.procon.dip.search.dto.DocumentAttributeFilterRequest;
|
||||
import at.procon.dip.search.dto.SearchRequest;
|
||||
import at.procon.dip.search.spi.SearchDocumentScope;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
|
||||
|
||||
class SearchSqlFilterSupportTest {
|
||||
|
||||
@Test
|
||||
void shouldAppendAttributeFilterSql() {
|
||||
SearchRequest request = SearchRequest.builder()
|
||||
.queryText("kitchen")
|
||||
.attributeFilters(java.util.List.of(
|
||||
DocumentAttributeFilterRequest.builder()
|
||||
.name("status")
|
||||
.context("ted")
|
||||
.type(DocumentAttributeValueType.STRING)
|
||||
.operator(DocumentAttributeFilterOperator.EQ)
|
||||
.stringValue("closed")
|
||||
.build(),
|
||||
DocumentAttributeFilterRequest.builder()
|
||||
.name("version")
|
||||
.type(DocumentAttributeValueType.INTEGER)
|
||||
.operator(DocumentAttributeFilterOperator.GTE)
|
||||
.integerValue(3L)
|
||||
.build()
|
||||
))
|
||||
.build();
|
||||
SearchExecutionContext context = SearchExecutionContext.builder()
|
||||
.request(request)
|
||||
.scope(new SearchDocumentScope(java.util.Set.of(), null, null, null, null, null))
|
||||
.page(0)
|
||||
.size(20)
|
||||
.build();
|
||||
|
||||
StringBuilder sql = new StringBuilder("SELECT 1 FROM doc.doc_document d JOIN doc.doc_text_representation dtr ON dtr.document_id = d.id WHERE 1=1");
|
||||
MapSqlParameterSource params = new MapSqlParameterSource();
|
||||
SearchSqlFilterSupport.appendCommonFilters(sql, params, context, "d", "dtr", false);
|
||||
|
||||
String rendered = sql.toString();
|
||||
assertThat(rendered).contains("doc.doc_document_attribute");
|
||||
assertThat(rendered).contains("attribute_context = :attributeContext0");
|
||||
assertThat(rendered).contains("integer_value >= :attributeValue1");
|
||||
assertThat(params.getValue("attributeName0")).isEqualTo("status");
|
||||
assertThat(params.getValue("attributeContext0")).isEqualTo("TED");
|
||||
assertThat(params.getValue("attributeValue0")).isEqualTo("closed");
|
||||
assertThat(params.getValue("attributeName1")).isEqualTo("version");
|
||||
assertThat(params.getValue("attributeValue1")).isEqualTo(3L);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue