package at.procon.ted.controller; import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode; import at.procon.dip.runtime.config.RuntimeMode; import at.procon.ted.service.SimilaritySearchService; import at.procon.ted.service.SimilaritySearchService.SimilaritySearchResponse; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.Parameter; import io.swagger.v3.oas.annotations.media.Content; import io.swagger.v3.oas.annotations.media.Schema; import io.swagger.v3.oas.annotations.responses.ApiResponse; import io.swagger.v3.oas.annotations.responses.ApiResponses; import io.swagger.v3.oas.annotations.tags.Tag; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; import java.io.IOException; /** * REST Controller for similarity search on TED procurement documents. * Provides endpoints for searching similar documents using text or PDF input. * * @author Martin.Schweitzer@procon.co.at and claude.ai */ @RestController @RequestMapping("/similarity") @RequiredArgsConstructor @Slf4j @ConditionalOnRuntimeMode(RuntimeMode.LEGACY) @Tag(name = "Similarity Search", description = "Vector-based semantic similarity search on TED procurement documents") public class SimilaritySearchController { private final SimilaritySearchService similaritySearchService; /** * Search for similar documents using text query. */ @PostMapping("/text") @Operation( summary = "Search by text", description = "Find similar TED procurement documents based on text content using vector similarity (cosine distance)" ) @ApiResponses({ @ApiResponse(responseCode = "200", description = "Search completed successfully", content = @Content(schema = @Schema(implementation = SimilaritySearchResponse.class))), @ApiResponse(responseCode = "400", description = "Invalid request (empty text)"), @ApiResponse(responseCode = "503", description = "Vectorization service unavailable") }) public ResponseEntity searchByText( @Parameter(description = "Text content to search for similar documents", required = true) @RequestBody TextSearchRequest request ) { log.info("Text similarity search request: {} chars, topK={}, threshold={}", request.getText() != null ? request.getText().length() : 0, request.getTopK(), request.getThreshold()); if (request.getText() == null || request.getText().isBlank()) { return ResponseEntity.badRequest().build(); } try { SimilaritySearchResponse response = similaritySearchService.searchByText( request.getText(), request.getTopK(), request.getThreshold() ); return ResponseEntity.ok(response); } catch (IllegalStateException e) { log.error("Vectorization service unavailable: {}", e.getMessage()); return ResponseEntity.status(503).build(); } catch (Exception e) { log.error("Text similarity search failed: {}", e.getMessage(), e); return ResponseEntity.internalServerError().build(); } } /** * Search for similar documents using PDF file. */ @PostMapping(value = "/pdf", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) @Operation( summary = "Search by PDF", description = "Upload a PDF document to find similar TED procurement documents. " + "Text is extracted from the PDF and used for vector similarity search." ) @ApiResponses({ @ApiResponse(responseCode = "200", description = "Search completed successfully", content = @Content(schema = @Schema(implementation = SimilaritySearchResponse.class))), @ApiResponse(responseCode = "400", description = "Invalid request (no file or not a PDF)"), @ApiResponse(responseCode = "422", description = "Could not extract text from PDF"), @ApiResponse(responseCode = "503", description = "Vectorization service unavailable") }) public ResponseEntity searchByPdf( @Parameter(description = "PDF file to search for similar documents", required = true) @RequestPart("file") MultipartFile file, @Parameter(description = "Number of top results to return (default: 20, max: 100)") @RequestParam(required = false, defaultValue = "20") Integer topK, @Parameter(description = "Minimum similarity threshold (0.0-1.0, default: 0.5)") @RequestParam(required = false, defaultValue = "0.5") Double threshold ) { if (file == null || file.isEmpty()) { log.warn("PDF search request with empty file"); return ResponseEntity.badRequest().build(); } String filename = file.getOriginalFilename(); String contentType = file.getContentType(); log.info("PDF similarity search request: filename='{}', size={} bytes, topK={}, threshold={}", filename, file.getSize(), topK, threshold); // Validate file type if (contentType != null && !contentType.toLowerCase().contains("pdf")) { if (filename == null || !filename.toLowerCase().endsWith(".pdf")) { log.warn("Invalid file type: {} ({})", filename, contentType); return ResponseEntity.badRequest().build(); } } try { byte[] pdfData = file.getBytes(); SimilaritySearchResponse response = similaritySearchService.searchByPdf( pdfData, filename, topK, threshold ); return ResponseEntity.ok(response); } catch (IOException e) { log.error("Failed to read PDF file: {}", e.getMessage()); return ResponseEntity.badRequest().build(); } catch (IllegalStateException e) { log.error("Vectorization service unavailable: {}", e.getMessage()); return ResponseEntity.status(503).build(); } catch (RuntimeException e) { if (e.getMessage() != null && e.getMessage().contains("extraction failed")) { log.error("PDF extraction failed: {}", e.getMessage()); return ResponseEntity.unprocessableEntity().build(); } log.error("PDF similarity search failed: {}", e.getMessage(), e); return ResponseEntity.internalServerError().build(); } catch (Exception e) { log.error("PDF similarity search failed: {}", e.getMessage(), e); return ResponseEntity.internalServerError().build(); } } /** * Request DTO for text-based similarity search. */ @lombok.Data @lombok.NoArgsConstructor @lombok.AllArgsConstructor public static class TextSearchRequest { @Schema(description = "Text content to search for similar documents", required = true) private String text; @Schema(description = "Number of top results to return (default: 20, max: 100)") private Integer topK; @Schema(description = "Minimum similarity threshold (0.0-1.0, default: 0.5)") private Double threshold; } }