Refactor phases 5 - search - tests

master
trifonovt 1 month ago
parent c8659bd45d
commit 40890101b1

@ -19,9 +19,9 @@ public class DocumentTrigramSearchRepositoryImpl implements DocumentTrigramSearc
@Override @Override
public List<SearchHit> search(SearchExecutionContext context, int limit, double threshold) { public List<SearchHit> search(SearchExecutionContext context, int limit, double threshold) {
String scoreExpr = "GREATEST(" + String scoreExpr = "GREATEST(" +
"similarity(COALESCE(d.title, ''), :queryText), " + "doc.similarity(COALESCE(d.title, ''), :queryText), " +
"similarity(COALESCE(d.summary, ''), :queryText), " + "doc.similarity(COALESCE(d.summary, ''), :queryText), " +
"similarity(COALESCE(dtr.text_body, ''), :queryText))"; "doc.similarity(COALESCE(dtr.text_body, ''), :queryText))";
StringBuilder sql = new StringBuilder("SELECT " + StringBuilder sql = new StringBuilder("SELECT " +
"d.id AS document_id, " + "d.id AS document_id, " +
@ -38,9 +38,9 @@ public class DocumentTrigramSearchRepositoryImpl implements DocumentTrigramSearc
"LEFT(COALESCE(dtr.text_body, COALESCE(d.summary, d.title, '')), 400) AS snippet, " + "LEFT(COALESCE(dtr.text_body, COALESCE(d.summary, d.title, '')), 400) AS snippet, " +
scoreExpr + " AS score, " + scoreExpr + " AS score, " +
"CASE " + "CASE " +
"WHEN similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(d.summary, ''), :queryText) " + "WHEN doc.similarity(COALESCE(d.title, ''), :queryText) >= doc.similarity(COALESCE(d.summary, ''), :queryText) " +
" AND similarity(COALESCE(d.title, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_TITLE' " + " AND doc.similarity(COALESCE(d.title, ''), :queryText) >= doc.similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_TITLE' " +
"WHEN similarity(COALESCE(d.summary, ''), :queryText) >= similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_SUMMARY' " + "WHEN doc.similarity(COALESCE(d.summary, ''), :queryText) >= doc.similarity(COALESCE(dtr.text_body, ''), :queryText) THEN 'DOCUMENT_SUMMARY' " +
"ELSE 'REPRESENTATION_TEXT' END AS matched_field " + "ELSE 'REPRESENTATION_TEXT' END AS matched_field " +
"FROM doc.doc_text_representation dtr " + "FROM doc.doc_text_representation dtr " +
"JOIN doc.doc_document d ON d.id = dtr.document_id " + "JOIN doc.doc_document d ON d.id = dtr.document_id " +

@ -1,26 +1,26 @@
-- Slice 1 generic lexical search support. -- Slice 1 generic lexical search support.
-- Adds PostgreSQL full-text and trigram search infrastructure for DOC-side search. -- Adds PostgreSQL full-text and trigram search infrastructure for DOC-side search.
CREATE EXTENSION IF NOT EXISTS pg_trgm; CREATE EXTENSION IF NOT EXISTS pg_trgm with schema doc;
ALTER TABLE DOC.doc_text_representation ALTER TABLE doc.doc_text_representation
ADD COLUMN IF NOT EXISTS search_config VARCHAR(64); ADD COLUMN IF NOT EXISTS search_config VARCHAR(64);
ALTER TABLE DOC.doc_text_representation ALTER TABLE doc.doc_text_representation
ADD COLUMN IF NOT EXISTS search_vector tsvector; ADD COLUMN IF NOT EXISTS search_vector tsvector;
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector CREATE INDEX IF NOT EXISTS idx_doc_text_repr_search_vector
ON DOC.doc_text_representation ON doc.doc_text_representation
USING GIN (search_vector); USING GIN (search_vector);
CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm CREATE INDEX IF NOT EXISTS idx_doc_document_title_trgm
ON DOC.doc_document ON doc.doc_document
USING GIN (title gin_trgm_ops); USING GIN (title doc.gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm CREATE INDEX IF NOT EXISTS idx_doc_document_summary_trgm
ON DOC.doc_document ON doc.doc_document
USING GIN (summary gin_trgm_ops); USING GIN (summary doc.gin_trgm_ops);
CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm CREATE INDEX IF NOT EXISTS idx_doc_text_repr_text_trgm
ON DOC.doc_text_representation ON doc.doc_text_representation
USING GIN (text_body gin_trgm_ops); USING GIN (text_body doc.gin_trgm_ops);

Loading…
Cancel
Save