You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
128 lines
4.7 KiB
Java
128 lines
4.7 KiB
Java
package at.procon.dip.search.service;
|
|
|
|
import java.util.List;
|
|
import java.util.UUID;
|
|
import lombok.RequiredArgsConstructor;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import org.springframework.jdbc.core.JdbcTemplate;
|
|
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
|
|
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.transaction.annotation.Transactional;
|
|
|
|
@Service
|
|
@RequiredArgsConstructor
|
|
@Slf4j
|
|
public class DocumentLexicalIndexService {
|
|
|
|
private final NamedParameterJdbcTemplate namedParameterJdbcTemplate;
|
|
private final JdbcTemplate jdbcTemplate;
|
|
|
|
/**
|
|
* New Slice 2 name kept for current code.
|
|
*/
|
|
@Transactional
|
|
public void indexRepresentation(UUID representationId) {
|
|
refreshRepresentationLexicalIndex(representationId);
|
|
}
|
|
|
|
/**
|
|
* Backward-compatible Slice 1 method name.
|
|
*/
|
|
@Transactional
|
|
public void refreshRepresentationLexicalIndex(UUID representationId) {
|
|
if (!isLexicalSearchSchemaAvailable()) {
|
|
log.debug("Skipping lexical indexing for representation {} because search_vector columns are not present yet", representationId);
|
|
return;
|
|
}
|
|
|
|
MapSqlParameterSource params = new MapSqlParameterSource();
|
|
params.addValue("representationId", representationId);
|
|
namedParameterJdbcTemplate.update("""
|
|
UPDATE doc.doc_text_representation
|
|
SET search_config = CASE
|
|
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'
|
|
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'
|
|
ELSE 'simple'
|
|
END,
|
|
search_vector = to_tsvector(
|
|
CASE
|
|
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig
|
|
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig
|
|
ELSE 'simple'::regconfig
|
|
END,
|
|
coalesce(text_body, '')
|
|
)
|
|
WHERE id = :representationId
|
|
""", params);
|
|
}
|
|
|
|
/**
|
|
* New Slice 2 method kept for current startup runner.
|
|
*/
|
|
@Transactional
|
|
public int backfillMissingVectors(int limit) {
|
|
if (!isLexicalSearchSchemaAvailable()) {
|
|
return 0;
|
|
}
|
|
List<UUID> ids = jdbcTemplate.query("""
|
|
SELECT id
|
|
FROM doc.doc_text_representation
|
|
WHERE search_vector IS NULL
|
|
ORDER BY created_at ASC
|
|
LIMIT ?
|
|
""", (rs, rowNum) -> rs.getObject(1, UUID.class), limit);
|
|
ids.forEach(this::refreshRepresentationLexicalIndex);
|
|
return ids.size();
|
|
}
|
|
|
|
/**
|
|
* Backward-compatible Slice 1 method name.
|
|
*/
|
|
@Transactional
|
|
public void refreshAllMissingLexicalIndexes() {
|
|
if (!isLexicalSearchSchemaAvailable()) {
|
|
log.info("Lexical search columns are not available yet. Skipping startup backfill for DOC lexical indexes.");
|
|
return;
|
|
}
|
|
jdbcTemplate.update("""
|
|
UPDATE doc.doc_text_representation
|
|
SET search_config = CASE
|
|
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'
|
|
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'
|
|
ELSE 'simple'
|
|
END,
|
|
search_vector = to_tsvector(
|
|
CASE
|
|
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig
|
|
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig
|
|
ELSE 'simple'::regconfig
|
|
END,
|
|
coalesce(text_body, '')
|
|
)
|
|
WHERE search_vector IS NULL
|
|
""");
|
|
}
|
|
|
|
/**
|
|
* New Slice 2 name kept for current code.
|
|
*/
|
|
public boolean searchVectorColumnsPresent() {
|
|
return isLexicalSearchSchemaAvailable();
|
|
}
|
|
|
|
/**
|
|
* Backward-compatible Slice 1 method name.
|
|
*/
|
|
public boolean isLexicalSearchSchemaAvailable() {
|
|
Integer count = jdbcTemplate.queryForObject("""
|
|
SELECT COUNT(*)
|
|
FROM information_schema.columns
|
|
WHERE table_schema = 'doc'
|
|
AND table_name = 'doc_text_representation'
|
|
AND column_name IN ('search_vector', 'search_config')
|
|
""", Integer.class);
|
|
return count != null && count >= 2;
|
|
}
|
|
}
|