You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

128 lines
4.7 KiB
Java

package at.procon.dip.search.service;
import java.util.List;
import java.util.UUID;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
@Service
@RequiredArgsConstructor
@Slf4j
public class DocumentLexicalIndexService {
private final NamedParameterJdbcTemplate namedParameterJdbcTemplate;
private final JdbcTemplate jdbcTemplate;
/**
* New Slice 2 name kept for current code.
*/
@Transactional
public void indexRepresentation(UUID representationId) {
refreshRepresentationLexicalIndex(representationId);
}
/**
* Backward-compatible Slice 1 method name.
*/
@Transactional
public void refreshRepresentationLexicalIndex(UUID representationId) {
if (!isLexicalSearchSchemaAvailable()) {
log.debug("Skipping lexical indexing for representation {} because search_vector columns are not present yet", representationId);
return;
}
MapSqlParameterSource params = new MapSqlParameterSource();
params.addValue("representationId", representationId);
namedParameterJdbcTemplate.update("""
UPDATE doc.doc_text_representation
SET search_config = CASE
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'
ELSE 'simple'
END,
search_vector = to_tsvector(
CASE
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig
ELSE 'simple'::regconfig
END,
coalesce(text_body, '')
)
WHERE id = :representationId
""", params);
}
/**
* New Slice 2 method kept for current startup runner.
*/
@Transactional
public int backfillMissingVectors(int limit) {
if (!isLexicalSearchSchemaAvailable()) {
return 0;
}
List<UUID> ids = jdbcTemplate.query("""
SELECT id
FROM doc.doc_text_representation
WHERE search_vector IS NULL
ORDER BY created_at ASC
LIMIT ?
""", (rs, rowNum) -> rs.getObject(1, UUID.class), limit);
ids.forEach(this::refreshRepresentationLexicalIndex);
return ids.size();
}
/**
* Backward-compatible Slice 1 method name.
*/
@Transactional
public void refreshAllMissingLexicalIndexes() {
if (!isLexicalSearchSchemaAvailable()) {
log.info("Lexical search columns are not available yet. Skipping startup backfill for DOC lexical indexes.");
return;
}
jdbcTemplate.update("""
UPDATE doc.doc_text_representation
SET search_config = CASE
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'
ELSE 'simple'
END,
search_vector = to_tsvector(
CASE
WHEN lower(coalesce(language_code, '')) = 'de' THEN 'german'::regconfig
WHEN lower(coalesce(language_code, '')) = 'en' THEN 'english'::regconfig
ELSE 'simple'::regconfig
END,
coalesce(text_body, '')
)
WHERE search_vector IS NULL
""");
}
/**
* New Slice 2 name kept for current code.
*/
public boolean searchVectorColumnsPresent() {
return isLexicalSearchSchemaAvailable();
}
/**
* Backward-compatible Slice 1 method name.
*/
public boolean isLexicalSearchSchemaAvailable() {
Integer count = jdbcTemplate.queryForObject("""
SELECT COUNT(*)
FROM information_schema.columns
WHERE table_schema = 'doc'
AND table_name = 'doc_text_representation'
AND column_name IN ('search_vector', 'search_config')
""", Integer.class);
return count != null && count >= 2;
}
}