improved embedding model prefix handling using profiles
This commit is contained in:
parent
6ca9936b87
commit
12f0b0604b
|
|
@ -93,7 +93,6 @@ Supported modes:
|
||||||
- `EXTERNAL` - DIP assumes the external service applies the prefixing itself
|
- `EXTERNAL` - DIP assumes the external service applies the prefixing itself
|
||||||
|
|
||||||
For persisted document embeddings, the produced prefix provenance is stored in `doc.doc_embedding`:
|
For persisted document embeddings, the produced prefix provenance is stored in `doc.doc_embedding`:
|
||||||
- `prefix_mode`
|
- `prefix_profile_id` (resolved via `DOC.doc_embedding_prefix_profile`)
|
||||||
- `applied_prefix`
|
|
||||||
|
|
||||||
This makes it possible to identify whether indexed vectors were created with raw text, DIP-side prefixing, or externally handled prefixing before deciding on re-embedding.
|
This makes it possible to identify whether indexed vectors were created with raw text, DIP-side prefixing, or externally handled prefixing before deciding on re-embedding.
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,8 @@ import org.springframework.scheduling.annotation.EnableAsync;
|
||||||
*/
|
*/
|
||||||
@SpringBootApplication(scanBasePackages = {"at.procon.dip", "at.procon.ted"})
|
@SpringBootApplication(scanBasePackages = {"at.procon.dip", "at.procon.ted"})
|
||||||
@EnableAsync
|
@EnableAsync
|
||||||
@EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", "at.procon.dip.domain.ted.entity", "at.procon.dip.embedding.job.entity", "at.procon.dip.migration.audit.entity", "at.procon.dip.migration.entity", "at.procon.dip.domain.time.entity"})
|
@EntityScan(basePackages = {"at.procon.ted.model.entity", "at.procon.dip.domain.document.entity", "at.procon.dip.domain.tenant.entity", "at.procon.dip.domain.ted.entity", "at.procon.dip.embedding.job.entity", "at.procon.dip.migration.audit.entity", "at.procon.dip.migration.entity"})
|
||||||
@EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository", "at.procon.dip.domain.ted.repository", "at.procon.dip.embedding.job.repository", "at.procon.dip.migration.audit.repository", "at.procon.dip.migration.repository", "at.procon.dip.domain.time.repository"})
|
@EnableJpaRepositories(basePackages = {"at.procon.ted.repository", "at.procon.dip.domain.document.repository", "at.procon.dip.domain.tenant.repository", "at.procon.dip.domain.ted.repository", "at.procon.dip.embedding.job.repository", "at.procon.dip.migration.audit.repository", "at.procon.dip.migration.repository"})
|
||||||
public class DocumentIntelligencePlatformApplication {
|
public class DocumentIntelligencePlatformApplication {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ package at.procon.dip.domain.document.entity;
|
||||||
|
|
||||||
import at.procon.dip.architecture.SchemaNames;
|
import at.procon.dip.architecture.SchemaNames;
|
||||||
import at.procon.dip.domain.document.EmbeddingStatus;
|
import at.procon.dip.domain.document.EmbeddingStatus;
|
||||||
import at.procon.dip.embedding.model.EmbeddingPrefixMode;
|
|
||||||
import jakarta.persistence.Column;
|
import jakarta.persistence.Column;
|
||||||
import jakarta.persistence.Entity;
|
import jakarta.persistence.Entity;
|
||||||
import jakarta.persistence.EnumType;
|
import jakarta.persistence.EnumType;
|
||||||
|
|
@ -39,7 +38,7 @@ import lombok.Setter;
|
||||||
@Index(name = "idx_doc_embedding_model", columnList = "model_id"),
|
@Index(name = "idx_doc_embedding_model", columnList = "model_id"),
|
||||||
@Index(name = "idx_doc_embedding_status", columnList = "embedding_status"),
|
@Index(name = "idx_doc_embedding_status", columnList = "embedding_status"),
|
||||||
@Index(name = "idx_doc_embedding_embedded_at", columnList = "embedded_at"),
|
@Index(name = "idx_doc_embedding_embedded_at", columnList = "embedded_at"),
|
||||||
@Index(name = "idx_doc_embedding_prefix_mode", columnList = "prefix_mode")
|
@Index(name = "idx_doc_embedding_prefix_profile", columnList = "prefix_profile_id")
|
||||||
})
|
})
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
|
|
@ -81,13 +80,9 @@ public class DocumentEmbedding {
|
||||||
@Column(name = "embedded_at")
|
@Column(name = "embedded_at")
|
||||||
private OffsetDateTime embeddedAt;
|
private OffsetDateTime embeddedAt;
|
||||||
|
|
||||||
@Enumerated(EnumType.STRING)
|
@ManyToOne(fetch = FetchType.LAZY)
|
||||||
@Column(name = "prefix_mode", nullable = false, length = 32)
|
@JoinColumn(name = "prefix_profile_id")
|
||||||
@Builder.Default
|
private DocumentEmbeddingPrefixProfile prefixProfile;
|
||||||
private EmbeddingPrefixMode prefixMode = EmbeddingPrefixMode.OFF;
|
|
||||||
|
|
||||||
@Column(name = "applied_prefix", length = 64)
|
|
||||||
private String appliedPrefix;
|
|
||||||
|
|
||||||
|
|
||||||
@Builder.Default
|
@Builder.Default
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,75 @@
|
||||||
|
package at.procon.dip.domain.document.entity;
|
||||||
|
|
||||||
|
import at.procon.dip.architecture.SchemaNames;
|
||||||
|
import at.procon.dip.embedding.model.EmbeddingPrefixMode;
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.EnumType;
|
||||||
|
import jakarta.persistence.Enumerated;
|
||||||
|
import jakarta.persistence.GeneratedValue;
|
||||||
|
import jakarta.persistence.GenerationType;
|
||||||
|
import jakarta.persistence.Id;
|
||||||
|
import jakarta.persistence.Index;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
|
import jakarta.persistence.PreUpdate;
|
||||||
|
import jakarta.persistence.Table;
|
||||||
|
import java.time.OffsetDateTime;
|
||||||
|
import java.util.UUID;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Entity
|
||||||
|
@Table(schema = SchemaNames.DOC, name = "doc_embedding_prefix_profile", indexes = {
|
||||||
|
@Index(name = "idx_doc_embedding_prefix_profile_code", columnList = "code", unique = true),
|
||||||
|
@Index(name = "idx_doc_embedding_prefix_profile_mode", columnList = "prefix_mode")
|
||||||
|
})
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@NoArgsConstructor
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class DocumentEmbeddingPrefixProfile {
|
||||||
|
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.UUID)
|
||||||
|
private UUID id;
|
||||||
|
|
||||||
|
@Column(name = "code", nullable = false, length = 128, unique = true)
|
||||||
|
private String code;
|
||||||
|
|
||||||
|
@Enumerated(EnumType.STRING)
|
||||||
|
@Column(name = "prefix_mode", nullable = false, length = 32)
|
||||||
|
private EmbeddingPrefixMode prefixMode;
|
||||||
|
|
||||||
|
@Column(name = "prefix_text", nullable = false, columnDefinition = "TEXT")
|
||||||
|
@Builder.Default
|
||||||
|
private String prefixText = "";
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "created_at", nullable = false, updatable = false)
|
||||||
|
private OffsetDateTime createdAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@Builder.Default
|
||||||
|
@Column(name = "updated_at", nullable = false)
|
||||||
|
private OffsetDateTime updatedAt = OffsetDateTime.now();
|
||||||
|
|
||||||
|
@PrePersist
|
||||||
|
protected void onCreate() {
|
||||||
|
createdAt = OffsetDateTime.now();
|
||||||
|
updatedAt = OffsetDateTime.now();
|
||||||
|
if (prefixText == null) {
|
||||||
|
prefixText = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@PreUpdate
|
||||||
|
protected void onUpdate() {
|
||||||
|
updatedAt = OffsetDateTime.now();
|
||||||
|
if (prefixText == null) {
|
||||||
|
prefixText = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
package at.procon.dip.domain.document.repository;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.entity.DocumentEmbeddingPrefixProfile;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.UUID;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
|
||||||
|
public interface DocumentEmbeddingPrefixProfileRepository extends JpaRepository<DocumentEmbeddingPrefixProfile, UUID> {
|
||||||
|
|
||||||
|
Optional<DocumentEmbeddingPrefixProfile> findByCode(String code);
|
||||||
|
}
|
||||||
|
|
@ -2,7 +2,6 @@ package at.procon.dip.domain.document.repository;
|
||||||
|
|
||||||
import at.procon.dip.domain.document.EmbeddingStatus;
|
import at.procon.dip.domain.document.EmbeddingStatus;
|
||||||
import at.procon.dip.domain.document.entity.DocumentEmbedding;
|
import at.procon.dip.domain.document.entity.DocumentEmbedding;
|
||||||
import at.procon.dip.embedding.model.EmbeddingPrefixMode;
|
|
||||||
import java.time.OffsetDateTime;
|
import java.time.OffsetDateTime;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
@ -33,25 +32,17 @@ public interface DocumentEmbeddingRepository extends JpaRepository<DocumentEmbed
|
||||||
"WHERE e.id = :embeddingId")
|
"WHERE e.id = :embeddingId")
|
||||||
Optional<DocumentEmbedding> findDetailedById(@Param("embeddingId") UUID embeddingId);
|
Optional<DocumentEmbedding> findDetailedById(@Param("embeddingId") UUID embeddingId);
|
||||||
|
|
||||||
default int updateEmbeddingVector(@Param("id") UUID id,
|
|
||||||
@Param("vectorData") float[] vectorData,
|
|
||||||
@Param("tokenCount") Integer tokenCount,
|
|
||||||
@Param("dimensions") Integer dimensions) {
|
|
||||||
return updateEmbeddingVector(id, vectorData, tokenCount, dimensions, EmbeddingPrefixMode.OFF.name(), null);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Modifying
|
@Modifying
|
||||||
@Query(value = "UPDATE doc.doc_embedding SET embedding_vector = CAST(:vectorData AS vector), " +
|
@Query(value = "UPDATE doc.doc_embedding SET embedding_vector = CAST(:vectorData AS vector), " +
|
||||||
"embedding_status = 'COMPLETED', embedded_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP, " +
|
"embedding_status = 'COMPLETED', embedded_at = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP, " +
|
||||||
"error_message = NULL, token_count = :tokenCount, embedding_dimensions = :dimensions, " +
|
"error_message = NULL, token_count = :tokenCount, embedding_dimensions = :dimensions, " +
|
||||||
"prefix_mode = :prefixMode, applied_prefix = :appliedPrefix WHERE id = :id",
|
"prefix_profile_id = :prefixProfileId WHERE id = :id",
|
||||||
nativeQuery = true)
|
nativeQuery = true)
|
||||||
int updateEmbeddingVector(@Param("id") UUID id,
|
int updateEmbeddingVector(@Param("id") UUID id,
|
||||||
@Param("vectorData") float[] vectorData,
|
@Param("vectorData") float[] vectorData,
|
||||||
@Param("tokenCount") Integer tokenCount,
|
@Param("tokenCount") Integer tokenCount,
|
||||||
@Param("dimensions") Integer dimensions,
|
@Param("dimensions") Integer dimensions,
|
||||||
@Param("prefixMode") String prefixMode,
|
@Param("prefixProfileId") UUID prefixProfileId);
|
||||||
@Param("appliedPrefix") String appliedPrefix);
|
|
||||||
|
|
||||||
@Modifying
|
@Modifying
|
||||||
@Query("UPDATE DocumentEmbedding e SET e.embeddingStatus = :status, e.errorMessage = :errorMessage, " +
|
@Query("UPDATE DocumentEmbedding e SET e.embeddingStatus = :status, e.errorMessage = :errorMessage, " +
|
||||||
|
|
|
||||||
|
|
@ -9,12 +9,18 @@ import java.time.OffsetDateTime;
|
||||||
import java.time.ZoneId;
|
import java.time.ZoneId;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import at.procon.dip.runtime.condition.ConditionalOnRuntimeMode;
|
||||||
|
import at.procon.dip.runtime.config.RuntimeMode;
|
||||||
import org.springframework.beans.factory.annotation.Qualifier;
|
import org.springframework.beans.factory.annotation.Qualifier;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
import org.springframework.jdbc.core.RowMapper;
|
import org.springframework.jdbc.core.RowMapper;
|
||||||
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
|
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
|
@ConditionalOnRuntimeMode(RuntimeMode.NEW)
|
||||||
|
@ConditionalOnProperty(prefix = "dip.time.leitstand", name = "enabled", havingValue = "true")
|
||||||
public class JdbcLeitstandTimeSourceClient implements LeitstandTimeSourceClient {
|
public class JdbcLeitstandTimeSourceClient implements LeitstandTimeSourceClient {
|
||||||
|
|
||||||
private final TimeDomainProperties properties;
|
private final TimeDomainProperties properties;
|
||||||
|
|
|
||||||
|
|
@ -62,10 +62,11 @@ public class TimeEntryRepresentationMaterializationService {
|
||||||
|| !equalsNullable(projection.getLanguageCode(), existing.get().getLanguageCode())
|
|| !equalsNullable(projection.getLanguageCode(), existing.get().getLanguageCode())
|
||||||
|| !BUILDER_KEY.equals(existing.get().getBuilderKey());
|
|| !BUILDER_KEY.equals(existing.get().getBuilderKey());
|
||||||
|
|
||||||
|
Document finalDocument = document;
|
||||||
DocumentTextRepresentation semantic = existing
|
DocumentTextRepresentation semantic = existing
|
||||||
.map(found -> changed ? updateRepresentation(found, projection) : found)
|
.map(found -> changed ? updateRepresentation(found, projection) : found)
|
||||||
.orElseGet(() -> documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand(
|
.orElseGet(() -> documentRepresentationService.addRepresentation(new AddDocumentTextRepresentationCommand(
|
||||||
document.getId(),
|
finalDocument.getId(),
|
||||||
null,
|
null,
|
||||||
RepresentationType.SEMANTIC_TEXT,
|
RepresentationType.SEMANTIC_TEXT,
|
||||||
BUILDER_KEY,
|
BUILDER_KEY,
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ public class EmbeddingPersistenceService {
|
||||||
private final DocumentEmbeddingService documentEmbeddingService;
|
private final DocumentEmbeddingService documentEmbeddingService;
|
||||||
private final DocumentEmbeddingRepository embeddingRepository;
|
private final DocumentEmbeddingRepository embeddingRepository;
|
||||||
private final EmbeddingModelCatalogService modelCatalogService;
|
private final EmbeddingModelCatalogService modelCatalogService;
|
||||||
|
private final EmbeddingPrefixProfileService embeddingPrefixProfileService;
|
||||||
|
|
||||||
public DocumentEmbedding ensurePending(UUID representationId, String modelKey) {
|
public DocumentEmbedding ensurePending(UUID representationId, String modelKey) {
|
||||||
DocumentTextRepresentation representation = representationRepository.findById(representationId)
|
DocumentTextRepresentation representation = representationRepository.findById(representationId)
|
||||||
|
|
@ -59,13 +60,13 @@ public class EmbeddingPersistenceService {
|
||||||
if (vector == null || vector.length == 0) {
|
if (vector == null || vector.length == 0) {
|
||||||
throw new IllegalArgumentException("Embedding vector must not be empty");
|
throw new IllegalArgumentException("Embedding vector must not be empty");
|
||||||
}
|
}
|
||||||
|
UUID prefixProfileId = embeddingPrefixProfileService.resolveProfileId(prefixMode, appliedPrefix);
|
||||||
embeddingRepository.updateEmbeddingVector(
|
embeddingRepository.updateEmbeddingVector(
|
||||||
embeddingId,
|
embeddingId,
|
||||||
vector,
|
vector,
|
||||||
tokenCount,
|
tokenCount,
|
||||||
vector.length,
|
vector.length,
|
||||||
(prefixMode == null ? EmbeddingPrefixMode.OFF : prefixMode).name(),
|
prefixProfileId
|
||||||
appliedPrefix
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
package at.procon.dip.embedding.service;
|
||||||
|
|
||||||
|
import at.procon.dip.domain.document.entity.DocumentEmbeddingPrefixProfile;
|
||||||
|
import at.procon.dip.domain.document.repository.DocumentEmbeddingPrefixProfileRepository;
|
||||||
|
import at.procon.dip.embedding.model.EmbeddingPrefixMode;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.security.MessageDigest;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.util.HexFormat;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ConcurrentMap;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class EmbeddingPrefixProfileService {
|
||||||
|
|
||||||
|
private final DocumentEmbeddingPrefixProfileRepository repository;
|
||||||
|
private final ConcurrentMap<String, UUID> idCache = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
public UUID resolveProfileId(EmbeddingPrefixMode prefixMode, String appliedPrefix) {
|
||||||
|
EmbeddingPrefixMode normalizedMode = prefixMode == null ? EmbeddingPrefixMode.OFF : prefixMode;
|
||||||
|
String normalizedPrefix = appliedPrefix == null ? "" : appliedPrefix;
|
||||||
|
String code = buildCode(normalizedMode, normalizedPrefix);
|
||||||
|
|
||||||
|
UUID cached = idCache.get(code);
|
||||||
|
if (cached != null) {
|
||||||
|
return cached;
|
||||||
|
}
|
||||||
|
|
||||||
|
UUID resolved = repository.findByCode(code)
|
||||||
|
.map(DocumentEmbeddingPrefixProfile::getId)
|
||||||
|
.orElseGet(() -> repository.save(
|
||||||
|
DocumentEmbeddingPrefixProfile.builder()
|
||||||
|
.code(code)
|
||||||
|
.prefixMode(normalizedMode)
|
||||||
|
.prefixText(normalizedPrefix)
|
||||||
|
.build()
|
||||||
|
).getId());
|
||||||
|
idCache.putIfAbsent(code, resolved);
|
||||||
|
return resolved;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String buildCode(EmbeddingPrefixMode prefixMode, String prefixText) {
|
||||||
|
return prefixMode.name() + ":" + sha256Hex(prefixText == null ? "" : prefixText);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String sha256Hex(String value) {
|
||||||
|
try {
|
||||||
|
MessageDigest digest = MessageDigest.getInstance("SHA-256");
|
||||||
|
byte[] bytes = digest.digest(value.getBytes(StandardCharsets.UTF_8));
|
||||||
|
return HexFormat.of().formatHex(bytes);
|
||||||
|
} catch (NoSuchAlgorithmException e) {
|
||||||
|
throw new IllegalStateException("SHA-256 digest not available", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -95,7 +95,7 @@ public class DocumentEmbeddingProcessingService {
|
||||||
}
|
}
|
||||||
|
|
||||||
String vectorString = vectorizationService.floatArrayToVectorString(embedding);
|
String vectorString = vectorizationService.floatArrayToVectorString(embedding);
|
||||||
embeddingRepository.updateEmbeddingVector(embeddingId, embedding, tokenCount, embedding.length);
|
embeddingRepository.updateEmbeddingVector(embeddingId, embedding, tokenCount, embedding.length, null);
|
||||||
documentService.updateStatus(loaded.getDocument().getId(), DocumentStatus.INDEXED);
|
documentService.updateStatus(loaded.getDocument().getId(), DocumentStatus.INDEXED);
|
||||||
|
|
||||||
if (properties.isDualWriteLegacyTedVectors()) {
|
if (properties.isDualWriteLegacyTedVectors()) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,65 @@
|
||||||
|
CREATE TABLE IF NOT EXISTS doc.doc_embedding_prefix_profile (
|
||||||
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
|
code VARCHAR(128) NOT NULL UNIQUE,
|
||||||
|
prefix_mode VARCHAR(32) NOT NULL,
|
||||||
|
prefix_text TEXT NOT NULL DEFAULT '',
|
||||||
|
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
ALTER TABLE doc.doc_embedding
|
||||||
|
ADD COLUMN IF NOT EXISTS prefix_profile_id UUID;
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM information_schema.columns c
|
||||||
|
WHERE c.table_schema = 'doc'
|
||||||
|
AND c.table_name = 'doc_embedding'
|
||||||
|
AND c.column_name = 'prefix_mode'
|
||||||
|
) THEN
|
||||||
|
EXECUTE $sql$
|
||||||
|
INSERT INTO doc.doc_embedding_prefix_profile (code, prefix_mode, prefix_text)
|
||||||
|
SELECT DISTINCT
|
||||||
|
COALESCE(de.prefix_mode, 'OFF') || ':' || md5(COALESCE(de.applied_prefix, '')),
|
||||||
|
COALESCE(de.prefix_mode, 'OFF'),
|
||||||
|
COALESCE(de.applied_prefix, '')
|
||||||
|
FROM doc.doc_embedding de
|
||||||
|
ON CONFLICT (code) DO NOTHING
|
||||||
|
$sql$;
|
||||||
|
|
||||||
|
EXECUTE $sql$
|
||||||
|
UPDATE doc.doc_embedding de
|
||||||
|
SET prefix_profile_id = pp.id
|
||||||
|
FROM doc.doc_embedding_prefix_profile pp
|
||||||
|
WHERE de.prefix_profile_id IS NULL
|
||||||
|
AND pp.code = COALESCE(de.prefix_mode, 'OFF') || ':' || md5(COALESCE(de.applied_prefix, ''))
|
||||||
|
$sql$;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1
|
||||||
|
FROM pg_constraint
|
||||||
|
WHERE conname = 'fk_doc_embedding_prefix_profile'
|
||||||
|
) THEN
|
||||||
|
ALTER TABLE doc.doc_embedding
|
||||||
|
ADD CONSTRAINT fk_doc_embedding_prefix_profile
|
||||||
|
FOREIGN KEY (prefix_profile_id)
|
||||||
|
REFERENCES doc.doc_embedding_prefix_profile(id);
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_doc_embedding_prefix_profile
|
||||||
|
ON doc.doc_embedding(prefix_profile_id);
|
||||||
|
|
||||||
|
DROP INDEX IF EXISTS doc.idx_doc_embedding_prefix_mode;
|
||||||
|
|
||||||
|
ALTER TABLE doc.doc_embedding
|
||||||
|
DROP COLUMN IF EXISTS applied_prefix;
|
||||||
|
|
||||||
|
ALTER TABLE doc.doc_embedding
|
||||||
|
DROP COLUMN IF EXISTS prefix_mode;
|
||||||
Loading…
Reference in New Issue