vectorization profiles per document type
parent
fbd249e56b
commit
177c61803e
@ -0,0 +1,30 @@
|
||||
# Embedding policy Patch K1
|
||||
|
||||
Patch K1 introduces the configuration and resolver layer for policy-based document embedding selection.
|
||||
|
||||
## Added
|
||||
- `EmbeddingPolicy`
|
||||
- `EmbeddingProfile`
|
||||
- `EmbeddingPolicyCondition`
|
||||
- `EmbeddingPolicyUse`
|
||||
- `EmbeddingPolicyRule`
|
||||
- `EmbeddingPolicyProperties`
|
||||
- `EmbeddingProfileProperties`
|
||||
- `EmbeddingPolicyResolver`
|
||||
- `DefaultEmbeddingPolicyResolver`
|
||||
- `EmbeddingProfileResolver`
|
||||
- `DefaultEmbeddingProfileResolver`
|
||||
|
||||
## Example config
|
||||
See `application-new-example-embedding-policy.yml`.
|
||||
|
||||
## What K1 does not change
|
||||
- no runtime import/orchestrator wiring yet
|
||||
- no `SourceDescriptor` schema change yet
|
||||
- no job persistence/audit changes yet
|
||||
|
||||
## Intended follow-up
|
||||
K2 should wire:
|
||||
- `GenericDocumentImportService`
|
||||
- `RepresentationEmbeddingOrchestrator`
|
||||
to use the resolved policy and profile.
|
||||
@ -0,0 +1,26 @@
|
||||
# Embedding policy Patch K2
|
||||
|
||||
Patch K2 wires the policy/profile layer into the actual NEW import runtime.
|
||||
|
||||
## What it changes
|
||||
- `GenericDocumentImportService`
|
||||
- resolves `EmbeddingPolicy` per imported document
|
||||
- resolves `EmbeddingProfile`
|
||||
- ensures the selected embedding model is registered
|
||||
- queues embeddings only for representation drafts allowed by the resolved profile
|
||||
- `RepresentationEmbeddingOrchestrator`
|
||||
- adds a convenience overload for `(documentId, modelKey, profile)`
|
||||
- `EmbeddingJobService`
|
||||
- adds a profile-aware enqueue overload
|
||||
- `DefaultEmbeddingSelectionPolicy`
|
||||
- adds profile-aware representation filtering
|
||||
- `DefaultEmbeddingPolicyResolver`
|
||||
- corrected for the current `SourceDescriptor.attributes()` shape
|
||||
|
||||
## Runtime flow after K2
|
||||
document imported
|
||||
-> representations built
|
||||
-> policy resolved
|
||||
-> profile resolved
|
||||
-> model ensured
|
||||
-> matching representations queued for embedding
|
||||
@ -0,0 +1,14 @@
|
||||
package at.procon.dip.embedding.config;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class EmbeddingPolicyCondition {
|
||||
private String documentType;
|
||||
private String documentFamily;
|
||||
private String sourceType;
|
||||
private String mimeType;
|
||||
private String language;
|
||||
private String ownerTenantKey;
|
||||
private String embeddingPolicyHint;
|
||||
}
|
||||
@ -0,0 +1,16 @@
|
||||
package at.procon.dip.embedding.config;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Configuration
|
||||
@ConfigurationProperties(prefix = "dip.embedding.policies")
|
||||
@Data
|
||||
public class EmbeddingPolicyProperties {
|
||||
|
||||
private EmbeddingPolicyUse defaultPolicy = new EmbeddingPolicyUse();
|
||||
private List<EmbeddingPolicyRule> rules = new ArrayList<>();
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
package at.procon.dip.embedding.config;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class EmbeddingPolicyRule {
|
||||
private String name;
|
||||
private EmbeddingPolicyCondition when = new EmbeddingPolicyCondition();
|
||||
private EmbeddingPolicyUse use = new EmbeddingPolicyUse();
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
package at.procon.dip.embedding.config;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class EmbeddingPolicyUse {
|
||||
private String policyKey;
|
||||
private String modelKey;
|
||||
private String queryModelKey;
|
||||
private String profileKey;
|
||||
private boolean enabled = true;
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
package at.procon.dip.embedding.config;
|
||||
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Configuration
|
||||
@ConfigurationProperties(prefix = "dip.embedding.profiles")
|
||||
@Data
|
||||
public class EmbeddingProfileProperties {
|
||||
|
||||
private Map<String, ProfileDefinition> definitions = new LinkedHashMap<>();
|
||||
|
||||
@Data
|
||||
public static class ProfileDefinition {
|
||||
private List<RepresentationType> embedRepresentationTypes = new ArrayList<>();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
package at.procon.dip.embedding.policy;
|
||||
|
||||
public record EmbeddingPolicy(
|
||||
String policyKey,
|
||||
String modelKey,
|
||||
String queryModelKey,
|
||||
String profileKey,
|
||||
boolean enabled
|
||||
) {
|
||||
}
|
||||
@ -0,0 +1,13 @@
|
||||
package at.procon.dip.embedding.policy;
|
||||
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import java.util.List;
|
||||
|
||||
public record EmbeddingProfile(
|
||||
String profileKey,
|
||||
List<RepresentationType> embedRepresentationTypes
|
||||
) {
|
||||
public boolean includes(RepresentationType representationType) {
|
||||
return embedRepresentationTypes != null && embedRepresentationTypes.contains(representationType);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,131 @@
|
||||
package at.procon.dip.embedding.service;
|
||||
|
||||
import at.procon.dip.domain.document.entity.Document;
|
||||
import at.procon.dip.embedding.config.EmbeddingPolicyCondition;
|
||||
import at.procon.dip.embedding.config.EmbeddingPolicyProperties;
|
||||
import at.procon.dip.embedding.config.EmbeddingPolicyRule;
|
||||
import at.procon.dip.embedding.config.EmbeddingPolicyUse;
|
||||
import at.procon.dip.embedding.policy.EmbeddingPolicy;
|
||||
import at.procon.dip.ingestion.spi.SourceDescriptor;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.regex.Pattern;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class DefaultEmbeddingPolicyResolver implements EmbeddingPolicyResolver {
|
||||
|
||||
private final EmbeddingPolicyProperties properties;
|
||||
|
||||
@Override
|
||||
public EmbeddingPolicy resolve(Document document, SourceDescriptor sourceDescriptor) {
|
||||
String overridePolicy = attributeValue(sourceDescriptor, "embeddingPolicyKey");
|
||||
if (overridePolicy != null) {
|
||||
return policyByKey(overridePolicy);
|
||||
}
|
||||
|
||||
String policyHint = policyHint(sourceDescriptor);
|
||||
if (policyHint != null) {
|
||||
return policyByKey(policyHint);
|
||||
}
|
||||
|
||||
for (EmbeddingPolicyRule rule : properties.getRules()) {
|
||||
if (matches(rule.getWhen(), document, sourceDescriptor)) {
|
||||
return toPolicy(rule.getUse());
|
||||
}
|
||||
}
|
||||
|
||||
return toPolicy(properties.getDefaultPolicy());
|
||||
}
|
||||
|
||||
private EmbeddingPolicy policyByKey(String policyKey) {
|
||||
for (EmbeddingPolicyRule rule : properties.getRules()) {
|
||||
if (rule.getUse() != null && policyKey.equals(rule.getUse().getPolicyKey())) {
|
||||
return toPolicy(rule.getUse());
|
||||
}
|
||||
}
|
||||
EmbeddingPolicyUse def = properties.getDefaultPolicy();
|
||||
if (def != null && policyKey.equals(def.getPolicyKey())) {
|
||||
return toPolicy(def);
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown embedding policy key: " + policyKey);
|
||||
}
|
||||
|
||||
private EmbeddingPolicy toPolicy(EmbeddingPolicyUse use) {
|
||||
if (use == null) {
|
||||
throw new IllegalStateException("Embedding policy configuration is missing");
|
||||
}
|
||||
return new EmbeddingPolicy(
|
||||
use.getPolicyKey(),
|
||||
use.getModelKey(),
|
||||
use.getQueryModelKey(),
|
||||
use.getProfileKey(),
|
||||
use.isEnabled()
|
||||
);
|
||||
}
|
||||
|
||||
private boolean matches(EmbeddingPolicyCondition c, Document document, SourceDescriptor sourceDescriptor) {
|
||||
if (c == null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!matchesExact(c.getDocumentType(), enumName(document != null ? document.getDocumentType() : null))) {
|
||||
return false;
|
||||
}
|
||||
if (!matchesExact(c.getDocumentFamily(), enumName(document != null ? document.getDocumentFamily() : null))) {
|
||||
return false;
|
||||
}
|
||||
if (!matchesExact(c.getSourceType(), enumName(sourceDescriptor != null ? sourceDescriptor.sourceType() : null))) {
|
||||
return false;
|
||||
}
|
||||
if (!matchesMime(c.getMimeType(), sourceDescriptor != null ? sourceDescriptor.mediaType() : null)) {
|
||||
return false;
|
||||
}
|
||||
if (!matchesExact(c.getLanguage(), document != null ? document.getLanguageCode() : null)) {
|
||||
return false;
|
||||
}
|
||||
if (!matchesExact(c.getOwnerTenantKey(), document != null && document.getOwnerTenant() != null ? document.getOwnerTenant().getTenantKey() : null )) {
|
||||
return false;
|
||||
}
|
||||
return matchesExact(c.getEmbeddingPolicyHint(), policyHint(sourceDescriptor));
|
||||
}
|
||||
|
||||
private boolean matchesExact(String expected, String actual) {
|
||||
if (expected == null || expected.isBlank()) {
|
||||
return true;
|
||||
}
|
||||
return Objects.equals(expected, actual);
|
||||
}
|
||||
|
||||
private boolean matchesMime(String pattern, String actual) {
|
||||
if (pattern == null || pattern.isBlank()) {
|
||||
return true;
|
||||
}
|
||||
if (actual == null || actual.isBlank()) {
|
||||
return false;
|
||||
}
|
||||
return Pattern.compile(pattern, Pattern.CASE_INSENSITIVE).matcher(actual).matches();
|
||||
}
|
||||
|
||||
private String enumName(Enum<?> value) {
|
||||
return value != null ? value.name() : null;
|
||||
}
|
||||
|
||||
private String policyHint(SourceDescriptor sourceDescriptor) {
|
||||
return attributeValue(sourceDescriptor, "embeddingPolicyHint");
|
||||
}
|
||||
|
||||
private String attributeValue(SourceDescriptor sourceDescriptor, String key) {
|
||||
if (sourceDescriptor == null) {
|
||||
return null;
|
||||
}
|
||||
Map<String, String> attributes = sourceDescriptor.attributes();
|
||||
if (attributes == null) {
|
||||
return null;
|
||||
}
|
||||
String value = attributes.get(key);
|
||||
return (value == null || value.isBlank()) ? null : value;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,31 @@
|
||||
package at.procon.dip.embedding.service;
|
||||
|
||||
import at.procon.dip.embedding.config.EmbeddingProfileProperties;
|
||||
import at.procon.dip.embedding.policy.EmbeddingProfile;
|
||||
import java.util.List;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class DefaultEmbeddingProfileResolver implements EmbeddingProfileResolver {
|
||||
|
||||
private final EmbeddingProfileProperties properties;
|
||||
|
||||
@Override
|
||||
public EmbeddingProfile resolve(String profileKey) {
|
||||
if (profileKey == null || profileKey.isBlank()) {
|
||||
throw new IllegalArgumentException("Embedding profile key must not be blank");
|
||||
}
|
||||
|
||||
EmbeddingProfileProperties.ProfileDefinition definition = properties.getDefinitions().get(profileKey);
|
||||
if (definition == null) {
|
||||
throw new IllegalArgumentException("Unknown embedding profile: " + profileKey);
|
||||
}
|
||||
|
||||
return new EmbeddingProfile(
|
||||
profileKey,
|
||||
List.copyOf(definition.getEmbedRepresentationTypes())
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,9 @@
|
||||
package at.procon.dip.embedding.service;
|
||||
|
||||
import at.procon.dip.domain.document.entity.Document;
|
||||
import at.procon.dip.embedding.policy.EmbeddingPolicy;
|
||||
import at.procon.dip.ingestion.spi.SourceDescriptor;
|
||||
|
||||
public interface EmbeddingPolicyResolver {
|
||||
EmbeddingPolicy resolve(Document document, SourceDescriptor sourceDescriptor);
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
package at.procon.dip.embedding.service;
|
||||
|
||||
import at.procon.dip.embedding.policy.EmbeddingProfile;
|
||||
|
||||
public interface EmbeddingProfileResolver {
|
||||
EmbeddingProfile resolve(String profileKey);
|
||||
}
|
||||
@ -0,0 +1,71 @@
|
||||
dip:
|
||||
embedding:
|
||||
profiles:
|
||||
definitions:
|
||||
primary-only:
|
||||
embed-representation-types: [SEMANTIC_TEXT]
|
||||
|
||||
primary-and-chunks:
|
||||
embed-representation-types: [SEMANTIC_TEXT, CHUNK]
|
||||
|
||||
ted-semantic:
|
||||
embed-representation-types: [SEMANTIC_TEXT, TITLE_ABSTRACT, CHUNK]
|
||||
|
||||
mail-message:
|
||||
embed-representation-types: [SEMANTIC_TEXT, ATTACHMENT_ROLLUP]
|
||||
|
||||
attachment-chunks:
|
||||
embed-representation-types: [CHUNK]
|
||||
|
||||
disabled:
|
||||
embed-representation-types: []
|
||||
|
||||
policies:
|
||||
default-policy:
|
||||
policy-key: generic-default
|
||||
model-key: e5-default
|
||||
query-model-key: e5-default
|
||||
profile-key: primary-and-chunks
|
||||
enabled: true
|
||||
|
||||
rules:
|
||||
- name: ted-notice
|
||||
when:
|
||||
document-family: TED_NOTICE
|
||||
use:
|
||||
policy-key: ted-default
|
||||
model-key: e5-default
|
||||
query-model-key: e5-default
|
||||
profile-key: ted-semantic
|
||||
enabled: true
|
||||
|
||||
- name: email-root
|
||||
when:
|
||||
document-type: EMAIL
|
||||
use:
|
||||
policy-key: mail-default
|
||||
model-key: e5-default
|
||||
query-model-key: e5-default
|
||||
profile-key: mail-message
|
||||
enabled: true
|
||||
|
||||
- name: mail-attachment-pdf
|
||||
when:
|
||||
source-type: MAIL_ATTACHMENT
|
||||
mime-type: application/pdf
|
||||
use:
|
||||
policy-key: mail-attachment-pdf
|
||||
model-key: e5-default
|
||||
query-model-key: e5-default
|
||||
profile-key: attachment-chunks
|
||||
enabled: true
|
||||
|
||||
- name: skip-images
|
||||
when:
|
||||
mime-type: image/.*
|
||||
use:
|
||||
policy-key: no-embedding-images
|
||||
model-key: e5-default
|
||||
query-model-key: e5-default
|
||||
profile-key: disabled
|
||||
enabled: false
|
||||
@ -0,0 +1,135 @@
|
||||
package at.procon.dip.embedding.service;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
import at.procon.dip.domain.access.DocumentVisibility;
|
||||
import at.procon.dip.domain.document.DocumentFamily;
|
||||
import at.procon.dip.domain.document.DocumentStatus;
|
||||
import at.procon.dip.domain.document.DocumentType;
|
||||
import at.procon.dip.domain.document.SourceType;
|
||||
import at.procon.dip.domain.document.entity.Document;
|
||||
import at.procon.dip.embedding.config.EmbeddingPolicyCondition;
|
||||
import at.procon.dip.embedding.config.EmbeddingPolicyProperties;
|
||||
import at.procon.dip.embedding.config.EmbeddingPolicyRule;
|
||||
import at.procon.dip.embedding.config.EmbeddingPolicyUse;
|
||||
import at.procon.dip.ingestion.spi.OriginalContentStoragePolicy;
|
||||
import at.procon.dip.ingestion.spi.SourceDescriptor;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class DefaultEmbeddingPolicyResolverTest {
|
||||
|
||||
@Test
|
||||
void shouldPreferHintAndOverrideFromAttributes() {
|
||||
EmbeddingPolicyProperties properties = baseProperties();
|
||||
|
||||
EmbeddingPolicyRule mailRule = new EmbeddingPolicyRule();
|
||||
EmbeddingPolicyUse mailUse = new EmbeddingPolicyUse();
|
||||
mailUse.setPolicyKey("mail-default");
|
||||
mailUse.setModelKey("e5-default");
|
||||
mailUse.setQueryModelKey("e5-default");
|
||||
mailUse.setProfileKey("mail-message");
|
||||
mailUse.setEnabled(true);
|
||||
mailRule.setUse(mailUse);
|
||||
properties.getRules().add(mailRule);
|
||||
|
||||
EmbeddingPolicyRule tedRule = new EmbeddingPolicyRule();
|
||||
EmbeddingPolicyUse tedUse = new EmbeddingPolicyUse();
|
||||
tedUse.setPolicyKey("ted-default");
|
||||
tedUse.setModelKey("e5-default");
|
||||
tedUse.setQueryModelKey("e5-default");
|
||||
tedUse.setProfileKey("ted-semantic");
|
||||
tedUse.setEnabled(true);
|
||||
tedRule.setUse(tedUse);
|
||||
properties.getRules().add(tedRule);
|
||||
|
||||
DefaultEmbeddingPolicyResolver resolver = new DefaultEmbeddingPolicyResolver(properties);
|
||||
|
||||
SourceDescriptor descriptor = sourceDescriptor(SourceType.MAIL_MESSAGE, "message/rfc822", Map.of(
|
||||
"embeddingPolicyHint", "mail-default",
|
||||
"embeddingPolicyKey", "ted-default"
|
||||
));
|
||||
|
||||
var policy = resolver.resolve(document(DocumentFamily.GENERIC, DocumentType.EMAIL, "en"), descriptor);
|
||||
|
||||
assertThat(policy.policyKey()).isEqualTo("ted-default");
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldMatchByMimeTypeUsingMediaTypeField() {
|
||||
EmbeddingPolicyProperties properties = baseProperties();
|
||||
EmbeddingPolicyRule rule = new EmbeddingPolicyRule();
|
||||
EmbeddingPolicyCondition when = new EmbeddingPolicyCondition();
|
||||
when.setSourceType("MAIL_ATTACHMENT");
|
||||
when.setMimeType("application/pdf");
|
||||
rule.setWhen(when);
|
||||
EmbeddingPolicyUse use = new EmbeddingPolicyUse();
|
||||
use.setPolicyKey("mail-attachment-pdf");
|
||||
use.setModelKey("e5-default");
|
||||
use.setQueryModelKey("e5-default");
|
||||
use.setProfileKey("attachment-chunks");
|
||||
rule.setUse(use);
|
||||
properties.getRules().add(rule);
|
||||
|
||||
DefaultEmbeddingPolicyResolver resolver = new DefaultEmbeddingPolicyResolver(properties);
|
||||
|
||||
var policy = resolver.resolve(document(DocumentFamily.GENERIC, DocumentType.FILE, "en"),
|
||||
sourceDescriptor(SourceType.MAIL_ATTACHMENT, "application/pdf", Map.of()));
|
||||
|
||||
assertThat(policy.policyKey()).isEqualTo("mail-attachment-pdf");
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldFailForUnknownOverridePolicy() {
|
||||
DefaultEmbeddingPolicyResolver resolver = new DefaultEmbeddingPolicyResolver(baseProperties());
|
||||
SourceDescriptor descriptor = sourceDescriptor(SourceType.FILE_IMPORT, "application/pdf", Map.of(
|
||||
"embeddingPolicyKey", "missing-policy"
|
||||
));
|
||||
assertThatThrownBy(() -> resolver.resolve(document(DocumentFamily.GENERIC, DocumentType.FILE, "en"), descriptor))
|
||||
.isInstanceOf(IllegalArgumentException.class)
|
||||
.hasMessageContaining("Unknown embedding policy key");
|
||||
}
|
||||
|
||||
private EmbeddingPolicyProperties baseProperties() {
|
||||
EmbeddingPolicyProperties properties = new EmbeddingPolicyProperties();
|
||||
EmbeddingPolicyUse defaultPolicy = new EmbeddingPolicyUse();
|
||||
defaultPolicy.setPolicyKey("generic-default");
|
||||
defaultPolicy.setModelKey("e5-default");
|
||||
defaultPolicy.setQueryModelKey("e5-default");
|
||||
defaultPolicy.setProfileKey("primary-and-chunks");
|
||||
defaultPolicy.setEnabled(true);
|
||||
properties.setDefaultPolicy(defaultPolicy);
|
||||
return properties;
|
||||
}
|
||||
|
||||
private Document document(DocumentFamily family, DocumentType type, String language) {
|
||||
return Document.builder()
|
||||
.id(UUID.randomUUID())
|
||||
.documentFamily(family)
|
||||
.documentType(type)
|
||||
.languageCode(language)
|
||||
.status(DocumentStatus.IMPORTED)
|
||||
.visibility(DocumentVisibility.PUBLIC)
|
||||
.title("Test document")
|
||||
.build();
|
||||
}
|
||||
|
||||
private SourceDescriptor sourceDescriptor(SourceType sourceType, String mediaType, Map<String, String> attrs) {
|
||||
return new SourceDescriptor(
|
||||
null,
|
||||
sourceType,
|
||||
"source-ref",
|
||||
"/tmp/source",
|
||||
"source.bin",
|
||||
mediaType,
|
||||
null,
|
||||
null,
|
||||
OffsetDateTime.now(),
|
||||
OriginalContentStoragePolicy.DEFAULT,
|
||||
attrs
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,37 @@
|
||||
package at.procon.dip.embedding.service;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
import at.procon.dip.domain.document.RepresentationType;
|
||||
import at.procon.dip.embedding.config.EmbeddingProfileProperties;
|
||||
import java.util.List;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class DefaultEmbeddingProfileResolverTest {
|
||||
|
||||
@Test
|
||||
void shouldResolveKnownProfile() {
|
||||
EmbeddingProfileProperties properties = new EmbeddingProfileProperties();
|
||||
EmbeddingProfileProperties.ProfileDefinition def = new EmbeddingProfileProperties.ProfileDefinition();
|
||||
def.setEmbedRepresentationTypes(List.of(RepresentationType.SEMANTIC_TEXT, RepresentationType.CHUNK));
|
||||
properties.getDefinitions().put("primary-and-chunks", def);
|
||||
|
||||
DefaultEmbeddingProfileResolver resolver = new DefaultEmbeddingProfileResolver(properties);
|
||||
|
||||
var profile = resolver.resolve("primary-and-chunks");
|
||||
|
||||
assertThat(profile.profileKey()).isEqualTo("primary-and-chunks");
|
||||
assertThat(profile.embedRepresentationTypes()).containsExactly(RepresentationType.SEMANTIC_TEXT, RepresentationType.CHUNK);
|
||||
}
|
||||
|
||||
@Test
|
||||
void shouldFailForUnknownProfile() {
|
||||
EmbeddingProfileProperties properties = new EmbeddingProfileProperties();
|
||||
DefaultEmbeddingProfileResolver resolver = new DefaultEmbeddingProfileResolver(properties);
|
||||
|
||||
assertThatThrownBy(() -> resolver.resolve("missing"))
|
||||
.isInstanceOf(IllegalArgumentException.class)
|
||||
.hasMessageContaining("Unknown embedding profile");
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue