batch embedding support

master
trifonovt 2 weeks ago
parent 678db76415
commit 6ae39b4ea5

@ -1,6 +1,7 @@
dip: dip:
runtime: runtime:
mode: NEW mode: NEW
search: search:
# Default page size for search results # Default page size for search results
default-page-size: 20 default-page-size: 20
@ -36,9 +37,11 @@ dip:
debug-top-hits-per-engine: 10 debug-top-hits-per-engine: 10
embedding: embedding:
enabled: true enabled: false
jobs: jobs:
enabled: true enabled: true
process-in-batches: true
execution-batch-size: 20
default-document-model: e5-default default-document-model: e5-default
default-query-model: e5-default default-query-model: e5-default
@ -58,10 +61,14 @@ dip:
vector-sync-e5: vector-sync-e5:
type: http-vector-sync type: http-vector-sync
base-url: http://localhost:8001 base-url: http://localhost:8001
connect-timeout: 5s connect-timeout: 30s
read-timeout: 60s read-timeout: 300s
headers: headers:
X-Client: dip X-Client: dip
batch-request:
truncate-text: true
truncate-length: 512
chunk-size: 8
models: models:
@ -79,6 +86,7 @@ dip:
dimensions: 1024 dimensions: 1024
distance-metric: COSINE distance-metric: COSINE
supports-query-embedding-mode: true supports-query-embedding-mode: true
supports-batch: true
active: true active: true
profiles: profiles:
@ -215,7 +223,7 @@ dip:
# ted packages download configuration # ted packages download configuration
ted-download: ted-download:
# Enable/disable automatic package download # Enable/disable automatic package download
enabled: true enabled: false
# Base URL for TED Daily Packages # Base URL for TED Daily Packages
base-url: https://ted.europa.eu/packages/daily/ base-url: https://ted.europa.eu/packages/daily/
# Download directory for tar.gz files # Download directory for tar.gz files
@ -238,6 +246,22 @@ dip:
delay-between-downloads: 5000 delay-between-downloads: 5000
# Delete tar.gz after ingestion # Delete tar.gz after ingestion
delete-after-ingestion: true delete-after-ingestion: true
migration:
legacy-audit:
# Enable/disable the Wave 1 / Milestone A legacy integrity audit subsystem
enabled: true
# Optional startup execution; the audit is read-only and only writes audit run/finding tables
startup-run-enabled: true
# Maximum number of legacy TED documents to scan during startup (0 = all)
startup-run-limit: 0
# Page size for legacy TED document scanning
page-size: 100
# Maximum number of persisted findings in a single run
max-findings-per-run: 10000
# Maximum number of grouped duplicate samples captured for aggregate checks
max-duplicate-samples: 100
ted: # Phase 3 TED projection configuration ted: # Phase 3 TED projection configuration
projection: projection:
# Enable/disable dual-write into the TED projection model on top of DOC.doc_document # Enable/disable dual-write into the TED projection model on top of DOC.doc_document

Loading…
Cancel
Save