batch embedding support

master
trifonovt 2 weeks ago
parent 678db76415
commit 6ae39b4ea5

@ -1,6 +1,7 @@
dip:
runtime:
mode: NEW
search:
# Default page size for search results
default-page-size: 20
@ -36,9 +37,11 @@ dip:
debug-top-hits-per-engine: 10
embedding:
enabled: true
enabled: false
jobs:
enabled: true
process-in-batches: true
execution-batch-size: 20
default-document-model: e5-default
default-query-model: e5-default
@ -58,10 +61,14 @@ dip:
vector-sync-e5:
type: http-vector-sync
base-url: http://localhost:8001
connect-timeout: 5s
read-timeout: 60s
connect-timeout: 30s
read-timeout: 300s
headers:
X-Client: dip
batch-request:
truncate-text: true
truncate-length: 512
chunk-size: 8
models:
@ -79,6 +86,7 @@ dip:
dimensions: 1024
distance-metric: COSINE
supports-query-embedding-mode: true
supports-batch: true
active: true
profiles:
@ -215,7 +223,7 @@ dip:
# ted packages download configuration
ted-download:
# Enable/disable automatic package download
enabled: true
enabled: false
# Base URL for TED Daily Packages
base-url: https://ted.europa.eu/packages/daily/
# Download directory for tar.gz files
@ -238,6 +246,22 @@ dip:
delay-between-downloads: 5000
# Delete tar.gz after ingestion
delete-after-ingestion: true
migration:
legacy-audit:
# Enable/disable the Wave 1 / Milestone A legacy integrity audit subsystem
enabled: true
# Optional startup execution; the audit is read-only and only writes audit run/finding tables
startup-run-enabled: true
# Maximum number of legacy TED documents to scan during startup (0 = all)
startup-run-limit: 0
# Page size for legacy TED document scanning
page-size: 100
# Maximum number of persisted findings in a single run
max-findings-per-run: 10000
# Maximum number of grouped duplicate samples captured for aggregate checks
max-duplicate-samples: 100
ted: # Phase 3 TED projection configuration
projection:
# Enable/disable dual-write into the TED projection model on top of DOC.doc_document

Loading…
Cancel
Save