From 152d9739af0969090ddb7db63ad6a93054147053 Mon Sep 17 00:00:00 2001 From: trifonovt <87468028+TihomirTrifonov@users.noreply.github.com> Date: Thu, 9 Apr 2026 12:55:42 +0200 Subject: [PATCH] batch embedding --- docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md | 66 +++++++++++++++------ 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md b/docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md index 69f823a..04ccb2f 100644 --- a/docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md +++ b/docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md @@ -1,12 +1,11 @@ # Vector-sync HTTP embedding provider -This patch adds a new provider type: +This provider supports two endpoints: -- `http-vector-sync` +- `POST {baseUrl}/vector-sync` for single-text requests +- `POST {baseUrl}/vectorize-batch` for batch document requests -## Request -Endpoint: -- `POST {baseUrl}/vector-sync` +## Single request Request body: ```json @@ -16,24 +15,53 @@ Request body: } ``` -## Response +## Batch request + +Request body: ```json { - "runtime_ms": 472.49, - "vector": [0.1, 0.2, 0.3], - "incomplete": false, - "combined_vector": null, - "token_count": 9, "model": "intfloat/multilingual-e5-large", - "max_seq_length": 512 + "truncate_text": false, + "truncate_length": 512, + "chunk_size": 20, + "items": [ + { + "id": "2f48fd5c-9d39-4d80-9225-ea0c59c77c9a", + "text": "This is a sample text to vectorize" + } + ] } ``` -## Notes -- supports a single text per request -- works for both document and query embeddings -- validates returned vector dimension against the configured embedding model -- keeps the existing `/embed` provider in place as `http-json` +## Provider configuration + +```yaml +batch-request: + truncate-text: false + truncate-length: 512 + chunk-size: 20 +``` + +These values are used for `/vectorize-batch` calls and can also be overridden per request via `EmbeddingRequest.providerOptions()`. + +## Orchestrator batch processing + +To let `RepresentationEmbeddingOrchestrator` send multiple representations in one provider call, enable batch processing for jobs and for the model: + +```yaml +dip: + embedding: + jobs: + enabled: true + process-in-batches: true + execution-batch-size: 20 + + models: + e5-default: + supports-batch: true +``` -## Example config -See `application-new-example-vector-sync-provider.yml`. +Notes: +- jobs are grouped by `modelKey` +- non-batch-capable models still fall back to single-item execution +- `execution-batch-size` controls how many texts are sent in one `/vectorize-batch` request