From 152d9739af0969090ddb7db63ad6a93054147053 Mon Sep 17 00:00:00 2001
From: trifonovt <87468028+TihomirTrifonov@users.noreply.github.com>
Date: Thu, 9 Apr 2026 12:55:42 +0200
Subject: [PATCH] batch embedding

---
 docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md | 66 +++++++++++++++------
 1 file changed, 47 insertions(+), 19 deletions(-)

diff --git a/docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md b/docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md
index 69f823a..04ccb2f 100644
--- a/docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md
+++ b/docs/embedding/VECTOR_SYNC_HTTP_PROVIDER.md
@@ -1,12 +1,11 @@
 # Vector-sync HTTP embedding provider
 
-This patch adds a new provider type:
+This provider supports two endpoints:
 
-- `http-vector-sync`
+- `POST {baseUrl}/vector-sync` for single-text requests
+- `POST {baseUrl}/vectorize-batch` for batch document requests
 
-## Request
-Endpoint:
-- `POST {baseUrl}/vector-sync`
+## Single request
 
 Request body:
 ```json
@@ -16,24 +15,53 @@ Request body:
 }
 ```
 
-## Response
+## Batch request
+
+Request body:
 ```json
 {
-  "runtime_ms": 472.49,
-  "vector": [0.1, 0.2, 0.3],
-  "incomplete": false,
-  "combined_vector": null,
-  "token_count": 9,
   "model": "intfloat/multilingual-e5-large",
-  "max_seq_length": 512
+  "truncate_text": false,
+  "truncate_length": 512,
+  "chunk_size": 20,
+  "items": [
+    {
+      "id": "2f48fd5c-9d39-4d80-9225-ea0c59c77c9a",
+      "text": "This is a sample text to vectorize"
+    }
+  ]
 }
 ```
 
-## Notes
-- supports a single text per request
-- works for both document and query embeddings
-- validates returned vector dimension against the configured embedding model
-- keeps the existing `/embed` provider in place as `http-json`
+## Provider configuration
+
+```yaml
+batch-request:
+  truncate-text: false
+  truncate-length: 512
+  chunk-size: 20
+```
+
+These values are used for `/vectorize-batch` calls and can also be overridden per request via `EmbeddingRequest.providerOptions()`.
+
+## Orchestrator batch processing
+
+To let `RepresentationEmbeddingOrchestrator` send multiple representations in one provider call, enable batch processing for jobs and for the model:
+
+```yaml
+dip:
+  embedding:
+    jobs:
+      enabled: true
+      process-in-batches: true
+      execution-batch-size: 20
+
+    models:
+      e5-default:
+        supports-batch: true
+```
 
-## Example config
-See `application-new-example-vector-sync-provider.yml`.
+Notes:
+- jobs are grouped by `modelKey`
+- non-batch-capable models still fall back to single-item execution
+- `execution-batch-size` controls how many texts are sent in one `/vectorize-batch` request