Initial import

2026-03-17 11:31:06 +01:00 · 2026-03-17 11:31:06 +01:00 · 21edbc35a2
commit 21edbc35a2
83 changed files with 13758 additions and 0 deletions
--- a/DAILY_PACKAGE_DOWNLOAD.md
+++ b/DAILY_PACKAGE_DOWNLOAD.md
@ -0,0 +1,109 @@
+# TED Daily Package Download - Implementierung
+
+## Übersicht
+
+Das System lädt automatisch TED Daily Packages herunter und verarbeitet sie.
+
+## Komponenten
+
+### 1. Entity: TedDailyPackage ✅
+- Tracking von Downloads
+- Status-Management
+- Idempotenz durch Hash
+
+### 2. Repository: TedDailyPackageRepository ✅
+- Package-Verwaltung
+- Status-Queries
+- Latest-Package-Ermittlung
+
+### 3. Configuration: DownloadProperties ✅
+- Download-Einstellungen
+- URL-Konfiguration
+- Rate Limiting
+
+### 4. Service: TedPackageDownloadService (in Arbeit)
+- Package-Download
+- tar.gz Extraktion
+- Fortschritts-Tracking
+
+### 5. Camel Route: TedPackageDownloadRoute (ausstehend)
+- Scheduled Downloads
+- Error Handling
+- Integration mit bestehender XML-Verarbeitung
+
+## Workflow
+
+1. **Initialization**
+   - Letztes Package aus DB ermitteln
+   - Start-Punkt berechnen (aktuelles Jahr oder letztes Package +1)
+
+2. **Download-Loop**
+   - Current Year: Start bei letztem +1, bis 404 (max 4x)
+   - Previous Years: Rückwärts downloaden, langsam
+
+3. **Package Processing**
+   - Download tar.gz
+   - Hash berechnen (SHA-256)
+   - Prüfung gegen DB (Idempotenz)
+   - Extraktion der XML-Dateien
+   - Weiterleitung an XML-Verarbeitungsroute
+
+4. **Status Tracking**
+   - PENDING → DOWNLOADING → DOWNLOADED → PROCESSING → COMPLETED
+   - Fehlerbehandlung: FAILED, NOT_FOUND
+
+## Konfiguration (application.yml)
+
+```yaml
+ted:
+  download:
+    enabled: true
+    base-url: https://ted.europa.eu/packages/daily/
+    download-directory: D:/ted.europe/downloads
+    extract-directory: D:/ted.europe/extracted
+    start-year: 2024
+    max-consecutive-404: 4
+    poll-interval: 3600000  # 1 Stunde
+    download-timeout: 300000  # 5 Minuten
+    max-concurrent-downloads: 2
+    delay-between-downloads: 5000  # 5 Sekunden
+    delete-after-extraction: true
+    prioritize-current-year: true
+```
+
+## Database Migration
+
+```sql
+CREATE TABLE TED.ted_daily_package (
+    id UUID PRIMARY KEY,
+    package_identifier VARCHAR(20) NOT NULL UNIQUE,
+    year INTEGER NOT NULL,
+    serial_number INTEGER NOT NULL,
+    download_url VARCHAR(500) NOT NULL,
+    file_hash VARCHAR(64),
+    xml_file_count INTEGER,
+    processed_count INTEGER DEFAULT 0,
+    failed_count INTEGER DEFAULT 0,
+    download_status VARCHAR(30) NOT NULL DEFAULT 'PENDING',
+    error_message TEXT,
+    downloaded_at TIMESTAMP WITH TIME ZONE,
+    processed_at TIMESTAMP WITH TIME ZONE,
+    download_duration_ms BIGINT,
+    processing_duration_ms BIGINT,
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    UNIQUE(year, serial_number)
+);
+
+CREATE INDEX idx_package_identifier ON TED.ted_daily_package(package_identifier);
+CREATE INDEX idx_package_year_serial ON TED.ted_daily_package(year, serial_number);
+CREATE INDEX idx_package_status ON TED.ted_daily_package(download_status);
+CREATE INDEX idx_package_downloaded_at ON TED.ted_daily_package(downloaded_at);
+```
+
+## Nächste Schritte
+
+1. Package Download Service fertigstellen
+2. Camel Route erstellen
+3. Database Migration ausführen
+4. Testing & Integration
--- a/Dockerfile.embedding
+++ b/Dockerfile.embedding
@ -0,0 +1,39 @@
+# Python Embedding Service Dockerfile
+# Author: Martin.Schweitzer@procon.co.at and claude.ai
+#
+# Provides HTTP API for generating text embeddings using sentence-transformers
+# Model: intfloat/multilingual-e5-large (1024 dimensions)
+
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Python dependencies
+COPY requirements-embedding.txt .
+RUN pip install --no-cache-dir -r requirements-embedding.txt
+
+# Copy application code
+COPY embedding_service.py .
+
+# Pre-download model (optional - reduces startup time)
+# RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large')"
+
+# Environment variables
+ENV MODEL_NAME=intfloat/multilingual-e5-large
+ENV MAX_LENGTH=512
+ENV HOST=0.0.0.0
+ENV PORT=8001
+
+EXPOSE 8001
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:8001/health || exit 1
+
+# Run the service
+CMD ["python", "embedding_service.py"]
--- a/EXECUTE_ENUM_FIX.md
+++ b/EXECUTE_ENUM_FIX.md
@ -0,0 +1,67 @@
+# ENUM Type Fix - Execution Instructions
+
+## Problem
+The database has PostgreSQL ENUM types, but Hibernate is configured to use VARCHAR with CHECK constraints. This causes the error:
+```
+ERROR: column "contract_nature" is of type ted.contract_nature but expression is of type character varying
+```
+
+## Solution
+Execute the `fix-enum-types-comprehensive.sql` script on the remote database.
+
+## Execution Methods
+
+### Option 1: Using psql (Recommended)
+
+```bash
+psql -h 94.130.218.54 -p 5432 -U postgres -d Sales -f fix-enum-types-comprehensive.sql
+```
+
+When prompted, enter password: `PDmXRx0Rbk9OFOn9qO5Gm/mPCfqW8zwbZ+/YIU1lySc=`
+
+### Option 2: Using psql with inline password (Windows PowerShell)
+
+```powershell
+$env:PGPASSWORD="PDmXRx0Rbk9OFOn9qO5Gm/mPCfqW8zwbZ+/YIU1lySc="
+psql -h 94.130.218.54 -p 5432 -U postgres -d Sales -f fix-enum-types-comprehensive.sql
+```
+
+### Option 3: Copy-paste into database client
+
+If you're using DBeaver, pgAdmin, or another GUI tool:
+
+1. Connect to:
+   - Host: `94.130.218.54`
+   - Port: `5432`
+   - Database: `Sales`
+   - Username: `postgres`
+   - Password: `PDmXRx0Rbk9OFOn9qO5Gm/mPCfqW8zwbZ+/YIU1lySc=`
+
+2. Open `fix-enum-types-comprehensive.sql` in the query editor
+
+3. Execute the entire script
+
+## What the script does
+
+1. ✅ Converts ENUM columns to VARCHAR(50) while preserving existing data
+2. ✅ Drops the old ENUM types
+3. ✅ Adds CHECK constraints for data validation
+4. ✅ Updates the search function to use VARCHAR parameters
+5. ✅ Shows verification query at the end
+
+## Verification
+
+After execution, you should see:
+
+```
+status                                       | column_name           | data_type         | character_maximum_length
+---------------------------------------------|-----------------------|-------------------|--------------------------
+ENUM types successfully converted to VARCHAR!| contract_nature       | character varying | 50
+ENUM types successfully converted to VARCHAR!| notice_type           | character varying | 50
+ENUM types successfully converted to VARCHAR!| procedure_type        | character varying | 50
+ENUM types successfully converted to VARCHAR!| vectorization_status  | character varying | 50
+```
+
+## After execution
+
+Restart your Spring Boot application. The error should be resolved and the application will be able to insert data into the database.
--- a/152
+++ b/152
@ -0,0 +1,152 @@
+European Union Public Licence
+V. 1.2
+
+EUPL © the European Union 2007, 2016
+
+This European Union Public Licence (the 'EUPL') applies to the Work (as defined below) which is provided under the terms of this Licence. Any use of the Work, other than as authorised under this Licence is prohibited (to the extent such use is covered by a right of the copyright holder of the Work).
+
+The Work is provided under the terms of this Licence when the Licensor (as defined below) has placed the following notice immediately following the copyright notice for the Work:
+
+Licensed under the EUPL
+
+or has expressed by any other means his willingness to license under the EUPL.
+
+1. Definitions
+
+In this Licence, the following terms have the following meaning:
+
+— 'The Licence': this Licence.
+— 'The Original Work': the work or software distributed or communicated by the Licensor under this Licence, available as Source Code and also as Executable Code as the case may be.
+— 'Derivative Works': the works or software that could be created by the Licensee, based upon the Original Work or modifications thereof. This Licence does not define the extent of modification or dependence on the Original Work required in order to classify a work as a Derivative Work; this extent is determined by copyright law applicable in the country mentioned in Article 15.
+— 'The Work': the Original Work or its Derivative Works.
+— 'The Source Code': the human-readable form of the Work which is the most convenient for people to study and modify.
+— 'The Executable Code': any code which has generally been compiled and which is meant to be interpreted by a computer as a program.
+— 'The Licensor': the natural or legal person that distributes or communicates the Work under the Licence.
+— 'Contributor(s)': any natural or legal person who modifies the Work under the Licence, or otherwise contributes to the creation of a Derivative Work.
+— 'The Licensee' or 'You': any natural or legal person who makes any usage of the Work under the terms of the Licence.
+— 'Distribution' or 'Communication': any act of selling, giving, lending, renting, distributing, communicating, transmitting, or otherwise making available, online or offline, copies of the Work or providing access to its essential functionalities at the disposal of any other natural or legal person.
+
+2. Scope of the rights granted by the Licence
+
+The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, sublicensable licence to do the following, for the duration of copyright vested in the Original Work:
+
+— use the Work in any circumstance and for all usage,
+— reproduce the Work,
+— modify the Work, and make Derivative Works based upon the Work,
+— communicate to the public, including the right to make available or display the Work or copies thereof to the public and perform publicly, as the case may be, the Work,
+— distribute the Work or copies thereof,
+— lend and rent the Work or copies thereof,
+— sublicense rights in the Work or copies thereof.
+
+Those rights can be exercised on any media, supports and formats, whether now known or later invented, as far as the applicable law permits so.
+
+In the countries where moral rights apply, the Licensor waives his right to exercise his moral right to the extent allowed by law in order to make effective the licence of the economic rights here above listed.
+
+The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to any patents held by the Licensor, to the extent necessary to make use of the rights granted on the Work under this Licence.
+
+3. Communication of the Source Code
+
+The Licensor may provide the Work either in its Source Code form, or as Executable Code. If the Work is provided as Executable Code, the Licensor provides in addition a machine-readable copy of the Source Code of the Work along with each copy of the Work that the Licensor distributes or indicates, in a notice following the copyright notice attached to the Work, a repository where the Source Code is easily and freely accessible for as long as the Licensor continues to distribute or communicate the Work.
+
+4. Limitations on copyright
+
+Nothing in this Licence is intended to deprive the Licensee of the benefits from any exception or limitation to the exclusive rights of the rights owners in the Work, of the exhaustion of those rights or of other applicable limitations thereto.
+
+5. Obligations of the Licensee
+
+The grant of the rights mentioned above is subject to some restrictions and obligations imposed on the Licensee. Those obligations are the following:
+
+Attribution right: The Licensee shall keep intact all copyright, patent or trademarks notices and all notices that refer to the Licence and to the disclaimer of warranties. The Licensee must include a copy of such notices and a copy of the Licence with every copy of the Work he/she distributes or communicates. The Licensee must cause any Derivative Work to carry prominent notices stating that the Work has been modified and the date of modification.
+
+Copyleft clause: If the Licensee distributes or communicates copies of the Original Works or Derivative Works, this Distribution or Communication will be done under the terms of this Licence or of a later version of this Licence unless the Original Work is expressly distributed only under this version of the Licence — for example by communicating 'EUPL v. 1.2 only'. The Licensee (becoming Licensor) cannot offer or impose any additional terms or conditions on the Work or Derivative Work that alter or restrict the terms of the Licence.
+
+Compatibility clause: If the Licensee Distributes or Communicates Derivative Works or copies thereof based upon both the Work and another work licensed under a Compatible Licence, this Distribution or Communication can be done under the terms of this Compatible Licence. For the sake of this clause, 'Compatible Licence' refers to the licences listed in the appendix attached to this Licence. Should the Licensee's obligations under the Compatible Licence conflict with his/her obligations under this Licence, the obligations of the Compatible Licence shall prevail.
+
+Provision of Source Code: When distributing or communicating copies of the Work, the Licensee will provide a machine-readable copy of the Source Code or indicate a repository where this Source will be easily and freely available for as long as the Licensee continues to distribute or communicate the Work.
+
+Legal Protection: This Licence does not grant permission to use the trade names, trademarks, service marks, or names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the copyright notice.
+
+6. Chain of Authorship
+
+The original Licensor warrants that the copyright in the Original Work granted hereunder is owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence.
+
+Each Contributor warrants that the copyright in the modifications he/she brings to the Work are owned by him/her or licensed to him/her and that he/she has the power and authority to grant the Licence.
+
+Each time You accept the Licence, the original Licensor and subsequent Contributors grant You a licence to their contributions to the Work, under the terms of this Licence.
+
+7. Disclaimer of Warranty
+
+The Work is a work in progress, which is continuously improved by numerous Contributors. It is not a finished work and may therefore contain defects or 'bugs' inherent to this type of development.
+
+For the above reason, the Work is provided under the Licence on an 'as is' basis and without warranties of any kind concerning the Work, including without limitation merchantability, fitness for a particular purpose, absence of defects or errors, accuracy, non-infringement of intellectual property rights other than copyright as stated in Article 6 of this Licence.
+
+This disclaimer of warranty is an essential part of the Licence and a condition for the grant of any rights to the Work.
+
+8. Disclaimer of Liability
+
+Except in the cases of wilful misconduct or damages directly caused to natural persons, the Licensor will in no event be liable for any direct or indirect, material or moral, damages of any kind, arising out of the Licence or of the use of the Work, including without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, loss of data or any commercial damage, even if the Licensor has been advised of the possibility of such damage. However, the Licensor will be liable under statutory product liability laws as far such laws apply to the Work.
+
+9. Additional agreements
+
+While distributing the Work, You may choose to conclude an additional agreement, defining obligations or services consistent with this Licence. However, if accepting obligations, You may act only on your own behalf and on your sole responsibility, not on behalf of the original Licensor or any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against such Contributor by the fact You have accepted any warranty or additional liability.
+
+10. Acceptance of the Licence
+
+The provisions of this Licence can be accepted by clicking on an icon 'I agree' placed under the bottom of a window displaying the text of this Licence or by affirming consent in any other similar way, in accordance with the rules of applicable law. Clicking on that icon indicates your clear and irrevocable acceptance of this Licence and all of its terms and conditions.
+
+Similarly, you irrevocably accept this Licence and all of its terms and conditions by exercising any rights granted to You by Article 2 of this Licence, such as the use of the Work, the creation by You of a Derivative Work or the Distribution or Communication by You of the Work or copies thereof.
+
+11. Information to the public
+
+In case of any Distribution or Communication of the Work by means of electronic communication by You (for example, by offering to download the Work from a remote location) the distribution channel or media (for example, a website) must at least provide to the public the information requested by the applicable law regarding the Licensor, the Licence and the way it may be accessible, concluded, stored and reproduced by the Licensee.
+
+12. Termination of the Licence
+
+The Licence and the rights granted hereunder will terminate automatically upon any breach by the Licensee of the terms of the Licence.
+
+Such a termination will not terminate the licences of any person who has received the Work from the Licensee under the Licence, provided such persons remain in full compliance with the Licence.
+
+13. Miscellaneous
+
+Without prejudice of Article 9 above, the Licence represents the complete agreement between the Parties as to the Work.
+
+If any provision of the Licence is invalid or unenforceable under applicable law, this will not affect the validity or enforceability of the Licence as a whole. Such provision will be construed or reformed so as necessary to make it valid and enforceable.
+
+The European Commission may publish other linguistic versions or new versions of this Licence or updated versions of the Appendix, so far this is required and reasonable, without reducing the scope of the rights granted by the Licence. New versions of the Licence will be published with a unique version number.
+
+All linguistic versions of this Licence, approved by the European Commission, have identical value. Parties can take advantage of the linguistic version of their choice.
+
+14. Jurisdiction
+
+Without prejudice to specific agreement between parties,
+
+— any litigation resulting from the interpretation of this License, arising between the European Union institutions, bodies, offices or agencies, as a Licensor, and any Licensee, will be subject to the jurisdiction of the Court of Justice of the European Union, as laid down in article 272 of the Treaty on the Functioning of the European Union,
+
+— any litigation arising between other parties and resulting from the interpretation of this License, will be subject to the exclusive jurisdiction of the competent court where the Licensor resides or conducts its primary business.
+
+15. Applicable Law
+
+Without prejudice to specific agreement between parties,
+
+— this Licence shall be governed by the law of the European Union Member State where the Licensor has his seat, resides or has his registered office,
+
+— this licence shall be governed by Belgian law if the Licensor has no seat, residence or registered office inside a European Union Member State.
+
+Appendix
+
+'Compatible Licences' according to Article 5 EUPL are:
+
+— GNU General Public License (GPL) v. 2, v. 3
+— GNU Affero General Public License (AGPL) v. 3
+— Open Software License (OSL) v. 2.1, v. 3.0
+— Eclipse Public License (EPL) v. 1.0
+— CeCILL v. 2.0, v. 2.1
+— Mozilla Public Licence (MPL) v. 2
+— GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3
+— Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for works other than software
+— European Union Public Licence (EUPL) v. 1.1, v. 1.2
+— Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong Reciprocity (LiLiQ-R+).
+
+The European Commission may update this Appendix to later versions of the above licences without producing a new version of the EUPL, as long as they provide the rights granted in Article 2 of this Licence and protect the covered Source Code from exclusive appropriation.
+
+All other changes or additions to this Appendix require the production of a new EUPL version.
--- a/MEMORY-OPTIMIZATION.md
+++ b/MEMORY-OPTIMIZATION.md
@ -0,0 +1,139 @@
+# Memory Optimization Changes
+
+## Problem
+Persistent OutOfMemoryError crashes after ~30 minutes of operation.
+
+## Root Causes Identified
+1. **Parallel Processing** - Too many concurrent threads processing XML files
+2. **Vectorization** - Heavy memory consumption from embedding service calls
+3. **Connection Leaks** - HikariCP pool too large (20 connections)
+4. **Duplicate File Processing** - File Consumer route was disabled but still causing issues
+
+## Changes Made (2026-01-07)
+
+### 1. Vectorization DISABLED
+**File**: `application.yml`
+```yaml
+vectorization:
+  enabled: false  # Was: true
+```
+
+**Reason**: Vectorization can be re-enabled later after stability is proven
+
+### 2. Reduced Database Connection Pool
+**File**: `application.yml`
+```yaml
+hikari:
+  maximum-pool-size: 5      # Was: 20
+  minimum-idle: 2           # Was: 5
+  idle-timeout: 300000      # Was: 600000
+  max-lifetime: 900000      # Was: 1800000
+  leak-detection-threshold: 60000  # NEW
+```
+
+### 3. Sequential Processing (No Parallelism)
+**File**: `TedPackageDownloadCamelRoute.java`
+- **Parallel Processing DISABLED** in XML file splitter
+- Thread pool reduced to 1 thread (was: 3)
+- Only 1 package processed at a time (was: 3)
+
+```java
+.split(header("xmlFiles"))
+    // .parallelProcessing()  // DISABLED
+    .stopOnException(false)
+```
+
+### 4. File Consumer Already Disabled
+**File**: `TedDocumentRoute.java`
+- File consumer route commented out to prevent duplicate processing
+- Only Package Download Route processes files
+
+### 5. Start Script with 8GB Heap
+**File**: `start.bat`
+```batch
+java -Xms4g -Xmx8g -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -jar target\ted-procurement-processor-1.0.0-SNAPSHOT.jar
+```
+
+## Performance Impact
+
+### Before
+- 3 packages in parallel
+- 3 XML files in parallel per package
+- Vectorization running
+- ~150 concurrent operations
+- **Crashes after 30 minutes**
+
+### After
+- 1 package at a time
+- Sequential XML file processing
+- No vectorization
+- ~10-20 concurrent operations
+- **Should run stable indefinitely**
+
+## How to Start
+
+1. **Reset stuck packages** (if any):
+   ```bash
+   psql -h 94.130.218.54 -p 32333 -U postgres -d RELM -f reset-stuck-packages.sql
+   ```
+
+2. **Start application**:
+   ```bash
+   start.bat
+   ```
+
+3. **Monitor memory**:
+   - Check logs for OutOfMemoryError
+   - Monitor with: `jconsole` or `jvisualvm`
+
+## Re-enabling Features Later
+
+### Step 1: Test with current settings
+Run for 24-48 hours to confirm stability
+
+### Step 2: Gradually increase parallelism
+```java
+// In TedPackageDownloadCamelRoute.java
+.split(header("xmlFiles"))
+    .parallelProcessing()
+    .executorService(executorService())  // Set to 2-3 threads
+```
+
+### Step 3: Re-enable vectorization
+```yaml
+# In application.yml
+vectorization:
+  enabled: true
+```
+
+### Step 4: Increase connection pool (if needed)
+```yaml
+hikari:
+  maximum-pool-size: 10  # Increase gradually
+```
+
+## Monitoring Commands
+
+### Check running packages
+```sql
+SELECT package_identifier, download_status, updated_at
+FROM ted.ted_daily_package
+WHERE download_status IN ('DOWNLOADING', 'PROCESSING')
+ORDER BY updated_at DESC;
+```
+
+### Check memory usage
+```bash
+jcmd <PID> GC.heap_info
+```
+
+### Check thread count
+```bash
+jcmd <PID> Thread.print | grep "ted-" | wc -l
+```
+
+## Notes
+- **Processing is slower** but stable
+- Approx. 50-100 documents/minute (sequential)
+- Can process ~100,000 documents/day
+- Vectorization can be run as separate batch job later
--- a/README.md
+++ b/README.md
@ -0,0 +1,402 @@
+# TED Procurement Document Processor
+
+**AI-Powered Semantic Search Demonstrator for EU Public Procurement**
+
+A production-ready Spring Boot application showcasing advanced AI semantic search capabilities for processing and searching EU eForms public procurement notices from TED (Tenders Electronic Daily).
+
+**Author:** Martin.Schweitzer@procon.co.at and claude.ai
+
+---
+
+## 🎯 Demonstrator Highlights
+
+This application demonstrates the integration of cutting-edge technologies for intelligent document processing:
+
+### 🧠 **AI Semantic Search**
+- **Natural Language Queries**: Search 100,000+ procurement documents using plain language
+  - Example: *"medical equipment for hospitals in Germany"*
+  - Example: *"IT infrastructure projects in Austria"*
+- **Multilingual Support**: 100+ languages supported via `intfloat/multilingual-e5-large` model
+- **1024-Dimensional Embeddings**: High-precision vector representations for accurate similarity matching
+- **Hybrid Search**: Combine semantic search with traditional filters (country, CPV codes, dates)
+
+### 🗄️ **PostgreSQL Native XML**
+- **Native XML Data Type**: Store complete eForms XML documents without serialization overhead
+- **XPath Queries**: Direct XML querying within PostgreSQL for complex data extraction
+- **Dual Storage Strategy**:
+  - Original XML preserved for audit trail and reprocessing
+  - Extracted metadata in structured columns for fast filtering
+  - Best of both worlds: flexibility + performance
+
+### 🚀 **Production-Grade Features**
+- **Fully Automated Pipeline**: Downloads and processes 30,000+ documents daily from ted.europa.eu
+- **Apache Camel Integration**: Enterprise Integration Patterns (Timer, Splitter, SEDA, Dead Letter Channel)
+- **Idempotent Processing**: SHA-256 hashing prevents duplicate imports
+- **Async Vectorization**: Non-blocking background processing with 4 concurrent workers
+- **pgvector Extension**: IVFFlat indexing for fast cosine similarity search at scale
+- **eForms SDK 1.13**: Full schema validation for EU standard compliance
+
+---
+
+## Key Technologies
+
+| Technology | Purpose | Benefit |
+|------------|---------|---------|
+| **PostgreSQL 16+** | Database with native XML | Query XML with XPath while maintaining structure |
+| **pgvector** | Vector similarity search | Million-scale semantic search with cosine similarity |
+| **Apache Camel** | Integration framework | Enterprise patterns for robust data pipelines |
+| **Spring Boot 3.x** | Application framework | Modern Java with dependency injection |
+| **intfloat/e5-large** | Embedding model | State-of-the-art multilingual semantic understanding |
+| **eForms SDK** | EU standard | Compliance with official procurement schemas |
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│                     TED Procurement Processor                        │
+├─────────────────────────────────────────────────────────────────────┤
+│                                                                      │
+│  ┌──────────────┐    ┌─────────────────┐    ┌───────────────────┐  │
+│  │  File System │───▶│  Apache Camel   │───▶│  Document         │  │
+│  │  (*.xml)     │    │  Route          │    │  Processing       │  │
+│  └──────────────┘    └─────────────────┘    │  Service          │  │
+│                                              └─────────┬─────────┘  │
+│                                                        │            │
+│                                                        ▼            │
+│  ┌──────────────┐    ┌─────────────────┐    ┌───────────────────┐  │
+│  │  REST API    │◀───│  Search         │◀───│  PostgreSQL       │  │
+│  │  Controller  │    │  Service        │    │  + pgvector       │  │
+│  └──────────────┘    └─────────────────┘    └───────────────────┘  │
+│                                                        ▲            │
+│                                                        │            │
+│  ┌──────────────────────────────────────────────────────┐          │
+│  │              Vectorization Service (Async)            │          │
+│  │        intfloat/multilingual-e5-large (1024d)         │          │
+│  └──────────────────────────────────────────────────────┘          │
+│                                                                      │
+└─────────────────────────────────────────────────────────────────────┘
+```
+
+## Prerequisites
+
+- Java 21+
+- Maven 3.9+
+- PostgreSQL 16+ with pgvector extension
+- Python 3.11+ (for embedding service)
+- Docker & Docker Compose (optional, for easy setup)
+
+## 🚀 Automated Pipeline
+
+**See [TED_AUTOMATED_PIPELINE.md](TED_AUTOMATED_PIPELINE.md) for complete documentation on the automated download, processing, and vectorization pipeline.**
+
+The application automatically:
+1. Downloads TED Daily Packages every hour from ted.europa.eu
+2. Extracts and processes XML files
+3. Stores in PostgreSQL with native XML support
+4. Generates 1024-dimensional embeddings for semantic search
+5. Enables REST API queries with natural language
+
+## Quick Start
+
+### 1. Start PostgreSQL with pgvector
+
+Using Docker:
+```bash
+docker-compose up -d postgres
+```
+
+Or manually install PostgreSQL with pgvector extension.
+
+### 2. Configure Application
+
+Edit `src/main/resources/application.yml`:
+
+```yaml
+ted:
+  input:
+    directory: D:/ted.europe/2025-11.tar/2025-11/11  # Your TED XML directory
+    pattern: "**/*.xml"
+```
+
+### 3. Build and Run
+
+```bash
+# Build
+mvn clean package -DskipTests
+
+# Run
+java -jar target/ted-procurement-processor-1.0.0-SNAPSHOT.jar
+```
+
+### 4. Start Embedding Service (Optional)
+
+For semantic search capabilities:
+
+```bash
+# Using Docker
+docker-compose --profile with-embedding up -d embedding-service
+
+# Or manually
+pip install -r requirements-embedding.txt
+python embedding_service.py
+```
+
+## Database Schema
+
+### Main Tables
+
+| Table | Description |
+|-------|-------------|
+| `procurement_document` | Main table with extracted metadata and original XML |
+| `procurement_lot` | Individual lots within procurement notices |
+| `organization` | Organizations mentioned in notices (buyers, review bodies) |
+| `processing_log` | Audit trail for document processing events |
+
+### Key Columns in `procurement_document`
+
+| Column | Type | Description |
+|--------|------|-------------|
+| `id` | UUID | Primary key |
+| `document_hash` | VARCHAR(64) | SHA-256 hash for idempotency |
+| `publication_id` | VARCHAR(50) | TED publication ID (e.g., "00786665-2025") |
+| `notice_url` | VARCHAR(255) | TED website URL (e.g., "https://ted.europa.eu/en/notice/-/detail/786665-2025") |
+| `xml_document` | XML | Original document |
+| `text_content` | TEXT | Extracted text for vectorization |
+| `content_vector` | vector(1024) | Embedding for semantic search |
+| `buyer_country_code` | VARCHAR(10) | ISO 3166-1 alpha-3 country code |
+| `cpv_codes` | VARCHAR(100)[] | CPV classification codes |
+| `nuts_codes` | VARCHAR(20)[] | NUTS region codes |
+
+## REST API
+
+### Search Endpoints
+
+#### GET /api/v1/documents/search
+
+Search with structured filters:
+
+```bash
+# Search by country
+curl "http://localhost:8080/api/v1/documents/search?countryCode=POL"
+
+# Search by CPV code prefix (medical supplies)
+curl "http://localhost:8080/api/v1/documents/search?cpvPrefix=33"
+
+# Search by date range
+curl "http://localhost:8080/api/v1/documents/search?publicationDateFrom=2025-01-01&publicationDateTo=2025-12-31"
+
+# Combined filters
+curl "http://localhost:8080/api/v1/documents/search?countryCode=DEU&contractNature=SERVICES&noticeType=CONTRACT_NOTICE"
+```
+
+#### GET /api/v1/documents/semantic-search
+
+Natural language semantic search:
+
+```bash
+# Search for medical equipment tenders
+curl "http://localhost:8080/api/v1/documents/semantic-search?query=medical+equipment+hospital+supplies"
+
+# Search with similarity threshold
+curl "http://localhost:8080/api/v1/documents/semantic-search?query=construction+works+road+infrastructure&threshold=0.75"
+```
+
+#### POST /api/v1/documents/search
+
+Complex search with JSON body:
+
+```bash
+curl -X POST "http://localhost:8080/api/v1/documents/search" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "countryCodes": ["DEU", "AUT", "CHE"],
+    "contractNature": "SERVICES",
+    "cpvPrefix": "72",
+    "semanticQuery": "software development IT services",
+    "similarityThreshold": 0.7,
+    "page": 0,
+    "size": 20
+  }'
+```
+
+### Document Retrieval
+
+```bash
+# Get by UUID
+curl "http://localhost:8080/api/v1/documents/{uuid}"
+
+# Get by publication ID
+curl "http://localhost:8080/api/v1/documents/publication/00786665-2025"
+```
+
+### Metadata Endpoints
+
+```bash
+# List countries
+curl "http://localhost:8080/api/v1/documents/metadata/countries"
+
+# Get statistics
+curl "http://localhost:8080/api/v1/documents/statistics"
+
+# Upcoming deadlines
+curl "http://localhost:8080/api/v1/documents/upcoming-deadlines?limit=50"
+```
+
+### Admin Endpoints
+
+```bash
+# Health check
+curl "http://localhost:8080/api/v1/admin/health"
+
+# Vectorization status
+curl "http://localhost:8080/api/v1/admin/vectorization/status"
+
+# Trigger vectorization for pending documents
+curl -X POST "http://localhost:8080/api/v1/admin/vectorization/process-pending?batchSize=100"
+```
+
+## Configuration
+
+### Application Properties
+
+| Property | Default | Description |
+|----------|---------|-------------|
+| `ted.input.directory` | - | Input directory for XML files |
+| `ted.input.pattern` | `**/*.xml` | File pattern (Ant-style) |
+| `ted.input.poll-interval` | 5000 | Polling interval in ms |
+| `ted.schema.enabled` | true | Enable XSD validation |
+| `ted.vectorization.enabled` | true | Enable async vectorization |
+| `ted.vectorization.model-name` | `intfloat/multilingual-e5-large` | Embedding model |
+| `ted.vectorization.dimensions` | 1024 | Vector dimensions |
+| `ted.search.default-page-size` | 20 | Default results per page |
+| `ted.search.similarity-threshold` | 0.7 | Default similarity threshold |
+
+### Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `DB_USERNAME` | PostgreSQL username |
+| `DB_PASSWORD` | PostgreSQL password |
+| `TED_INPUT_DIR` | Override input directory |
+
+## Data Model
+
+### Notice Types
+
+- `CONTRACT_NOTICE` - Standard contract notices
+- `PRIOR_INFORMATION_NOTICE` - Prior information notices
+- `CONTRACT_AWARD_NOTICE` - Contract award notices
+- `MODIFICATION_NOTICE` - Contract modifications
+- `OTHER` - Other notice types
+
+### Contract Nature
+
+- `SUPPLIES` - Goods procurement
+- `SERVICES` - Service procurement
+- `WORKS` - Construction works
+- `MIXED` - Mixed contracts
+- `UNKNOWN` - Not specified
+
+### Procedure Types
+
+- `OPEN` - Open procedure
+- `RESTRICTED` - Restricted procedure
+- `COMPETITIVE_DIALOGUE` - Competitive dialogue
+- `INNOVATION_PARTNERSHIP` - Innovation partnership
+- `NEGOTIATED_WITHOUT_PUBLICATION` - Negotiated without prior publication
+- `NEGOTIATED_WITH_PUBLICATION` - Negotiated with prior publication
+- `OTHER` - Other procedures
+
+## Semantic Search
+
+**See [VECTORIZATION.md](VECTORIZATION.md) for detailed documentation on the vectorization pipeline.**
+
+The application uses the `intfloat/multilingual-e5-large` model for generating document embeddings:
+
+- **Dimensions**: 1024
+- **Languages**: Supports 100+ languages
+- **Normalization**: Embeddings are L2 normalized for cosine similarity
+
+### Query Prefixes
+
+For optimal results with e5 models:
+- Documents use `passage: ` prefix
+- Queries use `query: ` prefix
+
+This is handled automatically by the vectorization service.
+
+## Development
+
+### Running Tests
+
+```bash
+mvn test
+```
+
+### Building Docker Image
+
+```bash
+docker build -t ted-procurement-processor .
+```
+
+### OpenAPI Documentation
+
+Access Swagger UI at: `http://localhost:8080/api/swagger-ui.html`
+
+## Performance Considerations
+
+### Indexes
+
+The schema includes optimized indexes for:
+- Hash lookup (idempotent processing)
+- Publication/notice ID lookups
+- Date range queries
+- Geographic searches (country, NUTS codes)
+- CPV code classification
+- Vector similarity search (IVFFlat)
+- Full-text trigram search
+
+### Batch Processing
+
+- Configure `ted.input.max-messages-per-poll` for batch sizes
+- Vectorization processes documents in batches of 16 by default
+- Use the admin API to trigger bulk vectorization
+
+## Troubleshooting
+
+### Common Issues
+
+**Files not being processed:**
+- Check directory path in configuration
+- Verify file permissions
+- Check Camel route status in logs
+
+**Duplicate detection not working:**
+- Ensure `document_hash` column has unique constraint
+- Check if XML content is exactly the same
+
+**Vectorization failing:**
+- Verify embedding service is running
+- Check Python dependencies
+- Ensure sufficient memory for model
+
+**Slow searches:**
+- Ensure pgvector IVFFlat index is created
+- Check if `content_vector` column is populated
+- Consider adjusting `lists` parameter in index
+
+## License
+
+Licensed under the European Union Public Licence (EUPL) v1.2
+
+Copyright (c) 2025 PROCON DATA Gesellschaft m.b.H.
+
+You may use, copy, modify and distribute this work under the terms of the EUPL.
+See the [LICENSE](LICENSE) file for details or visit: https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12
+
+## Acknowledgments
+
+- [eForms SDK](https://github.com/OP-TED/eForms-SDK) - EU Publications Office
+- [pgvector](https://github.com/pgvector/pgvector) - Vector similarity search for PostgreSQL
+- [sentence-transformers](https://www.sbert.net/) - Text embeddings
+- [Apache Camel](https://camel.apache.org/) - Integration framework
--- a/Search-TED.ps1
+++ b/Search-TED.ps1
@ -0,0 +1,164 @@
+<#
+.SYNOPSIS
+    Semantische Suche in TED Ausschreibungen
+.DESCRIPTION
+    Fuehrt eine semantische Aehnlichkeitssuche gegen die TED Procurement Datenbank durch.
+.PARAMETER Query
+    Der Suchtext fuer die semantische Suche
+.PARAMETER TopK
+    Anzahl der zurueckgegebenen Ergebnisse (Standard: 10, Max: 100)
+.PARAMETER Threshold
+    Minimaler Aehnlichkeitsschwellwert 0.0-1.0 (Standard: 0.5)
+.PARAMETER ApiUrl
+    URL des API-Servers (Standard: http://localhost:8888/api)
+.EXAMPLE
+    .\Search-TED.ps1 -Query "Software Entwicklung Cloud Services"
+.EXAMPLE
+    .\Search-TED.ps1 -Query "IT Beratung Digitalisierung" -TopK 20 -Threshold 0.6
+#>
+
+param(
+    [Parameter(Mandatory=$true, Position=0, HelpMessage="Suchtext fuer semantische Suche")]
+    [string]$Query,
+
+    [Parameter(Mandatory=$false)]
+    [ValidateRange(1, 100)]
+    [int]$TopK = 100,
+
+    [Parameter(Mandatory=$false)]
+    [ValidateRange(0.0, 1.0)]
+    [double]$Threshold = 0.5,
+
+    [Parameter(Mandatory=$false)]
+    [string]$ApiUrl = "http://localhost:8888/api"
+)
+
+# Farben fuer Ausgabe
+$colors = @{
+    Header = "Cyan"
+    Success = "Green"
+    Warning = "Yellow"
+    Error = "Red"
+    Info = "White"
+    Highlight = "Magenta"
+}
+
+function Write-Header {
+    param([string]$Text)
+    Write-Host ""
+    Write-Host ("=" * 80) -ForegroundColor $colors.Header
+    Write-Host $Text -ForegroundColor $colors.Header
+    Write-Host ("=" * 80) -ForegroundColor $colors.Header
+}
+
+function Write-Result {
+    param(
+        [int]$Rank,
+        [PSObject]$Doc
+    )
+
+    $similarity = if ($Doc.similarityPercent) { "$($Doc.similarityPercent)%" } else { "N/A" }
+    $deadline = "-"
+    if ($Doc.submissionDeadline) {
+        try {
+            # Handle ISO 8601 format (e.g., "2025-04-28T06:00:59Z")
+            $deadline = ([DateTime]::Parse($Doc.submissionDeadline, [System.Globalization.CultureInfo]::InvariantCulture)).ToString("dd.MM.yyyy")
+        } catch {
+            $deadline = $Doc.submissionDeadline.Substring(0, 10)  # Fallback: first 10 chars (YYYY-MM-DD)
+        }
+    }
+
+    Write-Host ""
+    Write-Host "[$Rank] " -NoNewline -ForegroundColor $colors.Highlight
+    Write-Host "$similarity Aehnlichkeit" -ForegroundColor $colors.Success
+    Write-Host "    Titel:        " -NoNewline -ForegroundColor $colors.Info
+    Write-Host $Doc.projectTitle -ForegroundColor $colors.Warning
+    Write-Host "    Auftraggeber: " -NoNewline -ForegroundColor $colors.Info
+    Write-Host "$($Doc.buyerName) ($($Doc.buyerCountryCode))" -ForegroundColor White
+    Write-Host "    Vertragsart:  " -NoNewline -ForegroundColor $colors.Info
+    Write-Host $Doc.contractNature -ForegroundColor White
+    Write-Host "    Einreichfrist:" -NoNewline -ForegroundColor $colors.Info
+    Write-Host " $deadline" -ForegroundColor White
+    Write-Host "    TED Link:     " -NoNewline -ForegroundColor $colors.Info
+    Write-Host $Doc.noticeUrl -ForegroundColor Cyan
+
+    if ($Doc.projectDescription) {
+        $desc = $Doc.projectDescription
+        if ($desc.Length -gt 200) {
+            $desc = $desc.Substring(0, 197) + "..."
+        }
+        Write-Host "    Beschreibung: " -NoNewline -ForegroundColor $colors.Info
+        Write-Host $desc -ForegroundColor DarkGray
+    }
+}
+
+# Hauptprogramm
+try {
+    Write-Header "TED Semantische Suche"
+    Write-Host "Suchtext: " -NoNewline -ForegroundColor $colors.Info
+    Write-Host $Query -ForegroundColor $colors.Warning
+    Write-Host "Parameter: TopK=$TopK, Threshold=$Threshold" -ForegroundColor DarkGray
+    Write-Host ""
+
+    # Request Body erstellen
+    $body = @{
+        text = $Query
+        topK = $TopK
+        threshold = $Threshold
+    } | ConvertTo-Json
+
+    # API aufrufen
+    Write-Host "Suche laeuft..." -ForegroundColor $colors.Info
+    $stopwatch = [System.Diagnostics.Stopwatch]::StartNew()
+
+    $response = Invoke-RestMethod -Uri "$ApiUrl/similarity/text" `
+        -Method Post `
+        -ContentType "application/json; charset=utf-8" `
+        -Body $body `
+        -ErrorAction Stop
+
+    $stopwatch.Stop()
+
+    # Ergebnisse anzeigen
+    $resultCount = $response.resultCount
+    $embeddingTime = $response.embeddingTimeMs
+    $searchTime = $response.searchTimeMs
+    $totalTime = $stopwatch.ElapsedMilliseconds
+
+    Write-Header "Suchergebnisse: $resultCount Treffer"
+    Write-Host "Zeiten: Embedding=${embeddingTime}ms, Suche=${searchTime}ms, Gesamt=${totalTime}ms" -ForegroundColor DarkGray
+
+    if ($resultCount -eq 0) {
+        Write-Host ""
+        Write-Host "Keine passenden Ausschreibungen gefunden." -ForegroundColor $colors.Warning
+        Write-Host "Versuchen Sie:" -ForegroundColor $colors.Info
+        Write-Host "  - Andere Suchbegriffe" -ForegroundColor DarkGray
+        Write-Host "  - Niedrigeren Schwellwert (-Threshold 0.3)" -ForegroundColor DarkGray
+    }
+    else {
+        $rank = 1
+        foreach ($doc in $response.results) {
+            Write-Result -Rank $rank -Doc $doc
+            $rank++
+        }
+    }
+
+    Write-Host ""
+    Write-Host ("-" * 80) -ForegroundColor DarkGray
+    Write-Host "Suche abgeschlossen." -ForegroundColor $colors.Success
+
+} catch {
+    Write-Host ""
+    Write-Host "FEHLER: $_" -ForegroundColor $colors.Error
+
+    if ($_.Exception.Response.StatusCode -eq 503) {
+        Write-Host "Der Vektorisierungsdienst ist nicht verfuegbar." -ForegroundColor $colors.Warning
+        Write-Host "Stellen Sie sicher, dass der Embedding-Service auf Port 8001 laeuft." -ForegroundColor $colors.Info
+    }
+    elseif ($_.Exception.Message -like "*Unable to connect*") {
+        Write-Host "Konnte keine Verbindung zum Server herstellen." -ForegroundColor $colors.Warning
+        Write-Host "Stellen Sie sicher, dass die Anwendung auf $ApiUrl laeuft." -ForegroundColor $colors.Info
+    }
+
+    exit 1
+}
--- a/TED_AUTOMATED_PIPELINE.md
+++ b/TED_AUTOMATED_PIPELINE.md
@ -0,0 +1,467 @@
+# TED Automatisierte Download & Verarbeitungs-Pipeline
+
+## Übersicht
+
+Die komplette automatisierte Pipeline für TED (Tenders Electronic Daily) Ausschreibungen:
+
+```
+┌────────────────────────────────────────────────────────────────────────┐
+│                   TED Automatisierte Pipeline                          │
+├────────────────────────────────────────────────────────────────────────┤
+│                                                                         │
+│  ┌─────────────────┐                                                   │
+│  │  Timer (1h)     │  Alle 1 Stunde neue Packages prüfen              │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  HTTP Download  │  https://ted.europa.eu/packages/daily/           │
+│  │  Package        │  Format: YYYY-MM-DD_XXXX.tar.gz                  │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  Extract        │  tar.gz → Tausende von XML Files                 │
+│  │  tar.gz         │  Extract to: D:/ted.europe/extracted             │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  XML Splitter   │  Parallel Processing (Streaming)                 │
+│  │  (Parallel)     │  Each XML → direct:process-document              │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  XML Parser     │  XPath Parsing + Metadata Extraction             │
+│  │  & Validator    │  Schema Validation (eForms SDK 1.13)             │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  SHA-256 Hash   │  Idempotent Processing                           │
+│  │  Check          │  Skip if already imported                        │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  Save to DB     │  PostgreSQL (ted.procurement_document)           │
+│  │  (PostgreSQL)   │  + Native XML + Metadata                         │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  wireTap        │  Non-blocking Trigger                            │
+│  │  Vectorization  │  direct:vectorize (async)                        │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  SEDA Queue     │  4 Concurrent Workers                            │
+│  │  (Async)        │  vectorize-async queue                           │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  Extract Text   │  Title + Description + Lots                      │
+│  │  Content        │  Buyer Info + CPV Codes                          │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  POST to        │  http://localhost:8001/embed                     │
+│  │  Embedding API  │  {"text": "...", "is_query": false}              │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  Python Service │  intfloat/multilingual-e5-large                  │
+│  │  (FastAPI)      │  Returns: 1024-dimensional vector                │
+│  └────────┬────────┘                                                   │
+│           │                                                            │
+│           ▼                                                            │
+│  ┌─────────────────┐                                                   │
+│  │  Save Vector    │  content_vector column (pgvector)                │
+│  │  to Database    │  Status: COMPLETED                               │
+│  └─────────────────┘                                                   │
+│                                                                         │
+└────────────────────────────────────────────────────────────────────────┘
+```
+
+## Konfiguration
+
+**application.yml:**
+
+```yaml
+ted:
+  # Input directory (points to extract directory)
+  input:
+    directory: D:/ted.europe/extracted
+    pattern: "**/*.xml"            # Recursive scanning
+    poll-interval: 5000            # Check every 5 seconds
+    max-messages-per-poll: 100     # Process up to 100 XMLs per poll
+
+  # Automatic download from ted.europa.eu
+  download:
+    enabled: true                  # ✅ ENABLED
+    base-url: https://ted.europa.eu/packages/daily/
+    download-directory: D:/ted.europe/downloads
+    extract-directory: D:/ted.europe/extracted
+    start-year: 2024               # Start downloading from 2024
+    poll-interval: 3600000         # Check every 1 hour
+    max-consecutive-404: 4         # Stop after 4 consecutive 404s
+    delete-after-extraction: true  # Clean up tar.gz files
+
+  # Vectorization (automatic after save)
+  vectorization:
+    enabled: true                  # ✅ ENABLED
+    api-url: http://localhost:8001
+    model-name: intfloat/multilingual-e5-large
+    dimensions: 1024
+    batch-size: 16
+    max-text-length: 8192
+```
+
+## Camel Routes
+
+### 1. **TedPackageDownloadCamelRoute** (Download & Extract)
+
+**Route ID:** `ted-package-scheduler`
+
+**Trigger:** Timer alle 1 Stunde
+
+**Ablauf:**
+1. Bestimmt nächstes Package (Jahr + Serial Number)
+2. Prüft ob bereits vorhanden (Idempotent Consumer)
+3. HTTP GET von `https://ted.europa.eu/packages/daily/YYYY-MM-DD_XXXX.tar.gz`
+4. Speichert in `download-directory`
+5. Extrahiert nach `extract-directory`
+6. Löscht tar.gz (optional)
+7. Splittiert XML Files → `direct:process-document`
+
+**Enterprise Integration Patterns:**
+- ✅ Timer Pattern
+- ✅ Idempotent Consumer
+- ✅ Content-Based Router
+- ✅ Splitter Pattern (Parallel + Streaming)
+- ✅ Dead Letter Channel
+
+### 2. **TedDocumentRoute** (XML Processing)
+
+**Route ID:** `ted-document-processor`
+
+**Trigger:**
+- File Watcher auf `D:/ted.europe/extracted`
+- Direct Call von Download Route
+
+**Ablauf:**
+1. Liest XML File
+2. Parst mit XPath (eForms UBL Schema)
+3. Extrahiert Metadata
+4. Berechnet SHA-256 Hash
+5. Prüft Duplikat in DB
+6. Speichert in `ted.procurement_document`
+7. **wireTap** → `direct:vectorize` (non-blocking!)
+
+### 3. **VectorizationRoute** (Async Embedding)
+
+**Route ID:** `vectorization-processor`
+
+**Trigger:**
+- wireTap von TedDocumentRoute
+- Timer Scheduler (alle 60s für PENDING)
+
+**Ablauf:**
+1. Load document from DB
+2. Extract text_content (Document + Lots)
+3. POST to Python Embedding Service
+4. Parse 1024-dimensional vector
+5. Save to `content_vector` column
+6. Update status → `COMPLETED`
+
+**Queue:** SEDA with 4 concurrent workers
+
+## Verzeichnisstruktur
+
+```
+D:/ted.europe/
+├── downloads/              # Temporäre tar.gz Downloads
+│   └── 2025-11-30_0001.tar.gz
+│   └── 2025-11-30_0002.tar.gz
+│
+├── extracted/              # Extrahierte XML Files
+│   ├── 2025-11-30/
+│   │   ├── 001/
+│   │   │   ├── 00123456_2025.xml
+│   │   │   └── 00123457_2025.xml
+│   │   └── 002/
+│   │       └── ...
+│   └── .processed/         # Erfolgreich verarbeitete XMLs
+│   └── .error/             # Fehlgeschlagene XMLs
+```
+
+## Datenbank-Tracking
+
+### ted_daily_package (Download-Tracking)
+
+| Spalte | Typ | Beschreibung |
+|--------|-----|--------------|
+| `id` | UUID | Primary Key |
+| `year` | INT | Package Jahr (2024, 2025) |
+| `serial_number` | INT | Package Nummer (1, 2, 3...) |
+| `package_id` | VARCHAR | Format: `2025-11-30_0001` |
+| `download_url` | VARCHAR | Full URL |
+| `download_status` | VARCHAR | PENDING, DOWNLOADING, COMPLETED, NOT_FOUND, FAILED |
+| `downloaded_at` | TIMESTAMP | Download-Zeitpunkt |
+| `file_size_bytes` | BIGINT | Größe der tar.gz |
+| `xml_file_count` | INT | Anzahl extrahierter XMLs |
+| `processed_count` | INT | Anzahl verarbeiteter XMLs |
+
+### procurement_document (XML-Daten)
+
+| Spalte | Typ | Beschreibung |
+|--------|-----|--------------|
+| `id` | UUID | Primary Key |
+| `document_hash` | VARCHAR(64) | SHA-256 für Idempotenz |
+| `publication_id` | VARCHAR(50) | TED ID (00123456-2025) |
+| `notice_url` | VARCHAR(255) | Auto-generated TED URL |
+| `xml_document` | XML | Native PostgreSQL XML |
+| `text_content` | TEXT | Für Vektorisierung |
+| `content_vector` | vector(1024) | pgvector Embedding |
+| `vectorization_status` | VARCHAR | PENDING, PROCESSING, COMPLETED, FAILED |
+
+## Monitoring
+
+### Camel Routes Status
+
+```bash
+curl http://localhost:8888/api/actuator/camel/routes
+```
+
+**Wichtige Routes:**
+- `ted-package-scheduler` - Download Timer
+- `ted-document-processor` - XML Processing
+- `vectorization-processor` - Embedding Generation
+- `vectorization-scheduler` - PENDING Documents
+
+### Download Status
+
+```sql
+SELECT
+    year,
+    COUNT(*) FILTER (WHERE download_status = 'COMPLETED') as completed,
+    COUNT(*) FILTER (WHERE download_status = 'NOT_FOUND') as not_found,
+    COUNT(*) FILTER (WHERE download_status = 'FAILED') as failed,
+    SUM(xml_file_count) as total_xmls,
+    SUM(processed_count) as processed_xmls
+FROM ted.ted_daily_package
+GROUP BY year
+ORDER BY year DESC;
+```
+
+### Vectorization Status
+
+```sql
+SELECT
+    COUNT(*) FILTER (WHERE vectorization_status = 'COMPLETED') as completed,
+    COUNT(*) FILTER (WHERE vectorization_status = 'PENDING') as pending,
+    COUNT(*) FILTER (WHERE vectorization_status = 'FAILED') as failed,
+    COUNT(*) FILTER (WHERE content_vector IS NOT NULL) as has_vector
+FROM ted.procurement_document;
+```
+
+### Heute verarbeitete Dokumente
+
+```sql
+SELECT
+    COUNT(*) as today_count,
+    MIN(created_at) as first,
+    MAX(created_at) as last
+FROM ted.procurement_document
+WHERE created_at::date = CURRENT_DATE;
+```
+
+## Python Embedding Service
+
+**Start:**
+```bash
+python embedding_service.py
+```
+
+**Health Check:**
+```bash
+curl http://localhost:8001/health
+```
+
+**Expected Response:**
+```json
+{
+  "status": "healthy",
+  "model_name": "intfloat/multilingual-e5-large",
+  "dimensions": 1024,
+  "max_length": 512
+}
+```
+
+## Start der Pipeline
+
+1. **Python Embedding Service starten:**
+   ```bash
+   python embedding_service.py
+   ```
+
+2. **Spring Boot Anwendung starten:**
+   ```bash
+   mvn spring-boot:run
+   ```
+
+3. **Logs beobachten:**
+   ```
+   INFO: Checking for new TED packages...
+   INFO: Next package to download: 2025-11-30_0001
+   INFO: Downloading from https://ted.europa.eu/packages/daily/...
+   INFO: Extracting package 2025-11-30_0001...
+   INFO: Processing 1247 XML files from package 2025-11-30_0001
+   INFO: Document processed successfully: 00123456_2025.xml
+   DEBUG: Queueing document for vectorization: xxx
+   INFO: Successfully vectorized document: xxx
+   ```
+
+## Durchsatz
+
+**Geschätzte Performance:**
+
+| Phase | Geschwindigkeit | Bemerkung |
+|-------|----------------|-----------|
+| **Download** | 1 Package/Stunde | Timer-basiert |
+| **Extract** | ~10 Sekunden | tar.gz → XMLs |
+| **XML Processing** | ~100-200 XMLs/min | Abhängig von CPU |
+| **Vectorization** | ~60-90 Docs/min | 4 Workers, Python Service |
+
+**Täglich:**
+- ~24 Packages heruntergeladen
+- ~30.000-50.000 Dokumente verarbeitet (je nach Package-Größe)
+- ~30.000-50.000 Vektoren generiert
+
+## Fehlerbehandlung
+
+### Download Fehler
+
+**404 Not Found:** Package existiert (noch) nicht
+- Max 4 consecutive 404s → Switch zu Vorjahr
+- Automatische Wiederholung nach 1 Stunde
+
+**Network Error:** Temporäre Verbindungsprobleme
+- 3 Retries mit 10s Delay
+- Dead Letter Channel
+
+### Processing Fehler
+
+**Duplikate:** SHA-256 Hash bereits vorhanden
+- Wird übersprungen (Idempotent Processing)
+- Log: "Duplicate document skipped"
+
+**XML Parsing Error:** Ungültiges XML
+- 3 Retries
+- Move to `.error` directory
+- Status: FAILED in DB
+
+### Vectorization Fehler
+
+**Embedding Service nicht erreichbar:**
+- 2 Retries mit 2s Delay
+- Status: FAILED
+- Scheduler versucht erneut nach 60s
+
+**Invalid Embedding Dimension:**
+- Status: FAILED mit Error-Message
+- Manuelles Eingreifen erforderlich
+
+## Troubleshooting
+
+### Pipeline läuft nicht
+
+```bash
+# Prüfe Camel Routes
+curl http://localhost:8888/api/actuator/camel/routes | jq '.routes[] | {id: .id, status: .status}'
+
+# Prüfe Download Route
+tail -f logs/ted-procurement-processor.log | grep "ted-package"
+
+# Prüfe Vectorization Route
+tail -f logs/ted-procurement-processor.log | grep "vectoriz"
+```
+
+### Keine Downloads
+
+1. Prüfe `ted.download.enabled = true`
+2. Prüfe Internet-Verbindung
+3. Prüfe ted.europa.eu erreichbar
+4. Prüfe Logs für 404/403 Errors
+
+### Keine Vektorisierung
+
+1. Prüfe Embedding Service: `curl http://localhost:8001/health`
+2. Prüfe `ted.vectorization.enabled = true`
+3. Prüfe PENDING Dokumente in DB
+4. Prüfe Logs für HTTP 400/500 Errors
+
+## Semantic Search
+
+Nach erfolgreicher Vektorisierung sind Dokumente durchsuchbar:
+
+```bash
+# Semantic Search
+curl "http://localhost:8888/api/v1/documents/semantic-search?query=medical+equipment"
+
+# Combined Search (Semantic + Filters)
+curl -X POST "http://localhost:8888/api/v1/documents/search" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "countryCodes": ["DEU", "AUT"],
+    "semanticQuery": "software development",
+    "similarityThreshold": 0.7
+  }'
+```
+
+## Performance-Optimierung
+
+### Vectorization beschleunigen
+
+```yaml
+ted:
+  vectorization:
+    thread-pool-size: 8  # Mehr Workers (Standard: 4)
+```
+
+**Achtung:** Mehr Workers = mehr Last auf Python Service!
+
+### XML Processing beschleunigen
+
+```yaml
+ted:
+  input:
+    max-messages-per-poll: 200  # Mehr Files pro Poll (Standard: 100)
+```
+
+### Download parallelisieren
+
+```yaml
+ted:
+  download:
+    max-concurrent-downloads: 4  # Mehr parallele Downloads (Standard: 2)
+```
+
+**Achtung:** ted.europa.eu Rate Limiting beachten!
+
+## Zusammenfassung
+
+✅ **Komplett automatisierte Pipeline** von Download bis Semantic Search
+✅ **Idempotent Processing** - Keine Duplikate
+✅ **Asynchrone Vektorisierung** - Non-blocking
+✅ **Enterprise Integration Patterns** - Production-ready
+✅ **Fehlerbehandlung** - Retries & Dead Letter Channel
+✅ **Monitoring** - Actuator + SQL Queries
+✅ **Skalierbar** - Concurrent Workers & Parallel Processing
+
+Die Pipeline läuft vollautomatisch 24/7 und verarbeitet alle neuen TED-Ausschreibungen! 🚀
--- a/TED_NOTICE_URL.md
+++ b/TED_NOTICE_URL.md
@ -0,0 +1,149 @@
+# TED Notice URL Feature
+
+## Übersicht
+
+Jedes Dokument in der Datenbank hat jetzt eine automatisch generierte `notice_url` Spalte, die direkt auf die TED-Webseite verlinkt.
+
+## Format
+
+```
+https://ted.europa.eu/en/notice/-/detail/{publication_id ohne führende Nullen}
+```
+
+**Beispiel:**
+- `publication_id`: `00786665-2025`
+- `notice_url`: `https://ted.europa.eu/en/notice/-/detail/786665-2025`
+
+## Automatische Generierung
+
+Die URL wird automatisch beim Speichern eines Dokuments generiert:
+
+```java
+@PrePersist
+@PreUpdate
+private void generateNoticeUrl() {
+    if (publicationId != null && !publicationId.isEmpty()) {
+        String cleanId = publicationId.replaceFirst("^0+", "");
+        this.noticeUrl = "https://ted.europa.eu/en/notice/-/detail/" + cleanId;
+    }
+}
+```
+
+## Datenbankstruktur
+
+### Spalte
+
+```sql
+ALTER TABLE ted.procurement_document
+    ADD COLUMN notice_url VARCHAR(255);
+
+CREATE INDEX idx_doc_notice_url ON ted.procurement_document(notice_url);
+```
+
+### Existierende Datensätze
+
+URLs für existierende Datensätze werden automatisch generiert:
+
+```sql
+UPDATE ted.procurement_document
+SET notice_url = 'https://ted.europa.eu/en/notice/-/detail/' ||
+    REGEXP_REPLACE(publication_id, '^0+', '')
+WHERE publication_id IS NOT NULL
+  AND notice_url IS NULL;
+```
+
+## Verwendung
+
+### Repository-Abfrage
+
+```java
+// Nach URL suchen
+Optional<ProcurementDocument> doc = repository.findByNoticeUrl(
+    "https://ted.europa.eu/en/notice/-/detail/786665-2025"
+);
+```
+
+### Entity-Zugriff
+
+```java
+ProcurementDocument doc = new ProcurementDocument();
+doc.setPublicationId("00786665-2025");
+// notice_url wird automatisch beim Speichern generiert
+repository.save(doc);
+
+// URL abrufen
+String url = doc.getNoticeUrl();
+// "https://ted.europa.eu/en/notice/-/detail/786665-2025"
+```
+
+### REST API Beispiel
+
+```json
+{
+  "id": "20fde305-844b-46b7-bb72-93e86381978d",
+  "publicationId": "00786665-2025",
+  "noticeUrl": "https://ted.europa.eu/en/notice/-/detail/786665-2025",
+  "buyerName": "Example Organization",
+  "projectTitle": "Construction Services"
+}
+```
+
+## Vorteile
+
+✅ **Direkte Verlinkung** zur offiziellen TED-Webseite
+✅ **Automatische Generierung** - keine manuelle Pflege nötig
+✅ **Indiziert** für schnelle Suche
+✅ **Konsistentes Format** - einheitliche URLs
+✅ **Integration** in REST API und Such-Ergebnisse
+
+## SQL-Abfragen
+
+### URL für Publication ID generieren
+
+```sql
+SELECT
+    publication_id,
+    'https://ted.europa.eu/en/notice/-/detail/' ||
+    REGEXP_REPLACE(publication_id, '^0+', '') AS notice_url
+FROM ted.procurement_document
+WHERE publication_id = '00786665-2025';
+```
+
+### Alle URLs anzeigen
+
+```sql
+SELECT
+    publication_id,
+    notice_url,
+    buyer_name,
+    project_title
+FROM ted.procurement_document
+WHERE notice_url IS NOT NULL
+ORDER BY created_at DESC
+LIMIT 10;
+```
+
+### Nach URL-Pattern suchen
+
+```sql
+SELECT *
+FROM ted.procurement_document
+WHERE notice_url LIKE '%/786665-2025';
+```
+
+## Migration
+
+Beim Hinzufügen des Features wurden:
+
+1. ✅ Spalte `notice_url` zur Tabelle hinzugefügt
+2. ✅ Index `idx_doc_notice_url` erstellt
+3. ✅ URLs für alle existierenden Datensätze generiert
+4. ✅ Entity-Klasse mit automatischer Generierung erweitert
+5. ✅ Repository-Methode `findByNoticeUrl()` hinzugefügt
+
+## Hinweise
+
+- Die URL wird **nur** generiert wenn `publication_id` vorhanden ist
+- Führende Nullen werden automatisch entfernt (`00786665` → `786665`)
+- Die URL wird bei jedem Update aktualisiert (falls sich `publication_id` ändert)
+- Die Spalte ist **nicht** `NOT NULL` da alte Datensätze möglicherweise keine `publication_id` haben
--- a/TED_PACKAGE_DOWNLOAD_CAMEL_ROUTE.md
+++ b/TED_PACKAGE_DOWNLOAD_CAMEL_ROUTE.md
@ -0,0 +1,374 @@
+# TED Package Download - Camel-Native Implementation
+
+## Übersicht
+
+Vollständig Camel-basierte Implementierung des automatischen TED Daily Package Downloads unter Verwendung von Apache Camel Enterprise Integration Patterns (EIP).
+
+## Architektur
+
+### Verwendete Enterprise Integration Patterns
+
+1. **Timer Pattern** - Periodischer Trigger für Downloads
+2. **Content-Based Router** - Verzweigung basierend auf HTTP-Status
+3. **Splitter Pattern** - Parallele Verarbeitung von XML-Dateien
+4. **Dead Letter Channel** - Fehlerbehandlung mit Retry-Logik
+5. **Message Filter** - Filterung basierend auf Package-Status
+6. **Pipes and Filters** - Sequenzielle Verarbeitung
+
+### Route-Komponenten
+
+#### 1. **Timer-Scheduler** (`ted-package-scheduler`)
+```
+timer → determineNextPackage → choice → download-package
+```
+- Läuft alle X Millisekunden (konfigurierbar, Default: 1 Stunde)
+- Ermittelt nächstes Package (aktuelles Jahr priorisiert)
+- Stoppt automatisch nach 4 aufeinanderfolgenden 404-Fehlern
+
+#### 2. **HTTP-Downloader** (`ted-package-http-downloader`)
+```
+direct:download-package → createPackageRecord → delay → HTTP GET → choice
+  ├─ 200 OK → process-downloaded-package
+  ├─ 404 → markPackageNotFound
+  └─ other → markPackageFailed
+```
+- Native HTTP-Component für Downloads
+- Rate Limiting via delay()
+- Content-Based Routing nach HTTP-Status
+
+#### 3. **Package-Processor** (`ted-package-processor`)
+```
+process-downloaded-package → calculateHash → checkDuplicate → choice
+  ├─ duplicate → markPackageDuplicate
+  └─ new → saveDownloadedPackage → extract-tar-gz
+```
+- SHA-256 Hash-Berechnung
+- Duplikaterkennung via Hash
+- Speicherung auf Filesystem
+
+#### 4. **TAR.GZ-Extractor** (`ted-package-extractor`)
+```
+extract-tar-gz → extractTarGz → deleteTarGz (optional) → split-xml-files
+```
+- Apache Commons Compress für TAR.GZ
+- Extraktion aller XML-Dateien
+- Optionales Cleanup
+
+#### 5. **XML-Splitter** (`ted-package-xml-splitter`)
+```
+split-xml-files → split(xmlFiles) → prepareXmlForProcessing → direct:process-document
+```
+- Parallele Verarbeitung (.parallelProcessing())
+- Streaming (.streaming())
+- Integration mit bestehender XML-Route
+
+## Camel-Komponenten
+
+### Verwendete Camel-Komponenten
+
+- **timer** - Periodischer Trigger
+- **http** - HTTP GET Requests
+- **direct** - Synchrone Route-Verbindungen
+- **bean** - Processor-Aufrufe
+- **file** - Filesystem-Operationen (indirekt via Processor)
+
+### Dependencies (pom.xml)
+
+```xml
+<dependency>
+    <groupId>org.apache.camel</groupId>
+    <artifactId>camel-http</artifactId>
+    <version>${camel.version}</version>
+</dependency>
+<dependency>
+    <groupId>org.apache.camel</groupId>
+    <artifactId>camel-bean</artifactId>
+    <version>${camel.version}</version>
+</dependency>
+<dependency>
+    <groupId>org.apache.camel</groupId>
+    <artifactId>camel-jackson</artifactId>
+    <version>${camel.version}</version>
+</dependency>
+<dependency>
+    <groupId>org.apache.commons</groupId>
+    <artifactId>commons-compress</artifactId>
+    <version>1.27.1</version>
+</dependency>
+```
+
+## Workflow-Diagramm
+
+```
+┌─────────────────────┐
+│  Timer (1h)         │
+│  Scheduler          │
+└──────┬──────────────┘
+       │
+       ▼
+┌─────────────────────┐
+│  Determine Next     │
+│  Package (Bean)     │
+└──────┬──────────────┘
+       │
+       ▼
+┌─────────────────────┐
+│  HTTP GET           │
+│  https://ted...     │
+└──────┬──────────────┘
+       │
+       ▼
+    ┌──┴───┐
+    │Choice│
+    └──┬───┘
+       │
+    ┌──┴─────┬─────────┬─────────┐
+    │        │         │         │
+   200      404       Other     Error
+    │        │         │         │
+    ▼        ▼         ▼         ▼
+ Process  NotFound  Failed    Dead Letter
+    │
+    ▼
+┌─────────────────────┐
+│  Calculate Hash     │
+│  (SHA-256)          │
+└──────┬──────────────┘
+       │
+       ▼
+┌─────────────────────┐
+│  Check Duplicate    │
+│  (DB Query)         │
+└──────┬──────────────┘
+       │
+    ┌──┴───┐
+    │Choice│
+    └──┬───┘
+       │
+    ┌──┴─────┬─────────┐
+    │        │         │
+   New    Duplicate   │
+    │        │         │
+    ▼        ▼         │
+ Extract  Complete    │
+    │                 │
+    ▼                 │
+┌─────────────────────┤
+│  Extract TAR.GZ     │
+│  (Apache Commons)   │
+└──────┬──────────────┘
+       │
+       ▼
+┌─────────────────────┐
+│  Split XML Files    │
+│  (Parallel)         │
+└──────┬──────────────┘
+       │
+       ▼
+┌─────────────────────┐
+│  Process Document   │
+│  (existing route)   │
+└─────────────────────┘
+```
+
+## Message Headers
+
+### Download Route Headers
+
+| Header | Type | Beschreibung |
+|--------|------|--------------|
+| `packageId` | String | YYYYSSSSS Format |
+| `year` | Integer | Jahr des Packages |
+| `serialNumber` | Integer | Seriennummer |
+| `downloadUrl` | String | Vollständige Download-URL |
+| `downloadStartTime` | Long | Start-Timestamp |
+| `CamelHttpResponseCode` | Integer | HTTP Status |
+| `fileHash` | String | SHA-256 Hash |
+| `isDuplicate` | Boolean | Duplikat-Flag |
+| `duplicateOf` | String | Original Package-ID |
+
+### Extraction Headers
+
+| Header | Type | Beschreibung |
+|--------|------|--------------|
+| `downloadPath` | String | Pfad zur tar.gz Datei |
+| `xmlFiles` | List<Path> | Liste der XML-Dateien |
+| `xmlFileCount` | Integer | Anzahl XML-Dateien |
+| `deleteAfterExtraction` | Boolean | Cleanup-Flag |
+
+## Konfiguration
+
+### application.yml
+
+```yaml
+ted:
+  download:
+    enabled: true  # Aktiviert die Camel-native Route
+    base-url: https://ted.europa.eu/packages/daily/
+    download-directory: D:/ted.europe/downloads
+    extract-directory: D:/ted.europe/extracted
+    start-year: 2024
+    max-consecutive-404: 4
+    poll-interval: 3600000  # 1 Stunde
+    download-timeout: 300000  # 5 Minuten
+    delay-between-downloads: 5000  # 5 Sekunden
+    delete-after-extraction: true
+    prioritize-current-year: true
+
+    # Optional: Service-basierte Route (alte Implementierung)
+    use-service-based: false  # Deaktiviert
+```
+
+## Error Handling
+
+### Dead Letter Channel
+
+```java
+errorHandler(deadLetterChannel("direct:package-download-error")
+    .maximumRedeliveries(3)
+    .redeliveryDelay(10000)
+    .retryAttemptedLogLevel(LoggingLevel.WARN))
+```
+
+**Retry-Strategie:**
+- Maximale Wiederholungen: 3
+- Verzögerung: 10 Sekunden
+- Bei Fehler: Dead Letter Channel
+
+### Fehlerbehandlung
+
+1. **HTTP-Fehler**:
+   - 404 → Status: NOT_FOUND (kein Retry)
+   - 5xx → Retry 3x
+   - Andere → Status: FAILED
+
+2. **Verarbeitungsfehler**:
+   - Hash-Berechnung fehlgeschlagen → Retry
+   - Extraktion fehlgeschlagen → Retry
+   - XML-Verarbeitung fehlgeschlagen → Package-Status bleibt PROCESSING
+
+## Monitoring & Logging
+
+### Log-Levels
+
+```yaml
+logging:
+  level:
+    at.procon.ted.camel: DEBUG
+    org.apache.camel: INFO
+```
+
+### Log-Meldungen
+
+- `INFO`: Package-Start, Completion, Status-Änderungen
+- `DEBUG`: HTTP-Responses, Hash-Berechnungen, Extraktionen
+- `WARN`: Duplikate, HTTP-Fehler, Retries
+- `ERROR`: Dead Letter Channel, kritische Fehler
+
+## Performance-Optimierung
+
+### Parallele Verarbeitung
+
+```java
+.split(header("xmlFiles"))
+    .parallelProcessing()  // Parallele Verarbeitung
+    .streaming()           // Streaming für große Listen
+```
+
+### Rate Limiting
+
+```java
+.delay(simple("{{ted.download.delay-between-downloads:5000}}"))
+```
+
+Verhindert Server-Überlastung durch konfigurierbare Verzögerung.
+
+## Database-Integration
+
+### Package-Tracking
+
+Alle Statusänderungen werden in `TED.ted_daily_package` gespeichert:
+
+```sql
+SELECT
+    package_identifier,
+    download_status,
+    xml_file_count,
+    processed_count,
+    downloaded_at
+FROM TED.ted_daily_package
+ORDER BY year DESC, serial_number DESC;
+```
+
+### Status-Workflow
+
+```
+PENDING → DOWNLOADING → DOWNLOADED → PROCESSING → COMPLETED
+                ↓           ↓
+            NOT_FOUND    FAILED
+```
+
+## Testing
+
+### Manueller Test
+
+```bash
+# 1. Verzeichnisse erstellen
+mkdir -p D:/ted.europe/downloads
+mkdir -p D:/ted.europe/extracted
+
+# 2. Database Migration
+psql -h 94.130.218.54 -p 5432 -U postgres -d Sales \
+  -f src/main/resources/db/migration/V2__add_ted_daily_package_table.sql
+
+# 3. Anwendung starten
+mvn spring-boot:run
+
+# 4. Logs überwachen
+tail -f logs/spring.log | grep "ted-package"
+```
+
+### Erfolgreicher Download (Logs)
+
+```
+INFO  - Checking for new TED packages...
+INFO  - Next package to download: 202400001
+INFO  - Downloaded package 202400001
+INFO  - Extracting package 202400001...
+INFO  - Extracted 1234 XML files from package 202400001
+INFO  - Processing 1234 XML files from package 202400001
+INFO  - Completed processing package 202400001
+```
+
+## Vorteile der Camel-Native Implementierung
+
+1. ✅ **Enterprise Integration Patterns** - Bewährte Muster
+2. ✅ **Declarative Configuration** - Route-Definition in Java
+3. ✅ **Native HTTP Component** - Optimiert und getestet
+4. ✅ **Monitoring** - Camel JMX-Management
+5. ✅ **Error Handling** - Dead Letter Channel, Retry
+6. ✅ **Parallel Processing** - Split/Aggregate Pattern
+7. ✅ **Message Transformation** - Header/Body-Manipulation
+8. ✅ **Content-Based Routing** - Dynamische Verzweigungen
+
+## Unterschied zur Service-basierten Route
+
+| Feature | Camel-Native | Service-basiert |
+|---------|-------------|-----------------|
+| HTTP Download | Camel HTTP Component | Java HttpURLConnection |
+| Retry | Camel Error Handler | Manuell |
+| Routing | Content-Based Router | if/else |
+| Parallelisierung | Camel Splitter | Java Executor |
+| Monitoring | Camel JMX | Custom |
+| Konfiguration | `ted.download.enabled` | `ted.download.use-service-based` |
+
+## Nächste Schritte
+
+1. ✅ Database Migration ausführen
+2. ✅ Verzeichnisse erstellen
+3. ✅ `ted.download.enabled=true` setzen
+4. ✅ Anwendung starten
+5. ⏳ Logs überwachen
+6. ⏳ DB-Status prüfen
+
+Das System ist produktionsbereit! 🚀
--- a/VECTORIZATION.md
+++ b/VECTORIZATION.md
@ -0,0 +1,385 @@
+# Vektorisierung mit Apache Camel
+
+## Übersicht
+
+Die Vektorisierung erfolgt vollständig asynchron über **Apache Camel Routes** und nutzt einen externen **Python Embedding Service** über REST.
+
+## Architektur
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                     Vektorisierungs-Pipeline                     │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                  │
+│  ┌──────────────┐    ┌─────────────────┐    ┌───────────────┐  │
+│  │ XML File     │───▶│ TedDocumentRoute│───▶│ Document      │  │
+│  │ Processing   │    │                 │    │ Saved to DB   │  │
+│  └──────────────┘    └────────┬────────┘    └───────┬───────┘  │
+│                               │                      │          │
+│                               │ wireTap              │          │
+│                               ▼                      │          │
+│  ┌──────────────────────────────────────────────────┼──────┐   │
+│  │         direct:vectorize (Trigger)               │      │   │
+│  └──────────────────────────┬───────────────────────┘      │   │
+│                             │                              │   │
+│                             ▼                              │   │
+│  ┌──────────────────────────────────────────────────┐      │   │
+│  │    seda:vectorize-async (4 concurrent workers)   │      │   │
+│  │                                                   │      │   │
+│  │  1. Load document from DB                        │      │   │
+│  │  2. Extract text_content (includes Lots!)        │      │   │
+│  │  3. Set status = PROCESSING                      │      │   │
+│  │  4. Add "passage: " prefix                       │      │   │
+│  │  5. Call REST API                                │      │   │
+│  │  6. Update content_vector                        │      │   │
+│  └──────────────┬───────────────────────────────────┘      │   │
+│                 │                                          │   │
+│                 ▼                                          │   │
+│  ┌──────────────────────────────────────────────────┐      │   │
+│  │   Python Embedding Service (Port 8001)           │      │   │
+│  │   POST /embed                                    │      │   │
+│  │   Model: intfloat/multilingual-e5-large          │      │   │
+│  │   Returns: [1024 floats]                         │      │   │
+│  └──────────────────────────────────────────────────┘      │   │
+│                                                             │   │
+│  ┌──────────────────────────────────────────────────┐      │   │
+│  │   Timer Route (every 60s)                        │◀─────┘   │
+│  │                                                   │          │
+│  │   SELECT * FROM procurement_document              │          │
+│  │   WHERE vectorization_status = 'PENDING'          │          │
+│  │   LIMIT 16                                        │          │
+│  │                                                   │          │
+│  │   → Trigger vectorization for each                │          │
+│  └──────────────────────────────────────────────────┘          │
+│                                                                  │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+## Apache Camel Routes
+
+### 1. Trigger Route (`direct:vectorize`)
+
+**Route-ID:** `vectorization-trigger`
+
+**Funktion:** Empfängt `documentId` und leitet an async Queue weiter
+
+**Integration:** Wird von `TedDocumentRoute` per `wireTap` aufgerufen (non-blocking)
+
+```java
+from("direct:vectorize")
+    .to("seda:vectorize-async?concurrentConsumers=4&waitForTaskToComplete=Never");
+```
+
+### 2. Async Processor Route (`seda:vectorize-async`)
+
+**Route-ID:** `vectorization-processor`
+
+**Concurrent Workers:** 4 (konfigurierbar)
+
+**Ablauf:**
+1. ✅ Load document from DB via `documentId`
+2. ✅ Update status → `PROCESSING`
+3. ✅ Extract `text_content` (enthält Dokument + Lots!)
+4. ✅ Truncate wenn > `max-text-length` (8192 chars)
+5. ✅ Add prefix: `"passage: " + text`
+6. ✅ POST → `http://localhost:8001/embed` mit JSON body
+7. ✅ Parse JSON response → `float[1024]`
+8. ✅ Update `content_vector` in DB
+9. ✅ Update status → `COMPLETED`
+
+**Error Handling:**
+- Max 2 Retries mit 2s Delay
+- Bei Fehler: Status → `FAILED` mit Error-Message
+
+### 3. Scheduler Route (`timer:vectorization-scheduler`)
+
+**Route-ID:** `vectorization-scheduler`
+
+**Interval:** 60 Sekunden (nach 5s Delay beim Start)
+
+**Funktion:** Verarbeitet noch nicht vektorisierte Dokumente aus der DB
+
+**Ablauf:**
+```java
+from("timer:vectorization-scheduler?period=60000&delay=5000")
+    .process(exchange -> {
+        // Load PENDING documents from DB
+        List<ProcurementDocument> pending =
+            documentRepository.findByVectorizationStatus(PENDING, PageRequest.of(0, 16));
+    })
+    .split(body())
+        .to("direct:vectorize")  // Trigger für jedes Dokument
+    .end();
+```
+
+## Text-Inhalt für Vektorisierung
+
+Der `text_content` wird in `XmlParserService.generateTextContent()` erstellt und enthält:
+
+```
+Title: Mission de maitrise d'œuvre pour la création...
+
+Description: Désignation d'une équipe de maîtrise d'œuvre...
+
+Contracting Authority: Société Publique Locale, Bannalec (FRA)
+
+Contract Type: SERVICES
+Procedure: OTHER
+CPV Codes: 71200000
+
+Lots (1):
+- LOT-0001: Mission de maîtrise d'œuvre... - Désignation d'une équipe...
+```
+
+**Alle Lot-Titel und Beschreibungen werden einbezogen!**
+
+## REST API: Python Embedding Service
+
+### Endpoint
+
+**POST** `http://localhost:8001/embed`
+
+### Request
+
+```json
+{
+  "text": "passage: Title: Mission de maitrise d'œuvre..."
+}
+```
+
+### Response
+
+```json
+[0.123, -0.456, 0.789, ..., 0.321]
+```
+
+**Format:** JSON Array mit 1024 Floats
+
+### Model
+
+- **Name:** `intfloat/multilingual-e5-large`
+- **Dimensions:** 1024
+- **Languages:** 100+ (Mehrsprachig)
+- **Normalization:** L2-normalized für Cosine Similarity
+
+### E5 Model Prefixes
+
+| Typ | Prefix | Verwendung |
+|-----|--------|------------|
+| **Dokumente** | `passage: ` | Beim Speichern in DB |
+| **Queries** | `query: ` | Bei Suchanfragen |
+
+## Konfiguration
+
+**application.yml:**
+
+```yaml
+ted:
+  vectorization:
+    enabled: true                    # Aktivierung
+    use-http-api: true               # REST statt Subprocess
+    api-url: http://localhost:8001   # Embedding Service URL
+    model-name: intfloat/multilingual-e5-large
+    dimensions: 1024
+    batch-size: 16                   # Scheduler batch size
+    max-text-length: 8192            # Max chars für Vektorisierung
+```
+
+## Python Embedding Service Starten
+
+### Option 1: Docker Compose
+
+```bash
+docker-compose up -d embedding-service
+```
+
+### Option 2: Standalone Python
+
+**Datei:** `embedding_service.py`
+
+```python
+from flask import Flask, request, jsonify
+from sentence_transformers import SentenceTransformer
+
+app = Flask(__name__)
+model = SentenceTransformer('intfloat/multilingual-e5-large')
+
+@app.route('/embed', methods=['POST'])
+def embed():
+    data = request.json
+    text = data['text']
+
+    # Generate embedding
+    embedding = model.encode(text, normalize_embeddings=True)
+
+    return jsonify(embedding.tolist())
+
+@app.route('/health', methods=['GET'])
+def health():
+    return jsonify({"status": "ok"})
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=8001)
+```
+
+**Start:**
+```bash
+pip install flask sentence-transformers
+python embedding_service.py
+```
+
+## Monitoring
+
+### Vektorisierungs-Status prüfen
+
+```sql
+SELECT
+    vectorization_status,
+    COUNT(*) as count
+FROM ted.procurement_document
+GROUP BY vectorization_status;
+```
+
+**Mögliche Status:**
+- `PENDING` - Wartet auf Vektorisierung
+- `PROCESSING` - Wird gerade vektorisiert
+- `COMPLETED` - Erfolgreich vektorisiert
+- `FAILED` - Fehler bei Vektorisierung
+- `SKIPPED` - Kein Text-Inhalt vorhanden
+
+### Admin REST API
+
+**GET** `/api/v1/admin/vectorization/status`
+```json
+{
+  "enabled": true,
+  "pending": 42,
+  "completed": 1523,
+  "failed": 3
+}
+```
+
+**POST** `/api/v1/admin/vectorization/process-pending?batchSize=100`
+
+Trigger manuelle Verarbeitung von PENDING Dokumenten
+
+### Camel Routes Status
+
+**Actuator Endpoint:** `http://localhost:8888/api/actuator/camel`
+
+Zeigt Status aller Camel Routes:
+- `vectorization-trigger`
+- `vectorization-processor`
+- `vectorization-scheduler`
+
+## Error Handling
+
+### Retry-Strategie
+
+```java
+onException(Exception.class)
+    .maximumRedeliveries(2)
+    .redeliveryDelay(2000)
+    .handled(true)
+    .process(exchange -> {
+        // Update status to FAILED in database
+    });
+```
+
+**Retries:** 2x mit 2 Sekunden Pause
+
+**Bei endgültigem Fehler:**
+- Status → `FAILED`
+- Error-Message in `vectorization_error` Spalte gespeichert
+- Dokument erscheint nicht mehr im Scheduler (nur PENDING)
+
+### Häufige Fehler
+
+| Fehler | Ursache | Lösung |
+|--------|---------|--------|
+| Connection refused | Embedding Service läuft nicht | Service starten |
+| Invalid dimension | Falsches Model | Konfiguration prüfen |
+| Timeout | Service überlastet | `concurrentConsumers` reduzieren |
+| No text content | Dokument leer | Wird automatisch als SKIPPED markiert |
+
+## Performance
+
+### Durchsatz
+
+**Concurrent Workers:** 4
+- **Pro Worker:** ~2-3 Sekunden pro Dokument
+- **Gesamt:** ~60-90 Dokumente/Minute
+
+**Optimierung:**
+```yaml
+vectorization:
+  thread-pool-size: 8  # Mehr concurrent workers
+```
+
+### Memory
+
+**E5-Large Model:**
+- ~2 GB RAM
+- Läuft auf CPU oder GPU
+- Einmalig beim Service-Start geladen
+
+### Netzwerk
+
+**Request Size:** ~8 KB (8192 chars max)
+**Response Size:** ~4 KB (1024 floats)
+
+## Best Practices
+
+✅ **DO:**
+- Embedding Service separat laufen lassen
+- Service-Health über `/health` endpoint prüfen
+- Batch-Size an Server-Kapazität anpassen
+- Failed Dokumente regelmäßig prüfen und retry
+
+❌ **DON'T:**
+- Nicht mehr als 8 concurrent workers (überlastet Service)
+- Nicht zu große `max-text-length` (>10000 chars)
+- Service nicht ohne Health-Check deployen
+
+## Semantic Search
+
+Nach erfolgreicher Vektorisierung sind Dokumente über Semantic Search auffindbar:
+
+```bash
+curl "http://localhost:8888/api/v1/documents/semantic-search?query=medical+equipment"
+```
+
+**Technologie:**
+- PostgreSQL pgvector Extension
+- Cosine Similarity (`1 - (vec1 <=> vec2)`)
+- IVFFlat Index für schnelle Suche
+
+## Troubleshooting
+
+### Dokumente werden nicht vektorisiert
+
+1. ✅ Check Embedding Service: `curl http://localhost:8001/health`
+2. ✅ Check Logs: `vectorization-processor` Route
+3. ✅ Check DB: `SELECT * FROM procurement_document WHERE vectorization_status = 'FAILED'`
+4. ✅ Check Config: `vectorization.enabled = true`
+
+### Embedding Service antwortet nicht
+
+```bash
+# Service Status
+curl http://localhost:8001/health
+
+# Test embedding
+curl -X POST http://localhost:8001/embed \
+  -H "Content-Type: application/json" \
+  -d '{"text": "passage: test"}'
+```
+
+### Camel Route läuft nicht
+
+```bash
+# Actuator Camel Routes
+curl http://localhost:8888/api/actuator/camel/routes
+```
+
+Prüfen ob Route `vectorization-processor` Status `Started` hat.
--- a/XPATH_EXAMPLES.md
+++ b/XPATH_EXAMPLES.md
@ -0,0 +1,241 @@
+# Native XML-Spalte mit XPath-Abfragen
+
+## ✅ Implementiert
+
+Die `xml_document`-Spalte ist jetzt ein nativer PostgreSQL XML-Typ mit voller XPath-Unterstützung.
+
+## Hibernate-Konfiguration
+
+```java
+@Column(name = "xml_document", nullable = false)
+@JdbcTypeCode(SqlTypes.SQLXML)
+private String xmlDocument;
+```
+
+## XPath-Abfrage Beispiele
+
+### 1. **Einfache XPath-Abfrage (ohne Namespaces)**
+
+```sql
+-- Alle Dokument-IDs extrahieren
+SELECT
+    id,
+    xpath('//ID/text()', xml_document) as document_ids
+FROM ted.procurement_document;
+```
+
+### 2. **XPath mit Namespaces (eForms UBL)**
+
+eForms verwendet XML-Namespaces. Sie müssen diese bei XPath-Abfragen angeben:
+
+```sql
+-- Titel extrahieren (mit Namespace)
+SELECT
+    id,
+    xpath(
+        '//cbc:Title/text()',
+        xml_document,
+        ARRAY[
+            ARRAY['cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'],
+            ARRAY['cac', 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2']
+        ]
+    ) as titles
+FROM ted.procurement_document;
+```
+
+### 3. **Buyer Name extrahieren**
+
+```sql
+SELECT
+    id,
+    publication_id,
+    xpath(
+        '//cac:ContractingParty/cac:Party/cac:PartyName/cbc:Name/text()',
+        xml_document,
+        ARRAY[
+            ARRAY['cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'],
+            ARRAY['cac', 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2']
+        ]
+    ) as buyer_names
+FROM ted.procurement_document;
+```
+
+### 4. **CPV-Codes extrahieren**
+
+```sql
+SELECT
+    id,
+    xpath(
+        '//cac:ProcurementProject/cac:MainCommodityClassification/cbc:ItemClassificationCode/text()',
+        xml_document,
+        ARRAY[
+            ARRAY['cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'],
+            ARRAY['cac', 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2']
+        ]
+    ) as cpv_codes
+FROM ted.procurement_document;
+```
+
+### 5. **Filtern nach XML-Inhalt**
+
+```sql
+-- Alle Dokumente finden, die einen bestimmten CPV-Code enthalten
+SELECT
+    id,
+    publication_id,
+    buyer_name
+FROM ted.procurement_document
+WHERE xpath_exists(
+    '//cac:ProcurementProject/cac:MainCommodityClassification/cbc:ItemClassificationCode[text()="45000000"]',
+    xml_document,
+    ARRAY[
+        ARRAY['cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'],
+        ARRAY['cac', 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2']
+    ]
+);
+```
+
+### 6. **Estimated Value extrahieren**
+
+```sql
+SELECT
+    id,
+    xpath(
+        '//cac:ProcurementProject/cac:RequestedTenderTotal/cbc:EstimatedOverallContractAmount/text()',
+        xml_document,
+        ARRAY[
+            ARRAY['cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'],
+            ARRAY['cac', 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2']
+        ]
+    ) as estimated_values
+FROM ted.procurement_document;
+```
+
+## JPA/Hibernate Native Queries
+
+Sie können XPath auch in Spring Data JPA Repositories verwenden:
+
+### Repository-Beispiel
+
+```java
+@Repository
+public interface ProcurementDocumentRepository extends JpaRepository<ProcurementDocument, UUID> {
+
+    /**
+     * Findet alle Dokumente, die einen bestimmten Text im Titel enthalten (via XPath)
+     */
+    @Query(value = """
+        SELECT * FROM ted.procurement_document
+        WHERE xpath_exists(
+            '//cbc:Title[contains(text(), :searchText)]',
+            xml_document,
+            ARRAY[
+                ARRAY['cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2']
+            ]
+        )
+        """, nativeQuery = true)
+    List<ProcurementDocument> findByTitleContaining(@Param("searchText") String searchText);
+
+    /**
+     * Extrahiert CPV-Codes via XPath
+     */
+    @Query(value = """
+        SELECT
+            unnest(xpath(
+                '//cac:ProcurementProject/cac:MainCommodityClassification/cbc:ItemClassificationCode/text()',
+                xml_document,
+                ARRAY[
+                    ARRAY['cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'],
+                    ARRAY['cac', 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2']
+                ]
+            ))::text as cpv_code
+        FROM ted.procurement_document
+        WHERE id = :documentId
+        """, nativeQuery = true)
+    List<String> extractCpvCodes(@Param("documentId") UUID documentId);
+
+    /**
+     * Findet Dokumente nach CPV-Code
+     */
+    @Query(value = """
+        SELECT * FROM ted.procurement_document
+        WHERE xpath_exists(
+            '//cbc:ItemClassificationCode[text()=:cpvCode]',
+            xml_document,
+            ARRAY[
+                ARRAY['cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2']
+            ]
+        )
+        """, nativeQuery = true)
+    List<ProcurementDocument> findByCpvCode(@Param("cpvCode") String cpvCode);
+}
+```
+
+## PostgreSQL XML-Funktionen
+
+Weitere nützliche XML-Funktionen:
+
+### `xml_is_well_formed()`
+```sql
+SELECT xml_is_well_formed(xml_document) FROM ted.procurement_document;
+```
+
+### `xpath_exists()` - Prüft ob Pfad existiert
+```sql
+SELECT xpath_exists('//cbc:Title', xml_document, ...) FROM ted.procurement_document;
+```
+
+### `unnest()` - Array zu Zeilen
+```sql
+SELECT
+    id,
+    unnest(xpath('//cbc:Title/text()', xml_document, ...))::text as title
+FROM ted.procurement_document;
+```
+
+## Häufige eForms Namespaces
+
+```sql
+ARRAY[
+    ARRAY['cbc', 'urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2'],
+    ARRAY['cac', 'urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2'],
+    ARRAY['ext', 'urn:oasis:names:specification:ubl:schema:xsd:CommonExtensionComponents-2'],
+    ARRAY['efac', 'http://data.europa.eu/p27/eforms-ubl-extension-aggregate-components/1'],
+    ARRAY['efbc', 'http://data.europa.eu/p27/eforms-ubl-extension-basic-components/1']
+]
+```
+
+## Performance-Tipps
+
+1. **Indizierung**: Für häufige XPath-Abfragen können Sie funktionale Indexe erstellen:
+```sql
+CREATE INDEX idx_doc_title ON ted.procurement_document
+    USING GIN ((xpath('//cbc:Title/text()', xml_document, ...)));
+```
+
+2. **Materialized Views**: Für komplexe XPath-Abfragen:
+```sql
+CREATE MATERIALIZED VIEW ted.document_titles AS
+SELECT
+    id,
+    unnest(xpath('//cbc:Title/text()', xml_document, ...))::text as title
+FROM ted.procurement_document;
+```
+
+## Vorteile der nativen XML-Spalte
+
+✅ Native XPath-Abfragen
+✅ XML-Validierung möglich
+✅ Effiziente Speicherung
+✅ PostgreSQL XML-Funktionen verfügbar
+✅ Strukturierte Abfragen auf XML-Elementen
+✅ Funktionale Indexe möglich
+
+## Hibernate funktioniert jetzt korrekt
+
+Mit `@JdbcTypeCode(SqlTypes.SQLXML)` weiß Hibernate, dass es `SQLXML` verwenden muss für INSERT/UPDATE.
+
+Das verhindert den Fehler:
+```
+ERROR: column "xml_document" is of type xml but expression is of type character varying
+```
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,102 @@
+# TED Procurement Document Processor - Docker Compose
+# Author: Martin.Schweitzer@procon.co.at and claude.ai
+#
+# Services:
+# - PostgreSQL 16 with pgvector extension (nur für lokale Entwicklung)
+# - Python embedding service für Vektorisierung
+#
+# Standard-Verwendung (nur Embedding-Service):
+#   docker-compose --profile with-embedding up -d
+#
+# Lokale Datenbank verwenden:
+#   docker-compose --profile local-db up -d
+#
+# Remote-Datenbank (94.130.218.54):
+#   Konfiguration in application.yml (Standard-Einstellung)
+
+version: '3.8'
+
+services:
+  # PostgreSQL database with pgvector extension (NUR FÜR LOKALE ENTWICKLUNG)
+  # Standardmäßig deaktiviert - verwende Remote-Server 94.130.218.54
+  postgres:
+    image: pgvector/pgvector:pg16
+    container_name: ted-postgres
+    environment:
+      POSTGRES_DB: sales
+      POSTGRES_USER: postgresql
+      POSTGRES_PASSWORD: "PDmXRx0Rbk9OFOn9qO5Gm/mPCfqW8zwbZ+/YIU1lySc="
+    ports:
+      - "5432:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+      - ./init-db.sql:/docker-entrypoint-initdb.d/init-db.sql:ro
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U postgresql -d sales"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    restart: unless-stopped
+    profiles:
+      - local-db
+
+  # Python embedding service (optional - for production vectorization)
+  embedding-service:
+    build:
+      context: .
+      dockerfile: Dockerfile.embedding
+    container_name: ted-embedding-service
+    ports:
+      - "8001:8001"
+    environment:
+      MODEL_NAME: intfloat/multilingual-e5-large
+      MAX_LENGTH: 512
+      BATCH_SIZE: 16
+    volumes:
+      - model_cache:/root/.cache/huggingface
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    # Remove GPU section if not using NVIDIA GPU
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+    restart: unless-stopped
+    profiles:
+      - with-embedding
+
+  # PgAdmin for database management (nur für lokale Datenbank)
+  pgadmin:
+    image: dpage/pgadmin4:latest
+    container_name: ted-pgadmin
+    environment:
+      PGADMIN_DEFAULT_EMAIL: admin@procon.co.at
+      PGADMIN_DEFAULT_PASSWORD: admin
+    ports:
+      - "5050:80"
+    volumes:
+      - pgadmin_data:/var/lib/pgadmin
+    depends_on:
+      - postgres
+    restart: unless-stopped
+    profiles:
+      - local-db
+      - tools
+
+volumes:
+  postgres_data:
+    driver: local
+  model_cache:
+    driver: local
+  pgadmin_data:
+    driver: local
+
+networks:
+  default:
+    name: ted-network
--- a/docs/architecture/architecture.archimate
+++ b/docs/architecture/architecture.archimate
@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<archimate:model xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:archimate="http://www.archimatetool.com/archimate" name="ted-procurement-processor" id="id-2b4d39e8f5d64fc0a7b560c5a8d003ab" version="5.0.0">
+  <folder name="Strategy" id="id-6bd5b01c2de749c1a35405ac999f5c05" type="strategy"/>
+  <folder name="Business" id="id-adeb15efa07e4b4fbd936e5f74b7a2ad" type="business">
+    <element xsi:type="archimate:BusinessActor" name="martin.schweitzer@procon.co.at" id="id-96b1ac07b653419cac40bb5e22fa62a8"/>
+  </folder>
+  <folder name="Application" id="id-dee9a6b313134cfca3443c5263b27b0c" type="application">
+    <element xsi:type="archimate:ApplicationInterface" name="packages/daily" id="id-30caa1d5f47d4eecb032665b458823ed">
+      <documentation>https://ted.europa.eu/packages/daily/</documentation>
+    </element>
+  </folder>
+  <folder name="Technology &amp; Physical" id="id-0996cd6e353b490f9c3b1af648feacee" type="technology">
+    <element xsi:type="archimate:Node" name="ted.europa.eu" id="id-f224255413704a16bb1c8c0245c56ab8"/>
+  </folder>
+  <folder name="Motivation" id="id-94439d1a94be45fc94efb0a0b7fb4c7f" type="motivation"/>
+  <folder name="Implementation &amp; Migration" id="id-45801b8267d94a519c9709c539b3ffec" type="implementation_migration"/>
+  <folder name="Other" id="id-25599f80418d41c68a34250804a4b741" type="other"/>
+  <folder name="Relations" id="id-281c02835b76408ca16710173b3c9dba" type="relations">
+    <element xsi:type="archimate:RealizationRelationship" id="id-45fb6cc328ab496da117339bb1de6c02" source="id-f224255413704a16bb1c8c0245c56ab8" target="id-30caa1d5f47d4eecb032665b458823ed"/>
+  </folder>
+  <folder name="Views" id="id-d63ccd74e382447a96d72593fb9d037a" type="diagrams">
+    <element xsi:type="archimate:ArchimateDiagramModel" name="Architecture" id="id-bd2670b514e445b683e21882001cfd88">
+      <child xsi:type="archimate:DiagramObject" id="id-388106b1105f4fdca6605f6da5922427" archimateElement="id-96b1ac07b653419cac40bb5e22fa62a8">
+        <bounds x="100" y="84" width="213" height="37"/>
+      </child>
+      <child xsi:type="archimate:DiagramObject" id="id-47ed1980dd044da8816138adec5bbc4e" archimateElement="id-f224255413704a16bb1c8c0245c56ab8">
+        <bounds x="629" y="69" width="260" height="88"/>
+        <sourceConnection xsi:type="archimate:Connection" id="id-7c867101990b4e59a105b4b8c5444858" source="id-47ed1980dd044da8816138adec5bbc4e" target="id-283844ff35584547a4ca1ba266504a98" archimateRelationship="id-45fb6cc328ab496da117339bb1de6c02"/>
+        <child xsi:type="archimate:DiagramObject" id="id-283844ff35584547a4ca1ba266504a98" targetConnections="id-7c867101990b4e59a105b4b8c5444858" archimateElement="id-30caa1d5f47d4eecb032665b458823ed">
+          <bounds x="58" y="32" width="159" height="41"/>
+        </child>
+      </child>
+    </element>
+  </folder>
+</archimate:model>
--- a/docs/architecture/architecture.archimate.bak
+++ b/docs/architecture/architecture.archimate.bak
@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<archimate:model xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:archimate="http://www.archimatetool.com/archimate" name="ted-procurement-processor" id="id-2b4d39e8f5d64fc0a7b560c5a8d003ab" version="5.0.0">
+  <folder name="Strategy" id="id-6bd5b01c2de749c1a35405ac999f5c05" type="strategy"/>
+  <folder name="Business" id="id-adeb15efa07e4b4fbd936e5f74b7a2ad" type="business"/>
+  <folder name="Application" id="id-dee9a6b313134cfca3443c5263b27b0c" type="application"/>
+  <folder name="Technology &amp; Physical" id="id-0996cd6e353b490f9c3b1af648feacee" type="technology"/>
+  <folder name="Motivation" id="id-94439d1a94be45fc94efb0a0b7fb4c7f" type="motivation"/>
+  <folder name="Implementation &amp; Migration" id="id-45801b8267d94a519c9709c539b3ffec" type="implementation_migration"/>
+  <folder name="Other" id="id-25599f80418d41c68a34250804a4b741" type="other"/>
+  <folder name="Relations" id="id-281c02835b76408ca16710173b3c9dba" type="relations"/>
+  <folder name="Views" id="id-d63ccd74e382447a96d72593fb9d037a" type="diagrams">
+    <element xsi:type="archimate:ArchimateDiagramModel" name="Default View" id="id-bd2670b514e445b683e21882001cfd88"/>
+  </folder>
+</archimate:model>
--- a/embedding_service.py
+++ b/embedding_service.py
@ -0,0 +1,340 @@
+#!/usr/bin/env python3
+"""
+TED Procurement Document Embedding Service
+
+HTTP API for generating text embeddings using sentence-transformers.
+Model: intfloat/multilingual-e5-large (1024 dimensions)
+
+Author: Martin.Schweitzer@procon.co.at and claude.ai
+
+Usage:
+    python embedding_service.py
+
+Environment Variables:
+    MODEL_NAME: Model to use (default: intfloat/multilingual-e5-large)
+    MAX_LENGTH: Maximum token length (default: 512)
+    HOST: Server host (default: 0.0.0.0)
+    PORT: Server port (default: 8001)
+
+API Endpoints:
+    POST /embed - Generate embedding for single text
+    POST /embed/batch - Generate embeddings for multiple texts
+    GET /health - Health check
+"""
+
+import os
+import logging
+import threading
+import time
+from typing import List
+from contextlib import asynccontextmanager
+
+import numpy as np
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+import uvicorn
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Suppress noisy HTTP warnings from uvicorn and asyncio
+logging.getLogger("uvicorn.error").setLevel(logging.CRITICAL)
+logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
+logging.getLogger("asyncio").setLevel(logging.CRITICAL)
+
+# Configuration from environment
+MODEL_NAME = os.getenv("MODEL_NAME", "intfloat/multilingual-e5-large")
+#MODEL_NAME = os.getenv("MODEL_NAME", "BAAI/bge-m3")
+MAX_LENGTH = int(os.getenv("MAX_LENGTH", "512"))
+HOST = os.getenv("HOST", "0.0.0.0")
+PORT = int(os.getenv("PORT", "8001"))
+
+# Global model instance (single model) with thread-safe access
+model = None
+model_lock = threading.Lock()
+model_dimensions = None
+
+# Statistics
+embedding_count = 0
+total_embedding_time = 0.0
+stats_lock = threading.Lock()
+
+
+class EmbedRequest(BaseModel):
+    """Request model for single text embedding."""
+    text: str = Field(..., description="Text to embed")
+    is_query: bool = Field(False, description="If True, use 'query:' prefix for e5 models")
+
+
+class EmbedBatchRequest(BaseModel):
+    """Request model for batch text embedding."""
+    texts: List[str] = Field(..., description="List of texts to embed")
+    is_query: bool = Field(False, description="If True, use 'query:' prefix for e5 models")
+
+
+class EmbedResponse(BaseModel):
+    """Response model for embedding result."""
+    embedding: List[float] = Field(..., description="Vector embedding")
+    dimensions: int = Field(..., description="Number of dimensions")
+    token_count: int = Field(..., description="Number of input tokens")
+
+
+class EmbedBatchResponse(BaseModel):
+    """Response model for batch embedding result."""
+    embeddings: List[List[float]] = Field(..., description="List of vector embeddings")
+    dimensions: int = Field(..., description="Number of dimensions")
+    count: int = Field(..., description="Number of embeddings generated")
+    token_counts: List[int] = Field(..., description="Number of input tokens for each text")
+
+
+class HealthResponse(BaseModel):
+    """Health check response."""
+    status: str
+    model_name: str
+    dimensions: int
+    max_length: int
+    embeddings_processed: int
+    avg_time_ms: float
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Initialize single model on startup."""
+    global model, model_dimensions
+    from sentence_transformers import SentenceTransformer
+
+    logger.info(f"Loading single model: {MODEL_NAME}")
+
+    try:
+        model = SentenceTransformer(MODEL_NAME)
+        model_dimensions = model.get_sentence_embedding_dimension()
+        logger.info(f"Model loaded successfully. Embedding dimension: {model_dimensions}")
+        logger.info("Ready to process embeddings - statistics will be logged every 100 embeddings")
+    except Exception as e:
+        logger.error(f"Failed to load model: {e}")
+        raise
+
+    yield
+
+    # Cleanup
+    with stats_lock:
+        avg_time_ms = (total_embedding_time / embedding_count * 1000) if embedding_count > 0 else 0.0
+        logger.info(f"Shutting down embedding service - Final statistics: {embedding_count} embeddings processed, average time: {avg_time_ms:.2f}ms per embedding")
+
+
+# Create FastAPI app
+app = FastAPI(
+    title="TED Embedding Service",
+    description="Generate text embeddings using sentence-transformers for semantic search",
+    version="1.0.0",
+    lifespan=lifespan
+)
+
+
+def add_prefix(text: str, is_query: bool) -> str:
+    """Add appropriate prefix for e5 models."""
+    if "e5" in MODEL_NAME.lower():
+        prefix = "query: " if is_query else "passage: "
+        return prefix + text
+    return text
+
+
+def check_token_length(text: str, model) -> tuple[int, bool]:
+    """
+    Check if text exceeds MAX_LENGTH tokens and return token count.
+
+    Returns:
+        tuple: (token_count, is_truncated)
+    """
+    try:
+        # Get tokenizer from model
+        tokenizer = model.tokenizer
+        tokens = tokenizer.encode(text, add_special_tokens=True)
+        token_count = len(tokens)
+        byte_count = len(text.encode('utf-8'))
+
+        if token_count > MAX_LENGTH:
+            logger.warning(
+                f"Text exceeds MAX_LENGTH ({MAX_LENGTH} tokens). "
+                f"Actual: {token_count} tokens, {byte_count} bytes ({len(text)} chars). "
+                f"Text will be truncated by the model. "
+                f"Preview: {text[:100]}..."
+            )
+            return token_count, True
+
+        return token_count, False
+
+    except Exception as e:
+        logger.debug(f"Could not check token length: {e}")
+        return 0, False
+
+
+@app.post("/embed", response_model=EmbedResponse)
+async def embed_text(request: EmbedRequest) -> EmbedResponse:
+    """Generate embedding for a single text using thread-safe single model."""
+    global embedding_count, total_embedding_time
+
+    if model is None:
+        raise HTTPException(status_code=503, detail="Model not initialized")
+
+    try:
+        start_time = time.time()
+
+        # Thread-safe access to single model
+        with model_lock:
+            # Add prefix for e5 models
+            text = add_prefix(request.text, request.is_query)
+
+            # Check token length and warn if exceeding MAX_LENGTH
+            token_count, is_truncated = check_token_length(text, model)
+            byte_count = len(text.encode('utf-8'))
+            if is_truncated:
+                logger.info(f"Processing text: {token_count} tokens, {byte_count} bytes ({len(text)} chars) - exceeds {MAX_LENGTH}, will be truncated")
+
+            # Generate embedding
+            embedding = model.encode(
+                text,
+                normalize_embeddings=True,
+                convert_to_numpy=True
+            )
+
+        # Update statistics
+        elapsed_time = time.time() - start_time
+        with stats_lock:
+            embedding_count += 1
+            total_embedding_time += elapsed_time
+
+            # Log statistics every 100 embeddings
+            if embedding_count % 100 == 0:
+                avg_time = total_embedding_time / embedding_count
+                logger.info(f"Statistics: {embedding_count} embeddings processed, average time: {avg_time*1000:.2f}ms per embedding")
+
+        return EmbedResponse(
+            embedding=embedding.tolist(),
+            dimensions=len(embedding),
+            token_count=token_count
+        )
+
+    except Exception as e:
+        logger.error(f"Embedding failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/embed/batch", response_model=EmbedBatchResponse)
+async def embed_batch(request: EmbedBatchRequest) -> EmbedBatchResponse:
+    """Generate embeddings for multiple texts using thread-safe single model."""
+    global embedding_count, total_embedding_time
+
+    if model is None:
+        raise HTTPException(status_code=503, detail="Model not initialized")
+
+    if not request.texts:
+        raise HTTPException(status_code=400, detail="Empty text list")
+
+    try:
+        start_time = time.time()
+        batch_count = len(request.texts)
+
+        # Thread-safe access to single model
+        with model_lock:
+            # Add prefixes
+            texts = [add_prefix(text, request.is_query) for text in request.texts]
+
+            # Check token length for each text and warn if exceeding MAX_LENGTH
+            truncated_count = 0
+            token_counts = []
+            for i, text in enumerate(texts):
+                token_count, is_truncated = check_token_length(text, model)
+                token_counts.append(token_count)
+                if is_truncated:
+                    truncated_count += 1
+                    byte_count = len(text.encode('utf-8'))
+                    logger.info(
+                        f"Batch item {i + 1}/{len(texts)}: {token_count} tokens, "
+                        f"{byte_count} bytes ({len(text)} chars) - exceeds {MAX_LENGTH}, will be truncated"
+                    )
+
+            if truncated_count > 0:
+                logger.warning(
+                    f"Batch processing: {truncated_count}/{len(texts)} texts exceed "
+                    f"MAX_LENGTH ({MAX_LENGTH} tokens) and will be truncated"
+                )
+
+            # Generate embeddings
+            embeddings = model.encode(
+                texts,
+                normalize_embeddings=True,
+                convert_to_numpy=True,
+                batch_size=16,
+                show_progress_bar=False
+            )
+
+        # Update statistics
+        elapsed_time = time.time() - start_time
+        with stats_lock:
+            embedding_count += batch_count
+            total_embedding_time += elapsed_time
+
+            # Log statistics every 100 embeddings
+            if embedding_count % 100 == 0 or (embedding_count // 100) != ((embedding_count - batch_count) // 100):
+                avg_time = total_embedding_time / embedding_count
+                logger.info(f"Statistics: {embedding_count} embeddings processed, average time: {avg_time*1000:.2f}ms per embedding")
+
+        return EmbedBatchResponse(
+            embeddings=[emb.tolist() for emb in embeddings],
+            dimensions=embeddings.shape[1] if len(embeddings.shape) > 1 else len(embeddings),
+            count=len(embeddings),
+            token_counts=token_counts
+        )
+
+    except Exception as e:
+        logger.error(f"Batch embedding failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/health", response_model=HealthResponse)
+async def health_check() -> HealthResponse:
+    """Health check endpoint."""
+    if model is None:
+        raise HTTPException(status_code=503, detail="Model not initialized")
+
+    with stats_lock:
+        avg_time_ms = (total_embedding_time / embedding_count * 1000) if embedding_count > 0 else 0.0
+
+    return HealthResponse(
+        status="healthy",
+        model_name=MODEL_NAME,
+        dimensions=model_dimensions,
+        max_length=MAX_LENGTH,
+        embeddings_processed=embedding_count,
+        avg_time_ms=round(avg_time_ms, 2)
+    )
+
+
+@app.get("/")
+async def root():
+    """Root endpoint with API info."""
+    return {
+        "service": "TED Embedding Service",
+        "model": MODEL_NAME,
+        "endpoints": {
+            "embed": "POST /embed - Generate single embedding",
+            "embed_batch": "POST /embed/batch - Generate batch embeddings",
+            "health": "GET /health - Health check"
+        }
+    }
+
+
+if __name__ == "__main__":
+    logger.info(f"Starting embedding service on {HOST}:{PORT}")
+    uvicorn.run(
+        "embedding_service:app",
+        host=HOST,
+        port=PORT,
+        log_level="info",
+        reload=False
+    )
--- a/execute-enum-fix.bat
+++ b/execute-enum-fix.bat
@ -0,0 +1,30 @@
+@echo off
+REM Batch script to execute ENUM fix using psql
+REM If psql is not in PATH, update the PSQL_PATH variable below
+
+SET PSQL_PATH=psql
+SET PGPASSWORD=PDmXRx0Rbk9OFOn9qO5Gm/mPCfqW8zwbZ+/YIU1lySc=
+SET PGHOST=94.130.218.54
+SET PGPORT=5432
+SET PGUSER=postgres
+SET PGDATABASE=Sales
+
+echo Executing ENUM fix on remote database...
+echo Host: %PGHOST%:%PGPORT%
+echo Database: %PGDATABASE%
+echo.
+
+%PSQL_PATH% -h %PGHOST% -p %PGPORT% -U %PGUSER% -d %PGDATABASE% -f "fix-enum-types-comprehensive.sql"
+
+if %ERRORLEVEL% EQU 0 (
+    echo.
+    echo SUCCESS: ENUM fix executed successfully!
+    echo Please restart your Spring Boot application.
+) else (
+    echo.
+    echo ERROR: Failed to execute ENUM fix. Error code: %ERRORLEVEL%
+    echo.
+    echo If psql is not found, please install PostgreSQL client tools or use a GUI tool like DBeaver.
+)
+
+pause
--- a/fix-organization-schema.bat
+++ b/fix-organization-schema.bat
@ -0,0 +1,21 @@
+@echo off
+REM Fix organization table schema - extend VARCHAR fields
+REM This applies the V2 migration manually if Flyway hasn't picked it up
+
+echo Applying schema fix to ted.organization table...
+
+set PGPASSWORD=PDmXRx0Rbk9OFOn9qO5Gm/mPCfqW8zwbZ+/YIU1lySc=
+
+psql -h 94.130.218.54 -p 32333 -U postgres -d RELM -c "ALTER TABLE ted.organization ALTER COLUMN postal_code TYPE VARCHAR(255); ALTER TABLE ted.organization ALTER COLUMN street_name TYPE TEXT; ALTER TABLE ted.organization ALTER COLUMN city TYPE VARCHAR(255); ALTER TABLE ted.organization ALTER COLUMN phone TYPE VARCHAR(100); ALTER TABLE ted.organization ALTER COLUMN org_reference TYPE VARCHAR(100); ALTER TABLE ted.organization ALTER COLUMN role TYPE VARCHAR(100); SELECT 'Schema updated successfully!' AS result;"
+
+if %ERRORLEVEL% EQU 0 (
+    echo.
+    echo Schema fix applied successfully!
+    echo You can now restart the application.
+) else (
+    echo.
+    echo ERROR: Failed to apply schema fix.
+    echo Please check the error messages above.
+)
+
+pause
--- a/fix-organization-schema.sql
+++ b/fix-organization-schema.sql
@ -0,0 +1,20 @@
+-- Fix organization table schema - extend VARCHAR fields to handle long TED data
+-- Run this manually if Flyway migration V2 hasn't been applied yet
+-- Usage: psql -h 94.130.218.54 -p 32333 -U postgres -d RELM -f fix-organization-schema.sql
+
+-- Check current schema
+\d ted.organization
+
+-- Extend VARCHAR fields in organization table
+ALTER TABLE ted.organization ALTER COLUMN postal_code TYPE VARCHAR(255);
+ALTER TABLE ted.organization ALTER COLUMN street_name TYPE TEXT;
+ALTER TABLE ted.organization ALTER COLUMN city TYPE VARCHAR(255);
+ALTER TABLE ted.organization ALTER COLUMN phone TYPE VARCHAR(100);
+ALTER TABLE ted.organization ALTER COLUMN org_reference TYPE VARCHAR(100);
+ALTER TABLE ted.organization ALTER COLUMN role TYPE VARCHAR(100);
+
+-- Verify changes
+\d ted.organization
+
+-- Show what was changed
+SELECT 'Schema updated successfully' AS result;
--- a/pom.xml
+++ b/pom.xml
@ -0,0 +1,265 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    TED Procurement Document Processor
+    Author: Martin.Schweitzer@procon.co.at and claude.ai
+    
+    Spring Boot application for processing EU eForms public procurement notices.
+    Features:
+    - Apache Camel directory watching and processing
+    - PostgreSQL storage with XML and vector columns
+    - Async vectorization using sentence-transformers
+    - REST API for structured and semantic search
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.springframework.boot</groupId>
+        <artifactId>spring-boot-starter-parent</artifactId>
+        <version>3.3.5</version>
+        <relativePath/>
+    </parent>
+
+    <groupId>at.procon.ted</groupId>
+    <artifactId>ted-procurement-processor</artifactId>
+    <version>1.0.0-SNAPSHOT</version>
+    <name>TED Procurement Processor</name>
+    <description>EU eForms TED document processor with vector search capabilities</description>
+
+    <properties>
+        <java.version>21</java.version>
+        <camel.version>4.8.1</camel.version>
+        <eforms-sdk.version>1.13.1</eforms-sdk.version>
+        <pgvector.version>0.1.6</pgvector.version>
+        <djl.version>0.30.0</djl.version>
+    </properties>
+
+    <dependencies>
+        <!-- Spring Boot Core -->
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-web</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-data-jpa</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-validation</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-actuator</artifactId>
+        </dependency>
+
+        <!-- Apache Camel -->
+        <dependency>
+            <groupId>org.apache.camel.springboot</groupId>
+            <artifactId>camel-spring-boot-starter</artifactId>
+            <version>${camel.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-file</artifactId>
+            <version>${camel.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-jaxb</artifactId>
+            <version>${camel.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-validator</artifactId>
+            <version>${camel.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-http</artifactId>
+            <version>${camel.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-bean</artifactId>
+            <version>${camel.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-jackson</artifactId>
+            <version>${camel.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-mail</artifactId>
+            <version>${camel.version}</version>
+        </dependency>
+
+        <!-- JSoup for HTML to Text conversion -->
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>1.18.1</version>
+        </dependency>
+
+        <!-- Apache PDFBox for PDF text extraction -->
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox</artifactId>
+            <version>3.0.3</version>
+        </dependency>
+
+        <!-- PostgreSQL -->
+        <dependency>
+            <groupId>org.postgresql</groupId>
+            <artifactId>postgresql</artifactId>
+            <scope>runtime</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.pgvector</groupId>
+            <artifactId>pgvector</artifactId>
+            <version>${pgvector.version}</version>
+        </dependency>
+
+        <!-- EU eForms SDK for schema validation -->
+        <dependency>
+            <groupId>eu.europa.ted.eforms</groupId>
+            <artifactId>eforms-sdk</artifactId>
+            <version>${eforms-sdk.version}</version>
+        </dependency>
+
+        <!-- XML Processing -->
+        <dependency>
+            <groupId>jakarta.xml.bind</groupId>
+            <artifactId>jakarta.xml.bind-api</artifactId>
+            <version>4.0.2</version>
+        </dependency>
+        <dependency>
+            <groupId>org.glassfish.jaxb</groupId>
+            <artifactId>jaxb-runtime</artifactId>
+            <version>4.0.5</version>
+        </dependency>
+        <dependency>
+            <groupId>com.sun.xml.bind</groupId>
+            <artifactId>jaxb-impl</artifactId>
+            <version>4.0.5</version>
+            <scope>runtime</scope>
+        </dependency>
+
+        <!-- DJL (Deep Java Library) for sentence-transformers integration -->
+        <dependency>
+            <groupId>ai.djl</groupId>
+            <artifactId>api</artifactId>
+            <version>${djl.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>ai.djl.huggingface</groupId>
+            <artifactId>tokenizers</artifactId>
+            <version>${djl.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>ai.djl.pytorch</groupId>
+            <artifactId>pytorch-engine</artifactId>
+            <version>${djl.version}</version>
+            <scope>runtime</scope>
+        </dependency>
+        <dependency>
+            <groupId>ai.djl.pytorch</groupId>
+            <artifactId>pytorch-model-zoo</artifactId>
+            <version>${djl.version}</version>
+        </dependency>
+
+        <!-- Utilities -->
+        <dependency>
+            <groupId>org.projectlombok</groupId>
+            <artifactId>lombok</artifactId>
+            <optional>true</optional>
+        </dependency>
+        <dependency>
+            <groupId>org.mapstruct</groupId>
+            <artifactId>mapstruct</artifactId>
+            <version>1.6.2</version>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <version>33.3.1-jre</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-compress</artifactId>
+            <version>1.27.1</version>
+        </dependency>
+
+        <!-- Apache POI for Excel generation -->
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>5.3.0</version>
+        </dependency>
+
+        <!-- OpenAPI Documentation -->
+        <dependency>
+            <groupId>org.springdoc</groupId>
+            <artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
+            <version>2.6.0</version>
+        </dependency>
+
+        <!-- Testing -->
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-test</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.camel</groupId>
+            <artifactId>camel-test-spring-junit5</artifactId>
+            <version>${camel.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.testcontainers</groupId>
+            <artifactId>postgresql</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.springframework.boot</groupId>
+                <artifactId>spring-boot-maven-plugin</artifactId>
+                <configuration>
+                    <excludes>
+                        <exclude>
+                            <groupId>org.projectlombok</groupId>
+                            <artifactId>lombok</artifactId>
+                        </exclude>
+                    </excludes>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>${java.version}</source>
+                    <target>${java.version}</target>
+                    <annotationProcessorPaths>
+                        <path>
+                            <groupId>org.projectlombok</groupId>
+                            <artifactId>lombok</artifactId>
+                            <version>${lombok.version}</version>
+                        </path>
+                        <path>
+                            <groupId>org.mapstruct</groupId>
+                            <artifactId>mapstruct-processor</artifactId>
+                            <version>1.6.2</version>
+                        </path>
+                    </annotationProcessorPaths>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+</project>
--- a/requirements-embedding.txt
+++ b/requirements-embedding.txt
@ -0,0 +1,16 @@
+# Python dependencies for embedding service
+# Author: Martin.Schweitzer@procon.co.at and claude.ai
+
+# Sentence Transformers for embedding generation
+sentence-transformers>=2.7.0
+
+# FastAPI for HTTP API
+fastapi>=0.109.0
+uvicorn[standard]>=0.27.0
+
+# PyTorch (CPU version - use torch for GPU)
+torch>=2.0.0
+
+# Utilities
+numpy>=1.24.0
+pydantic>=2.0.0
--- a/reset-stuck-packages.sql
+++ b/reset-stuck-packages.sql
@ -0,0 +1,15 @@
+-- Reset stuck packages to PENDING
+-- Run this if packages are stuck in DOWNLOADING or PROCESSING status
+-- Usage: psql -h 94.130.218.54 -p 32333 -U postgres -d RELM -f reset-stuck-packages.sql
+
+UPDATE ted.ted_daily_package
+SET download_status = 'PENDING',
+    error_message = 'Reset from stuck state - manual'
+WHERE download_status IN ('DOWNLOADING', 'PROCESSING');
+
+-- Show what was reset
+SELECT package_identifier, year, serial_number, download_status, error_message, updated_at
+FROM ted.ted_daily_package
+WHERE error_message LIKE '%Reset from stuck state%'
+ORDER BY year DESC, serial_number DESC
+LIMIT 10;
--- a/solution-brief-processed.dat
+++ b/solution-brief-processed.dat
--- a/src/main/java/at/procon/ted/TedProcurementProcessorApplication.java
+++ b/src/main/java/at/procon/ted/TedProcurementProcessorApplication.java
@ -0,0 +1,26 @@
+package at.procon.ted;
+
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.springframework.scheduling.annotation.EnableAsync;
+
+/**
+ * TED Procurement Document Processor Application.
+ * 
+ * Processes EU eForms public procurement notices from TED (Tenders Electronic Daily).
+ * Features:
+ * - Directory watching with Apache Camel for automated XML processing
+ * - PostgreSQL storage with native XML support and pgvector for semantic search
+ * - Asynchronous document vectorization using multilingual-e5-large model
+ * - REST API for structured and semantic search
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@SpringBootApplication
+@EnableAsync
+public class TedProcurementProcessorApplication {
+
+    public static void main(String[] args) {
+        SpringApplication.run(TedProcurementProcessorApplication.class, args);
+    }
+}
--- a/src/main/java/at/procon/ted/camel/MailRoute.java
+++ b/src/main/java/at/procon/ted/camel/MailRoute.java
@ -0,0 +1,485 @@
+package at.procon.ted.camel;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.service.attachment.AttachmentExtractor;
+import at.procon.ted.service.attachment.AttachmentProcessingService;
+import jakarta.mail.BodyPart;
+import jakarta.mail.Message;
+import jakarta.mail.Multipart;
+import jakarta.mail.Part;
+import jakarta.mail.Session;
+import jakarta.mail.internet.MimeMessage;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.camel.Exchange;
+import org.apache.camel.LoggingLevel;
+import org.apache.camel.builder.RouteBuilder;
+import org.jsoup.Jsoup;
+import org.springframework.stereotype.Component;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Apache Camel route for IMAP mail processing.
+ *
+ * Features:
+ * - IMAP connection with SSL/TLS to mail server
+ * - MIME message decoding
+ * - Asynchronous attachment processing with idempotency
+ * - PDF text extraction
+ * - ZIP file extraction with recursive processing
+ * - HTML to plain text conversion
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class MailRoute extends RouteBuilder {
+
+    private static final String ROUTE_ID_IMAP = "mail-imap-consumer";
+    private static final String ROUTE_ID_MIME_FILE = "mail-mime-file-consumer";
+    private static final String ROUTE_ID_MIME = "mail-mime-decoder";
+    private static final String ROUTE_ID_ATTACHMENT = "mail-attachment-processor";
+    private static final String ROUTE_ID_ATTACHMENT_ASYNC = "mail-attachment-async";
+
+    private final TedProcessorProperties properties;
+    private final AttachmentProcessingService attachmentProcessingService;
+
+    @Override
+    public void configure() throws Exception {
+        TedProcessorProperties.MailProperties mail = properties.getMail();
+
+        if (!mail.isEnabled()) {
+            log.info("Mail processing is disabled, skipping route configuration");
+            return;
+        }
+
+        log.info("Configuring mail routes (host={}, port={}, ssl={}, user={})",
+                mail.getHost(), mail.getPort(), mail.isSsl(), mail.getUsername());
+
+        // Ensure attachment output directory exists
+        File attachmentDir = new File(mail.getAttachmentOutputDirectory());
+        if (!attachmentDir.exists()) {
+            attachmentDir.mkdirs();
+            log.info("Created attachment output directory: {}", attachmentDir.getAbsolutePath());
+        }
+
+        // Error handler for mail processing
+        errorHandler(deadLetterChannel("direct:mail-error-handler")
+                .maximumRedeliveries(3)
+                .redeliveryDelay(5000)
+                .retryAttemptedLogLevel(LoggingLevel.WARN)
+                .logStackTrace(true));
+
+        // Mail error handler route
+        from("direct:mail-error-handler")
+                .routeId("mail-error-handler")
+                .process(exchange -> {
+                    Exception exception = exchange.getProperty(Exchange.EXCEPTION_CAUGHT, Exception.class);
+                    String subject = exchange.getIn().getHeader("mailSubject", String.class);
+                    if (exception != null) {
+                        log.error("Mail processing error for subject '{}': {}", subject, exception.getMessage(), exception);
+                    }
+                })
+                .log(LoggingLevel.ERROR, "Mail processing failed: ${exception.message}");
+
+        // IMAP consumer route
+        from(buildImapUri())
+                .routeId(ROUTE_ID_IMAP)
+                .log(LoggingLevel.INFO, "Received email: ${header.subject} from ${header.from}")
+                .to("direct:mime");
+
+        // MIME file consumer route - reads .eml files from directory
+        if (mail.isMimeInputEnabled()) {
+            configureMimeFileConsumer(mail);
+        }
+
+        // MIME decoder route - decodes the email and extracts content/attachments
+        from("direct:mime")
+                .routeId(ROUTE_ID_MIME)
+                .process(exchange -> {
+                    Message mailMessage = exchange.getIn().getBody(Message.class);
+
+                    if (mailMessage == null) {
+                        log.warn("Received null mail message, skipping");
+                        return;
+                    }
+
+                    String subject = mailMessage.getSubject();
+                    String from = mailMessage.getFrom() != null && mailMessage.getFrom().length > 0
+                            ? mailMessage.getFrom()[0].toString() : "unknown";
+
+                    log.info("Processing MIME message: subject='{}', from='{}'", subject, from);
+
+                    // Store mail metadata in headers
+                    exchange.getIn().setHeader("mailSubject", subject);
+                    exchange.getIn().setHeader("mailFrom", from);
+                    exchange.getIn().setHeader("mailReceivedDate", mailMessage.getReceivedDate());
+
+                    // Process the content
+                    List<AttachmentInfo> attachments = new ArrayList<>();
+                    StringBuilder textContent = new StringBuilder();
+                    StringBuilder htmlContent = new StringBuilder();
+
+                    processMessageContent(mailMessage, textContent, htmlContent, attachments);
+
+                    // Convert HTML to plain text if we have HTML but no plain text
+                    String finalTextContent;
+                    if (textContent.length() == 0 && htmlContent.length() > 0) {
+                        finalTextContent = convertHtmlToText(htmlContent.toString());
+                        log.debug("Converted HTML mail to plain text ({} chars)", finalTextContent.length());
+                    } else {
+                        finalTextContent = textContent.toString();
+                    }
+
+                    // Store results
+                    exchange.getIn().setHeader("mailTextContent", finalTextContent);
+                    exchange.getIn().setHeader("mailHtmlContent", htmlContent.toString());
+                    exchange.getIn().setHeader("mailAttachments", attachments);
+                    exchange.getIn().setHeader("mailAttachmentCount", attachments.size());
+
+                    log.info("MIME decoded: subject='{}', textLength={}, htmlLength={}, attachments={}",
+                            subject, finalTextContent.length(), htmlContent.length(), attachments.size());
+                })
+                // Queue attachments for async processing
+                .choice()
+                    .when(simple("${header.mailAttachmentCount} > 0"))
+                        .log(LoggingLevel.INFO, "Queueing ${header.mailAttachmentCount} attachments for async processing")
+                    .otherwise()
+                        .log(LoggingLevel.DEBUG, "No attachments in email: ${header.mailSubject}")
+                .end()
+                // Process attachments asynchronously via SEDA
+                .filter(simple("${header.mailAttachmentCount} > 0"))
+                    .split(header("mailAttachments"))
+                        .to("seda:attachment-async?waitForTaskToComplete=Never&size=500")
+                    .end()
+                .end()
+                .log(LoggingLevel.INFO, "Mail processing completed: ${header.mailSubject}");
+
+        // Async attachment processor route via SEDA
+        from("seda:attachment-async?concurrentConsumers=2&size=500")
+                .routeId(ROUTE_ID_ATTACHMENT_ASYNC)
+                .to("direct:attachment");
+
+        // Attachment processor route - handles individual attachments with idempotency
+        from("direct:attachment")
+                .routeId(ROUTE_ID_ATTACHMENT)
+                .process(exchange -> {
+                    AttachmentInfo attachment = exchange.getIn().getBody(AttachmentInfo.class);
+
+                    if (attachment == null) {
+                        log.warn("Received null attachment info, skipping");
+                        return;
+                    }
+
+                    String mailSubject = exchange.getIn().getHeader("mailSubject", String.class);
+                    String mailFrom = exchange.getIn().getHeader("mailFrom", String.class);
+                    String parentHash = exchange.getIn().getHeader("parentHash", String.class);
+
+                    log.info("Processing attachment: '{}' ({} bytes, type={}) from email '{}'",
+                            attachment.getFilename(), attachment.getSize(),
+                            attachment.getContentType(), mailSubject);
+
+                    // Process attachment with idempotency check
+                    AttachmentProcessingService.ProcessingResult result = attachmentProcessingService.processAttachment(
+                            attachment.getData(),
+                            attachment.getFilename(),
+                            attachment.getContentType(),
+                            mailSubject,
+                            mailFrom,
+                            parentHash
+                    );
+
+                    if (result.isDuplicate()) {
+                        log.info("Attachment is duplicate, skipping: '{}'", attachment.getFilename());
+                        exchange.setProperty("isDuplicate", true);
+                        return;
+                    }
+
+                    if (!result.isSuccess()) {
+                        log.warn("Attachment processing failed: '{}' - {}",
+                                attachment.getFilename(), result.errorMessage());
+                        return;
+                    }
+
+                    // Store result in exchange
+                    exchange.getIn().setHeader("attachmentId", result.attachment().getId());
+                    exchange.getIn().setHeader("attachmentHash", result.attachment().getContentHash());
+                    exchange.getIn().setHeader("extractedText",
+                            result.attachment().getExtractedText() != null
+                                    ? result.attachment().getExtractedText().length() + " chars"
+                                    : "none");
+
+                    // Queue child attachments (from ZIP) for recursive processing
+                    if (result.hasChildren()) {
+                        log.info("Queueing {} child attachments from ZIP '{}'",
+                                result.childAttachments().size(), attachment.getFilename());
+
+                        for (AttachmentExtractor.ChildAttachment child : result.childAttachments()) {
+                            // Create AttachmentInfo for child and send to SEDA queue
+                            AttachmentInfo childInfo = new AttachmentInfo(
+                                    child.filename(),
+                                    child.contentType(),
+                                    child.data(),
+                                    child.data().length
+                            );
+
+                            // Send to SEDA for async processing with parent hash
+                            getContext().createProducerTemplate().sendBodyAndHeaders(
+                                    "seda:attachment-async?waitForTaskToComplete=Never",
+                                    childInfo,
+                                    java.util.Map.of(
+                                            "mailSubject", mailSubject != null ? mailSubject : "",
+                                            "mailFrom", mailFrom != null ? mailFrom : "",
+                                            "parentHash", result.attachment().getContentHash(),
+                                            "pathInArchive", child.pathInArchive()
+                                    )
+                            );
+                        }
+                    }
+                })
+                .choice()
+                    .when(exchangeProperty("isDuplicate").isEqualTo(true))
+                        .log(LoggingLevel.DEBUG, "Skipped duplicate attachment")
+                    .otherwise()
+                        .log(LoggingLevel.INFO, "Attachment processed: ${header.attachmentId}, extracted=${header.extractedText}")
+                .end();
+    }
+
+    /**
+     * Configure the MIME file consumer route.
+     */
+    private void configureMimeFileConsumer(TedProcessorProperties.MailProperties mail) throws Exception {
+        // Ensure MIME input directory exists
+        File mimeInputDir = new File(mail.getMimeInputDirectory());
+        if (!mimeInputDir.exists()) {
+            mimeInputDir.mkdirs();
+            log.info("Created MIME input directory: {}", mimeInputDir.getAbsolutePath());
+        }
+
+        String mimeFileUri = buildMimeFileUri(mail);
+        log.info("Configuring MIME file consumer: {}", mimeFileUri);
+
+        // MIME file consumer - reads .eml files and sends to direct:mime
+        from(mimeFileUri)
+                .routeId(ROUTE_ID_MIME_FILE)
+                .log(LoggingLevel.INFO, "Reading MIME file: ${header.CamelFileName}")
+                .process(exchange -> {
+                    // Read file content as bytes
+                    byte[] fileContent = exchange.getIn().getBody(byte[].class);
+                    String filename = exchange.getIn().getHeader(Exchange.FILE_NAME, String.class);
+
+                    if (fileContent == null || fileContent.length == 0) {
+                        log.warn("Empty MIME file: {}", filename);
+                        throw new RuntimeException("Empty MIME file: " + filename);
+                    }
+
+                    log.debug("Parsing MIME file: {} ({} bytes)", filename, fileContent.length);
+
+                    // Parse the file as a MimeMessage
+                    Session session = Session.getDefaultInstance(new Properties());
+                    try (ByteArrayInputStream bais = new ByteArrayInputStream(fileContent)) {
+                        MimeMessage mimeMessage = new MimeMessage(session, bais);
+
+                        // Set the parsed message as body for direct:mime
+                        exchange.getIn().setBody(mimeMessage);
+
+                        log.info("Parsed MIME file: {} -> subject='{}'",
+                                filename, mimeMessage.getSubject());
+                    }
+                })
+                .to("direct:mime")
+                .log(LoggingLevel.INFO, "MIME file processed successfully: ${header.CamelFileName}");
+    }
+
+    /**
+     * Build the file URI for MIME file consumer.
+     */
+    private String buildMimeFileUri(TedProcessorProperties.MailProperties mail) {
+        String directory = mail.getMimeInputDirectory().replace("\\", "/");
+
+        StringBuilder uri = new StringBuilder("file:");
+        uri.append(directory);
+        uri.append("?");
+        // File pattern
+        uri.append("include=").append(mail.getMimeInputPattern());
+        // Polling interval
+        uri.append("&delay=").append(mail.getMimeInputPollInterval());
+        // Move to .processed after successful processing
+        uri.append("&move=.processed");
+        // Move to .error on failure
+        uri.append("&moveFailed=.error");
+        // Read lock to prevent processing incomplete files
+        uri.append("&readLock=changed");
+        uri.append("&readLockCheckInterval=1000");
+        uri.append("&readLockTimeout=30000");
+        // Sort by name for consistent ordering
+        uri.append("&sortBy=file:name");
+        // Don't process hidden files
+        uri.append("&exclude=^\\..*");
+        // Recursive scanning disabled by default
+        uri.append("&recursive=false");
+
+        return uri.toString();
+    }
+
+    /**
+     * Build the IMAP URI for the mail consumer.
+     */
+    private String buildImapUri() {
+        TedProcessorProperties.MailProperties mail = properties.getMail();
+
+        StringBuilder uri = new StringBuilder();
+        uri.append(mail.isSsl() ? "imaps://" : "imap://");
+        uri.append(mail.getHost());
+        uri.append(":").append(mail.getPort());
+        uri.append("?username=").append(encodeUriComponent(mail.getUsername()));
+        uri.append("&password=").append(encodeUriComponent(mail.getPassword()));
+        uri.append("&folderName=").append(mail.getFolderName());
+        uri.append("&delete=").append(mail.isDelete());
+        // peek=false means messages will be marked as SEEN after fetch
+        // peek=true means messages will NOT be marked as SEEN (peek only)
+        uri.append("&peek=").append(!mail.isSeen());
+        uri.append("&unseen=").append(mail.isUnseen());
+        uri.append("&delay=").append(mail.getDelay());
+        uri.append("&maxMessagesPerPoll=").append(mail.getMaxMessagesPerPoll());
+        // Connection settings
+        uri.append("&connectionTimeout=30000");
+        uri.append("&fetchSize=-1");  // Fetch entire message
+        uri.append("&debugMode=false");
+
+        log.info("IMAP URI configured (password hidden): {}://{}:{}?username={}&folderName={}",
+                mail.isSsl() ? "imaps" : "imap", mail.getHost(), mail.getPort(),
+                mail.getUsername(), mail.getFolderName());
+
+        return uri.toString();
+    }
+
+    /**
+     * URL-encode a URI component.
+     */
+    private String encodeUriComponent(String value) {
+        if (value == null) return "";
+        try {
+            return java.net.URLEncoder.encode(value, StandardCharsets.UTF_8);
+        } catch (Exception e) {
+            return value;
+        }
+    }
+
+    /**
+     * Recursively process message content to extract text, HTML, and attachments.
+     */
+    private void processMessageContent(Part part, StringBuilder textContent,
+            StringBuilder htmlContent, List<AttachmentInfo> attachments) throws Exception {
+
+        String contentType = part.getContentType().toLowerCase();
+        String disposition = part.getDisposition();
+
+        // Check if this is an attachment
+        if (disposition != null && (disposition.equalsIgnoreCase(Part.ATTACHMENT)
+                || disposition.equalsIgnoreCase(Part.INLINE))) {
+            extractAttachment(part, attachments);
+            return;
+        }
+
+        Object content = part.getContent();
+
+        if (content instanceof Multipart multipart) {
+            // Process each part of the multipart message
+            for (int i = 0; i < multipart.getCount(); i++) {
+                BodyPart bodyPart = multipart.getBodyPart(i);
+                processMessageContent(bodyPart, textContent, htmlContent, attachments);
+            }
+        } else if (contentType.contains("text/plain")) {
+            // Plain text content
+            String text = content.toString();
+            textContent.append(text);
+        } else if (contentType.contains("text/html")) {
+            // HTML content
+            String html = content.toString();
+            htmlContent.append(html);
+        } else if (part.getFileName() != null) {
+            // Has filename - treat as attachment
+            extractAttachment(part, attachments);
+        }
+    }
+
+    /**
+     * Extract attachment data from a message part.
+     */
+    private void extractAttachment(Part part, List<AttachmentInfo> attachments) throws Exception {
+        String filename = part.getFileName();
+        if (filename == null) {
+            filename = "unnamed_attachment";
+        }
+
+        // Decode filename if necessary (might be MIME-encoded)
+        try {
+            filename = jakarta.mail.internet.MimeUtility.decodeText(filename);
+        } catch (Exception e) {
+            log.debug("Could not decode filename: {}", filename);
+        }
+
+        String contentType = part.getContentType();
+
+        // Read attachment data
+        byte[] data;
+        try (InputStream is = part.getInputStream()) {
+            data = is.readAllBytes();
+        }
+
+        AttachmentInfo info = new AttachmentInfo(filename, contentType, data, data.length);
+        attachments.add(info);
+
+        log.debug("Extracted attachment: '{}' ({} bytes, type={})", filename, data.length, contentType);
+    }
+
+    /**
+     * Convert HTML content to plain text using JSoup.
+     */
+    private String convertHtmlToText(String html) {
+        if (html == null || html.isBlank()) {
+            return "";
+        }
+
+        try {
+            // Parse HTML and extract text
+            org.jsoup.nodes.Document doc = Jsoup.parse(html);
+
+            // Remove script and style elements
+            doc.select("script, style").remove();
+
+            // Get text with whitespace preservation
+            String text = doc.text();
+
+            // Clean up excessive whitespace
+            text = text.replaceAll("\\s+", " ").trim();
+
+            return text;
+        } catch (Exception e) {
+            log.warn("Failed to convert HTML to text: {}", e.getMessage());
+            // Fallback: strip HTML tags with regex
+            return html.replaceAll("<[^>]+>", " ").replaceAll("\\s+", " ").trim();
+        }
+    }
+
+    /**
+     * DTO for attachment information.
+     */
+    @lombok.Data
+    @lombok.AllArgsConstructor
+    public static class AttachmentInfo {
+        private String filename;
+        private String contentType;
+        private byte[] data;
+        private int size;
+    }
+}
--- a/src/main/java/at/procon/ted/camel/SolutionBriefRoute.java
+++ b/src/main/java/at/procon/ted/camel/SolutionBriefRoute.java
@ -0,0 +1,180 @@
+package at.procon.ted.camel;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.service.ExcelExportService;
+import at.procon.ted.service.SimilaritySearchService;
+import at.procon.ted.service.SimilaritySearchService.SimilaritySearchResponse;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.camel.Exchange;
+import org.apache.camel.LoggingLevel;
+import org.apache.camel.builder.RouteBuilder;
+import org.springframework.stereotype.Component;
+
+import java.io.File;
+import java.nio.file.Paths;
+
+/**
+ * Apache Camel route for processing Solution Brief PDF files.
+ *
+ * Features:
+ * - Scans input directory for PDF files
+ * - Performs semantic similarity search against TED documents
+ * - Generates Excel (XLSX) reports with hyperlinks to matching tenders
+ * - Files are NOT moved (noop mode)
+ * - Idempotent processing to avoid reprocessing same files
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class SolutionBriefRoute extends RouteBuilder {
+
+    private static final String ROUTE_ID = "solution-brief-processor";
+
+    private final TedProcessorProperties properties;
+    private final SimilaritySearchService similaritySearchService;
+    private final ExcelExportService excelExportService;
+
+    @Override
+    public void configure() throws Exception {
+        TedProcessorProperties.SolutionBriefProperties config = properties.getSolutionBrief();
+
+        if (!config.isEnabled()) {
+            log.info("Solution Brief processing is disabled, skipping route configuration");
+            return;
+        }
+
+        // Determine result directory (absolute or relative to input)
+        String resultDir = config.getResultDirectory();
+        if (!Paths.get(resultDir).isAbsolute()) {
+            resultDir = Paths.get(config.getInputDirectory(), resultDir).toString();
+        }
+
+        // Ensure directories exist
+        File inputDir = new File(config.getInputDirectory());
+        File outputDir = new File(resultDir);
+        if (!inputDir.exists()) {
+            inputDir.mkdirs();
+            log.info("Created Solution Brief input directory: {}", inputDir.getAbsolutePath());
+        }
+        if (!outputDir.exists()) {
+            outputDir.mkdirs();
+            log.info("Created Solution Brief result directory: {}", outputDir.getAbsolutePath());
+        }
+
+        final String finalResultDir = resultDir;
+
+        String fileUri = buildFileUri(config);
+        log.info("=== Solution Brief Route Configuration ===");
+        log.info("Input Directory: {}", config.getInputDirectory());
+        log.info("Input Directory exists: {}", inputDir.exists());
+        log.info("Input Directory is directory: {}", inputDir.isDirectory());
+        log.info("Result Directory: {}", resultDir);
+        log.info("File Pattern: {}", config.getFilePattern());
+        log.info("TopK: {}, Threshold: {}", config.getTopK(), config.getSimilarityThreshold());
+        log.info("Poll Interval: {}ms", config.getPollInterval());
+        log.info("Idempotent: {}", config.isIdempotent());
+        log.info("File URI: {}", fileUri);
+        log.info("===========================================");
+
+        // List existing PDFs in directory
+        File[] pdfFiles = inputDir.listFiles((dir, name) -> name.toLowerCase().endsWith(".pdf"));
+        if (pdfFiles != null && pdfFiles.length > 0) {
+            log.info("Found {} PDF files in input directory:", pdfFiles.length);
+            for (File pdf : pdfFiles) {
+                log.info("  - {} ({} bytes)", pdf.getName(), pdf.length());
+            }
+        } else {
+            log.warn("No PDF files found in input directory: {}", inputDir.getAbsolutePath());
+        }
+
+        // Error handler - handled(false) ensures file goes to .error directory
+        onException(Exception.class)
+                .routeId("solution-brief-error-handler")
+                .handled(false)
+                .log(LoggingLevel.ERROR, "Solution Brief processing failed for ${header.CamelFileName}: ${exception.message}")
+                .process(exchange -> {
+                    Exception exception = exchange.getProperty(Exchange.EXCEPTION_CAUGHT, Exception.class);
+                    String filename = exchange.getIn().getHeader(Exchange.FILE_NAME, String.class);
+                    log.error("Error processing Solution Brief '{}': {}", filename,
+                            exception != null ? exception.getMessage() : "Unknown error", exception);
+                });
+
+        // Main processing route
+        from(fileUri)
+                .routeId(ROUTE_ID)
+                .log(LoggingLevel.INFO, "Processing Solution Brief PDF: ${header.CamelFileName}")
+                .process(exchange -> {
+                    String filename = exchange.getIn().getHeader(Exchange.FILE_NAME, String.class);
+                    byte[] pdfData = exchange.getIn().getBody(byte[].class);
+
+                    if (pdfData == null || pdfData.length == 0) {
+                        log.warn("Empty PDF file: {}", filename);
+                        exchange.setProperty("skipProcessing", true);
+                        return;
+                    }
+
+                    log.info("Searching similar documents for: {} ({} bytes)", filename, pdfData.length);
+
+                    try {
+                        // Perform similarity search
+                        SimilaritySearchResponse response = similaritySearchService.searchByPdf(
+                                pdfData,
+                                filename,
+                                config.getTopK(),
+                                config.getSimilarityThreshold()
+                        );
+
+                        if (response.getResults().isEmpty()) {
+                            log.info("No similar documents found for: {}", filename);
+                            exchange.setProperty("noResults", true);
+                            return;
+                        }
+
+                        log.info("Found {} similar documents for: {}", response.getResultCount(), filename);
+
+                        // Export to Excel
+                        String excelPath = excelExportService.exportToExcel(
+                                response,
+                                filename,
+                                finalResultDir
+                        );
+
+                        exchange.getIn().setHeader("excelOutputPath", excelPath);
+                        exchange.getIn().setHeader("resultCount", response.getResultCount());
+
+                        log.info("Excel report generated: {} ({} results)", excelPath, response.getResultCount());
+
+                    } catch (Exception e) {
+                        log.error("Failed to process Solution Brief '{}': {}", filename, e.getMessage(), e);
+                        throw e;
+                    }
+                })
+                .choice()
+                    .when(exchangeProperty("skipProcessing").isEqualTo(true))
+                        .log(LoggingLevel.WARN, "Skipped empty PDF: ${header.CamelFileName}")
+                    .when(exchangeProperty("noResults").isEqualTo(true))
+                        .log(LoggingLevel.INFO, "No similar documents found for: ${header.CamelFileName}")
+                    .otherwise()
+                        .log(LoggingLevel.INFO, "Solution Brief completed: ${header.CamelFileName} -> ${header.excelOutputPath} (${header.resultCount} results)")
+                .end();
+    }
+
+    /**
+     * Build the file URI for the Solution Brief consumer.
+     */
+    private String buildFileUri(TedProcessorProperties.SolutionBriefProperties config) {
+        String directory = config.getInputDirectory().replace("\\", "/");
+
+        StringBuilder uri = new StringBuilder("file:");
+        uri.append(directory);
+        uri.append("?includeExt=pdf");
+        uri.append("&delay=").append(config.getPollInterval());
+        uri.append("&move=.done");
+        uri.append("&moveFailed=.error");
+
+        return uri.toString();
+    }
+}
--- a/src/main/java/at/procon/ted/camel/TedDocumentRoute.java
+++ b/src/main/java/at/procon/ted/camel/TedDocumentRoute.java
@ -0,0 +1,164 @@
+package at.procon.ted.camel;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.service.DocumentProcessingService;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.camel.Exchange;
+import org.apache.camel.LoggingLevel;
+import org.apache.camel.builder.RouteBuilder;
+import org.springframework.stereotype.Component;
+
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+
+/**
+ * Apache Camel route for watching and processing TED XML documents.
+ * 
+ * Features:
+ * - Recursive directory scanning for *.xml files
+ * - File locking to prevent concurrent processing
+ * - Move to .processed or .error directories after processing
+ * - Error handling with retry and dead letter channel
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class TedDocumentRoute extends RouteBuilder {
+
+    private static final String ROUTE_ID = "ted-document-processor";
+    private static final String ROUTE_ID_PROCESSOR = "ted-document-processor-handler";
+
+    private final TedProcessorProperties properties;
+    private final DocumentProcessingService documentProcessingService;
+
+    @Override
+    public void configure() throws Exception {
+        // Error handler configuration
+        errorHandler(deadLetterChannel("direct:error-handler")
+                .maximumRedeliveries(3)
+                .redeliveryDelay(1000)
+                .retryAttemptedLogLevel(LoggingLevel.WARN)
+                .logStackTrace(true)
+                .logRetryAttempted(true)
+                .logExhausted(true));
+
+        // Error handler route
+        from("direct:error-handler")
+                .routeId("ted-error-handler")
+                .process(exchange -> {
+                    Exception exception = exchange.getProperty(Exchange.EXCEPTION_CAUGHT, Exception.class);
+                    if (exception != null) {
+                        log.debug("Document processing error", exception);
+                    }
+                })
+                .to(buildErrorUri());
+
+        // Main file processing route - DISABLED
+        // File consumer disabled to prevent memory leak with package download route
+        // The package download route processes XML files directly after extraction
+        // from(buildFileUri())
+        // from("file://{{ted.input.directory}}")
+        //         .routeId(ROUTE_ID)
+        //         .to("direct:process-document");
+
+        // Document processing sub-route
+        from("direct:process-document")
+                .routeId(ROUTE_ID_PROCESSOR)
+                .process(exchange -> {
+                    // Extract file information
+                    String filename = exchange.getIn().getHeader(Exchange.FILE_NAME, String.class);
+                    String absolutePath = exchange.getIn().getHeader(Exchange.FILE_PATH, String.class);
+                    Long fileSize = exchange.getIn().getHeader(Exchange.FILE_LENGTH, Long.class);
+                    
+                    // Read XML content
+                    byte[] body = exchange.getIn().getBody(byte[].class);
+                    String xmlContent = new String(body, StandardCharsets.UTF_8);
+                    
+                    log.debug("Processing file: {} ({} bytes)", filename, fileSize);
+                    
+                    // Process the document
+                    DocumentProcessingService.ProcessingResult result = 
+                            documentProcessingService.processDocument(xmlContent, filename, absolutePath, fileSize);
+                    
+                    // Set result in exchange for logging
+                    exchange.setProperty("processingResult", result);
+                    
+                    if (result.isError()) {
+                        throw new RuntimeException("Document processing failed: " + result.errorMessage());
+                    }
+                })
+                .choice()
+                    .when(exchange -> {
+                        DocumentProcessingService.ProcessingResult result =
+                                exchange.getProperty("processingResult", DocumentProcessingService.ProcessingResult.class);
+                        return result != null && result.isDuplicate();
+                    })
+                        // Move duplicate to processed directory (it's not an error)
+                        .to(buildProcessedUri())
+                    .otherwise()
+                        // Vectorization is already triggered in DocumentProcessingService after DB save
+                        .to(buildProcessedUri())
+                .end();
+    }
+
+    /**
+     * Build the file component URI for watching the input directory.
+     */
+    private String buildFileUri() {
+        TedProcessorProperties.InputProperties input = properties.getInput();
+
+        // Normalize path for Camel (convert backslashes to forward slashes)
+        String directory = input.getDirectory().replace("\\", "/");
+
+        StringBuilder uri = new StringBuilder("file:");
+        uri.append(directory);
+        
+        uri.append("?");
+        // Recursive scanning only if pattern contains **
+        boolean recursive = input.getPattern().contains("**");
+        uri.append("recursive=").append(recursive);
+        // File pattern (always use antInclude for Ant-style patterns like *.xml)
+        uri.append("&antInclude=").append(input.getPattern());
+        // Polling configuration
+        uri.append("&delay=").append(input.getPollInterval());
+        uri.append("&maxMessagesPerPoll=").append(input.getMaxMessagesPerPoll());
+        // Read lock strategy to prevent processing incomplete files
+        uri.append("&readLock=changed");
+        uri.append("&readLockCheckInterval=1000");
+        uri.append("&readLockTimeout=30000");
+        // Move files after processing
+        uri.append("&move=").append(input.getProcessedDirectory());
+        uri.append("&moveFailed=").append(input.getErrorDirectory());
+        // Sort by name for consistent ordering
+        uri.append("&sortBy=file:name");
+        // Don't process hidden files
+        uri.append("&exclude=.*");
+        // Max depth only for recursive scanning
+        if (recursive) {
+            uri.append("&maxDepth=10");
+        }
+
+        log.info("File consumer URI: {}", uri);
+        return uri.toString();
+    }
+
+    /**
+     * Build URI for successfully processed files.
+     */
+    private String buildProcessedUri() {
+        // Files are automatically moved by the file component's 'move' option
+        // This is a no-op endpoint used for explicit routing
+        return "log:ted-processed?level=DEBUG";
+    }
+
+    /**
+     * Build URI for failed files.
+     */
+    private String buildErrorUri() {
+        // Files are automatically moved by the file component's 'moveFailed' option
+        return "log:ted-error?level=ERROR";
+    }
+}
--- a/src/main/java/at/procon/ted/camel/TedPackageDownloadCamelRoute.java
+++ b/src/main/java/at/procon/ted/camel/TedPackageDownloadCamelRoute.java
@ -0,0 +1,635 @@
+package at.procon.ted.camel;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.model.entity.TedDailyPackage;
+import at.procon.ted.repository.TedDailyPackageRepository;
+import at.procon.ted.service.BatchDocumentProcessingService;
+import at.procon.ted.service.TedPackageDownloadService;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.camel.Exchange;
+import org.apache.camel.LoggingLevel;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.stereotype.Component;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.security.MessageDigest;
+import java.time.OffsetDateTime;
+import java.time.Year;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * Camel-Native Route for automatic download of TED Daily Packages.
+ *
+ * Uses Camel HTTP Component for downloads and Enterprise Integration Patterns:
+ * - Timer-based triggers
+ * - Idempotent Consumer Pattern
+ * - Content-Based Router
+ * - Splitter Pattern
+ * - Dead Letter Channel
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Component
+@ConditionalOnProperty(name = "ted.download.enabled", havingValue = "true")
+@RequiredArgsConstructor
+@Slf4j
+public class TedPackageDownloadCamelRoute extends RouteBuilder {
+
+    private static final String ROUTE_ID_SCHEDULER = "ted-package-scheduler";
+    private static final String ROUTE_ID_DOWNLOADER = "ted-package-http-downloader";
+    private static final String ROUTE_ID_EXTRACTOR = "ted-package-extractor";
+    private static final String ROUTE_ID_XML_SPLITTER = "ted-package-xml-splitter";
+
+    private final TedProcessorProperties properties;
+    private final TedDailyPackageRepository packageRepository;
+    private final TedPackageDownloadService downloadService;
+    private final BatchDocumentProcessingService batchProcessingService;
+
+    /**
+     * Creates thread pool for parallel XML processing.
+     * Maximum 1 parallel process to prevent OutOfMemory.
+     */
+    private java.util.concurrent.ExecutorService executorService() {
+        return java.util.concurrent.Executors.newFixedThreadPool(
+            1,
+            r -> {
+                Thread thread = new Thread(r);
+                thread.setName("ted-xml-processor-" + thread.getId());
+                thread.setDaemon(true);
+                thread.setPriority(Thread.NORM_PRIORITY - 1);
+                return thread;
+            }
+        );
+    }
+
+    @Override
+    public void configure() throws Exception {
+
+        // Error Handler with Dead Letter Channel
+        errorHandler(deadLetterChannel("direct:package-download-error")
+                .maximumRedeliveries(3)
+                .redeliveryDelay(10000)
+                .retryAttemptedLogLevel(LoggingLevel.WARN)
+                .logStackTrace(true));
+
+        // Error Handler Route
+        from("direct:package-download-error")
+                .routeId("ted-package-error-handler")
+                .log(LoggingLevel.ERROR, "Failed to process package: ${exception.message}")
+                .process(this::handleDownloadError);
+
+        // Timer-based Scheduler (starts immediately with delay=0)
+        from("timer:ted-package-scheduler?period={{ted.download.poll-interval:120000}}&delay=0")
+                .routeId(ROUTE_ID_SCHEDULER)
+                .autoStartup(true)
+                .log(LoggingLevel.INFO, "TED Package Scheduler: Checking for new packages...")
+                .process(this::checkRunningPackages)
+                .choice()
+                    .when(header("tooManyRunning").isEqualTo(true))
+                        .log(LoggingLevel.INFO, "Skipping download - already ${header.runningCount} packages in progress (max 2)")
+                    .otherwise()
+                        .process(this::determineNextPackage)
+                        .choice()
+                            .when(header("packageId").isNotNull())
+                                .to("direct:download-package")
+                            .otherwise()
+                                .log(LoggingLevel.INFO, "No more packages to download - all complete")
+                        .end()
+                .end();
+
+        // Package Download Route (HTTP)
+        from("direct:download-package")
+                .routeId(ROUTE_ID_DOWNLOADER)
+                .log(LoggingLevel.INFO, "Starting download of package ${header.packageId}")
+                .setHeader("downloadStartTime", constant(System.currentTimeMillis()))
+                // Check if already downloaded
+                .process(this::createPackageRecord)
+                // Rate Limiting
+                .delay(simple("{{ted.download.delay-between-downloads:5000}}"))
+                // HTTP Download
+                .setHeader(Exchange.HTTP_METHOD, constant("GET"))
+                .setHeader("CamelHttpConnectionClose", constant(true))
+                .toD("${header.downloadUrl}?bridgeEndpoint=true&throwExceptionOnFailure=false&socketTimeout={{ted.download.download-timeout:300000}}")
+                .choice()
+                    // HTTP 200: Success
+                    .when(header(Exchange.HTTP_RESPONSE_CODE).isEqualTo(200))
+                        .log(LoggingLevel.INFO, "Processing package ${header.packageId}")
+                        .to("direct:process-downloaded-package")
+                    // HTTP 404: Not Found
+                    .when(header(Exchange.HTTP_RESPONSE_CODE).isEqualTo(404))
+                        .log(LoggingLevel.DEBUG, "Package not found (404): ${header.packageId}")
+                        .process(this::markPackageNotFound)
+                    // Other HTTP errors
+                    .otherwise()
+                        .log(LoggingLevel.WARN, "HTTP ${header.CamelHttpResponseCode} for package ${header.packageId}")
+                        .process(this::markPackageFailed)
+                .end();
+
+        // Downloaded Package Processing
+        from("direct:process-downloaded-package")
+                .routeId("ted-package-processor")
+                .process(this::calculateHash)
+                .process(this::checkDuplicateByHash)
+                .choice()
+                    .when(header("isDuplicate").isEqualTo(true))
+                        .log(LoggingLevel.WARN, "Duplicate package detected via hash: ${header.packageId}")
+                        .process(this::markPackageDuplicate)
+                    .otherwise()
+                        .process(this::saveDownloadedPackage)
+                        .to("direct:extract-tar-gz")
+                .end();
+
+        // tar.gz Extraction Route
+        from("direct:extract-tar-gz")
+                .routeId(ROUTE_ID_EXTRACTOR)
+                .log(LoggingLevel.DEBUG, "Extracting package ${header.packageId}...")
+                .process(this::extractTarGz)
+                .choice()
+                    .when(header("deleteAfterExtraction").isEqualTo(true))
+                        .log(LoggingLevel.DEBUG, "Deleting tar.gz: ${header.downloadPath}")
+                        .process(this::deleteTarGz)
+                .end()
+                .to("direct:split-xml-files");
+
+        // XML Files Batch Processor
+        from("direct:split-xml-files")
+                .routeId(ROUTE_ID_XML_SPLITTER)
+                .process(this::updatePackageProcessing)
+                .setHeader("processingStartTime", constant(System.currentTimeMillis()))
+                .process(this::processBatchDocuments)
+                .process(this::markPackageCompleted)
+                .process(this::logPackageStatistics);
+    }
+
+    /**
+     * Checks how many packages are currently being processed.
+     * Resets stuck packages (older than 30 minutes) to PENDING.
+     * Sets header "tooManyRunning" to true if 3 or more packages are in progress.
+     */
+    private void checkRunningPackages(Exchange exchange) {
+        OffsetDateTime thirtyMinutesAgo = OffsetDateTime.now().minusMinutes(30);
+
+        // Find stuck DOWNLOADING packages
+        List<TedDailyPackage> stuckDownloading = packageRepository.findByDownloadStatus(
+            TedDailyPackage.DownloadStatus.DOWNLOADING).stream()
+            .filter(pkg -> pkg.getUpdatedAt() != null && pkg.getUpdatedAt().isBefore(thirtyMinutesAgo))
+            .toList();
+
+        // Find stuck PROCESSING packages
+        List<TedDailyPackage> stuckProcessing = packageRepository.findByDownloadStatus(
+            TedDailyPackage.DownloadStatus.PROCESSING).stream()
+            .filter(pkg -> pkg.getUpdatedAt() != null && pkg.getUpdatedAt().isBefore(thirtyMinutesAgo))
+            .toList();
+
+        List<TedDailyPackage> stuckPackages = new ArrayList<>();
+        stuckPackages.addAll(stuckDownloading);
+        stuckPackages.addAll(stuckProcessing);
+
+        if (!stuckPackages.isEmpty()) {
+            log.warn("Found {} stuck packages (older than 30 minutes), resetting to PENDING", stuckPackages.size());
+            stuckPackages.forEach(pkg -> {
+                log.warn("Resetting stuck package: {} (status: {}, last update: {})",
+                    pkg.getPackageIdentifier(), pkg.getDownloadStatus(), pkg.getUpdatedAt());
+                pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.PENDING);
+                pkg.setErrorMessage("Reset from stuck state");
+                packageRepository.save(pkg);
+            });
+        }
+
+        // Count currently running packages (after reset)
+        long downloadingCount = packageRepository.findByDownloadStatus(
+            TedDailyPackage.DownloadStatus.DOWNLOADING).size();
+        long processingCount = packageRepository.findByDownloadStatus(
+            TedDailyPackage.DownloadStatus.PROCESSING).size();
+        long runningCount = downloadingCount + processingCount;
+
+        exchange.getIn().setHeader("runningCount", runningCount);
+        exchange.getIn().setHeader("tooManyRunning", runningCount >= 2);
+
+        if (runningCount > 0) {
+            log.info("Currently {} packages in progress ({} downloading, {} processing)",
+                runningCount, downloadingCount, processingCount);
+        }
+    }
+
+    /**
+     * Determines the next package to download.
+     * Strategy:
+     * 1. First check for PENDING packages (previously failed/stuck)
+     * 2. Then use download service strategy:
+     *    - Current year: Forward from max(nr) until 404
+     *    - All years: Fill gaps (if min(nr) > 1, then backward to 1)
+     *    - If current year complete (min=1 and 404 after max) -> previous year
+     *    - Repeat until startYear
+     */
+    private void determineNextPackage(Exchange exchange) {
+        // First check for PENDING packages
+        List<TedDailyPackage> pendingPackages = packageRepository.findByDownloadStatus(
+            TedDailyPackage.DownloadStatus.PENDING);
+
+        if (!pendingPackages.isEmpty()) {
+            TedDailyPackage pkg = pendingPackages.get(0);
+            log.info("Retrying PENDING package: {}", pkg.getPackageIdentifier());
+            setPackageHeaders(exchange, pkg.getYear(), pkg.getSerialNumber());
+            return;
+        }
+
+        // Use download service to find next package
+        TedPackageDownloadService.PackageInfo packageInfo = downloadService.getNextPackageToDownload();
+
+        if (packageInfo == null) {
+            // No more packages
+            exchange.getIn().setHeader("packageId", null);
+            return;
+        }
+
+        setPackageHeaders(exchange, packageInfo.year(), packageInfo.serialNumber());
+    }
+
+    /**
+     * Sets package headers for download.
+     */
+    private void setPackageHeaders(Exchange exchange, int year, int serialNumber) {
+        String packageId = String.format("%04d%05d", year, serialNumber);
+        String downloadUrl = properties.getDownload().getBaseUrl() + packageId;
+
+        exchange.getIn().setHeader("packageId", packageId);
+        exchange.getIn().setHeader("year", year);
+        exchange.getIn().setHeader("serialNumber", serialNumber);
+        exchange.getIn().setHeader("downloadUrl", downloadUrl);
+    }
+
+    /**
+     * Creates package record in DB.
+     */
+    private void createPackageRecord(Exchange exchange) {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        Integer year = exchange.getIn().getHeader("year", Integer.class);
+        Integer serialNumber = exchange.getIn().getHeader("serialNumber", Integer.class);
+        String downloadUrl = exchange.getIn().getHeader("downloadUrl", String.class);
+
+        Optional<TedDailyPackage> existing = packageRepository.findByPackageIdentifier(packageId);
+        if (existing.isPresent()) {
+            TedDailyPackage pkg = existing.get();
+            if (pkg.getDownloadStatus() == TedDailyPackage.DownloadStatus.NOT_FOUND) {
+                log.info("Retrying existing NOT_FOUND package in Camel route: {}", packageId);
+                pkg.setDownloadUrl(downloadUrl);
+                pkg.setErrorMessage(null);
+                pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.DOWNLOADING);
+                pkg = packageRepository.save(pkg);
+                exchange.getIn().setHeader("packageDbId", pkg.getId());
+                return;
+            }
+
+            log.debug("Package {} already exists in DB with status {}", packageId, pkg.getDownloadStatus());
+            exchange.getIn().setHeader("packageDbId", pkg.getId());
+            return;
+        }
+
+        TedDailyPackage pkg = TedDailyPackage.builder()
+            .packageIdentifier(packageId)
+            .year(year)
+            .serialNumber(serialNumber)
+            .downloadUrl(downloadUrl)
+            .downloadStatus(TedDailyPackage.DownloadStatus.DOWNLOADING)
+            .build();
+
+        pkg = packageRepository.save(pkg);
+        exchange.getIn().setHeader("packageDbId", pkg.getId());
+    }
+
+    /**
+     * Calculates SHA-256 hash.
+     */
+    private void calculateHash(Exchange exchange) throws Exception {
+        byte[] body = exchange.getIn().getBody(byte[].class);
+
+        MessageDigest digest = MessageDigest.getInstance("SHA-256");
+        byte[] hashBytes = digest.digest(body);
+
+        StringBuilder sb = new StringBuilder();
+        for (byte b : hashBytes) {
+            sb.append(String.format("%02x", b));
+        }
+
+        String hash = sb.toString();
+        exchange.getIn().setHeader("fileHash", hash);
+        log.debug("Calculated hash: {}", hash);
+    }
+
+    /**
+     * Checks for duplicate via hash.
+     */
+    private void checkDuplicateByHash(Exchange exchange) {
+        String hash = exchange.getIn().getHeader("fileHash", String.class);
+
+        Optional<TedDailyPackage> duplicate = packageRepository.findAll().stream()
+            .filter(p -> hash.equals(p.getFileHash()))
+            .findFirst();
+
+        exchange.getIn().setHeader("isDuplicate", duplicate.isPresent());
+        if (duplicate.isPresent()) {
+            exchange.getIn().setHeader("duplicateOf", duplicate.get().getPackageIdentifier());
+        }
+    }
+
+    /**
+     * Saves downloaded package.
+     */
+    private void saveDownloadedPackage(Exchange exchange) throws IOException {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        String hash = exchange.getIn().getHeader("fileHash", String.class);
+        byte[] body = exchange.getIn().getBody(byte[].class);
+
+        // Save tar.gz
+        Path downloadDir = Paths.get(properties.getDownload().getDownloadDirectory());
+        Files.createDirectories(downloadDir);
+        Path downloadPath = downloadDir.resolve(packageId + ".tar.gz");
+        Files.write(downloadPath, body);
+
+        long downloadDuration = System.currentTimeMillis() -
+            exchange.getIn().getHeader("downloadStartTime", Long.class);
+
+        // Update DB
+        packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+            pkg.setFileHash(hash);
+            pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.DOWNLOADED);
+            pkg.setDownloadedAt(OffsetDateTime.now());
+            pkg.setDownloadDurationMs(downloadDuration);
+            packageRepository.save(pkg);
+        });
+
+        exchange.getIn().setHeader("downloadPath", downloadPath.toString());
+        exchange.getIn().setHeader("deleteAfterExtraction",
+            properties.getDownload().isDeleteAfterExtraction());
+    }
+
+    /**
+     * Extracts tar.gz.
+     */
+    private void extractTarGz(Exchange exchange) throws IOException {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        String downloadPath = exchange.getIn().getHeader("downloadPath", String.class);
+
+        Path tarGzFile = Paths.get(downloadPath);
+        Path extractDir = Paths.get(properties.getDownload().getExtractDirectory())
+            .resolve(packageId);
+        Files.createDirectories(extractDir);
+
+        List<Path> xmlFiles = new ArrayList<>();
+
+        try (FileInputStream fis = new FileInputStream(tarGzFile.toFile());
+             GzipCompressorInputStream gzis = new GzipCompressorInputStream(fis);
+             TarArchiveInputStream tais = new TarArchiveInputStream(gzis)) {
+
+            TarArchiveEntry entry;
+            while ((entry = tais.getNextTarEntry()) != null) {
+                if (entry.isDirectory()) {
+                    continue;
+                }
+
+                String name = entry.getName();
+                if (!name.toLowerCase().endsWith(".xml")) {
+                    continue;
+                }
+
+                Path outputPath = extractDir.resolve(new File(name).getName());
+
+                try (OutputStream os = Files.newOutputStream(outputPath)) {
+                    byte[] buffer = new byte[8192];
+                    int read;
+                    while ((read = tais.read(buffer)) > 0) {
+                        os.write(buffer, 0, read);
+                    }
+                }
+
+                xmlFiles.add(outputPath);
+            }
+        }
+
+        exchange.getIn().setHeader("xmlFiles", xmlFiles);
+        exchange.getIn().setHeader("xmlFileCount", xmlFiles.size());
+
+        // Update DB
+        packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+            pkg.setXmlFileCount(xmlFiles.size());
+            packageRepository.save(pkg);
+        });
+
+        log.debug("Extracted {} XML files from package {}", xmlFiles.size(), packageId);
+    }
+
+    /**
+     * Deletes tar.gz.
+     */
+    private void deleteTarGz(Exchange exchange) throws IOException {
+        String downloadPath = exchange.getIn().getHeader("downloadPath", String.class);
+        Files.deleteIfExists(Paths.get(downloadPath));
+    }
+
+    /**
+     * Process XML files in chunks to avoid connection leaks.
+     * Splits large packages into batches of 100 files.
+     */
+    @SuppressWarnings("unchecked")
+    private void processBatchDocuments(Exchange exchange) {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        List<Path> xmlFiles = (List<Path>) exchange.getIn().getHeader("xmlFiles");
+
+        log.info("Package {}: Processing {} XML files in batches of 25", packageId, xmlFiles.size());
+
+        int totalInserted = 0;
+        int totalDuplicates = 0;
+        int totalErrors = 0;
+        long totalDuration = 0;
+
+        // Process in chunks of 25 to avoid connection leaks (must complete in <60s)
+        int chunkSize = 25;
+        for (int i = 0; i < xmlFiles.size(); i += chunkSize) {
+            int end = Math.min(i + chunkSize, xmlFiles.size());
+            List<Path> chunk = xmlFiles.subList(i, end);
+
+            log.debug("Package {}: Processing chunk {}-{} of {}",
+                    packageId, i + 1, end, xmlFiles.size());
+
+            // Process chunk in one transaction
+            BatchDocumentProcessingService.BatchProcessingResult result =
+                    batchProcessingService.processBatch(chunk);
+
+            totalInserted += result.insertedCount();
+            totalDuplicates += result.duplicateCount();
+            totalErrors += result.errorCount();
+            totalDuration += result.durationMs();
+
+            // Update package statistics after each chunk
+            updatePackageStatistics(packageId, totalInserted + totalDuplicates, totalErrors);
+        }
+
+        // Store final result in exchange for logging
+        exchange.setProperty("processedCount", totalInserted + totalDuplicates);
+        exchange.setProperty("failedCount", totalErrors);
+
+        log.info("Package {}: Batch processing completed - {} inserted, {} duplicates, {} errors in {}ms",
+                packageId, totalInserted, totalDuplicates, totalErrors, totalDuration);
+    }
+
+    /**
+     * Update package processing statistics.
+     */
+    private void updatePackageStatistics(String packageId, int processedCount, int failedCount) {
+        packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+            pkg.setProcessedCount(processedCount);
+            pkg.setFailedCount(failedCount);
+            packageRepository.save(pkg);
+        });
+    }
+
+    /**
+     * Updates status to PROCESSING.
+     */
+    private void updatePackageProcessing(Exchange exchange) {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+            pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.PROCESSING);
+            packageRepository.save(pkg);
+        });
+    }
+
+    /**
+     * Marks package as COMPLETED and cleans up extracted XML files.
+     */
+    private void markPackageCompleted(Exchange exchange) {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        Long processingStartTime = exchange.getIn().getHeader("processingStartTime", Long.class);
+
+        packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+            pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.COMPLETED);
+            pkg.setProcessedAt(OffsetDateTime.now());
+
+            if (processingStartTime != null) {
+                long processingDuration = System.currentTimeMillis() - processingStartTime;
+                pkg.setProcessingDurationMs(processingDuration);
+            }
+
+            packageRepository.save(pkg);
+        });
+
+        // Clean up extracted XML files and package directory to free memory
+        List<Path> xmlFiles = exchange.getIn().getHeader("xmlFiles", List.class);
+        if (xmlFiles != null) {
+            int deletedCount = 0;
+            for (Path xmlFile : xmlFiles) {
+                try {
+                    if (Files.deleteIfExists(xmlFile)) {
+                        deletedCount++;
+                    }
+                } catch (IOException e) {
+                    log.warn("Failed to delete XML file {}: {}", xmlFile, e.getMessage());
+                }
+            }
+
+            // Delete package directory if empty
+            if (!xmlFiles.isEmpty()) {
+                try {
+                    Path packageDir = xmlFiles.get(0).getParent();
+                    if (packageDir != null && Files.isDirectory(packageDir)) {
+                        try (var stream = Files.list(packageDir)) {
+                            if (stream.findAny().isEmpty()) {
+                                Files.deleteIfExists(packageDir);
+                                log.debug("Deleted empty package directory: {}", packageDir);
+                            }
+                        }
+                    }
+                } catch (IOException e) {
+                    log.debug("Could not delete package directory: {}", e.getMessage());
+                }
+            }
+
+            if (deletedCount > 0) {
+                log.debug("Cleaned up {} XML files for package {}", deletedCount, packageId);
+            }
+        }
+    }
+
+    /**
+     * Marks package as NOT_FOUND.
+     */
+    private void markPackageNotFound(Exchange exchange) {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+            pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.NOT_FOUND);
+            pkg.setErrorMessage("Package not found (404)");
+            packageRepository.save(pkg);
+        });
+    }
+
+    /**
+     * Marks package as FAILED.
+     */
+    private void markPackageFailed(Exchange exchange) {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        Integer httpCode = exchange.getIn().getHeader(Exchange.HTTP_RESPONSE_CODE, Integer.class);
+        packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+            pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.FAILED);
+            pkg.setErrorMessage("HTTP " + httpCode);
+            packageRepository.save(pkg);
+        });
+    }
+
+    /**
+     * Marks package as DUPLICATE.
+     */
+    private void markPackageDuplicate(Exchange exchange) {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        String duplicateOf = exchange.getIn().getHeader("duplicateOf", String.class);
+        packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+            pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.COMPLETED);
+            pkg.setErrorMessage("Duplicate of " + duplicateOf);
+            packageRepository.save(pkg);
+        });
+    }
+
+    /**
+     * Logs package processing statistics.
+     */
+    private void logPackageStatistics(Exchange exchange) {
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+        packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+            long totalDuration = (pkg.getDownloadDurationMs() != null ? pkg.getDownloadDurationMs() : 0) +
+                               (pkg.getProcessingDurationMs() != null ? pkg.getProcessingDurationMs() : 0);
+
+            log.info("Package {} completed: {} XML files, {} processed, {} failed, total duration: {}ms",
+                    packageId,
+                    pkg.getXmlFileCount(),
+                    pkg.getProcessedCount(),
+                    pkg.getFailedCount(),
+                    totalDuration);
+        });
+    }
+
+    /**
+     * Error Handler.
+     */
+    private void handleDownloadError(Exchange exchange) {
+        Exception exception = exchange.getProperty(Exchange.EXCEPTION_CAUGHT, Exception.class);
+        String packageId = exchange.getIn().getHeader("packageId", String.class);
+
+        if (packageId != null) {
+            packageRepository.findByPackageIdentifier(packageId).ifPresent(pkg -> {
+                pkg.setDownloadStatus(TedDailyPackage.DownloadStatus.FAILED);
+                pkg.setErrorMessage(exception != null ? exception.getMessage() : "Unknown error");
+                packageRepository.save(pkg);
+            });
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/camel/TedPackageDownloadRoute.java
+++ b/src/main/java/at/procon/ted/camel/TedPackageDownloadRoute.java
@ -0,0 +1,158 @@
+package at.procon.ted.camel;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.service.TedPackageDownloadService;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.camel.Exchange;
+import org.apache.camel.LoggingLevel;
+import org.apache.camel.builder.RouteBuilder;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.stereotype.Component;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+/**
+ * Apache Camel Route for automatic download of TED Daily Packages.
+ *
+ * Features:
+ * - Scheduled Download (hourly configurable)
+ * - Idempotent Downloads via Hash
+ * - Rate Limiting
+ * - Integration with XML processing
+ * - Error Handling
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Component
+@ConditionalOnProperty(name = "ted.download.use-service-based", havingValue = "true")
+@RequiredArgsConstructor
+@Slf4j
+public class TedPackageDownloadRoute extends RouteBuilder {
+
+    private static final String ROUTE_ID_SCHEDULER = "ted-package-download-scheduler";
+    private static final String ROUTE_ID_DOWNLOADER = "ted-package-downloader";
+    private static final String ROUTE_ID_XML_PROCESSOR = "ted-package-xml-processor";
+
+    private final TedProcessorProperties properties;
+    private final TedPackageDownloadService downloadService;
+
+    /**
+     * Creates thread pool for parallel XML processing.
+     * Maximum 3 parallel processes for DB loading (lower priority due to vectorization).
+     */
+    private java.util.concurrent.ExecutorService executorService() {
+        return java.util.concurrent.Executors.newFixedThreadPool(
+            3,
+            r -> {
+                Thread thread = new Thread(r);
+                thread.setName("ted-xml-processor-" + thread.getId());
+                thread.setDaemon(true);
+                thread.setPriority(Thread.NORM_PRIORITY - 1); // Lower priority than vectorization
+                return thread;
+            }
+        );
+    }
+
+    @Override
+    public void configure() throws Exception {
+
+        // Error Handler
+        errorHandler(defaultErrorHandler()
+                .maximumRedeliveries(3)
+                .redeliveryDelay(5000)
+                .retryAttemptedLogLevel(LoggingLevel.WARN));
+
+        // Scheduler: Periodic check for new packages
+        from("timer:package-download-scheduler?period={{ted.download.poll-interval:3600000}}")
+                .routeId(ROUTE_ID_SCHEDULER)
+                .autoStartup("{{ted.download.enabled:false}}")
+                .log(LoggingLevel.DEBUG, "Checking for new TED packages to download...")
+                .bean(downloadService, "getNextPackageToDownload")
+                .choice()
+                    .when(body().isNull())
+                        .log(LoggingLevel.DEBUG, "No more packages to download")
+                    .otherwise()
+                        .to("direct:download-package")
+                .end();
+
+        // Download Route
+        from("direct:download-package")
+                .routeId(ROUTE_ID_DOWNLOADER)
+                .log(LoggingLevel.INFO, "Processing package: ${body.identifier}")
+                .process(exchange -> {
+                    TedPackageDownloadService.PackageInfo packageInfo =
+                        exchange.getIn().getBody(TedPackageDownloadService.PackageInfo.class);
+
+                    // Rate Limiting
+                    long delay = properties.getDownload().getDelayBetweenDownloads();
+                    if (delay > 0) {
+                        Thread.sleep(delay);
+                    }
+
+                    // Download Package
+                    TedPackageDownloadService.DownloadResult result =
+                        downloadService.downloadPackage(packageInfo.year(), packageInfo.serialNumber());
+
+                    exchange.setProperty("downloadResult", result);
+                    exchange.getIn().setBody(result);
+                })
+                .choice()
+                    .when(simple("${exchangeProperty.downloadResult.success} == true"))
+                        .to("direct:process-package-xml-files")
+                    .when(simple("${exchangeProperty.downloadResult.status.name} == 'NOT_FOUND'"))
+                        .log(LoggingLevel.DEBUG, "Package not found (404): ${body.packageEntity.packageIdentifier}")
+                    .when(simple("${exchangeProperty.downloadResult.status.name} == 'ALREADY_EXISTS'"))
+                        .log(LoggingLevel.DEBUG, "Package already exists: ${body.packageEntity.packageIdentifier}")
+                    .when(simple("${exchangeProperty.downloadResult.status.name} == 'DUPLICATE'"))
+                        .log(LoggingLevel.WARN, "Duplicate package detected: ${body.packageEntity.packageIdentifier}")
+                    .otherwise()
+                        .log(LoggingLevel.ERROR, "Failed to download package: ${exchangeProperty.downloadResult.error.message}")
+                .end();
+
+        // XML Files Processing Route
+        from("direct:process-package-xml-files")
+                .routeId(ROUTE_ID_XML_PROCESSOR)
+                .setProperty("processedCount", constant(0))
+                .setProperty("failedCount", constant(0))
+                .setProperty("packageIdentifier", simple("${body.packageEntity.packageIdentifier}"))
+                .setProperty("xmlFileCount", simple("${body.xmlFiles.size}"))
+                .split(simple("${body.xmlFiles}"))
+                    .parallelProcessing()
+                    .executorService(executorService())
+                    .stopOnException(false)  // Continue even if individual documents fail
+                    .shareUnitOfWork()
+                    .doTry()
+                        .process(exchange -> {
+                            Path xmlFile = exchange.getIn().getBody(Path.class);
+
+                            // Set headers for existing XML processing route
+                            exchange.getIn().setHeader(Exchange.FILE_NAME, xmlFile.getFileName().toString());
+                            exchange.getIn().setHeader(Exchange.FILE_PATH, xmlFile.toString());
+                            exchange.getIn().setHeader(Exchange.FILE_LENGTH, Files.size(xmlFile));
+
+                            // Read XML content
+                            byte[] content = Files.readAllBytes(xmlFile);
+                            exchange.getIn().setBody(content);
+                        })
+                        // Forward to existing processing route
+                        .to("direct:process-document")
+                        .process(exchange -> {
+                            // Increment success counter
+                            Integer count = exchange.getProperty("processedCount", Integer.class);
+                            exchange.setProperty("processedCount", count + 1);
+                        })
+                    .doCatch(Exception.class)
+                        .log(LoggingLevel.WARN, "Failed to process ${header.CamelFileName}: ${exception.message}")
+                        .process(exchange -> {
+                            // Increment error counter
+                            Integer count = exchange.getProperty("failedCount", Integer.class);
+                            exchange.setProperty("failedCount", count + 1);
+                        })
+                    .end()
+                .end()
+                .log(LoggingLevel.INFO, "Package ${exchangeProperty.packageIdentifier} completed: ${exchangeProperty.xmlFileCount} XML files, ${exchangeProperty.processedCount} processed, ${exchangeProperty.failedCount} failed");
+    }
+}
--- a/src/main/java/at/procon/ted/camel/VectorizationRoute.java
+++ b/src/main/java/at/procon/ted/camel/VectorizationRoute.java
@ -0,0 +1,360 @@
+package at.procon.ted.camel;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.model.entity.ProcurementDocument;
+import at.procon.ted.model.entity.VectorizationStatus;
+import at.procon.ted.repository.ProcurementDocumentRepository;
+import at.procon.ted.service.VectorizationProcessorService;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.camel.Exchange;
+import org.apache.camel.LoggingLevel;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.model.dataformat.JsonLibrary;
+import org.springframework.data.domain.PageRequest;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Apache Camel route for asynchronous document vectorization.
+ *
+ * Features:
+ * - Async vectorization triggered after document processing
+ * - Scheduled processing of pending vectorizations from database
+ * - Direct REST calls to Python embedding service
+ * - Error handling with retry mechanism
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class VectorizationRoute extends RouteBuilder {
+
+    private static final String ROUTE_ID_TRIGGER = "vectorization-trigger";
+    private static final String ROUTE_ID_PROCESSOR = "vectorization-processor";
+    private static final String ROUTE_ID_SCHEDULER = "vectorization-scheduler";
+
+    private final TedProcessorProperties properties;
+    private final ProcurementDocumentRepository documentRepository;
+    private final VectorizationProcessorService vectorizationProcessorService;
+    private final ObjectMapper objectMapper;
+
+    /**
+     * Creates thread pool for vectorization with highest priority.
+     * Only 1 thread since only one embedding service is available.
+     */
+    private java.util.concurrent.ExecutorService executorService() {
+        return java.util.concurrent.Executors.newFixedThreadPool(
+            1,
+            r -> {
+                Thread thread = new Thread(r);
+                thread.setName("ted-vectorization-" + thread.getId());
+                thread.setDaemon(true);
+                thread.setPriority(Thread.MAX_PRIORITY); // Highest priority
+                return thread;
+            }
+        );
+    }
+
+    @Override
+    public void configure() throws Exception {
+
+        if (!properties.getVectorization().isEnabled()) {
+            log.info("Vectorization is disabled, skipping route configuration");
+            return;
+        }
+
+        log.info("Configuring vectorization routes (enabled=true, apiUrl={}, connectTimeout={}ms, socketTimeout={}ms, maxRetries={}, scheduler every 6s)",
+                properties.getVectorization().getApiUrl(),
+                properties.getVectorization().getConnectTimeout(),
+                properties.getVectorization().getSocketTimeout(),
+                properties.getVectorization().getMaxRetries());
+
+        // Global error handler for unexpected exceptions (like NullPointer, Connection pool shutdown, etc.)
+        // Only catches severe exceptions that are not handled by route-specific doCatch
+        onException(NullPointerException.class, IllegalStateException.class)
+            .routeId("vectorization-error-handler")
+            .handled(true)
+            .process(exchange -> {
+                UUID documentId = exchange.getIn().getHeader("documentId", UUID.class);
+                Exception exception = exchange.getProperty(Exchange.EXCEPTION_CAUGHT, Exception.class);
+                String errorMsg = exception != null ? exception.getClass().getSimpleName() + ": " + exception.getMessage() : "Unknown error";
+
+                // If connection pool is shut down, it's likely during application shutdown - just log warning
+                if (errorMsg.contains("Connection pool shut down")) {
+                    log.warn("Vectorization aborted for document {} - connection pool shut down (application shutting down?)", documentId);
+                    return;
+                }
+
+                log.error("Unexpected error in vectorization for document {}: {}", documentId, errorMsg, exception);
+
+                // Update document status to FAILED via service (transactional)
+                if (documentId != null) {
+                    try {
+                        vectorizationProcessorService.markAsFailed(documentId, errorMsg);
+                    } catch (Exception e) {
+                        log.warn("Failed to mark document {} as failed: {}", documentId, e.getMessage());
+                    }
+                }
+            })
+            .to("log:vectorization-error?level=WARN");
+
+        // Trigger route: Receives document ID and queues for async processing
+        // Queue size limited to 1000 to prevent memory issues
+        from("direct:vectorize")
+            .routeId(ROUTE_ID_TRIGGER)
+            .doTry()
+                .to("seda:vectorize-async?waitForTaskToComplete=Never&size=1000&blockWhenFull=true&timeout=5000")
+            .doCatch(Exception.class)
+                .log(LoggingLevel.WARN, "Failed to queue document ${header.documentId} for vectorization (queue may be full or shutting down): ${exception.message}")
+            .end();
+
+        // Async processor route: Performs actual vectorization with highest priority
+        // Uses dedicated single-thread pool with MAX_PRIORITY (1 thread for 1 embedding service)
+        from("seda:vectorize-async?size=1000")
+            .routeId(ROUTE_ID_PROCESSOR)
+            .threads().executorService(executorService())
+            .process(exchange -> {
+                UUID documentId = exchange.getIn().getHeader("documentId", UUID.class);
+
+                log.debug("Starting vectorization for document: {}", documentId);
+
+                // Prepare document for vectorization (transactional)
+                VectorizationProcessorService.DocumentContent docContent =
+                        vectorizationProcessorService.prepareDocumentForVectorization(documentId);
+
+                if (docContent == null) {
+                    // Document was skipped (no content)
+                    log.debug("Document {} has no content, skipping vectorization", documentId);
+                    exchange.setProperty("skipVectorization", true);
+                    return;
+                }
+
+                // Prepare request object
+                EmbedRequest embedRequest = new EmbedRequest();
+                embedRequest.text = docContent.textContent();
+                embedRequest.isQuery = false;
+
+                // Set headers and body for REST call
+                exchange.getIn().setHeader("documentId", documentId);
+                exchange.getIn().setHeader(Exchange.HTTP_METHOD, "POST");
+                exchange.getIn().setHeader(Exchange.CONTENT_TYPE, "application/json");
+                exchange.getIn().setBody(embedRequest);
+            })
+            .choice()
+                .when(exchangeProperty("skipVectorization").isEqualTo(true))
+                    .log(LoggingLevel.DEBUG, "Skipping vectorization (no content): ${header.documentId}")
+                .otherwise()
+                    // Marshal request to JSON
+                    .marshal().json(JsonLibrary.Jackson)
+                    // Initialize retry counter
+                    .setProperty("retryCount", constant(0))
+                    .setProperty("maxRetries", constant(properties.getVectorization().getMaxRetries()))
+                    .setProperty("vectorizationSuccess", constant(false))
+                    // Retry loop with exponential backoff
+                    .loopDoWhile(simple("${exchangeProperty.vectorizationSuccess} == false && ${exchangeProperty.retryCount} < ${exchangeProperty.maxRetries}"))
+                        .process(exchange -> {
+                            Integer retryCount = exchange.getProperty("retryCount", Integer.class);
+                            exchange.setProperty("retryCount", retryCount + 1);
+
+                            // Exponential backoff: 2s, 4s, 8s, 16s, 32s
+                            if (retryCount > 0) {
+                                long backoffMs = (long) Math.pow(2, retryCount) * 1000;
+                                UUID documentId = exchange.getIn().getHeader("documentId", UUID.class);
+                                log.warn("Retry #{} for document {} after {}ms backoff", retryCount, documentId, backoffMs);
+                                Thread.sleep(backoffMs);
+                            }
+                        })
+                        .doTry()
+                            // HTTP call with configurable timeouts
+                            .toD(properties.getVectorization().getApiUrl() + "/embed?bridgeEndpoint=true&throwExceptionOnFailure=false&connectTimeout=" +
+                                 properties.getVectorization().getConnectTimeout() + "&socketTimeout=" +
+                                 properties.getVectorization().getSocketTimeout())
+                            .process(exchange -> {
+                                UUID documentId = exchange.getIn().getHeader("documentId", UUID.class);
+                                Integer statusCode = exchange.getIn().getHeader(Exchange.HTTP_RESPONSE_CODE, Integer.class);
+
+                                if (statusCode == null) {
+                                    log.error("No response from embedding service for document {} (service may be down!)", documentId);
+                                    throw new RuntimeException("Embedding service not reachable (no HTTP response)");
+                                }
+
+                                if (statusCode != 200) {
+                                    String responseBody = exchange.getIn().getBody(String.class);
+                                    String errorMsg = "HTTP " + statusCode + " from embedding service: " + responseBody;
+                                    log.error("Embedding service error for document {}: {}", documentId, errorMsg);
+                                    throw new RuntimeException(errorMsg);
+                                }
+                            })
+                            .unmarshal().json(JsonLibrary.Jackson, EmbedResponse.class)
+                            .process(exchange -> {
+                                UUID documentId = exchange.getIn().getHeader("documentId", UUID.class);
+                                EmbedResponse response = exchange.getIn().getBody(EmbedResponse.class);
+
+                                if (response == null || response.embedding == null) {
+                                    throw new RuntimeException("Embedding service returned null response");
+                                }
+
+                                log.debug("Successfully vectorized document {}: {} dimensions, {} tokens",
+                                        documentId, response.dimensions, response.tokenCount);
+
+                                // Save embedding with token count via service (transactional)
+                                vectorizationProcessorService.saveEmbedding(documentId, response.embedding, response.tokenCount);
+
+                                // Mark as successful to stop retry loop
+                                exchange.setProperty("vectorizationSuccess", true);
+                            })
+                        .doCatch(Exception.class)
+                            .process(exchange -> {
+                                UUID documentId = exchange.getIn().getHeader("documentId", UUID.class);
+                                Integer retryCount = exchange.getProperty("retryCount", Integer.class);
+                                Integer maxRetries = exchange.getProperty("maxRetries", Integer.class);
+                                Exception exception = exchange.getProperty(Exchange.EXCEPTION_CAUGHT, Exception.class);
+                                String errorMsg = exception != null ? exception.getMessage() : "Unknown error";
+
+                                // Check if error is due to shutdown
+                                if (errorMsg != null && errorMsg.contains("Connection pool shut down")) {
+                                    log.warn("Vectorization aborted for document {} - connection pool shut down (application shutting down)", documentId);
+                                    // Don't mark as failed - it will be retried on next startup
+                                    exchange.setProperty("vectorizationSuccess", true); // Stop retry loop
+                                    return;
+                                }
+
+                                if (retryCount >= maxRetries) {
+                                    log.error("Vectorization failed for document {} after {} retries: {}", documentId, maxRetries, errorMsg, exception);
+                                    try {
+                                        vectorizationProcessorService.markAsFailed(documentId, errorMsg);
+                                    } catch (Exception e) {
+                                        log.warn("Failed to mark document {} as failed (may be shutting down): {}", documentId, e.getMessage());
+                                    }
+                                } else {
+                                    log.warn("Vectorization attempt #{} failed for document {}: {}", retryCount, documentId, errorMsg);
+                                }
+                            })
+                        .end()
+                    .end()
+            .end();
+
+        // Scheduled route: Process pending and failed vectorizations from database
+        // Runs every 6 seconds to catch documents that need (re-)vectorization
+        from("timer:vectorization-scheduler?period=6000&delay=500")
+            .routeId(ROUTE_ID_SCHEDULER)
+            .log(LoggingLevel.DEBUG, "Vectorization scheduler: Checking for pending/failed documents...")
+            .process(exchange -> {
+                int batchSize = properties.getVectorization().getBatchSize();
+
+                // First get PENDING documents (highest priority)
+                List<ProcurementDocument> pending = documentRepository.findByVectorizationStatus(
+                    VectorizationStatus.PENDING,
+                    PageRequest.of(0, batchSize)
+                );
+
+                // If no PENDING, get FAILED documents for retry
+                List<ProcurementDocument> failed = List.of();
+                if (pending.isEmpty()) {
+                    failed = documentRepository.findByVectorizationStatus(
+                        VectorizationStatus.FAILED,
+                        PageRequest.of(0, batchSize)
+                    );
+                }
+
+                List<ProcurementDocument> toProcess = !pending.isEmpty() ? pending : failed;
+
+                if (!toProcess.isEmpty()) {
+                    String status = !pending.isEmpty() ? "PENDING" : "FAILED";
+                    log.debug("Processing {} {} vectorizations from database", toProcess.size(), status);
+                    exchange.getIn().setBody(toProcess);
+                } else {
+                    exchange.setProperty("noPendingDocs", true);
+                }
+            })
+            .choice()
+                .when(exchangeProperty("noPendingDocs").isEqualTo(true))
+                    .log(LoggingLevel.DEBUG, "Vectorization scheduler: No pending or failed vectorizations found")
+                .otherwise()
+                    .split(body())
+                        .process(exchange -> {
+                            ProcurementDocument doc = exchange.getIn().getBody(ProcurementDocument.class);
+                            exchange.getIn().setHeader("documentId", doc.getId());
+                        })
+                        .to("direct:vectorize")
+                    .end()
+            .end();
+    }
+
+    /**
+     * Request model for embedding service.
+     * Matches Python FastAPI EmbedRequest model with snake_case field names.
+     */
+    public static class EmbedRequest {
+        @JsonProperty("text")
+        public String text;
+
+        @JsonProperty("is_query")
+        public boolean isQuery;
+
+        public EmbedRequest() {}
+
+        public String getText() {
+            return text;
+        }
+
+        public void setText(String text) {
+            this.text = text;
+        }
+
+        @JsonProperty("is_query")
+        public boolean isIsQuery() {
+            return isQuery;
+        }
+
+        @JsonProperty("is_query")
+        public void setIsQuery(boolean isQuery) {
+            this.isQuery = isQuery;
+        }
+    }
+
+    /**
+     * Response model for embedding service.
+     */
+    public static class EmbedResponse {
+        public float[] embedding;
+        public int dimensions;
+        @JsonProperty("token_count")
+        public int tokenCount;
+
+        public EmbedResponse() {}
+
+        public float[] getEmbedding() {
+            return embedding;
+        }
+
+        public void setEmbedding(float[] embedding) {
+            this.embedding = embedding;
+        }
+
+        public int getDimensions() {
+            return dimensions;
+        }
+
+        public void setDimensions(int dimensions) {
+            this.dimensions = dimensions;
+        }
+
+        @JsonProperty("token_count")
+        public int getTokenCount() {
+            return tokenCount;
+        }
+
+        @JsonProperty("token_count")
+        public void setTokenCount(int tokenCount) {
+            this.tokenCount = tokenCount;
+        }
+    }
+
+}
--- a/src/main/java/at/procon/ted/config/AsyncConfig.java
+++ b/src/main/java/at/procon/ted/config/AsyncConfig.java
@ -0,0 +1,78 @@
+package at.procon.ted.config;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.aop.interceptor.AsyncUncaughtExceptionHandler;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.scheduling.annotation.AsyncConfigurer;
+import org.springframework.scheduling.annotation.EnableAsync;
+import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
+
+import java.lang.reflect.Method;
+import java.util.concurrent.Executor;
+
+/**
+ * Async configuration for document vectorization processing.
+ * Provides thread pool executor optimized for ML inference workloads.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Configuration
+@EnableAsync
+@RequiredArgsConstructor
+@Slf4j
+public class AsyncConfig implements AsyncConfigurer {
+
+    private final TedProcessorProperties properties;
+
+    /**
+     * Thread pool executor for async vectorization tasks.
+     */
+    @Bean(name = "vectorizationExecutor")
+    public Executor vectorizationExecutor() {
+        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
+        executor.setCorePoolSize(properties.getVectorization().getThreadPoolSize());
+        executor.setMaxPoolSize(properties.getVectorization().getThreadPoolSize() * 2);
+        executor.setQueueCapacity(500);
+        executor.setThreadNamePrefix("vectorization-");
+        executor.setRejectedExecutionHandler((r, e) -> 
+            log.warn("Vectorization task rejected, queue full"));
+        executor.initialize();
+        return executor;
+    }
+
+    /**
+     * Default async executor for general async tasks.
+     */
+    @Override
+    @Bean(name = "taskExecutor")
+    public Executor getAsyncExecutor() {
+        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
+        executor.setCorePoolSize(4);
+        executor.setMaxPoolSize(8);
+        executor.setQueueCapacity(200);
+        executor.setThreadNamePrefix("async-");
+        executor.initialize();
+        return executor;
+    }
+
+    /**
+     * Exception handler for async tasks.
+     */
+    @Override
+    public AsyncUncaughtExceptionHandler getAsyncUncaughtExceptionHandler() {
+        return new AsyncExceptionHandler();
+    }
+
+    /**
+     * Handles uncaught exceptions in async methods.
+     */
+    @Slf4j
+    private static class AsyncExceptionHandler implements AsyncUncaughtExceptionHandler {
+        @Override
+        public void handleUncaughtException(Throwable ex, Method method, Object... params) {
+            log.error("Async exception in method {}: {}", method.getName(), ex.getMessage(), ex);
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/config/CamelConfig.java
+++ b/src/main/java/at/procon/ted/config/CamelConfig.java
@ -0,0 +1,34 @@
+package at.procon.ted.config;
+
+import org.apache.camel.spi.IdempotentRepository;
+import org.apache.camel.support.processor.idempotent.FileIdempotentRepository;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+import java.io.File;
+
+/**
+ * Camel configuration beans.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Configuration
+public class CamelConfig {
+
+    @Value("${ted.solution-brief.idempotent-repository:./solution-brief-processed.dat}")
+    private String idempotentRepositoryPath;
+
+    /**
+     * File-based idempotent repository for tracking processed files.
+     */
+    @Bean
+    public IdempotentRepository fileIdempotentRepository() {
+        File repoFile = new File(idempotentRepositoryPath);
+        // Ensure parent directory exists
+        if (repoFile.getParentFile() != null && !repoFile.getParentFile().exists()) {
+            repoFile.getParentFile().mkdirs();
+        }
+        return FileIdempotentRepository.fileIdempotentRepository(repoFile, 10000);
+    }
+}
--- a/src/main/java/at/procon/ted/config/OpenApiConfig.java
+++ b/src/main/java/at/procon/ted/config/OpenApiConfig.java
@ -0,0 +1,58 @@
+package at.procon.ted.config;
+
+import io.swagger.v3.oas.models.OpenAPI;
+import io.swagger.v3.oas.models.info.Contact;
+import io.swagger.v3.oas.models.info.Info;
+import io.swagger.v3.oas.models.info.License;
+import io.swagger.v3.oas.models.servers.Server;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+import java.util.List;
+
+/**
+ * OpenAPI/Swagger documentation configuration.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Configuration
+public class OpenApiConfig {
+
+    @Bean
+    public OpenAPI customOpenAPI() {
+        return new OpenAPI()
+                .info(new Info()
+                        .title("TED Procurement Document API")
+                        .version("1.0.0")
+                        .description("""
+                                REST API for searching and retrieving EU eForms public procurement documents 
+                                from TED (Tenders Electronic Daily).
+                                
+                                ## Features
+                                - **Structured Search**: Filter by country, CPV codes, dates, procedure types
+                                - **Semantic Search**: Natural language queries using vector similarity
+                                - **Document Retrieval**: Full document details with lots and organizations
+                                
+                                ## Authentication
+                                Currently no authentication required (development mode).
+                                
+                                ## Rate Limits
+                                No rate limits currently enforced.
+                                """)
+                        .contact(new Contact()
+                                .name("PROCON DATA")
+                                .email("Martin.Schweitzer@procon.co.at")
+                                .url("https://www.procon.co.at"))
+                        .license(new License()
+                                .name("Proprietary")
+                                .url("https://www.procon.co.at")))
+                .servers(List.of(
+                        new Server()
+                                .url("http://localhost:8080/api")
+                                .description("Local Development Server"),
+                        new Server()
+                                .url("https://ted-api.procon.co.at/api")
+                                .description("Production Server")
+                ));
+    }
+}
--- a/src/main/java/at/procon/ted/config/TedProcessorProperties.java
+++ b/src/main/java/at/procon/ted/config/TedProcessorProperties.java
@ -0,0 +1,431 @@
+package at.procon.ted.config;
+
+import lombok.Data;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.validation.annotation.Validated;
+
+import jakarta.validation.constraints.Min;
+import jakarta.validation.constraints.NotBlank;
+import jakarta.validation.constraints.Positive;
+
+/**
+ * Configuration properties for TED Procurement Processor.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Configuration
+@ConfigurationProperties(prefix = "ted")
+@Data
+@Validated
+public class TedProcessorProperties {
+
+    private InputProperties input = new InputProperties();
+    private SchemaProperties schema = new SchemaProperties();
+    private VectorizationProperties vectorization = new VectorizationProperties();
+    private SearchProperties search = new SearchProperties();
+    private DownloadProperties download = new DownloadProperties();
+    private MailProperties mail = new MailProperties();
+    private SolutionBriefProperties solutionBrief = new SolutionBriefProperties();
+
+    /**
+     * Input directory configuration for Apache Camel file consumer.
+     */
+    @Data
+    public static class InputProperties {
+        
+        /**
+         * Base directory for watching incoming TED XML files.
+         */
+        @NotBlank
+        private String directory = "D:/ted.europe/2025-11.tar/2025-11/11";
+
+        /**
+         * File pattern to match (supports Ant-style patterns).
+         */
+        private String pattern = "**/*.xml";
+
+        /**
+         * Directory to move successfully processed files.
+         */
+        private String processedDirectory = ".processed";
+
+        /**
+         * Directory to move failed files.
+         */
+        private String errorDirectory = ".error";
+
+        /**
+         * Polling interval in milliseconds.
+         */
+        @Positive
+        private long pollInterval = 5000;
+
+        /**
+         * Maximum number of messages per poll.
+         */
+        @Positive
+        private int maxMessagesPerPoll = 100;
+    }
+
+    /**
+     * XML Schema validation configuration.
+     */
+    @Data
+    public static class SchemaProperties {
+        
+        /**
+         * Enable/disable XSD validation.
+         */
+        private boolean enabled = true;
+
+        /**
+         * Path to the eForms XSD schema file.
+         */
+        private String path = "classpath:schemas/maindoc/UBL-ContractNotice-2.3.xsd";
+    }
+
+    /**
+     * Document vectorization configuration.
+     */
+    @Data
+    public static class VectorizationProperties {
+
+        /**
+         * Enable/disable async vectorization.
+         */
+        private boolean enabled = true;
+
+        /**
+         * Use external HTTP API instead of Python subprocess.
+         */
+        private boolean useHttpApi = false;
+
+        /**
+         * Embedding service HTTP API URL.
+         */
+        private String apiUrl = "http://localhost:8001";
+
+        /**
+         * Sentence transformer model name.
+         */
+        private String modelName = "intfloat/multilingual-e5-large";
+
+        /**
+         * Vector dimensions (must match model output).
+         */
+        @Positive
+        private int dimensions = 1024;
+
+        /**
+         * Batch size for vectorization processing.
+         */
+        @Min(1)
+        private int batchSize = 16;
+
+        /**
+         * Thread pool size for async vectorization.
+         */
+        @Min(1)
+        private int threadPoolSize = 4;
+
+        /**
+         * Maximum text length for vectorization (characters).
+         */
+        @Positive
+        private int maxTextLength = 8192;
+
+        /**
+         * HTTP connection timeout in milliseconds.
+         */
+        @Positive
+        private int connectTimeout = 10000;
+
+        /**
+         * HTTP socket/read timeout in milliseconds.
+         */
+        @Positive
+        private int socketTimeout = 60000;
+
+        /**
+         * Maximum retries on connection failure.
+         */
+        @Min(0)
+        private int maxRetries = 5;
+    }
+
+    /**
+     * Search configuration.
+     */
+    @Data
+    public static class SearchProperties {
+
+        /**
+         * Default page size for search results.
+         */
+        @Positive
+        private int defaultPageSize = 20;
+
+        /**
+         * Maximum allowed page size.
+         */
+        @Positive
+        private int maxPageSize = 100;
+
+        /**
+         * Similarity threshold for vector search (0.0 - 1.0).
+         */
+        private double similarityThreshold = 0.7;
+    }
+
+    /**
+     * TED Daily Package Download configuration.
+     */
+    @Data
+    public static class DownloadProperties {
+
+        /**
+         * Enable/disable automatic package download.
+         */
+        private boolean enabled = false;
+
+        /**
+         * Base URL für TED Daily Packages.
+         */
+        private String baseUrl = "https://ted.europa.eu/packages/daily/";
+
+        /**
+         * Download-Verzeichnis für tar.gz Files.
+         */
+        private String downloadDirectory = "D:/ted.europe/downloads";
+
+        /**
+         * Extrahierungs-Verzeichnis für XML-Dateien.
+         */
+        private String extractDirectory = "D:/ted.europe/extracted";
+
+        /**
+         * Start-Jahr für den Download.
+         */
+        @Positive
+        private int startYear = 2015;
+
+        /**
+         * Anzahl aufeinanderfolgender 404-Fehler bevor Download stoppt.
+         * HINWEIS: Wird nicht mehr verwendet. System stoppt jetzt sofort bei erstem 404.
+         * @deprecated Nicht mehr verwendet seit Update auf sofortige 404-Behandlung
+         */
+        @Positive
+        @Deprecated
+        private int maxConsecutive404 = 1;
+
+        /**
+         * Polling-Interval für neue Packages (Millisekunden).
+         */
+        @Positive
+        private long pollInterval = 3600000; // 1 Stunde
+
+        /**
+         * Retry-Intervall für tail-NOT_FOUND Packages.
+         * Current year packages remain retryable indefinitely.
+         */
+        @Positive
+        private long notFoundRetryInterval = 21600000; // 6 Stunden
+
+        /**
+         * Grace period for previous years after year end before a tail-NOT_FOUND is treated as final.
+         */
+        @Min(0)
+        private int previousYearGracePeriodDays = 30;
+
+        /**
+         * Keep retrying current-year tail NOT_FOUND packages indefinitely.
+         */
+        private boolean retryCurrentYearNotFoundIndefinitely = true;
+
+        /**
+         * Download-Timeout (Millisekunden).
+         */
+        @Positive
+        private long downloadTimeout = 300000; // 5 Minuten
+
+        /**
+         * Maximale gleichzeitige Downloads.
+         */
+        @Positive
+        private int maxConcurrentDownloads = 2;
+
+        /**
+         * Verzögerung zwischen Downloads (Millisekunden) für Rate Limiting.
+         */
+        @Positive
+        private long delayBetweenDownloads = 5000; // 5 Sekunden
+
+        /**
+         * Automatisches Löschen von tar.gz nach Extraktion.
+         */
+        private boolean deleteAfterExtraction = true;
+
+        /**
+         * Priorisierung: Aktuelles Jahr zuerst, dann rückwärts.
+         * HINWEIS: Wird nicht mehr verwendet. System priorisiert immer das aktuelle Jahr.
+         * @deprecated Nicht mehr verwendet - immer aktiv
+         */
+        @Deprecated
+        private boolean prioritizeCurrentYear = true;
+    }
+
+    /**
+     * IMAP Mail configuration for email processing.
+     */
+    @Data
+    public static class MailProperties {
+
+        /**
+         * Enable/disable mail processing.
+         */
+        private boolean enabled = false;
+
+        /**
+         * IMAP server hostname.
+         */
+        @NotBlank
+        private String host = "mail.mymagenta.business";
+
+        /**
+         * IMAP server port.
+         */
+        @Positive
+        private int port = 993;
+
+        /**
+         * Mail account username (email address).
+         */
+        @NotBlank
+        private String username = "archiv@procon.co.at";
+
+        /**
+         * Mail account password.
+         */
+        @NotBlank
+        private String password = "";
+
+        /**
+         * Use SSL/TLS connection.
+         */
+        private boolean ssl = true;
+
+        /**
+         * Mail folder to read from.
+         */
+        private String folderName = "INBOX";
+
+        /**
+         * Delete messages after processing.
+         */
+        private boolean delete = false;
+
+        /**
+         * Mark messages as seen after processing.
+         */
+        private boolean seen = true;
+
+        /**
+         * Only process unseen messages.
+         */
+        private boolean unseen = true;
+
+        /**
+         * Polling delay in milliseconds.
+         */
+        @Positive
+        private long delay = 60000;
+
+        /**
+         * Max messages per poll.
+         */
+        @Positive
+        private int maxMessagesPerPoll = 10;
+
+        /**
+         * Output directory for processed attachments.
+         */
+        private String attachmentOutputDirectory = "D:/ted.europe/mail-attachments";
+
+        /**
+         * Enable/disable MIME file input processing.
+         */
+        private boolean mimeInputEnabled = false;
+
+        /**
+         * Input directory for MIME files (.eml, .msg).
+         */
+        private String mimeInputDirectory = "D:/ted.europe/mime-input";
+
+        /**
+         * File pattern for MIME files.
+         */
+        private String mimeInputPattern = "*.eml";
+
+        /**
+         * Polling interval for MIME input directory (milliseconds).
+         */
+        @Positive
+        private long mimeInputPollInterval = 10000;
+    }
+
+    /**
+     * Solution Brief processing configuration.
+     * Scans PDF files and generates Excel reports with similar TED documents.
+     */
+    @Data
+    public static class SolutionBriefProperties {
+
+        /**
+         * Enable/disable Solution Brief processing.
+         */
+        private boolean enabled = false;
+
+        /**
+         * Input directory for Solution Brief PDF files.
+         */
+        private String inputDirectory = "C:/work/SolutionBrief";
+
+        /**
+         * Output directory for Excel result files (relative to input or absolute).
+         */
+        private String resultDirectory = "./result";
+
+        /**
+         * Number of top similar documents to include in results.
+         */
+        @Positive
+        private int topK = 20;
+
+        /**
+         * Minimum similarity threshold (0.0-1.0).
+         */
+        private double similarityThreshold = 0.5;
+
+        /**
+         * Polling interval in milliseconds.
+         */
+        @Positive
+        private long pollInterval = 30000;
+
+        /**
+         * File pattern for PDF files.
+         */
+        private String filePattern = ".*\\.pdf";
+
+        /**
+         * Process files only once (idempotent based on filename+size+date).
+         */
+        private boolean idempotent = true;
+
+        /**
+         * Idempotent repository file path.
+         */
+        private String idempotentRepository = "./solution-brief-processed.dat";
+    }
+}
--- a/src/main/java/at/procon/ted/controller/AdminController.java
+++ b/src/main/java/at/procon/ted/controller/AdminController.java
@ -0,0 +1,264 @@
+package at.procon.ted.controller;
+
+import at.procon.ted.model.entity.ProcessingLog;
+import at.procon.ted.model.entity.VectorizationStatus;
+import at.procon.ted.repository.ProcurementDocumentRepository;
+import at.procon.ted.service.DocumentProcessingService;
+import at.procon.ted.service.VectorizationService;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.camel.ProducerTemplate;
+import org.springframework.data.domain.PageRequest;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.*;
+
+import java.time.OffsetDateTime;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+/**
+ * REST API controller for administrative operations.
+ * 
+ * Provides endpoints for:
+ * - System health checks
+ * - Manual vectorization triggers
+ * - Processing log access
+ * - Document reprocessing
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@RestController
+@RequestMapping("/v1/admin")
+@RequiredArgsConstructor
+@Slf4j
+@Tag(name = "Admin", description = "Administrative Operations API")
+public class AdminController {
+
+    private final VectorizationService vectorizationService;
+    private final DocumentProcessingService documentProcessingService;
+    private final ProcurementDocumentRepository documentRepository;
+    private final at.procon.ted.repository.ProcessingLogRepository logRepository;
+    private final ProducerTemplate producerTemplate;
+    private final at.procon.ted.service.DataCleanupService dataCleanupService;
+
+    /**
+     * Health check endpoint.
+     */
+    @GetMapping("/health")
+    @Operation(summary = "Health check", description = "Check system health and service availability")
+    public ResponseEntity<Map<String, Object>> healthCheck() {
+        Map<String, Object> health = new HashMap<>();
+        health.put("status", "UP");
+        health.put("timestamp", OffsetDateTime.now());
+        health.put("vectorizationAvailable", vectorizationService.isAvailable());
+        health.put("documentCount", documentRepository.count());
+        
+        return ResponseEntity.ok(health);
+    }
+
+    /**
+     * Get vectorization status overview.
+     */
+    @GetMapping("/vectorization/status")
+    @Operation(summary = "Vectorization status", description = "Get overview of document vectorization status")
+    public ResponseEntity<Map<String, Object>> getVectorizationStatus() {
+        Map<String, Object> status = new HashMap<>();
+        
+        List<Object[]> counts = documentRepository.countByVectorizationStatus();
+        Map<String, Long> statusCounts = new HashMap<>();
+        for (Object[] row : counts) {
+            statusCounts.put(((VectorizationStatus) row[0]).name(), (Long) row[1]);
+        }
+        
+        status.put("counts", statusCounts);
+        status.put("serviceAvailable", vectorizationService.isAvailable());
+        status.put("timestamp", OffsetDateTime.now());
+        
+        return ResponseEntity.ok(status);
+    }
+
+    /**
+     * Manually trigger vectorization for a specific document.
+     */
+    @PostMapping("/vectorization/trigger/{documentId}")
+    @Operation(summary = "Trigger vectorization", description = "Manually trigger vectorization for a specific document")
+    public ResponseEntity<Map<String, Object>> triggerVectorization(@PathVariable UUID documentId) {
+        Map<String, Object> result = new HashMap<>();
+
+        if (!vectorizationService.isAvailable()) {
+            result.put("success", false);
+            result.put("message", "Vectorization service is not available");
+            return ResponseEntity.badRequest().body(result);
+        }
+
+        // Verify document exists
+        if (!documentRepository.existsById(documentId)) {
+            result.put("success", false);
+            result.put("message", "Document not found: " + documentId);
+            return ResponseEntity.badRequest().body(result);
+        }
+
+        // Trigger vectorization via Camel route
+        producerTemplate.sendBodyAndHeader("direct:vectorize", null, "documentId", documentId);
+
+        result.put("success", true);
+        result.put("message", "Vectorization triggered for document " + documentId);
+        result.put("documentId", documentId);
+
+        return ResponseEntity.ok(result);
+    }
+
+    /**
+     * Trigger vectorization for all pending documents.
+     */
+    @PostMapping("/vectorization/process-pending")
+    @Operation(summary = "Process pending vectorizations", description = "Trigger vectorization for all pending documents")
+    public ResponseEntity<Map<String, Object>> processPendingVectorizations(
+            @RequestParam(required = false, defaultValue = "100") Integer batchSize) {
+        Map<String, Object> result = new HashMap<>();
+
+        if (!vectorizationService.isAvailable()) {
+            result.put("success", false);
+            result.put("message", "Vectorization service is not available");
+            return ResponseEntity.badRequest().body(result);
+        }
+
+        var pending = documentRepository.findByVectorizationStatus(
+                VectorizationStatus.PENDING,
+                PageRequest.of(0, Math.min(batchSize, 500)));
+
+        int count = 0;
+        for (var doc : pending) {
+            // Trigger vectorization via Camel route
+            producerTemplate.sendBodyAndHeader("direct:vectorize", null, "documentId", doc.getId());
+            count++;
+        }
+
+        result.put("success", true);
+        result.put("message", "Triggered vectorization for " + count + " documents");
+        result.put("documentsQueued", count);
+
+        return ResponseEntity.ok(result);
+    }
+
+    /**
+     * Reprocess a document by publication ID.
+     */
+    @PostMapping("/reprocess/{publicationId}")
+    @Operation(summary = "Reprocess document", description = "Reparse and revectorize a document by publication ID")
+    public ResponseEntity<Map<String, Object>> reprocessDocument(@PathVariable String publicationId) {
+        Map<String, Object> result = new HashMap<>();
+        
+        var updated = documentProcessingService.reprocessDocument(publicationId);
+        
+        if (updated.isPresent()) {
+            result.put("success", true);
+            result.put("message", "Document reprocessed successfully");
+            result.put("documentId", updated.get().getId());
+            result.put("publicationId", publicationId);
+            return ResponseEntity.ok(result);
+        } else {
+            result.put("success", false);
+            result.put("message", "Document not found or reprocessing failed");
+            result.put("publicationId", publicationId);
+            return ResponseEntity.notFound().build();
+        }
+    }
+
+    /**
+     * Get recent processing logs.
+     */
+    @GetMapping("/logs/recent")
+    @Operation(summary = "Recent processing logs", description = "Get recent document processing log entries")
+    public ResponseEntity<List<ProcessingLog>> getRecentLogs(
+            @RequestParam(required = false, defaultValue = "24") Integer hoursBack,
+            @RequestParam(required = false, defaultValue = "100") Integer limit) {
+        
+        OffsetDateTime since = OffsetDateTime.now().minusHours(hoursBack);
+        List<ProcessingLog> logs = logRepository.findRecentLogs(since);
+        
+        if (logs.size() > limit) {
+            logs = logs.subList(0, limit);
+        }
+        
+        return ResponseEntity.ok(logs);
+    }
+
+    /**
+     * Get processing logs for a specific document.
+     */
+    @GetMapping("/logs/document/{documentId}")
+    @Operation(summary = "Document processing logs", description = "Get processing log entries for a specific document")
+    public ResponseEntity<List<ProcessingLog>> getDocumentLogs(@PathVariable UUID documentId) {
+        List<ProcessingLog> logs = logRepository.findByDocumentIdOrderByCreatedAtDesc(documentId);
+        return ResponseEntity.ok(logs);
+    }
+
+    /**
+     * Get system information.
+     */
+    @GetMapping("/info")
+    @Operation(summary = "System information", description = "Get system configuration and runtime information")
+    public ResponseEntity<Map<String, Object>> getSystemInfo() {
+        Map<String, Object> info = new HashMap<>();
+        
+        info.put("javaVersion", System.getProperty("java.version"));
+        info.put("osName", System.getProperty("os.name"));
+        info.put("availableProcessors", Runtime.getRuntime().availableProcessors());
+        info.put("maxMemory", Runtime.getRuntime().maxMemory());
+        info.put("freeMemory", Runtime.getRuntime().freeMemory());
+        info.put("totalMemory", Runtime.getRuntime().totalMemory());
+        info.put("vectorizationEnabled", vectorizationService.isAvailable());
+
+        return ResponseEntity.ok(info);
+    }
+
+    /**
+     * Count documents older than specified retention period.
+     */
+    @GetMapping("/cleanup/count")
+    @Operation(summary = "Count old documents", description = "Count documents older than specified years")
+    public ResponseEntity<Map<String, Object>> countOldDocuments(
+            @RequestParam(required = false, defaultValue = "7") Integer years) {
+        Map<String, Object> result = new HashMap<>();
+
+        long count = dataCleanupService.countDocumentsOlderThan(years);
+
+        result.put("years", years);
+        result.put("count", count);
+        result.put("message", String.format("Found %d documents older than %d years", count, years));
+
+        return ResponseEntity.ok(result);
+    }
+
+    /**
+     * Delete documents older than specified retention period.
+     */
+    @DeleteMapping("/cleanup/delete")
+    @Operation(summary = "Delete old documents", description = "Delete documents older than specified years (default: 7)")
+    public ResponseEntity<Map<String, Object>> deleteOldDocuments(
+            @RequestParam(required = false, defaultValue = "7") Integer years) {
+        Map<String, Object> result = new HashMap<>();
+
+        try {
+            int deletedCount = dataCleanupService.deleteDocumentsOlderThan(years);
+
+            result.put("success", true);
+            result.put("years", years);
+            result.put("deletedCount", deletedCount);
+            result.put("message", String.format("Deleted %d documents older than %d years", deletedCount, years));
+
+            return ResponseEntity.ok(result);
+
+        } catch (Exception e) {
+            log.error("Error deleting old documents", e);
+            result.put("success", false);
+            result.put("error", e.getMessage());
+            return ResponseEntity.status(500).body(result);
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/controller/DocumentController.java
+++ b/src/main/java/at/procon/ted/controller/DocumentController.java
@ -0,0 +1,314 @@
+package at.procon.ted.controller;
+
+import at.procon.ted.model.dto.DocumentDtos.*;
+import at.procon.ted.model.entity.ContractNature;
+import at.procon.ted.model.entity.NoticeType;
+import at.procon.ted.model.entity.ProcedureType;
+import at.procon.ted.service.SearchService;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.Parameter;
+import io.swagger.v3.oas.annotations.media.Content;
+import io.swagger.v3.oas.annotations.media.Schema;
+import io.swagger.v3.oas.annotations.responses.ApiResponse;
+import io.swagger.v3.oas.annotations.responses.ApiResponses;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.format.annotation.DateTimeFormat;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.*;
+
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * REST API controller for searching and retrieving TED procurement documents.
+ * 
+ * Provides endpoints for:
+ * - Structured search with filters (country, type, dates, etc.)
+ * - Semantic search using natural language queries
+ * - Document retrieval by ID or publication ID
+ * - Statistics and metadata
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@RestController
+@RequestMapping("/v1/documents")
+@RequiredArgsConstructor
+@Slf4j
+@Tag(name = "Documents", description = "TED Procurement Document Search API")
+public class DocumentController {
+
+    private final SearchService searchService;
+
+    /**
+     * Search documents with structured and/or semantic filters.
+     */
+    @GetMapping("/search")
+    @Operation(
+            summary = "Search procurement documents",
+            description = "Search documents using structured filters (country, type, dates) and/or semantic search with natural language queries"
+    )
+    @ApiResponses(value = {
+            @ApiResponse(responseCode = "200", description = "Search results returned successfully",
+                    content = @Content(schema = @Schema(implementation = SearchResponse.class))),
+            @ApiResponse(responseCode = "400", description = "Invalid search parameters")
+    })
+    public ResponseEntity<SearchResponse> searchDocuments(
+            @Parameter(description = "Country code (ISO 3166-1 alpha-3, e.g., POL, DEU, FRA)")
+            @RequestParam(required = false) String countryCode,
+
+            @Parameter(description = "Multiple country codes")
+            @RequestParam(required = false) List<String> countryCodes,
+
+            @Parameter(description = "Notice type filter")
+            @RequestParam(required = false) NoticeType noticeType,
+
+            @Parameter(description = "Contract nature filter")
+            @RequestParam(required = false) ContractNature contractNature,
+
+            @Parameter(description = "Procedure type filter")
+            @RequestParam(required = false) ProcedureType procedureType,
+
+            @Parameter(description = "CPV code prefix (e.g., '33' for medical supplies)")
+            @RequestParam(required = false) String cpvPrefix,
+
+            @Parameter(description = "NUTS region code")
+            @RequestParam(required = false) String nutsCode,
+
+            @Parameter(description = "Publication date from (inclusive)")
+            @RequestParam(required = false) @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate publicationDateFrom,
+
+            @Parameter(description = "Publication date to (inclusive)")
+            @RequestParam(required = false) @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate publicationDateTo,
+
+            @Parameter(description = "Only documents with submission deadline after this date")
+            @RequestParam(required = false) @DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME) OffsetDateTime submissionDeadlineAfter,
+
+            @Parameter(description = "Filter by EU funding status")
+            @RequestParam(required = false) Boolean euFunded,
+
+            @Parameter(description = "Search in buyer name (case-insensitive)")
+            @RequestParam(required = false) String buyerNameContains,
+
+            @Parameter(description = "Search in project title (case-insensitive)")
+            @RequestParam(required = false) String projectTitleContains,
+
+            @Parameter(description = "Natural language semantic search query")
+            @RequestParam(required = false) String q,
+
+            @Parameter(description = "Similarity threshold for semantic search (0.0-1.0)")
+            @RequestParam(required = false, defaultValue = "0.7") Double similarityThreshold,
+
+            @Parameter(description = "Page number (0-based)")
+            @RequestParam(required = false, defaultValue = "0") Integer page,
+
+            @Parameter(description = "Page size (max 100)")
+            @RequestParam(required = false, defaultValue = "20") Integer size,
+
+            @Parameter(description = "Sort field (publicationDate, submissionDeadline, buyerName, projectTitle)")
+            @RequestParam(required = false, defaultValue = "publicationDate") String sortBy,
+
+            @Parameter(description = "Sort direction (asc, desc)")
+            @RequestParam(required = false, defaultValue = "desc") String sortDirection
+    ) {
+        SearchRequest request = SearchRequest.builder()
+                .countryCode(countryCode)
+                .countryCodes(countryCodes)
+                .noticeType(noticeType)
+                .contractNature(contractNature)
+                .procedureType(procedureType)
+                .cpvPrefix(cpvPrefix)
+                .nutsCode(nutsCode)
+                .publicationDateFrom(publicationDateFrom)
+                .publicationDateTo(publicationDateTo)
+                .submissionDeadlineAfter(submissionDeadlineAfter)
+                .euFunded(euFunded)
+                .buyerNameContains(buyerNameContains)
+                .projectTitleContains(projectTitleContains)
+                .semanticQuery(q)
+                .similarityThreshold(similarityThreshold)
+                .page(page)
+                .size(size)
+                .sortBy(sortBy)
+                .sortDirection(sortDirection)
+                .build();
+
+        log.debug("Search request: {}", request);
+        SearchResponse response = searchService.search(request);
+        return ResponseEntity.ok(response);
+    }
+
+    /**
+     * Search documents using POST with request body.
+     * Useful for complex queries with many parameters.
+     */
+    @PostMapping("/search")
+    @Operation(
+            summary = "Search procurement documents (POST)",
+            description = "Search documents using a JSON request body for complex queries"
+    )
+    public ResponseEntity<SearchResponse> searchDocumentsPost(@RequestBody SearchRequest request) {
+        log.debug("Search request (POST): {}", request);
+        SearchResponse response = searchService.search(request);
+        return ResponseEntity.ok(response);
+    }
+
+    /**
+     * Get document by internal UUID.
+     */
+    @GetMapping("/{id}")
+    @Operation(
+            summary = "Get document by ID",
+            description = "Retrieve full document details by internal UUID"
+    )
+    @ApiResponses(value = {
+            @ApiResponse(responseCode = "200", description = "Document found",
+                    content = @Content(schema = @Schema(implementation = DocumentDetail.class))),
+            @ApiResponse(responseCode = "404", description = "Document not found")
+    })
+    public ResponseEntity<DocumentDetail> getDocument(
+            @Parameter(description = "Document UUID") @PathVariable UUID id) {
+        return searchService.getDocumentDetail(id)
+                .map(ResponseEntity::ok)
+                .orElse(ResponseEntity.notFound().build());
+    }
+
+    /**
+     * Get document by TED publication ID.
+     */
+    @GetMapping("/publication/{publicationId}")
+    @Operation(
+            summary = "Get document by publication ID",
+            description = "Retrieve document by TED publication ID (e.g., '00786665-2025')"
+    )
+    @ApiResponses(value = {
+            @ApiResponse(responseCode = "200", description = "Document found"),
+            @ApiResponse(responseCode = "404", description = "Document not found")
+    })
+    public ResponseEntity<DocumentDetail> getDocumentByPublicationId(
+            @Parameter(description = "TED Publication ID") @PathVariable String publicationId) {
+        return searchService.getDocumentByPublicationId(publicationId)
+                .map(ResponseEntity::ok)
+                .orElse(ResponseEntity.notFound().build());
+    }
+
+    /**
+     * Get documents with upcoming submission deadlines.
+     */
+    @GetMapping("/upcoming-deadlines")
+    @Operation(
+            summary = "Get documents with upcoming deadlines",
+            description = "List documents with submission deadlines in the future, sorted by deadline"
+    )
+    public ResponseEntity<List<DocumentSummary>> getUpcomingDeadlines(
+            @Parameter(description = "Maximum number of results")
+            @RequestParam(required = false, defaultValue = "20") Integer limit) {
+        List<DocumentSummary> documents = searchService.getUpcomingDeadlines(Math.min(limit, 100));
+        return ResponseEntity.ok(documents);
+    }
+
+    /**
+     * Get collection statistics.
+     */
+    @GetMapping("/statistics")
+    @Operation(
+            summary = "Get collection statistics",
+            description = "Retrieve statistics about the document collection including counts by country, type, and vectorization status"
+    )
+    public ResponseEntity<StatisticsResponse> getStatistics() {
+        StatisticsResponse stats = searchService.getStatistics();
+        return ResponseEntity.ok(stats);
+    }
+
+    /**
+     * Get list of all countries in the collection.
+     */
+    @GetMapping("/metadata/countries")
+    @Operation(
+            summary = "Get available countries",
+            description = "List all distinct country codes present in the collection"
+    )
+    public ResponseEntity<List<String>> getCountries() {
+        List<String> countries = searchService.getDistinctCountries();
+        return ResponseEntity.ok(countries);
+    }
+
+    /**
+     * Get available notice types.
+     */
+    @GetMapping("/metadata/notice-types")
+    @Operation(
+            summary = "Get available notice types",
+            description = "List all notice type enum values"
+    )
+    public ResponseEntity<NoticeType[]> getNoticeTypes() {
+        return ResponseEntity.ok(NoticeType.values());
+    }
+
+    /**
+     * Get available contract natures.
+     */
+    @GetMapping("/metadata/contract-natures")
+    @Operation(
+            summary = "Get available contract natures",
+            description = "List all contract nature enum values"
+    )
+    public ResponseEntity<ContractNature[]> getContractNatures() {
+        return ResponseEntity.ok(ContractNature.values());
+    }
+
+    /**
+     * Get available procedure types.
+     */
+    @GetMapping("/metadata/procedure-types")
+    @Operation(
+            summary = "Get available procedure types",
+            description = "List all procedure type enum values"
+    )
+    public ResponseEntity<ProcedureType[]> getProcedureTypes() {
+        return ResponseEntity.ok(ProcedureType.values());
+    }
+
+    /**
+     * Semantic search endpoint - convenience method for natural language queries.
+     */
+    @GetMapping("/semantic-search")
+    @Operation(
+            summary = "Semantic search",
+            description = "Search documents using natural language query with vector similarity"
+    )
+    public ResponseEntity<SearchResponse> semanticSearch(
+            @Parameter(description = "Natural language search query", required = true)
+            @RequestParam String query,
+
+            @Parameter(description = "Minimum similarity score (0.0-1.0)")
+            @RequestParam(required = false, defaultValue = "0.7") Double threshold,
+
+            @Parameter(description = "Country code filter")
+            @RequestParam(required = false) String countryCode,
+
+            @Parameter(description = "Notice type filter")
+            @RequestParam(required = false) NoticeType noticeType,
+
+            @Parameter(description = "Page number")
+            @RequestParam(required = false, defaultValue = "0") Integer page,
+
+            @Parameter(description = "Page size")
+            @RequestParam(required = false, defaultValue = "20") Integer size
+    ) {
+        SearchRequest request = SearchRequest.builder()
+                .semanticQuery(query)
+                .similarityThreshold(threshold)
+                .countryCode(countryCode)
+                .noticeType(noticeType)
+                .page(page)
+                .size(size)
+                .build();
+
+        SearchResponse response = searchService.search(request);
+        return ResponseEntity.ok(response);
+    }
+}
--- a/src/main/java/at/procon/ted/controller/SimilaritySearchController.java
+++ b/src/main/java/at/procon/ted/controller/SimilaritySearchController.java
@ -0,0 +1,175 @@
+package at.procon.ted.controller;
+
+import at.procon.ted.service.SimilaritySearchService;
+import at.procon.ted.service.SimilaritySearchService.SimilaritySearchResponse;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.Parameter;
+import io.swagger.v3.oas.annotations.media.Content;
+import io.swagger.v3.oas.annotations.media.Schema;
+import io.swagger.v3.oas.annotations.responses.ApiResponse;
+import io.swagger.v3.oas.annotations.responses.ApiResponses;
+import io.swagger.v3.oas.annotations.tags.Tag;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.http.MediaType;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.*;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.io.IOException;
+
+/**
+ * REST Controller for similarity search on TED procurement documents.
+ * Provides endpoints for searching similar documents using text or PDF input.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@RestController
+@RequestMapping("/similarity")
+@RequiredArgsConstructor
+@Slf4j
+@Tag(name = "Similarity Search", description = "Vector-based semantic similarity search on TED procurement documents")
+public class SimilaritySearchController {
+
+    private final SimilaritySearchService similaritySearchService;
+
+    /**
+     * Search for similar documents using text query.
+     */
+    @PostMapping("/text")
+    @Operation(
+            summary = "Search by text",
+            description = "Find similar TED procurement documents based on text content using vector similarity (cosine distance)"
+    )
+    @ApiResponses({
+            @ApiResponse(responseCode = "200", description = "Search completed successfully",
+                    content = @Content(schema = @Schema(implementation = SimilaritySearchResponse.class))),
+            @ApiResponse(responseCode = "400", description = "Invalid request (empty text)"),
+            @ApiResponse(responseCode = "503", description = "Vectorization service unavailable")
+    })
+    public ResponseEntity<SimilaritySearchResponse> searchByText(
+            @Parameter(description = "Text content to search for similar documents", required = true)
+            @RequestBody TextSearchRequest request
+    ) {
+        log.info("Text similarity search request: {} chars, topK={}, threshold={}",
+                request.getText() != null ? request.getText().length() : 0,
+                request.getTopK(),
+                request.getThreshold());
+
+        if (request.getText() == null || request.getText().isBlank()) {
+            return ResponseEntity.badRequest().build();
+        }
+
+        try {
+            SimilaritySearchResponse response = similaritySearchService.searchByText(
+                    request.getText(),
+                    request.getTopK(),
+                    request.getThreshold()
+            );
+            return ResponseEntity.ok(response);
+
+        } catch (IllegalStateException e) {
+            log.error("Vectorization service unavailable: {}", e.getMessage());
+            return ResponseEntity.status(503).build();
+
+        } catch (Exception e) {
+            log.error("Text similarity search failed: {}", e.getMessage(), e);
+            return ResponseEntity.internalServerError().build();
+        }
+    }
+
+    /**
+     * Search for similar documents using PDF file.
+     */
+    @PostMapping(value = "/pdf", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
+    @Operation(
+            summary = "Search by PDF",
+            description = "Upload a PDF document to find similar TED procurement documents. " +
+                    "Text is extracted from the PDF and used for vector similarity search."
+    )
+    @ApiResponses({
+            @ApiResponse(responseCode = "200", description = "Search completed successfully",
+                    content = @Content(schema = @Schema(implementation = SimilaritySearchResponse.class))),
+            @ApiResponse(responseCode = "400", description = "Invalid request (no file or not a PDF)"),
+            @ApiResponse(responseCode = "422", description = "Could not extract text from PDF"),
+            @ApiResponse(responseCode = "503", description = "Vectorization service unavailable")
+    })
+    public ResponseEntity<SimilaritySearchResponse> searchByPdf(
+            @Parameter(description = "PDF file to search for similar documents", required = true)
+            @RequestPart("file") MultipartFile file,
+
+            @Parameter(description = "Number of top results to return (default: 20, max: 100)")
+            @RequestParam(required = false, defaultValue = "20") Integer topK,
+
+            @Parameter(description = "Minimum similarity threshold (0.0-1.0, default: 0.5)")
+            @RequestParam(required = false, defaultValue = "0.5") Double threshold
+    ) {
+        if (file == null || file.isEmpty()) {
+            log.warn("PDF search request with empty file");
+            return ResponseEntity.badRequest().build();
+        }
+
+        String filename = file.getOriginalFilename();
+        String contentType = file.getContentType();
+
+        log.info("PDF similarity search request: filename='{}', size={} bytes, topK={}, threshold={}",
+                filename, file.getSize(), topK, threshold);
+
+        // Validate file type
+        if (contentType != null && !contentType.toLowerCase().contains("pdf")) {
+            if (filename == null || !filename.toLowerCase().endsWith(".pdf")) {
+                log.warn("Invalid file type: {} ({})", filename, contentType);
+                return ResponseEntity.badRequest().build();
+            }
+        }
+
+        try {
+            byte[] pdfData = file.getBytes();
+
+            SimilaritySearchResponse response = similaritySearchService.searchByPdf(
+                    pdfData,
+                    filename,
+                    topK,
+                    threshold
+            );
+            return ResponseEntity.ok(response);
+
+        } catch (IOException e) {
+            log.error("Failed to read PDF file: {}", e.getMessage());
+            return ResponseEntity.badRequest().build();
+
+        } catch (IllegalStateException e) {
+            log.error("Vectorization service unavailable: {}", e.getMessage());
+            return ResponseEntity.status(503).build();
+
+        } catch (RuntimeException e) {
+            if (e.getMessage() != null && e.getMessage().contains("extraction failed")) {
+                log.error("PDF extraction failed: {}", e.getMessage());
+                return ResponseEntity.unprocessableEntity().build();
+            }
+            log.error("PDF similarity search failed: {}", e.getMessage(), e);
+            return ResponseEntity.internalServerError().build();
+
+        } catch (Exception e) {
+            log.error("PDF similarity search failed: {}", e.getMessage(), e);
+            return ResponseEntity.internalServerError().build();
+        }
+    }
+
+    /**
+     * Request DTO for text-based similarity search.
+     */
+    @lombok.Data
+    @lombok.NoArgsConstructor
+    @lombok.AllArgsConstructor
+    public static class TextSearchRequest {
+        @Schema(description = "Text content to search for similar documents", required = true)
+        private String text;
+
+        @Schema(description = "Number of top results to return (default: 20, max: 100)")
+        private Integer topK;
+
+        @Schema(description = "Minimum similarity threshold (0.0-1.0, default: 0.5)")
+        private Double threshold;
+    }
+}
--- a/src/main/java/at/procon/ted/event/DocumentSavedEvent.java
+++ b/src/main/java/at/procon/ted/event/DocumentSavedEvent.java
@ -0,0 +1,12 @@
+package at.procon.ted.event;
+
+import java.util.UUID;
+
+/**
+ * Event published after a document has been successfully saved to the database.
+ * Triggers async vectorization after transaction commit.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+public record DocumentSavedEvent(UUID documentId, String publicationId) {
+}
--- a/src/main/java/at/procon/ted/event/VectorizationEventListener.java
+++ b/src/main/java/at/procon/ted/event/VectorizationEventListener.java
@ -0,0 +1,46 @@
+package at.procon.ted.event;
+
+import at.procon.ted.config.TedProcessorProperties;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.camel.ProducerTemplate;
+import org.springframework.stereotype.Component;
+import org.springframework.transaction.event.TransactionPhase;
+import org.springframework.transaction.event.TransactionalEventListener;
+
+/**
+ * Event listener that triggers vectorization after document save transaction commits.
+ * This ensures the document is visible in the database before vectorization starts.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class VectorizationEventListener {
+
+    private final ProducerTemplate producerTemplate;
+    private final TedProcessorProperties properties;
+
+    /**
+     * Triggered AFTER the transaction commits (document is now visible in DB).
+     * Queues the document for async vectorization.
+     */
+    @TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT)
+    public void onDocumentSaved(DocumentSavedEvent event) {
+        if (!properties.getVectorization().isEnabled()) {
+            return;
+        }
+
+        try {
+            log.debug("Document saved event received, triggering vectorization for: {}", event.documentId());
+            producerTemplate.sendBodyAndHeader("direct:vectorize", null, "documentId", event.documentId());
+            log.debug("Vectorization queued for document: {} (publication: {})",
+                    event.documentId(), event.publicationId());
+        } catch (Exception e) {
+            log.warn("Failed to queue document {} for vectorization: {}",
+                    event.documentId(), e.getMessage());
+            // Non-critical: scheduler will pick it up later
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/model/dto/DocumentDtos.java
+++ b/src/main/java/at/procon/ted/model/dto/DocumentDtos.java
@ -0,0 +1,264 @@
+package at.procon.ted.model.dto;
+
+import at.procon.ted.model.entity.ContractNature;
+import at.procon.ted.model.entity.NoticeType;
+import at.procon.ted.model.entity.ProcedureType;
+import at.procon.ted.model.entity.VectorizationStatus;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import java.math.BigDecimal;
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * DTOs for procurement document API responses.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+public class DocumentDtos {
+
+    /**
+     * Summary DTO for list views and search results.
+     */
+    @Data
+    @Builder
+    @NoArgsConstructor
+    @AllArgsConstructor
+    public static class DocumentSummary {
+        private UUID id;
+        private String publicationId;
+        private String noticeId;
+        private NoticeType noticeType;
+        private String projectTitle;
+        private String buyerName;
+        private String buyerCountryCode;
+        private String buyerCity;
+        private ContractNature contractNature;
+        private ProcedureType procedureType;
+        private LocalDate publicationDate;
+        private OffsetDateTime submissionDeadline;
+        private List<String> cpvCodes;
+        private Integer totalLots;
+        private BigDecimal estimatedValue;
+        private String estimatedValueCurrency;
+        private Double similarity; // For semantic search results
+    }
+
+    /**
+     * Detailed DTO for single document view.
+     */
+    @Data
+    @Builder
+    @NoArgsConstructor
+    @AllArgsConstructor
+    public static class DocumentDetail {
+        private UUID id;
+        private String documentHash;
+        
+        // Identifiers
+        private String publicationId;
+        private String noticeId;
+        private String ojsId;
+        private String contractFolderId;
+        
+        // Classification
+        private NoticeType noticeType;
+        private String noticeSubtypeCode;
+        private String sdkVersion;
+        private String ublVersion;
+        private String languageCode;
+
+        // Dates
+        private OffsetDateTime issueDateTime;
+        private LocalDate publicationDate;
+        private OffsetDateTime submissionDeadline;
+        
+        // Buyer information
+        private String buyerName;
+        private String buyerCountryCode;
+        private String buyerCity;
+        private String buyerPostalCode;
+        private String buyerNutsCode;
+        private String buyerActivityType;
+        private String buyerLegalType;
+        
+        // Project information
+        private String projectTitle;
+        private String projectDescription;
+        private String internalReference;
+        private ContractNature contractNature;
+        private ProcedureType procedureType;
+        
+        // Classification codes
+        private List<String> cpvCodes;
+        private List<String> nutsCodes;
+        
+        // Financial
+        private BigDecimal estimatedValue;
+        private String estimatedValueCurrency;
+        
+        // Lots
+        private Integer totalLots;
+        private Integer maxLotsAwarded;
+        private Integer maxLotsSubmitted;
+        private List<LotSummary> lots;
+        
+        // Organizations
+        private List<OrganizationSummary> organizations;
+        
+        // Legal
+        private String regulatoryDomain;
+        private Boolean euFunded;
+        
+        // Vectorization
+        private VectorizationStatus vectorizationStatus;
+        private OffsetDateTime vectorizedAt;
+        
+        // Metadata
+        private String sourceFilename;
+        private Long fileSizeBytes;
+        private OffsetDateTime createdAt;
+        private OffsetDateTime updatedAt;
+    }
+
+    /**
+     * Lot summary for document detail view.
+     */
+    @Data
+    @Builder
+    @NoArgsConstructor
+    @AllArgsConstructor
+    public static class LotSummary {
+        private UUID id;
+        private String lotId;
+        private String internalId;
+        private String title;
+        private String description;
+        private List<String> cpvCodes;
+        private List<String> nutsCodes;
+        private BigDecimal estimatedValue;
+        private String estimatedValueCurrency;
+        private Double durationValue;
+        private String durationUnit;
+        private OffsetDateTime submissionDeadline;
+        private Boolean euFunded;
+    }
+
+    /**
+     * Organization summary for document detail view.
+     */
+    @Data
+    @Builder
+    @NoArgsConstructor
+    @AllArgsConstructor
+    public static class OrganizationSummary {
+        private UUID id;
+        private String orgReference;
+        private String role;
+        private String name;
+        private String companyId;
+        private String countryCode;
+        private String city;
+        private String postalCode;
+        private String nutsCode;
+        private String websiteUri;
+        private String email;
+        private String phone;
+    }
+
+    /**
+     * Search request for structured + semantic search.
+     */
+    @Data
+    @Builder
+    @NoArgsConstructor
+    @AllArgsConstructor
+    public static class SearchRequest {
+        // Structured filters
+        private String countryCode;
+        private List<String> countryCodes;
+        private NoticeType noticeType;
+        private ContractNature contractNature;
+        private ProcedureType procedureType;
+        private String cpvPrefix;
+        private List<String> cpvCodes;
+        private String nutsCode;
+        private List<String> nutsCodes;
+        private LocalDate publicationDateFrom;
+        private LocalDate publicationDateTo;
+        private OffsetDateTime submissionDeadlineAfter;
+        private Boolean euFunded;
+        private String buyerNameContains;
+        private String projectTitleContains;
+        
+        // Semantic search
+        private String semanticQuery;
+        private Double similarityThreshold;
+        
+        // Pagination
+        private Integer page;
+        private Integer size;
+        private String sortBy;
+        private String sortDirection;
+    }
+
+    /**
+     * Search response with pagination.
+     */
+    @Data
+    @Builder
+    @NoArgsConstructor
+    @AllArgsConstructor
+    public static class SearchResponse {
+        private List<DocumentSummary> documents;
+        private int page;
+        private int size;
+        private long totalElements;
+        private int totalPages;
+        private boolean hasNext;
+        private boolean hasPrevious;
+    }
+
+    /**
+     * Statistics response.
+     */
+    @Data
+    @Builder
+    @NoArgsConstructor
+    @AllArgsConstructor
+    public static class StatisticsResponse {
+        private long totalDocuments;
+        private long vectorizedDocuments;
+        private long pendingVectorization;
+        private long failedVectorization;
+        private int uniqueCountries;
+        private LocalDate earliestPublication;
+        private LocalDate latestPublication;
+        private long totalLots;
+        private List<CountryStats> countryStatistics;
+        private List<NoticeTypeStats> noticeTypeStatistics;
+    }
+
+    @Data
+    @Builder
+    @NoArgsConstructor
+    @AllArgsConstructor
+    public static class CountryStats {
+        private String countryCode;
+        private long documentCount;
+    }
+
+    @Data
+    @Builder
+    @NoArgsConstructor
+    @AllArgsConstructor
+    public static class NoticeTypeStats {
+        private NoticeType noticeType;
+        private long documentCount;
+    }
+}
--- a/src/main/java/at/procon/ted/model/entity/ContractNature.java
+++ b/src/main/java/at/procon/ted/model/entity/ContractNature.java
@ -0,0 +1,15 @@
+package at.procon.ted.model.entity;
+
+/**
+ * Enum representing the nature of the procurement contract.
+ * Maps to eForms contract-nature codelist.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+public enum ContractNature {
+    SUPPLIES,  // Procurement of goods/supplies
+    SERVICES,  // Procurement of services
+    WORKS,     // Procurement of construction works
+    MIXED,     // Mixed contracts
+    UNKNOWN    // Unknown or not specified
+}
--- a/src/main/java/at/procon/ted/model/entity/NoticeType.java
+++ b/src/main/java/at/procon/ted/model/entity/NoticeType.java
@ -0,0 +1,15 @@
+package at.procon.ted.model.entity;
+
+/**
+ * Enum representing the type of TED procurement notice.
+ * Based on eForms notice categories.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+public enum NoticeType {
+    CONTRACT_NOTICE,          // Contract notices (cn-*)
+    PRIOR_INFORMATION_NOTICE, // Prior information notices (pin-*)
+    CONTRACT_AWARD_NOTICE,    // Contract award notices (can-*)
+    MODIFICATION_NOTICE,      // Contract modification notices (mod-*)
+    OTHER                     // Other or unrecognized notice types
+}
--- a/src/main/java/at/procon/ted/model/entity/Organization.java
+++ b/src/main/java/at/procon/ted/model/entity/Organization.java
@ -0,0 +1,90 @@
+package at.procon.ted.model.entity;
+
+import jakarta.persistence.*;
+import lombok.*;
+
+import java.time.OffsetDateTime;
+import java.util.UUID;
+
+/**
+ * JPA Entity representing an organization mentioned in a procurement notice.
+ * Can be buyers, review bodies, service providers, etc.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Entity
+@Table(name = "organization", indexes = {
+    @Index(name = "idx_org_document", columnList = "document_id"),
+    @Index(name = "idx_org_country", columnList = "country_code")
+}, uniqueConstraints = {
+    @UniqueConstraint(columnNames = {"document_id", "org_reference"})
+})
+@Getter
+@Setter
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class Organization {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.UUID)
+    private UUID id;
+
+    @ManyToOne(fetch = FetchType.LAZY)
+    @JoinColumn(name = "document_id", nullable = false)
+    private ProcurementDocument document;
+
+    /**
+     * Internal organization reference from XML (e.g., "ORG-0001").
+     */
+    @Column(name = "org_reference", length = 50)
+    private String orgReference;
+
+    /**
+     * Role of the organization (e.g., "buyer", "review-body", "ted-esen").
+     */
+    @Column(name = "role", length = 50)
+    private String role;
+
+    @Column(name = "name", columnDefinition = "TEXT")
+    private String name;
+
+    /**
+     * Company/tax registration ID.
+     */
+    @Column(name = "company_id", length = 1000)
+    private String companyId;
+
+    @Column(name = "country_code", length = 10)
+    private String countryCode;
+
+    @Column(name = "city", length = 255)
+    private String city;
+
+    @Column(name = "postal_code", length = 255)
+    private String postalCode;
+
+    @Column(name = "street_name", columnDefinition = "TEXT")
+    private String streetName;
+
+    @Column(name = "nuts_code", length = 10)
+    private String nutsCode;
+
+    @Column(name = "website_uri", columnDefinition = "TEXT")
+    private String websiteUri;
+
+    @Column(name = "email", length = 255)
+    private String email;
+
+    @Column(name = "phone", length = 50)
+    private String phone;
+
+    @Column(name = "created_at", nullable = false, updatable = false)
+    @Builder.Default
+    private OffsetDateTime createdAt = OffsetDateTime.now();
+
+    @PrePersist
+    protected void onCreate() {
+        createdAt = OffsetDateTime.now();
+    }
+}
--- a/src/main/java/at/procon/ted/model/entity/ProcedureType.java
+++ b/src/main/java/at/procon/ted/model/entity/ProcedureType.java
@ -0,0 +1,17 @@
+package at.procon.ted.model.entity;
+
+/**
+ * Enum representing the procurement procedure type.
+ * Maps to eForms procurement-procedure-type codelist.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+public enum ProcedureType {
+    OPEN,                           // Open procedure
+    RESTRICTED,                     // Restricted procedure
+    COMPETITIVE_DIALOGUE,           // Competitive dialogue
+    INNOVATION_PARTNERSHIP,         // Innovation partnership
+    NEGOTIATED_WITHOUT_PUBLICATION, // Negotiated without prior publication
+    NEGOTIATED_WITH_PUBLICATION,    // Negotiated with prior publication
+    OTHER                           // Other or not specified
+}
--- a/src/main/java/at/procon/ted/model/entity/ProcessedAttachment.java
+++ b/src/main/java/at/procon/ted/model/entity/ProcessedAttachment.java
@ -0,0 +1,146 @@
+package at.procon.ted.model.entity;
+
+import jakarta.persistence.*;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import java.time.LocalDateTime;
+import java.util.UUID;
+
+/**
+ * Entity for tracking processed mail attachments.
+ * Uses content hash for idempotent processing to avoid duplicate handling.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Entity
+@Table(name = "processed_attachment", schema = "ted",
+        indexes = {
+                @Index(name = "idx_processed_attachment_hash", columnList = "content_hash", unique = true),
+                @Index(name = "idx_processed_attachment_status", columnList = "processing_status"),
+                @Index(name = "idx_processed_attachment_type", columnList = "file_type")
+        })
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class ProcessedAttachment {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.UUID)
+    private UUID id;
+
+    /**
+     * SHA-256 hash of the attachment content for idempotent processing.
+     */
+    @Column(name = "content_hash", nullable = false, unique = true, length = 64)
+    private String contentHash;
+
+    /**
+     * Original filename of the attachment.
+     */
+    @Column(name = "original_filename", nullable = false, length = 500)
+    private String originalFilename;
+
+    /**
+     * Detected or declared file type (e.g., PDF, ZIP, XML).
+     */
+    @Column(name = "file_type", length = 50)
+    private String fileType;
+
+    /**
+     * MIME content type.
+     */
+    @Column(name = "content_type", length = 255)
+    private String contentType;
+
+    /**
+     * File size in bytes.
+     */
+    @Column(name = "file_size")
+    private Long fileSize;
+
+    /**
+     * Processing status of the attachment.
+     */
+    @Enumerated(EnumType.STRING)
+    @Column(name = "processing_status", nullable = false, length = 20)
+    private ProcessingStatus processingStatus;
+
+    /**
+     * Extracted text content (for PDF, etc.).
+     */
+    @Column(name = "extracted_text", columnDefinition = "TEXT")
+    private String extractedText;
+
+    /**
+     * Path where the attachment was saved.
+     */
+    @Column(name = "saved_path", length = 1000)
+    private String savedPath;
+
+    /**
+     * Email subject from which the attachment was extracted.
+     */
+    @Column(name = "mail_subject", length = 500)
+    private String mailSubject;
+
+    /**
+     * Email sender.
+     */
+    @Column(name = "mail_from", length = 500)
+    private String mailFrom;
+
+    /**
+     * Parent attachment hash (for files extracted from ZIP).
+     */
+    @Column(name = "parent_hash", length = 64)
+    private String parentHash;
+
+    /**
+     * Error message if processing failed.
+     */
+    @Column(name = "error_message", columnDefinition = "TEXT")
+    private String errorMessage;
+
+    /**
+     * Number of child attachments (for ZIP files).
+     */
+    @Column(name = "child_count")
+    private Integer childCount;
+
+    /**
+     * When the attachment was first received.
+     */
+    @Column(name = "received_at", nullable = false)
+    private LocalDateTime receivedAt;
+
+    /**
+     * When processing was completed.
+     */
+    @Column(name = "processed_at")
+    private LocalDateTime processedAt;
+
+    /**
+     * Processing status enum.
+     */
+    public enum ProcessingStatus {
+        PENDING,
+        PROCESSING,
+        COMPLETED,
+        FAILED,
+        DUPLICATE
+    }
+
+    @PrePersist
+    protected void onCreate() {
+        if (receivedAt == null) {
+            receivedAt = LocalDateTime.now();
+        }
+        if (processingStatus == null) {
+            processingStatus = ProcessingStatus.PENDING;
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/model/entity/ProcessingLog.java
+++ b/src/main/java/at/procon/ted/model/entity/ProcessingLog.java
@ -0,0 +1,91 @@
+package at.procon.ted.model.entity;
+
+import jakarta.persistence.*;
+import lombok.*;
+
+import java.time.OffsetDateTime;
+import java.util.UUID;
+
+/**
+ * JPA Entity for logging document processing events.
+ * Provides audit trail and debugging information.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Entity
+@Table(name = "processing_log", indexes = {
+    @Index(name = "idx_log_document", columnList = "document_id"),
+    @Index(name = "idx_log_created", columnList = "created_at"),
+    @Index(name = "idx_log_event_type", columnList = "event_type")
+})
+@Getter
+@Setter
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class ProcessingLog {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.UUID)
+    private UUID id;
+
+    @ManyToOne(fetch = FetchType.LAZY)
+    @JoinColumn(name = "document_id")
+    private ProcurementDocument document;
+
+    @Column(name = "document_hash", length = 64)
+    private String documentHash;
+
+    @Column(name = "event_type", nullable = false, length = 50)
+    private String eventType;
+
+    @Column(name = "event_status", nullable = false, length = 20)
+    private String eventStatus;
+
+    @Column(name = "message", columnDefinition = "TEXT")
+    private String message;
+
+    @Column(name = "error_details", columnDefinition = "TEXT")
+    private String errorDetails;
+
+    @Column(name = "source_filename", length = 500)
+    private String sourceFilename;
+
+    @Column(name = "duration_ms")
+    private Integer durationMs;
+
+    @Column(name = "created_at", nullable = false, updatable = false)
+    @Builder.Default
+    private OffsetDateTime createdAt = OffsetDateTime.now();
+
+    @PrePersist
+    protected void onCreate() {
+        createdAt = OffsetDateTime.now();
+    }
+
+    /**
+     * Event types for processing log entries.
+     */
+    public static final class EventType {
+        public static final String RECEIVED = "RECEIVED";
+        public static final String VALIDATED = "VALIDATED";
+        public static final String PARSED = "PARSED";
+        public static final String STORED = "STORED";
+        public static final String VECTORIZED = "VECTORIZED";
+        public static final String DUPLICATE = "DUPLICATE";
+        public static final String ERROR = "ERROR";
+        
+        private EventType() {}
+    }
+
+    /**
+     * Status values for processing log entries.
+     */
+    public static final class EventStatus {
+        public static final String SUCCESS = "SUCCESS";
+        public static final String FAILURE = "FAILURE";
+        public static final String SKIPPED = "SKIPPED";
+        
+        private EventStatus() {}
+    }
+}
--- a/src/main/java/at/procon/ted/model/entity/ProcurementDocument.java
+++ b/src/main/java/at/procon/ted/model/entity/ProcurementDocument.java
@ -0,0 +1,281 @@
+package at.procon.ted.model.entity;
+
+import jakarta.persistence.*;
+import lombok.*;
+import org.hibernate.annotations.JdbcTypeCode;
+import org.hibernate.type.SqlTypes;
+
+import java.math.BigDecimal;
+import java.time.LocalDate;
+import java.time.LocalTime;
+import java.time.OffsetDateTime;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * JPA Entity representing an EU eForms procurement document from TED.
+ * 
+ * Stores the complete XML document along with extracted metadata for efficient querying.
+ * Supports semantic search via pgvector embeddings.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Entity
+@Table(name = "procurement_document", indexes = {
+    @Index(name = "idx_doc_hash", columnList = "documentHash"),
+    @Index(name = "idx_doc_publication_id", columnList = "publicationId"),
+    @Index(name = "idx_doc_buyer_country", columnList = "buyerCountryCode"),
+    @Index(name = "idx_doc_publication_date", columnList = "publicationDate")
+})
+@Getter
+@Setter
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class ProcurementDocument {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.UUID)
+    private UUID id;
+
+    /**
+     * SHA-256 hash of the XML content for idempotent processing.
+     * Used as the unique key to prevent duplicate document imports.
+     */
+    @Column(name = "document_hash", nullable = false, unique = true, length = 64)
+    private String documentHash;
+
+    // TED/eForms identifiers
+    @Column(name = "notice_id", length = 100)
+    private String noticeId;
+
+    @Column(name = "publication_id", length = 50)
+    private String publicationId;
+
+    /**
+     * TED notice URL generated from publication_id.
+     * Format: https://ted.europa.eu/en/notice/-/detail/{publication_id without leading zeros}
+     * Example: https://ted.europa.eu/en/notice/-/detail/786665-2025
+     */
+    @Column(name = "notice_url", length = 255)
+    private String noticeUrl;
+
+    @Column(name = "ojs_id", length = 20)
+    private String ojsId;
+
+    @Column(name = "contract_folder_id", length = 100)
+    private String contractFolderId;
+
+    // Document classification
+    @Enumerated(EnumType.STRING)
+    @Column(name = "notice_type", nullable = false, length = 50)
+    @Builder.Default
+    private NoticeType noticeType = NoticeType.OTHER;
+
+    @Column(name = "notice_subtype_code", length = 10)
+    private String noticeSubtypeCode;
+
+    @Column(name = "sdk_version", length = 20)
+    private String sdkVersion;
+
+    @Column(name = "ubl_version", length = 10)
+    private String ublVersion;
+
+    @Column(name = "language_code", length = 10)
+    private String languageCode;
+
+    // Timestamps
+    @Column(name = "issue_datetime")
+    private OffsetDateTime issueDateTime;
+
+    @Column(name = "publication_date")
+    private LocalDate publicationDate;
+
+    @Column(name = "submission_deadline")
+    private OffsetDateTime submissionDeadline;
+
+    // Contracting authority (buyer) information
+    @Column(name = "buyer_name", columnDefinition = "TEXT")
+    private String buyerName;
+
+    @Column(name = "buyer_country_code", length = 10)
+    private String buyerCountryCode;
+
+    @Column(name = "buyer_city", length = 255)
+    private String buyerCity;
+
+    @Column(name = "buyer_postal_code", length = 100)
+    private String buyerPostalCode;
+
+    @Column(name = "buyer_nuts_code", length = 10)
+    private String buyerNutsCode;
+
+    @Column(name = "buyer_activity_type", length = 50)
+    private String buyerActivityType;
+
+    @Column(name = "buyer_legal_type", length = 50)
+    private String buyerLegalType;
+
+    // Procurement project details
+    @Column(name = "project_title", columnDefinition = "TEXT")
+    private String projectTitle;
+
+    @Column(name = "project_description", columnDefinition = "TEXT")
+    private String projectDescription;
+
+    @Column(name = "internal_reference", length = 500)
+    private String internalReference;
+
+    @Enumerated(EnumType.STRING)
+    @Column(name = "contract_nature", nullable = false, length = 50)
+    @Builder.Default
+    private ContractNature contractNature = ContractNature.UNKNOWN;
+
+    @Enumerated(EnumType.STRING)
+    @Column(name = "procedure_type", length = 50)
+    @Builder.Default
+    private ProcedureType procedureType = ProcedureType.OTHER;
+
+    // Classification codes (stored as PostgreSQL arrays)
+    @Column(name = "cpv_codes", columnDefinition = "VARCHAR(100)[]")
+    @JdbcTypeCode(SqlTypes.ARRAY)
+    private String[] cpvCodes;
+
+    @Column(name = "nuts_codes", columnDefinition = "VARCHAR(20)[]")
+    @JdbcTypeCode(SqlTypes.ARRAY)
+    private String[] nutsCodes;
+
+    // Financial information
+    @Column(name = "estimated_value", precision = 20, scale = 2)
+    private BigDecimal estimatedValue;
+
+    @Column(name = "estimated_value_currency", length = 3)
+    private String estimatedValueCurrency;
+
+    // Lot information
+    @Column(name = "total_lots")
+    @Builder.Default
+    private Integer totalLots = 0;
+
+    @Column(name = "max_lots_awarded")
+    private Integer maxLotsAwarded;
+
+    @Column(name = "max_lots_submitted")
+    private Integer maxLotsSubmitted;
+
+    // Legal basis
+    @Column(name = "regulatory_domain", length = 50)
+    private String regulatoryDomain;
+
+    @Column(name = "eu_funded")
+    @Builder.Default
+    private Boolean euFunded = false;
+
+    /**
+     * Normalized text content extracted from the XML for vectorization.
+     * Contains title, description, buyer info, and other searchable text.
+     */
+    @Column(name = "text_content", columnDefinition = "TEXT")
+    private String textContent;
+
+    /**
+     * Original XML document stored in PostgreSQL native XML type.
+     * Enables XPath queries like: xpath('/ContractNotice/cbc:ID/text()', xml_document)
+     */
+    @Column(name = "xml_document", nullable = false)
+    @JdbcTypeCode(SqlTypes.SQLXML)
+    private String xmlDocument;
+
+    /**
+     * 1024-dimensional vector embedding for semantic search.
+     * Generated using intfloat/multilingual-e5-large model.
+     *
+     * Note: This field is @Transient because the pgvector type is not natively
+     * supported by Hibernate/JDBC. Vectors are written via native SQL queries only.
+     */
+    @Transient
+    private float[] contentVector;
+
+    // Vectorization tracking
+    @Enumerated(EnumType.STRING)
+    @Column(name = "vectorization_status", length = 50)
+    @Builder.Default
+    private VectorizationStatus vectorizationStatus = VectorizationStatus.PENDING;
+
+    @Column(name = "vectorization_error", columnDefinition = "TEXT")
+    private String vectorizationError;
+
+    @Column(name = "vectorized_at")
+    private OffsetDateTime vectorizedAt;
+
+    @Column(name = "embedding_token_count")
+    private Integer embeddingTokenCount;
+
+    // Processing metadata
+    @Column(name = "source_filename", length = 500)
+    private String sourceFilename;
+
+    @Column(name = "source_path", columnDefinition = "TEXT")
+    private String sourcePath;
+
+    @Column(name = "file_size_bytes")
+    private Long fileSizeBytes;
+
+    // Audit fields
+    @Column(name = "created_at", nullable = false, updatable = false)
+    @Builder.Default
+    private OffsetDateTime createdAt = OffsetDateTime.now();
+
+    @Column(name = "updated_at")
+    @Builder.Default
+    private OffsetDateTime updatedAt = OffsetDateTime.now();
+
+    @Column(name = "processing_duration_ms")
+    private Integer processingDurationMs;
+
+    // Relationships
+    @OneToMany(mappedBy = "document", cascade = CascadeType.ALL, orphanRemoval = true)
+    @Builder.Default
+    private List<ProcurementLot> lots = new ArrayList<>();
+
+    @OneToMany(mappedBy = "document", cascade = CascadeType.ALL, orphanRemoval = true)
+    @Builder.Default
+    private List<Organization> organizations = new ArrayList<>();
+
+    // Helper methods
+    public void addLot(ProcurementLot lot) {
+        lots.add(lot);
+        lot.setDocument(this);
+    }
+
+    public void addOrganization(Organization organization) {
+        organizations.add(organization);
+        organization.setDocument(this);
+    }
+
+    @PrePersist
+    protected void onCreate() {
+        createdAt = OffsetDateTime.now();
+        updatedAt = OffsetDateTime.now();
+        generateNoticeUrl();
+    }
+
+    @PreUpdate
+    protected void onUpdate() {
+        updatedAt = OffsetDateTime.now();
+        generateNoticeUrl();
+    }
+
+    /**
+     * Generates TED notice URL from publication_id.
+     * Format: https://ted.europa.eu/en/notice/-/detail/{publication_id without leading zeros}
+     */
+    private void generateNoticeUrl() {
+        if (publicationId != null && !publicationId.isEmpty()) {
+            // Remove leading zeros from publication_id
+            String cleanId = publicationId.replaceFirst("^0+", "");
+            this.noticeUrl = "https://ted.europa.eu/en/notice/-/detail/" + cleanId;
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/model/entity/ProcurementLot.java
+++ b/src/main/java/at/procon/ted/model/entity/ProcurementLot.java
@ -0,0 +1,92 @@
+package at.procon.ted.model.entity;
+
+import jakarta.persistence.*;
+import lombok.*;
+import org.hibernate.annotations.JdbcTypeCode;
+import org.hibernate.type.SqlTypes;
+
+import java.math.BigDecimal;
+import java.time.OffsetDateTime;
+import java.util.UUID;
+
+/**
+ * JPA Entity representing a lot within a procurement notice.
+ * A procurement document can have multiple lots.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Entity
+@Table(name = "procurement_lot", indexes = {
+    @Index(name = "idx_lot_document", columnList = "document_id")
+}, uniqueConstraints = {
+    @UniqueConstraint(columnNames = {"document_id", "lot_id"})
+})
+@Getter
+@Setter
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class ProcurementLot {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.UUID)
+    private UUID id;
+
+    @ManyToOne(fetch = FetchType.LAZY)
+    @JoinColumn(name = "document_id", nullable = false)
+    private ProcurementDocument document;
+
+    /**
+     * Lot identifier from the XML (e.g., "LOT-0001").
+     */
+    @Column(name = "lot_id", nullable = false, length = 50)
+    private String lotId;
+
+    /**
+     * Buyer's internal reference for this lot.
+     */
+    @Column(name = "internal_id", columnDefinition = "TEXT")
+    private String internalId;
+
+    @Column(name = "title", columnDefinition = "TEXT")
+    private String title;
+
+    @Column(name = "description", columnDefinition = "TEXT")
+    private String description;
+
+    @Column(name = "cpv_codes", columnDefinition = "VARCHAR(100)[]")
+    @JdbcTypeCode(SqlTypes.ARRAY)
+    private String[] cpvCodes;
+
+    @Column(name = "nuts_codes", columnDefinition = "VARCHAR(20)[]")
+    @JdbcTypeCode(SqlTypes.ARRAY)
+    private String[] nutsCodes;
+
+    @Column(name = "estimated_value", precision = 20, scale = 2)
+    private BigDecimal estimatedValue;
+
+    @Column(name = "estimated_value_currency", length = 3)
+    private String estimatedValueCurrency;
+
+    @Column(name = "duration_value")
+    private Double durationValue;
+
+    @Column(name = "duration_unit", length = 20)
+    private String durationUnit;
+
+    @Column(name = "submission_deadline")
+    private OffsetDateTime submissionDeadline;
+
+    @Column(name = "eu_funded")
+    @Builder.Default
+    private Boolean euFunded = false;
+
+    @Column(name = "created_at", nullable = false, updatable = false)
+    @Builder.Default
+    private OffsetDateTime createdAt = OffsetDateTime.now();
+
+    @PrePersist
+    protected void onCreate() {
+        createdAt = OffsetDateTime.now();
+    }
+}
--- a/src/main/java/at/procon/ted/model/entity/TedDailyPackage.java
+++ b/src/main/java/at/procon/ted/model/entity/TedDailyPackage.java
@ -0,0 +1,163 @@
+package at.procon.ted.model.entity;
+
+import jakarta.persistence.*;
+import lombok.*;
+import java.time.OffsetDateTime;
+import java.util.UUID;
+
+/**
+ * JPA Entity for tracking downloaded TED Daily Packages.
+ *
+ * Stores information about each downloaded package to ensure idempotency
+ * and track download progress.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Entity
+@Table(name = "ted_daily_package", indexes = {
+    @Index(name = "idx_package_identifier", columnList = "packageIdentifier", unique = true),
+    @Index(name = "idx_package_year_serial", columnList = "year,serialNumber", unique = true),
+    @Index(name = "idx_package_status", columnList = "downloadStatus"),
+    @Index(name = "idx_package_downloaded_at", columnList = "downloadedAt")
+})
+@Getter
+@Setter
+@NoArgsConstructor
+@AllArgsConstructor
+@Builder
+public class TedDailyPackage {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.UUID)
+    private UUID id;
+
+    /**
+     * Package identifier in format YYYYSSSSS (e.g. 202400001)
+     */
+    @Column(name = "package_identifier", nullable = false, unique = true, length = 20)
+    private String packageIdentifier;
+
+    /**
+     * Year of the package (e.g. 2024)
+     */
+    @Column(name = "year", nullable = false)
+    private Integer year;
+
+    /**
+     * Serial number within the year (e.g. 1, 2, 3...)
+     */
+    @Column(name = "serial_number", nullable = false)
+    private Integer serialNumber;
+
+    /**
+     * Download URL
+     */
+    @Column(name = "download_url", nullable = false, length = 500)
+    private String downloadUrl;
+
+    /**
+     * SHA-256 hash of the downloaded tar.gz file
+     */
+    @Column(name = "file_hash", length = 64)
+    private String fileHash;
+
+    /**
+     * Number of extracted XML files
+     */
+    @Column(name = "xml_file_count")
+    private Integer xmlFileCount;
+
+    /**
+     * Number of successfully processed documents
+     */
+    @Column(name = "processed_count")
+    @Builder.Default
+    private Integer processedCount = 0;
+
+    /**
+     * Number of failed documents
+     */
+    @Column(name = "failed_count")
+    @Builder.Default
+    private Integer failedCount = 0;
+
+    /**
+     * Download status
+     */
+    @Enumerated(EnumType.STRING)
+    @Column(name = "download_status", nullable = false, length = 30)
+    @Builder.Default
+    private DownloadStatus downloadStatus = DownloadStatus.PENDING;
+
+    /**
+     * Error message for failed download
+     */
+    @Column(name = "error_message", columnDefinition = "TEXT")
+    private String errorMessage;
+
+    /**
+     * Timestamp of successful download
+     */
+    @Column(name = "downloaded_at")
+    private OffsetDateTime downloadedAt;
+
+    /**
+     * Timestamp of complete processing
+     */
+    @Column(name = "processed_at")
+    private OffsetDateTime processedAt;
+
+    /**
+     * Download duration in milliseconds
+     */
+    @Column(name = "download_duration_ms")
+    private Long downloadDurationMs;
+
+    /**
+     * Processing duration in milliseconds
+     */
+    @Column(name = "processing_duration_ms")
+    private Long processingDurationMs;
+
+    /**
+     * Timestamp of creation
+     */
+    @Column(name = "created_at", nullable = false)
+    @Builder.Default
+    private OffsetDateTime createdAt = OffsetDateTime.now();
+
+    /**
+     * Timestamp of last update
+     */
+    @Column(name = "updated_at", nullable = false)
+    @Builder.Default
+    private OffsetDateTime updatedAt = OffsetDateTime.now();
+
+    /**
+     * Download status enum
+     */
+    public enum DownloadStatus {
+        PENDING,        // Not yet downloaded
+        DOWNLOADING,    // Download in progress
+        DOWNLOADED,     // Downloaded, not processed
+        PROCESSING,     // Being processed
+        COMPLETED,      // Fully processed
+        FAILED,         // Download or processing failed
+        NOT_FOUND       // Package does not exist (404)
+    }
+
+    @PrePersist
+    protected void onCreate() {
+        if (createdAt == null) {
+            createdAt = OffsetDateTime.now();
+        }
+        if (updatedAt == null) {
+            updatedAt = OffsetDateTime.now();
+        }
+    }
+
+    @PreUpdate
+    protected void onUpdate() {
+        updatedAt = OffsetDateTime.now();
+    }
+}
--- a/src/main/java/at/procon/ted/model/entity/VectorizationStatus.java
+++ b/src/main/java/at/procon/ted/model/entity/VectorizationStatus.java
@ -0,0 +1,16 @@
+package at.procon.ted.model.entity;
+
+/**
+ * Enum representing the status of document vectorization.
+ * Used for tracking asynchronous vectorization processing.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+public enum VectorizationStatus {
+    PENDING,            // Awaiting vectorization
+    PROCESSING,         // Currently being vectorized
+    COMPLETED,          // Successfully vectorized
+    COMPLETED_Temporal, // Legacy: Successfully vectorized (temporal)
+    FAILED,             // Vectorization failed
+    SKIPPED             // Skipped (e.g., no text content)
+}
--- a/src/main/java/at/procon/ted/repository/ProcessedAttachmentRepository.java
+++ b/src/main/java/at/procon/ted/repository/ProcessedAttachmentRepository.java
@ -0,0 +1,58 @@
+package at.procon.ted.repository;
+
+import at.procon.ted.model.entity.ProcessedAttachment;
+import at.procon.ted.model.entity.ProcessedAttachment.ProcessingStatus;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.data.jpa.repository.Query;
+import org.springframework.data.repository.query.Param;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.UUID;
+
+/**
+ * Repository for ProcessedAttachment entity.
+ * Provides idempotent attachment tracking via content hash.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Repository
+public interface ProcessedAttachmentRepository extends JpaRepository<ProcessedAttachment, UUID> {
+
+    /**
+     * Find attachment by content hash for idempotency check.
+     */
+    Optional<ProcessedAttachment> findByContentHash(String contentHash);
+
+    /**
+     * Check if an attachment with given hash already exists.
+     */
+    boolean existsByContentHash(String contentHash);
+
+    /**
+     * Find all attachments with given processing status.
+     */
+    List<ProcessedAttachment> findByProcessingStatus(ProcessingStatus status);
+
+    /**
+     * Find all child attachments (extracted from a ZIP).
+     */
+    List<ProcessedAttachment> findByParentHash(String parentHash);
+
+    /**
+     * Find attachments by file type.
+     */
+    List<ProcessedAttachment> findByFileType(String fileType);
+
+    /**
+     * Count attachments by status.
+     */
+    long countByProcessingStatus(ProcessingStatus status);
+
+    /**
+     * Find pending attachments for retry processing.
+     */
+    @Query("SELECT a FROM ProcessedAttachment a WHERE a.processingStatus = 'PENDING' OR a.processingStatus = 'FAILED' ORDER BY a.receivedAt ASC")
+    List<ProcessedAttachment> findPendingOrFailed();
+}
--- a/src/main/java/at/procon/ted/repository/ProcessingLogRepository.java
+++ b/src/main/java/at/procon/ted/repository/ProcessingLogRepository.java
@ -0,0 +1,31 @@
+package at.procon.ted.repository;
+
+import at.procon.ted.model.entity.ProcessingLog;
+import org.springframework.data.domain.Page;
+import org.springframework.data.domain.Pageable;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.data.jpa.repository.Query;
+import org.springframework.data.repository.query.Param;
+import org.springframework.stereotype.Repository;
+
+import java.time.OffsetDateTime;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Repository for ProcessingLog entities.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Repository
+public interface ProcessingLogRepository extends JpaRepository<ProcessingLog, UUID> {
+    
+    List<ProcessingLog> findByDocumentIdOrderByCreatedAtDesc(UUID documentId);
+    
+    List<ProcessingLog> findByDocumentHashOrderByCreatedAtDesc(String documentHash);
+    
+    Page<ProcessingLog> findByEventTypeOrderByCreatedAtDesc(String eventType, Pageable pageable);
+    
+    @Query("SELECT l FROM ProcessingLog l WHERE l.createdAt >= :since ORDER BY l.createdAt DESC")
+    List<ProcessingLog> findRecentLogs(@Param("since") OffsetDateTime since);
+}
--- a/src/main/java/at/procon/ted/repository/ProcurementDocumentRepository.java
+++ b/src/main/java/at/procon/ted/repository/ProcurementDocumentRepository.java
@ -0,0 +1,232 @@
+package at.procon.ted.repository;
+
+import at.procon.ted.model.entity.*;
+import org.springframework.data.domain.Page;
+import org.springframework.data.domain.Pageable;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.data.jpa.repository.JpaSpecificationExecutor;
+import org.springframework.data.jpa.repository.Modifying;
+import org.springframework.data.jpa.repository.Query;
+import org.springframework.data.repository.query.Param;
+import org.springframework.stereotype.Repository;
+
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.util.List;
+import java.util.Optional;
+import java.util.UUID;
+
+/**
+ * Repository for ProcurementDocument entities.
+ * Provides standard CRUD operations plus custom queries for search and statistics.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Repository
+public interface ProcurementDocumentRepository extends 
+        JpaRepository<ProcurementDocument, UUID>, 
+        JpaSpecificationExecutor<ProcurementDocument> {
+
+    /**
+     * Find document by its SHA-256 hash (for idempotent processing).
+     */
+    Optional<ProcurementDocument> findByDocumentHash(String documentHash);
+
+    /**
+     * Check if a document with the given hash already exists.
+     */
+    boolean existsByDocumentHash(String documentHash);
+
+    /**
+     * Find document by TED publication ID.
+     */
+    Optional<ProcurementDocument> findByPublicationId(String publicationId);
+
+    /**
+     * Find document by TED notice URL.
+     * Example: https://ted.europa.eu/en/notice/-/detail/786665-2025
+     */
+    Optional<ProcurementDocument> findByNoticeUrl(String noticeUrl);
+
+    /**
+     * Find documents by buyer country code.
+     */
+    Page<ProcurementDocument> findByBuyerCountryCode(String countryCode, Pageable pageable);
+
+    /**
+     * Find documents by notice type.
+     */
+    Page<ProcurementDocument> findByNoticeType(NoticeType noticeType, Pageable pageable);
+
+    /**
+     * Find documents pending vectorization (ordered by creation date).
+     */
+    @Query("SELECT d FROM ProcurementDocument d WHERE d.vectorizationStatus = :status ORDER BY d.createdAt ASC")
+    List<ProcurementDocument> findByVectorizationStatus(
+            @Param("status") VectorizationStatus status,
+            Pageable pageable);
+
+    /**
+     * Get only text content for vectorization (memory efficient - does not load XML).
+     */
+    @Query("SELECT p.textContent FROM ProcurementDocument p WHERE p.id = :id")
+    String findTextContentById(@Param("id") UUID id);
+
+    /**
+     * Find document IDs by vectorization status (memory efficient - does not load full entities).
+     */
+    @Query("SELECT d.id FROM ProcurementDocument d WHERE d.vectorizationStatus = :status ORDER BY d.createdAt ASC")
+    List<UUID> findIdsByVectorizationStatus(@Param("status") VectorizationStatus status, Pageable pageable);
+
+    /**
+     * Update vectorization status for a document.
+     */
+    @Modifying
+    @Query("UPDATE ProcurementDocument d SET d.vectorizationStatus = :status, " +
+           "d.vectorizationError = :error, d.vectorizedAt = :vectorizedAt " +
+           "WHERE d.id = :id")
+    int updateVectorizationStatus(
+            @Param("id") UUID id,
+            @Param("status") VectorizationStatus status,
+            @Param("error") String error,
+            @Param("vectorizedAt") OffsetDateTime vectorizedAt);
+
+    /**
+     * Update document vector after successful vectorization.
+     */
+    @Modifying
+    @Query(value = "UPDATE ted.procurement_document SET content_vector = CAST(:vectorData AS vector), " +
+                   "vectorization_status = 'COMPLETED', vectorized_at = CURRENT_TIMESTAMP, " +
+                   "vectorization_error = NULL, embedding_token_count = :tokenCount WHERE id = :id",
+           nativeQuery = true)
+    int updateContentVector(@Param("id") UUID id, @Param("vectorData") String vectorData, @Param("tokenCount") Integer tokenCount);
+
+    /**
+     * Simple semantic search using cosine similarity without filters.
+     * Returns document IDs and similarity scores sorted by similarity.
+     * Note: We only select id and similarity to avoid XML column deserialization issues.
+     */
+    @Query(value = """
+            SELECT d.id, 1 - (d.content_vector <=> CAST(:queryVector AS vector)) AS similarity
+            FROM ted.procurement_document d
+            WHERE d.content_vector IS NOT NULL
+            AND (1 - (d.content_vector <=> CAST(:queryVector AS vector))) >= :threshold
+            ORDER BY similarity DESC
+            LIMIT :limit
+            """, nativeQuery = true)
+    List<Object[]> findBySimilarity(
+            @Param("queryVector") String queryVector,
+            @Param("threshold") double threshold,
+            @Param("limit") int limit);
+
+    /**
+     * Semantic search using cosine similarity with filters.
+     * Returns documents sorted by similarity score.
+     */
+    @Query(value = """
+            SELECT d.*, 1 - (d.content_vector <=> CAST(:queryVector AS vector)) AS similarity
+            FROM ted.procurement_document d
+            WHERE d.content_vector IS NOT NULL
+            AND (1 - (d.content_vector <=> CAST(:queryVector AS vector))) >= :threshold
+            AND (:countryCode IS NULL OR d.buyer_country_code = :countryCode)
+            AND (:noticeType IS NULL OR d.notice_type = :noticeType)
+            AND (:contractNature IS NULL OR d.contract_nature = :contractNature)
+            AND (:cpvPrefix IS NULL OR EXISTS (
+                SELECT 1 FROM unnest(d.cpv_codes) code WHERE code LIKE :cpvPrefix || '%'
+            ))
+            AND (CAST(:dateFrom AS DATE) IS NULL OR d.publication_date >= CAST(:dateFrom AS DATE))
+            AND (CAST(:dateTo AS DATE) IS NULL OR d.publication_date <= CAST(:dateTo AS DATE))
+            ORDER BY similarity DESC
+            LIMIT :limit OFFSET :offset
+            """, nativeQuery = true)
+    List<Object[]> findBySemanticSearch(
+            @Param("queryVector") String queryVector,
+            @Param("threshold") double threshold,
+            @Param("countryCode") String countryCode,
+            @Param("noticeType") String noticeType,
+            @Param("contractNature") String contractNature,
+            @Param("cpvPrefix") String cpvPrefix,
+            @Param("dateFrom") LocalDate dateFrom,
+            @Param("dateTo") LocalDate dateTo,
+            @Param("limit") int limit,
+            @Param("offset") int offset);
+
+    /**
+     * Count total documents for semantic search (for pagination).
+     */
+    @Query(value = """
+            SELECT COUNT(*)
+            FROM ted.procurement_document d
+            WHERE d.content_vector IS NOT NULL
+            AND (1 - (d.content_vector <=> CAST(:queryVector AS vector))) >= :threshold
+            AND (:countryCode IS NULL OR d.buyer_country_code = :countryCode)
+            AND (:noticeType IS NULL OR d.notice_type = :noticeType)
+            AND (:contractNature IS NULL OR d.contract_nature = :contractNature)
+            """, nativeQuery = true)
+    long countBySemanticSearch(
+            @Param("queryVector") String queryVector,
+            @Param("threshold") double threshold,
+            @Param("countryCode") String countryCode,
+            @Param("noticeType") String noticeType,
+            @Param("contractNature") String contractNature);
+
+    /**
+     * Get document count by country.
+     */
+    @Query("SELECT d.buyerCountryCode, COUNT(d) FROM ProcurementDocument d " +
+           "WHERE d.buyerCountryCode IS NOT NULL " +
+           "GROUP BY d.buyerCountryCode ORDER BY COUNT(d) DESC")
+    List<Object[]> countByCountry();
+
+    /**
+     * Get document count by notice type.
+     */
+    @Query("SELECT d.noticeType, COUNT(d) FROM ProcurementDocument d " +
+           "GROUP BY d.noticeType ORDER BY COUNT(d) DESC")
+    List<Object[]> countByNoticeType();
+
+    /**
+     * Get vectorization statistics.
+     */
+    @Query("SELECT d.vectorizationStatus, COUNT(d) FROM ProcurementDocument d " +
+           "GROUP BY d.vectorizationStatus")
+    List<Object[]> countByVectorizationStatus();
+
+    /**
+     * Find documents with submission deadline in the future.
+     */
+    @Query("SELECT d FROM ProcurementDocument d " +
+           "WHERE d.submissionDeadline > CURRENT_TIMESTAMP " +
+           "ORDER BY d.submissionDeadline ASC")
+    Page<ProcurementDocument> findUpcomingDeadlines(Pageable pageable);
+
+    /**
+     * Full-text search on text content using PostgreSQL trigram matching.
+     */
+    @Query(value = "SELECT * FROM ted.procurement_document d " +
+                   "WHERE d.text_content ILIKE '%' || :query || '%' " +
+                   "ORDER BY similarity(d.text_content, :query) DESC",
+           nativeQuery = true)
+    List<ProcurementDocument> findByTextContentContaining(@Param("query") String query, Pageable pageable);
+
+    /**
+     * Delete all documents created before the specified date.
+     * Cascading deletes will automatically remove related lots, organizations, and logs.
+     *
+     * @param cutoffDate Documents created before this date will be deleted
+     * @return Number of deleted documents
+     */
+    @Modifying
+    @Query(value = "DELETE FROM ted.procurement_document WHERE created_at < :cutoffDate", nativeQuery = true)
+    int deleteByCreatedAtBefore(@Param("cutoffDate") OffsetDateTime cutoffDate);
+
+    /**
+     * Count documents created before the specified date.
+     *
+     * @param cutoffDate Documents created before this date will be counted
+     * @return Number of documents
+     */
+    long countByCreatedAtBefore(OffsetDateTime cutoffDate);
+}
+
+
--- a/src/main/java/at/procon/ted/repository/TedDailyPackageRepository.java
+++ b/src/main/java/at/procon/ted/repository/TedDailyPackageRepository.java
@ -0,0 +1,90 @@
+package at.procon.ted.repository;
+
+import at.procon.ted.model.entity.TedDailyPackage;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.data.jpa.repository.Query;
+import org.springframework.data.repository.query.Param;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.UUID;
+
+/**
+ * Repository für TED Daily Package Entities.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Repository
+public interface TedDailyPackageRepository extends JpaRepository<TedDailyPackage, UUID> {
+
+    /**
+     * Findet ein Package anhand des Identifiers (YYYYSSSSS).
+     */
+    Optional<TedDailyPackage> findByPackageIdentifier(String packageIdentifier);
+
+    /**
+     * Findet ein Package anhand von Jahr und Seriennummer.
+     */
+    Optional<TedDailyPackage> findByYearAndSerialNumber(Integer year, Integer serialNumber);
+
+    /**
+     * Prüft ob ein Package bereits existiert.
+     */
+    boolean existsByPackageIdentifier(String packageIdentifier);
+
+    /**
+     * Findet das zuletzt erfolgreich heruntergeladene Package.
+     */
+    @Query("SELECT p FROM TedDailyPackage p " +
+           "WHERE p.downloadStatus IN ('DOWNLOADED', 'PROCESSING', 'COMPLETED') " +
+           "ORDER BY p.year DESC, p.serialNumber DESC " +
+           "LIMIT 1")
+    Optional<TedDailyPackage> findLatestDownloaded();
+
+    /**
+     * Findet das Package mit der höchsten Seriennummer für ein bestimmtes Jahr.
+     */
+    @Query("SELECT p FROM TedDailyPackage p " +
+           "WHERE p.year = :year " +
+           "ORDER BY p.serialNumber DESC " +
+           "LIMIT 1")
+    Optional<TedDailyPackage> findLatestByYear(@Param("year") Integer year);
+
+    /**
+     * Findet das Package mit der niedrigsten Seriennummer für ein bestimmtes Jahr.
+     */
+    @Query("SELECT p FROM TedDailyPackage p " +
+           "WHERE p.year = :year " +
+           "ORDER BY p.serialNumber ASC " +
+           "LIMIT 1")
+    Optional<TedDailyPackage> findFirstByYear(@Param("year") Integer year);
+
+    /**
+     * Findet alle Packages mit einem bestimmten Status.
+     */
+    List<TedDailyPackage> findByDownloadStatus(TedDailyPackage.DownloadStatus status);
+
+    /**
+     * Findet alle Packages die noch verarbeitet werden müssen.
+     */
+    @Query("SELECT p FROM TedDailyPackage p " +
+           "WHERE p.downloadStatus IN ('DOWNLOADED', 'PROCESSING') " +
+           "ORDER BY p.year ASC, p.serialNumber ASC")
+    List<TedDailyPackage> findPendingProcessing();
+
+    /**
+     * Checks if there is a NOT_FOUND package directly after the given serial number.
+     * Returns 1 if the next package (fromSerial+1) is NOT_FOUND, 0 otherwise.
+     */
+    @Query("SELECT COUNT(p) FROM TedDailyPackage p " +
+           "WHERE p.year = :year " +
+           "AND p.serialNumber = :fromSerial + 1 " +
+           "AND p.downloadStatus = 'NOT_FOUND'")
+    long countConsecutiveNotFound(@Param("year") Integer year, @Param("fromSerial") Integer fromSerial);
+
+    /**
+     * Findet alle Packages eines Jahres.
+     */
+    List<TedDailyPackage> findByYearOrderBySerialNumberAsc(Integer year);
+}
--- a/src/main/java/at/procon/ted/service/BatchDocumentProcessingService.java
+++ b/src/main/java/at/procon/ted/service/BatchDocumentProcessingService.java
@ -0,0 +1,183 @@
+package at.procon.ted.service;
+
+import at.procon.ted.model.entity.ProcurementDocument;
+import at.procon.ted.model.entity.ProcessingLog;
+import at.procon.ted.repository.ProcurementDocumentRepository;
+import at.procon.ted.util.HashUtils;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Service for batch processing of TED procurement documents.
+ *
+ * Processes all XML files from a Daily Package in a single transaction:
+ * 1. Parse all XML files
+ * 2. Check for duplicates
+ * 3. Batch insert all new documents (saveAll)
+ * 4. Trigger vectorization for all inserted documents
+ *
+ * This is much more efficient than processing documents one by one.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+public class BatchDocumentProcessingService {
+
+    private final XmlParserService xmlParserService;
+    private final ProcurementDocumentRepository documentRepository;
+    private final ProcessingLogService processingLogService;
+
+    /**
+     * Process a batch of XML files from a Daily Package.
+     *
+     * @param xmlFiles List of XML file paths to process
+     * @return Batch processing result with statistics
+     */
+    @Transactional
+    public BatchProcessingResult processBatch(List<Path> xmlFiles) {
+        long startTime = System.currentTimeMillis();
+
+        List<ProcurementDocument> documentsToInsert = new ArrayList<>();
+        List<String> duplicateHashes = new ArrayList<>();
+        List<ProcessingError> errors = new ArrayList<>();
+
+        log.debug("Processing batch of {} XML files", xmlFiles.size());
+
+        // Step 1: Parse all XML files and check for duplicates
+        for (Path xmlFile : xmlFiles) {
+            try {
+                // Read XML content
+                String xmlContent = Files.readString(xmlFile, StandardCharsets.UTF_8);
+                String filename = xmlFile.getFileName().toString();
+                String filePath = xmlFile.toString();
+                long fileSize = Files.size(xmlFile);
+
+                // Compute document hash
+                String documentHash = HashUtils.computeSha256(xmlContent);
+
+                // Check for duplicate
+                if (documentRepository.existsByDocumentHash(documentHash)) {
+                    log.debug("Duplicate document detected, skipping: {} (hash: {})", filename, documentHash);
+                    duplicateHashes.add(documentHash);
+
+                    processingLogService.logEvent(
+                            null, documentHash, ProcessingLog.EventType.DUPLICATE,
+                            ProcessingLog.EventStatus.SKIPPED,
+                            "Document already exists in database", null, filename, 0);
+                    continue;
+                }
+
+                // Parse XML document
+                ProcurementDocument document = xmlParserService.parseDocument(xmlContent);
+                document.setDocumentHash(documentHash);
+                document.setSourceFilename(filename);
+                document.setSourcePath(filePath);
+                document.setFileSizeBytes(fileSize);
+
+                documentsToInsert.add(document);
+
+            } catch (IOException e) {
+                log.warn("Failed to read XML file {}: {}", xmlFile, e.getMessage());
+                errors.add(new ProcessingError(xmlFile.toString(), "File read error: " + e.getMessage()));
+
+            } catch (XmlParserService.XmlParsingException e) {
+                log.warn("Failed to parse XML file {}: {}", xmlFile, e.getMessage());
+                errors.add(new ProcessingError(xmlFile.toString(), "XML parsing error: " + e.getMessage()));
+
+                String hash = "unknown";
+                try {
+                    String xmlContent = Files.readString(xmlFile, StandardCharsets.UTF_8);
+                    hash = HashUtils.computeSha256(xmlContent);
+                } catch (IOException ignored) {}
+
+                processingLogService.logEvent(
+                        null, hash, ProcessingLog.EventType.ERROR,
+                        ProcessingLog.EventStatus.FAILURE,
+                        "XML parsing failed", e.getMessage(), xmlFile.getFileName().toString(), 0);
+
+            } catch (Exception e) {
+                log.error("Unexpected error processing file {}: {}", xmlFile, e.getMessage(), e);
+                errors.add(new ProcessingError(xmlFile.toString(), "Unexpected error: " + e.getMessage()));
+            }
+        }
+
+        // Step 2: Batch insert all new documents
+        List<UUID> insertedDocumentIds = new ArrayList<>();
+        if (!documentsToInsert.isEmpty()) {
+            log.info("Batch inserting {} new documents into database", documentsToInsert.size());
+
+            List<ProcurementDocument> savedDocuments = documentRepository.saveAll(documentsToInsert);
+
+            // Log success for each document
+            for (ProcurementDocument doc : savedDocuments) {
+                insertedDocumentIds.add(doc.getId());
+
+                processingLogService.logEvent(
+                        doc, doc.getDocumentHash(), ProcessingLog.EventType.STORED,
+                        ProcessingLog.EventStatus.SUCCESS,
+                        "Document parsed and stored successfully (batch)", null,
+                        doc.getSourceFilename(), 0);
+            }
+
+            log.info("Successfully inserted {} documents in batch", savedDocuments.size());
+        }
+
+        // Step 3: Vectorization will be picked up by VectorizationRoute scheduler
+        // No need to publish individual events - the scheduler checks for PENDING documents
+        // This avoids creating 149k+ inflight exchanges in the queue
+        if (!insertedDocumentIds.isEmpty()) {
+            log.debug("Inserted {} documents with vectorization_status=PENDING, " +
+                    "will be picked up by vectorization scheduler", insertedDocumentIds.size());
+        }
+
+        long duration = System.currentTimeMillis() - startTime;
+        log.info("Batch processing completed: {} inserted, {} duplicates, {} errors in {}ms",
+                insertedDocumentIds.size(), duplicateHashes.size(), errors.size(), duration);
+
+        return new BatchProcessingResult(
+                insertedDocumentIds.size(),
+                duplicateHashes.size(),
+                errors.size(),
+                duration,
+                insertedDocumentIds,
+                errors
+        );
+    }
+
+    /**
+     * Result of batch processing operation.
+     */
+    public record BatchProcessingResult(
+            int insertedCount,
+            int duplicateCount,
+            int errorCount,
+            long durationMs,
+            List<UUID> insertedDocumentIds,
+            List<ProcessingError> errors
+    ) {
+        public int getTotalProcessed() {
+            return insertedCount + duplicateCount + errorCount;
+        }
+
+        public boolean hasErrors() {
+            return errorCount > 0;
+        }
+    }
+
+    /**
+     * Processing error details.
+     */
+    public record ProcessingError(String filename, String errorMessage) {}
+}
--- a/src/main/java/at/procon/ted/service/DataCleanupService.java
+++ b/src/main/java/at/procon/ted/service/DataCleanupService.java
@ -0,0 +1,95 @@
+package at.procon.ted.service;
+
+import at.procon.ted.repository.ProcurementDocumentRepository;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.scheduling.annotation.Scheduled;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.time.OffsetDateTime;
+
+/**
+ * Service for cleaning up old procurement documents.
+ *
+ * Automatically deletes documents older than a configurable retention period.
+ * Default: 7 years
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+public class DataCleanupService {
+
+    private final ProcurementDocumentRepository documentRepository;
+
+    @Value("${ted.cleanup.retention-years:7}")
+    private int retentionYears;
+
+    @Value("${ted.cleanup.enabled:false}")
+    private boolean cleanupEnabled;
+
+    /**
+     * Delete procurement documents older than retention period.
+     * Runs daily at 2 AM.
+     */
+    @Scheduled(cron = "${ted.cleanup.cron:0 0 2 * * *}")
+    @Transactional
+    public void deleteOldDocuments() {
+        if (!cleanupEnabled) {
+            log.debug("Data cleanup is disabled");
+            return;
+        }
+
+        OffsetDateTime cutoffDate = OffsetDateTime.now().minusYears(retentionYears);
+
+        log.info("Starting cleanup of documents older than {} (retention: {} years)",
+                cutoffDate, retentionYears);
+
+        try {
+            int deletedCount = documentRepository.deleteByCreatedAtBefore(cutoffDate);
+
+            if (deletedCount > 0) {
+                log.info("✅ Deleted {} documents older than {} years", deletedCount, retentionYears);
+            } else {
+                log.debug("No documents to delete");
+            }
+
+        } catch (Exception e) {
+            log.error("❌ Error during cleanup: {}", e.getMessage(), e);
+        }
+    }
+
+    /**
+     * Manually trigger cleanup of old documents.
+     *
+     * @param years Number of years to retain (documents older than this will be deleted)
+     * @return Number of deleted documents
+     */
+    @Transactional
+    public int deleteDocumentsOlderThan(int years) {
+        OffsetDateTime cutoffDate = OffsetDateTime.now().minusYears(years);
+
+        log.info("Manual cleanup: Deleting documents older than {} (retention: {} years)",
+                cutoffDate, years);
+
+        int deletedCount = documentRepository.deleteByCreatedAtBefore(cutoffDate);
+
+        log.info("✅ Manually deleted {} documents", deletedCount);
+
+        return deletedCount;
+    }
+
+    /**
+     * Get count of documents that would be deleted.
+     *
+     * @param years Number of years to check
+     * @return Number of documents older than specified years
+     */
+    public long countDocumentsOlderThan(int years) {
+        OffsetDateTime cutoffDate = OffsetDateTime.now().minusYears(years);
+        return documentRepository.countByCreatedAtBefore(cutoffDate);
+    }
+}
--- a/src/main/java/at/procon/ted/service/DocumentProcessingService.java
+++ b/src/main/java/at/procon/ted/service/DocumentProcessingService.java
@ -0,0 +1,195 @@
+package at.procon.ted.service;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.event.DocumentSavedEvent;
+import at.procon.ted.model.entity.*;
+import at.procon.ted.repository.ProcurementDocumentRepository;
+import at.procon.ted.util.HashUtils;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.context.ApplicationEventPublisher;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.time.OffsetDateTime;
+import java.util.Optional;
+
+/**
+ * Service for processing TED procurement documents.
+ *
+ * Handles the complete processing pipeline:
+ * 1. Hash computation for idempotency check
+ * 2. XML parsing and data extraction
+ * 3. Database storage
+ *
+ * Note: Vectorization is triggered separately by VectorizationRoute via Camel wireTap.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+public class DocumentProcessingService {
+
+    private final XmlParserService xmlParserService;
+    private final ProcurementDocumentRepository documentRepository;
+    private final ProcessingLogService processingLogService;
+    private final TedProcessorProperties properties;
+    private final ApplicationEventPublisher eventPublisher;
+
+    /**
+     * Process an XML document from the file system.
+     * 
+     * @param xmlContent The XML content
+     * @param filename Source filename
+     * @param filePath Source file path
+     * @param fileSize File size in bytes
+     * @return Processing result with document ID if successful
+     */
+    @Transactional
+    public ProcessingResult processDocument(String xmlContent, String filename, String filePath, Long fileSize) {
+        long startTime = System.currentTimeMillis();
+        
+        // Step 1: Compute document hash
+        String documentHash = HashUtils.computeSha256(xmlContent);
+        log.debug("Processing document {} with hash {}", filename, documentHash);
+
+        // Step 2: Check for duplicate (idempotent processing)
+        if (documentRepository.existsByDocumentHash(documentHash)) {
+            log.debug("Duplicate document detected, skipping: {} (hash: {})", filename, documentHash);
+            processingLogService.logEvent(
+                    null, documentHash, ProcessingLog.EventType.DUPLICATE,
+                    ProcessingLog.EventStatus.SKIPPED,
+                    "Document already exists in database", null, filename,
+                    (int) (System.currentTimeMillis() - startTime));
+            return ProcessingResult.duplicate(documentHash);
+        }
+
+        try {
+            // Step 3: Parse XML document
+            ProcurementDocument document = xmlParserService.parseDocument(xmlContent);
+            document.setDocumentHash(documentHash);
+            document.setSourceFilename(filename);
+            document.setSourcePath(filePath);
+            document.setFileSizeBytes(fileSize);
+            document.setProcessingDurationMs((int) (System.currentTimeMillis() - startTime));
+
+            // Step 4: Save to database
+            document = documentRepository.save(document);
+
+            log.debug("Successfully processed document: {} -> {} (publication: {})",
+                    filename, document.getId(), document.getPublicationId());
+
+            // Log success
+            processingLogService.logEvent(
+                    document, documentHash, ProcessingLog.EventType.STORED,
+                    ProcessingLog.EventStatus.SUCCESS,
+                    "Document parsed and stored successfully", null, filename,
+                    (int) (System.currentTimeMillis() - startTime));
+
+            // Publish event to trigger vectorization AFTER transaction commit
+            // This ensures document is visible in DB and avoids transaction isolation issues
+            eventPublisher.publishEvent(new DocumentSavedEvent(document.getId(), document.getPublicationId()));
+            log.debug("Document saved successfully, vectorization event published: {}", document.getId());
+
+            return ProcessingResult.success(document.getId(), documentHash, document.getPublicationId());
+
+        } catch (XmlParserService.XmlParsingException e) {
+            log.error("Failed to parse XML document {}: {}", filename, e.getMessage());
+            processingLogService.logEvent(
+                    null, documentHash, ProcessingLog.EventType.ERROR,
+                    ProcessingLog.EventStatus.FAILURE,
+                    "XML parsing failed", e.getMessage(), filename,
+                    (int) (System.currentTimeMillis() - startTime));
+            return ProcessingResult.error(documentHash, "XML parsing failed: " + e.getMessage());
+
+        } catch (Exception e) {
+            log.error("Unexpected error processing document {}: {}", filename, e.getMessage(), e);
+            processingLogService.logEvent(
+                    null, documentHash, ProcessingLog.EventType.ERROR,
+                    ProcessingLog.EventStatus.FAILURE,
+                    "Processing failed", e.getMessage(), filename,
+                    (int) (System.currentTimeMillis() - startTime));
+            return ProcessingResult.error(documentHash, "Processing failed: " + e.getMessage());
+        }
+    }
+
+    /**
+     * Reprocess a document (e.g., after schema update).
+     */
+    @Transactional
+    public Optional<ProcurementDocument> reprocessDocument(String publicationId) {
+        return documentRepository.findByPublicationId(publicationId)
+                .map(existing -> {
+                    try {
+                        // Re-parse the stored XML
+                        ProcurementDocument updated = xmlParserService.parseDocument(existing.getXmlDocument());
+                        
+                        // Preserve identity and tracking fields
+                        updated.setId(existing.getId());
+                        updated.setDocumentHash(existing.getDocumentHash());
+                        updated.setSourceFilename(existing.getSourceFilename());
+                        updated.setSourcePath(existing.getSourcePath());
+                        updated.setFileSizeBytes(existing.getFileSizeBytes());
+                        updated.setCreatedAt(existing.getCreatedAt());
+                        
+                        // Reset vectorization
+                        updated.setVectorizationStatus(VectorizationStatus.PENDING);
+                        updated.setContentVector(null);
+                        updated.setVectorizedAt(null);
+                        updated.setVectorizationError(null);
+
+                        documentRepository.save(updated);
+
+                        // Note: Re-vectorization will be triggered automatically by
+                        // VectorizationRoute scheduler (checks for PENDING documents every 60s)
+
+                        return updated;
+                    } catch (Exception e) {
+                        log.error("Failed to reprocess document {}: {}", publicationId, e.getMessage());
+                        return null;
+                    }
+                });
+    }
+
+    /**
+     * Result of document processing operation.
+     */
+    public record ProcessingResult(
+            Status status,
+            java.util.UUID documentId,
+            String documentHash,
+            String publicationId,
+            String errorMessage
+    ) {
+        public enum Status {
+            SUCCESS,
+            DUPLICATE,
+            ERROR
+        }
+
+        public static ProcessingResult success(java.util.UUID id, String hash, String pubId) {
+            return new ProcessingResult(Status.SUCCESS, id, hash, pubId, null);
+        }
+
+        public static ProcessingResult duplicate(String hash) {
+            return new ProcessingResult(Status.DUPLICATE, null, hash, null, null);
+        }
+
+        public static ProcessingResult error(String hash, String message) {
+            return new ProcessingResult(Status.ERROR, null, hash, null, message);
+        }
+
+        public boolean isSuccess() {
+            return status == Status.SUCCESS;
+        }
+
+        public boolean isDuplicate() {
+            return status == Status.DUPLICATE;
+        }
+
+        public boolean isError() {
+            return status == Status.ERROR;
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/service/ExcelExportService.java
+++ b/src/main/java/at/procon/ted/service/ExcelExportService.java
@ -0,0 +1,241 @@
+package at.procon.ted.service;
+
+import at.procon.ted.service.SimilaritySearchService.SimilarDocument;
+import at.procon.ted.service.SimilaritySearchService.SimilaritySearchResponse;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.poi.common.usermodel.HyperlinkType;
+import org.apache.poi.ss.usermodel.*;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.springframework.stereotype.Service;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.List;
+
+/**
+ * Service for exporting similarity search results to Excel (XLSX) format.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@Slf4j
+public class ExcelExportService {
+
+    private static final String[] HEADERS = {
+            "Rang",
+            "Ähnlichkeit %",
+            "Publication ID",
+            "Projekt Titel",
+            "Auftraggeber",
+            "Land",
+            "Stadt",
+            "Vertragsart",
+            "Verfahrensart",
+            "Publikationsdatum",
+            "Einreichfrist",
+            "Geschätzter Wert",
+            "Währung",
+            "CPV Codes",
+            "TED Link"
+    };
+
+    /**
+     * Export similarity search results to an Excel file.
+     *
+     * @param response       the similarity search response
+     * @param sourceFilename the name of the source PDF file
+     * @param outputDir      the output directory for the Excel file
+     * @return the path to the generated Excel file
+     */
+    public String exportToExcel(SimilaritySearchResponse response, String sourceFilename, String outputDir) throws IOException {
+        // Ensure output directory exists
+        File dir = new File(outputDir);
+        if (!dir.exists()) {
+            dir.mkdirs();
+        }
+
+        // Generate output filename
+        String baseName = sourceFilename != null
+                ? sourceFilename.replaceAll("\\.[^.]+$", "")  // Remove extension
+                : "search_results";
+        String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
+        String outputFilename = baseName + "_results_" + timestamp + ".xlsx";
+        File outputFile = new File(dir, outputFilename);
+
+        log.info("Exporting {} results to Excel: {}", response.getResultCount(), outputFile.getAbsolutePath());
+
+        try (Workbook workbook = new XSSFWorkbook()) {
+            Sheet sheet = workbook.createSheet("Ähnliche Ausschreibungen");
+
+            // Create styles
+            CellStyle headerStyle = createHeaderStyle(workbook);
+            CellStyle linkStyle = createLinkStyle(workbook);
+            CellStyle percentStyle = createPercentStyle(workbook);
+            CellStyle dateStyle = createDateStyle(workbook);
+            CellStyle currencyStyle = createCurrencyStyle(workbook);
+
+            // Create header row
+            Row headerRow = sheet.createRow(0);
+            for (int i = 0; i < HEADERS.length; i++) {
+                Cell cell = headerRow.createCell(i);
+                cell.setCellValue(HEADERS[i]);
+                cell.setCellStyle(headerStyle);
+            }
+
+            // Add data rows (source info is in filename, no need to add it here)
+            List<SimilarDocument> results = response.getResults();
+            int rowNum = 1;  // Start directly after header
+            int rank = 1;
+
+            for (SimilarDocument doc : results) {
+                Row row = sheet.createRow(rowNum++);
+                int colNum = 0;
+
+                // Rang
+                row.createCell(colNum++).setCellValue(rank++);
+
+                // Ähnlichkeit %
+                Cell simCell = row.createCell(colNum++);
+                if (doc.getSimilarityPercent() != null) {
+                    simCell.setCellValue(doc.getSimilarityPercent());
+                    simCell.setCellStyle(percentStyle);
+                }
+
+                // Publication ID
+                row.createCell(colNum++).setCellValue(doc.getPublicationId() != null ? doc.getPublicationId() : "");
+
+                // Projekt Titel
+                row.createCell(colNum++).setCellValue(doc.getProjectTitle() != null ? doc.getProjectTitle() : "");
+
+                // Auftraggeber
+                row.createCell(colNum++).setCellValue(doc.getBuyerName() != null ? doc.getBuyerName() : "");
+
+                // Land
+                row.createCell(colNum++).setCellValue(doc.getBuyerCountryCode() != null ? doc.getBuyerCountryCode() : "");
+
+                // Stadt
+                row.createCell(colNum++).setCellValue(doc.getBuyerCity() != null ? doc.getBuyerCity() : "");
+
+                // Vertragsart
+                row.createCell(colNum++).setCellValue(doc.getContractNature() != null ? doc.getContractNature() : "");
+
+                // Verfahrensart
+                row.createCell(colNum++).setCellValue(doc.getProcedureType() != null ? doc.getProcedureType() : "");
+
+                // Publikationsdatum
+                Cell pubDateCell = row.createCell(colNum++);
+                if (doc.getPublicationDate() != null) {
+                    pubDateCell.setCellValue(doc.getPublicationDate().toString());
+                    pubDateCell.setCellStyle(dateStyle);
+                }
+
+                // Einreichfrist
+                Cell deadlineCell = row.createCell(colNum++);
+                if (doc.getSubmissionDeadline() != null) {
+                    deadlineCell.setCellValue(doc.getSubmissionDeadline().toLocalDate().toString());
+                    deadlineCell.setCellStyle(dateStyle);
+                }
+
+                // Geschätzter Wert
+                Cell valueCell = row.createCell(colNum++);
+                if (doc.getEstimatedValue() != null) {
+                    valueCell.setCellValue(doc.getEstimatedValue().doubleValue());
+                    valueCell.setCellStyle(currencyStyle);
+                }
+
+                // Währung
+                row.createCell(colNum++).setCellValue(
+                        doc.getEstimatedValueCurrency() != null ? doc.getEstimatedValueCurrency() : "");
+
+                // CPV Codes
+                String cpvCodes = doc.getCpvCodes() != null && !doc.getCpvCodes().isEmpty()
+                        ? String.join(", ", doc.getCpvCodes())
+                        : "";
+                row.createCell(colNum++).setCellValue(cpvCodes);
+
+                // TED Link (Hyperlink)
+                Cell linkCell = row.createCell(colNum);
+                if (doc.getNoticeUrl() != null && !doc.getNoticeUrl().isEmpty()) {
+                    linkCell.setCellValue("Zur Ausschreibung");
+                    linkCell.setCellStyle(linkStyle);
+
+                    CreationHelper createHelper = workbook.getCreationHelper();
+                    Hyperlink hyperlink = createHelper.createHyperlink(HyperlinkType.URL);
+                    hyperlink.setAddress(doc.getNoticeUrl());
+                    linkCell.setHyperlink(hyperlink);
+                }
+            }
+
+            // Auto-size columns
+            for (int i = 0; i < HEADERS.length; i++) {
+                sheet.autoSizeColumn(i);
+                // Set minimum width for some columns
+                if (i == 3) { // Projekt Titel
+                    sheet.setColumnWidth(i, Math.max(sheet.getColumnWidth(i), 50 * 256));
+                }
+                if (i == 4) { // Auftraggeber
+                    sheet.setColumnWidth(i, Math.max(sheet.getColumnWidth(i), 30 * 256));
+                }
+            }
+
+            // Add filter
+            sheet.setAutoFilter(new org.apache.poi.ss.util.CellRangeAddress(0, 0, 0, HEADERS.length - 1));
+
+            // Freeze header row
+            sheet.createFreezePane(0, 1);
+
+            // Write to file
+            try (FileOutputStream fos = new FileOutputStream(outputFile)) {
+                workbook.write(fos);
+            }
+
+            log.info("Excel export completed: {} ({} results)", outputFile.getAbsolutePath(), results.size());
+            return outputFile.getAbsolutePath();
+        }
+    }
+
+    private CellStyle createHeaderStyle(Workbook workbook) {
+        CellStyle style = workbook.createCellStyle();
+        Font font = workbook.createFont();
+        font.setBold(true);
+        font.setColor(IndexedColors.WHITE.getIndex());
+        style.setFont(font);
+        style.setFillForegroundColor(IndexedColors.DARK_BLUE.getIndex());
+        style.setFillPattern(FillPatternType.SOLID_FOREGROUND);
+        style.setBorderBottom(BorderStyle.THIN);
+        style.setAlignment(HorizontalAlignment.CENTER);
+        return style;
+    }
+
+    private CellStyle createLinkStyle(Workbook workbook) {
+        CellStyle style = workbook.createCellStyle();
+        Font font = workbook.createFont();
+        font.setUnderline(Font.U_SINGLE);
+        font.setColor(IndexedColors.BLUE.getIndex());
+        style.setFont(font);
+        return style;
+    }
+
+    private CellStyle createPercentStyle(Workbook workbook) {
+        CellStyle style = workbook.createCellStyle();
+        style.setAlignment(HorizontalAlignment.CENTER);
+        return style;
+    }
+
+    private CellStyle createDateStyle(Workbook workbook) {
+        CellStyle style = workbook.createCellStyle();
+        style.setAlignment(HorizontalAlignment.CENTER);
+        return style;
+    }
+
+    private CellStyle createCurrencyStyle(Workbook workbook) {
+        CellStyle style = workbook.createCellStyle();
+        DataFormat format = workbook.createDataFormat();
+        style.setDataFormat(format.getFormat("#,##0.00"));
+        style.setAlignment(HorizontalAlignment.RIGHT);
+        return style;
+    }
+}
--- a/src/main/java/at/procon/ted/service/ProcessingLogService.java
+++ b/src/main/java/at/procon/ted/service/ProcessingLogService.java
@ -0,0 +1,40 @@
+package at.procon.ted.service;
+
+import at.procon.ted.model.entity.ProcurementDocument;
+import at.procon.ted.model.entity.ProcessingLog;
+import at.procon.ted.repository.ProcessingLogRepository;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+/**
+ * Service for logging processing events.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+public class ProcessingLogService {
+
+    private final ProcessingLogRepository logRepository;
+
+    @Transactional
+    public void logEvent(ProcurementDocument document, String documentHash, String eventType,
+                        String eventStatus, String message, String errorDetails,
+                        String sourceFilename, Integer durationMs) {
+        ProcessingLog logEntry = ProcessingLog.builder()
+                .document(document)
+                .documentHash(documentHash)
+                .eventType(eventType)
+                .eventStatus(eventStatus)
+                .message(message)
+                .errorDetails(errorDetails)
+                .sourceFilename(sourceFilename)
+                .durationMs(durationMs)
+                .build();
+
+        logRepository.save(logEntry);
+    }
+}
--- a/src/main/java/at/procon/ted/service/SearchService.java
+++ b/src/main/java/at/procon/ted/service/SearchService.java
@ -0,0 +1,456 @@
+package at.procon.ted.service;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.model.dto.DocumentDtos.*;
+import at.procon.ted.model.entity.*;
+import at.procon.ted.repository.ProcurementDocumentRepository;
+import jakarta.persistence.EntityManager;
+import jakarta.persistence.PersistenceContext;
+import jakarta.persistence.criteria.*;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.data.domain.Page;
+import org.springframework.data.domain.PageRequest;
+import org.springframework.data.domain.Pageable;
+import org.springframework.data.domain.Sort;
+import org.springframework.data.jpa.domain.Specification;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * Service for searching procurement documents.
+ * 
+ * Supports:
+ * - Structured search with filters (country, type, CPV codes, dates, etc.)
+ * - Semantic search using vector similarity
+ * - Combined structured + semantic search
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+@Transactional(readOnly = true)
+public class SearchService {
+
+    private final ProcurementDocumentRepository documentRepository;
+    private final VectorizationService vectorizationService;
+    private final TedProcessorProperties properties;
+
+    @PersistenceContext
+    private EntityManager entityManager;
+
+    /**
+     * Search documents with combined structured and semantic filters.
+     */
+    public SearchResponse search(SearchRequest request) {
+        // Normalize pagination parameters
+        int page = request.getPage() != null ? request.getPage() : 0;
+        int size = Math.min(
+                request.getSize() != null ? request.getSize() : properties.getSearch().getDefaultPageSize(),
+                properties.getSearch().getMaxPageSize()
+        );
+
+        // Check if semantic search is requested
+        boolean hasSemanticQuery = request.getSemanticQuery() != null && !request.getSemanticQuery().isBlank();
+
+        if (hasSemanticQuery && vectorizationService.isAvailable()) {
+            return semanticSearch(request, page, size);
+        } else {
+            return structuredSearch(request, page, size);
+        }
+    }
+
+    /**
+     * Perform structured search using JPA Specifications.
+     */
+    private SearchResponse structuredSearch(SearchRequest request, int page, int size) {
+        Specification<ProcurementDocument> spec = buildSpecification(request);
+
+        Sort sort = buildSort(request.getSortBy(), request.getSortDirection());
+        Pageable pageable = PageRequest.of(page, size, sort);
+
+        Page<ProcurementDocument> result = documentRepository.findAll(spec, pageable);
+
+        List<DocumentSummary> summaries = result.getContent().stream()
+                .map(this::toSummary)
+                .collect(Collectors.toList());
+
+        return SearchResponse.builder()
+                .documents(summaries)
+                .page(page)
+                .size(size)
+                .totalElements(result.getTotalElements())
+                .totalPages(result.getTotalPages())
+                .hasNext(result.hasNext())
+                .hasPrevious(result.hasPrevious())
+                .build();
+    }
+
+    /**
+     * Perform semantic search with vector similarity.
+     */
+    private SearchResponse semanticSearch(SearchRequest request, int page, int size) {
+        try {
+            // Generate query embedding
+            float[] queryEmbedding = vectorizationService.generateQueryEmbedding(request.getSemanticQuery());
+            String vectorStr = vectorizationService.floatArrayToVectorString(queryEmbedding);
+
+            double threshold = request.getSimilarityThreshold() != null 
+                    ? request.getSimilarityThreshold() 
+                    : properties.getSearch().getSimilarityThreshold();
+
+            // Execute native query for vector search
+            List<Object[]> results = documentRepository.findBySemanticSearch(
+                    vectorStr,
+                    threshold,
+                    request.getCountryCode(),
+                    request.getNoticeType() != null ? request.getNoticeType().name() : null,
+                    request.getContractNature() != null ? request.getContractNature().name() : null,
+                    request.getCpvPrefix(),
+                    request.getPublicationDateFrom(),
+                    request.getPublicationDateTo(),
+                    size,
+                    page * size
+            );
+
+            // Count total for pagination
+            long totalElements = documentRepository.countBySemanticSearch(
+                    vectorStr,
+                    threshold,
+                    request.getCountryCode(),
+                    request.getNoticeType() != null ? request.getNoticeType().name() : null,
+                    request.getContractNature() != null ? request.getContractNature().name() : null
+            );
+
+            // Map results to summaries
+            List<DocumentSummary> summaries = results.stream()
+                    .map(this::mapSemanticResult)
+                    .collect(Collectors.toList());
+
+            int totalPages = (int) Math.ceil((double) totalElements / size);
+
+            return SearchResponse.builder()
+                    .documents(summaries)
+                    .page(page)
+                    .size(size)
+                    .totalElements(totalElements)
+                    .totalPages(totalPages)
+                    .hasNext(page < totalPages - 1)
+                    .hasPrevious(page > 0)
+                    .build();
+
+        } catch (Exception e) {
+            log.error("Semantic search failed, falling back to structured search: {}", e.getMessage());
+            // Fallback to structured search without semantic component
+            return structuredSearch(request, page, size);
+        }
+    }
+
+    /**
+     * Build JPA Specification from search request.
+     */
+    private Specification<ProcurementDocument> buildSpecification(SearchRequest request) {
+        return (root, query, cb) -> {
+            List<Predicate> predicates = new ArrayList<>();
+
+            // Country filter
+            if (request.getCountryCode() != null && !request.getCountryCode().isBlank()) {
+                predicates.add(cb.equal(root.get("buyerCountryCode"), request.getCountryCode()));
+            }
+
+            // Multiple countries filter
+            if (request.getCountryCodes() != null && !request.getCountryCodes().isEmpty()) {
+                predicates.add(root.get("buyerCountryCode").in(request.getCountryCodes()));
+            }
+
+            // Notice type filter
+            if (request.getNoticeType() != null) {
+                predicates.add(cb.equal(root.get("noticeType"), request.getNoticeType()));
+            }
+
+            // Contract nature filter
+            if (request.getContractNature() != null) {
+                predicates.add(cb.equal(root.get("contractNature"), request.getContractNature()));
+            }
+
+            // Procedure type filter
+            if (request.getProcedureType() != null) {
+                predicates.add(cb.equal(root.get("procedureType"), request.getProcedureType()));
+            }
+
+            // Publication date range
+            if (request.getPublicationDateFrom() != null) {
+                predicates.add(cb.greaterThanOrEqualTo(root.get("publicationDate"), request.getPublicationDateFrom()));
+            }
+            if (request.getPublicationDateTo() != null) {
+                predicates.add(cb.lessThanOrEqualTo(root.get("publicationDate"), request.getPublicationDateTo()));
+            }
+
+            // Submission deadline filter
+            if (request.getSubmissionDeadlineAfter() != null) {
+                predicates.add(cb.greaterThan(root.get("submissionDeadline"), request.getSubmissionDeadlineAfter()));
+            }
+
+            // EU funded filter
+            if (request.getEuFunded() != null) {
+                predicates.add(cb.equal(root.get("euFunded"), request.getEuFunded()));
+            }
+
+            // Text search on buyer name
+            if (request.getBuyerNameContains() != null && !request.getBuyerNameContains().isBlank()) {
+                predicates.add(cb.like(cb.lower(root.get("buyerName")), 
+                        "%" + request.getBuyerNameContains().toLowerCase() + "%"));
+            }
+
+            // Text search on project title
+            if (request.getProjectTitleContains() != null && !request.getProjectTitleContains().isBlank()) {
+                predicates.add(cb.like(cb.lower(root.get("projectTitle")), 
+                        "%" + request.getProjectTitleContains().toLowerCase() + "%"));
+            }
+
+            return cb.and(predicates.toArray(new Predicate[0]));
+        };
+    }
+
+    /**
+     * Build Sort from request parameters.
+     */
+    private Sort buildSort(String sortBy, String sortDirection) {
+        String field = sortBy != null ? sortBy : "publicationDate";
+        Sort.Direction direction = "asc".equalsIgnoreCase(sortDirection) 
+                ? Sort.Direction.ASC 
+                : Sort.Direction.DESC;
+        return Sort.by(direction, field);
+    }
+
+    /**
+     * Convert entity to summary DTO.
+     */
+    private DocumentSummary toSummary(ProcurementDocument doc) {
+        return DocumentSummary.builder()
+                .id(doc.getId())
+                .publicationId(doc.getPublicationId())
+                .noticeId(doc.getNoticeId())
+                .noticeType(doc.getNoticeType())
+                .projectTitle(doc.getProjectTitle())
+                .buyerName(doc.getBuyerName())
+                .buyerCountryCode(doc.getBuyerCountryCode())
+                .buyerCity(doc.getBuyerCity())
+                .contractNature(doc.getContractNature())
+                .procedureType(doc.getProcedureType())
+                .publicationDate(doc.getPublicationDate())
+                .submissionDeadline(doc.getSubmissionDeadline())
+                .cpvCodes(doc.getCpvCodes() != null ? Arrays.asList(doc.getCpvCodes()) : List.of())
+                .totalLots(doc.getTotalLots())
+                .estimatedValue(doc.getEstimatedValue())
+                .estimatedValueCurrency(doc.getEstimatedValueCurrency())
+                .build();
+    }
+
+    /**
+     * Map semantic search result array to summary DTO.
+     */
+    private DocumentSummary mapSemanticResult(Object[] row) {
+        // Results from native query: id, publication_id, project_title, buyer_name, buyer_country_code, publication_date, similarity
+        UUID id = (UUID) row[0];
+        
+        return documentRepository.findById(id)
+                .map(doc -> {
+                    DocumentSummary summary = toSummary(doc);
+                    // Set similarity score from query result
+                    if (row.length > 6 && row[6] != null) {
+                        summary.setSimilarity(((Number) row[6]).doubleValue());
+                    }
+                    return summary;
+                })
+                .orElse(DocumentSummary.builder()
+                        .id(id)
+                        .publicationId(row[1] != null ? row[1].toString() : null)
+                        .projectTitle(row[2] != null ? row[2].toString() : null)
+                        .buyerName(row[3] != null ? row[3].toString() : null)
+                        .buyerCountryCode(row[4] != null ? row[4].toString() : null)
+                        .publicationDate(row[5] != null ? (LocalDate) row[5] : null)
+                        .similarity(row.length > 6 && row[6] != null ? ((Number) row[6]).doubleValue() : null)
+                        .build());
+    }
+
+    /**
+     * Get document by ID with full details.
+     */
+    public Optional<DocumentDetail> getDocumentDetail(UUID id) {
+        return documentRepository.findById(id).map(this::toDetail);
+    }
+
+    /**
+     * Get document by publication ID.
+     */
+    public Optional<DocumentDetail> getDocumentByPublicationId(String publicationId) {
+        return documentRepository.findByPublicationId(publicationId).map(this::toDetail);
+    }
+
+    /**
+     * Convert entity to detail DTO.
+     */
+    private DocumentDetail toDetail(ProcurementDocument doc) {
+        return DocumentDetail.builder()
+                .id(doc.getId())
+                .documentHash(doc.getDocumentHash())
+                .publicationId(doc.getPublicationId())
+                .noticeId(doc.getNoticeId())
+                .ojsId(doc.getOjsId())
+                .contractFolderId(doc.getContractFolderId())
+                .noticeType(doc.getNoticeType())
+                .noticeSubtypeCode(doc.getNoticeSubtypeCode())
+                .sdkVersion(doc.getSdkVersion())
+                .ublVersion(doc.getUblVersion())
+                .languageCode(doc.getLanguageCode())
+                .issueDateTime(doc.getIssueDateTime())
+                .publicationDate(doc.getPublicationDate())
+                .submissionDeadline(doc.getSubmissionDeadline())
+                .buyerName(doc.getBuyerName())
+                .buyerCountryCode(doc.getBuyerCountryCode())
+                .buyerCity(doc.getBuyerCity())
+                .buyerPostalCode(doc.getBuyerPostalCode())
+                .buyerNutsCode(doc.getBuyerNutsCode())
+                .buyerActivityType(doc.getBuyerActivityType())
+                .buyerLegalType(doc.getBuyerLegalType())
+                .projectTitle(doc.getProjectTitle())
+                .projectDescription(doc.getProjectDescription())
+                .internalReference(doc.getInternalReference())
+                .contractNature(doc.getContractNature())
+                .procedureType(doc.getProcedureType())
+                .cpvCodes(doc.getCpvCodes() != null ? Arrays.asList(doc.getCpvCodes()) : List.of())
+                .nutsCodes(doc.getNutsCodes() != null ? Arrays.asList(doc.getNutsCodes()) : List.of())
+                .estimatedValue(doc.getEstimatedValue())
+                .estimatedValueCurrency(doc.getEstimatedValueCurrency())
+                .totalLots(doc.getTotalLots())
+                .maxLotsAwarded(doc.getMaxLotsAwarded())
+                .maxLotsSubmitted(doc.getMaxLotsSubmitted())
+                .lots(doc.getLots().stream().map(this::toLotSummary).collect(Collectors.toList()))
+                .organizations(doc.getOrganizations().stream().map(this::toOrgSummary).collect(Collectors.toList()))
+                .regulatoryDomain(doc.getRegulatoryDomain())
+                .euFunded(doc.getEuFunded())
+                .vectorizationStatus(doc.getVectorizationStatus())
+                .vectorizedAt(doc.getVectorizedAt())
+                .sourceFilename(doc.getSourceFilename())
+                .fileSizeBytes(doc.getFileSizeBytes())
+                .createdAt(doc.getCreatedAt())
+                .updatedAt(doc.getUpdatedAt())
+                .build();
+    }
+
+    private LotSummary toLotSummary(ProcurementLot lot) {
+        return LotSummary.builder()
+                .id(lot.getId())
+                .lotId(lot.getLotId())
+                .internalId(lot.getInternalId())
+                .title(lot.getTitle())
+                .description(lot.getDescription())
+                .cpvCodes(lot.getCpvCodes() != null ? Arrays.asList(lot.getCpvCodes()) : List.of())
+                .nutsCodes(lot.getNutsCodes() != null ? Arrays.asList(lot.getNutsCodes()) : List.of())
+                .estimatedValue(lot.getEstimatedValue())
+                .estimatedValueCurrency(lot.getEstimatedValueCurrency())
+                .durationValue(lot.getDurationValue())
+                .durationUnit(lot.getDurationUnit())
+                .submissionDeadline(lot.getSubmissionDeadline())
+                .euFunded(lot.getEuFunded())
+                .build();
+    }
+
+    private OrganizationSummary toOrgSummary(Organization org) {
+        return OrganizationSummary.builder()
+                .id(org.getId())
+                .orgReference(org.getOrgReference())
+                .role(org.getRole())
+                .name(org.getName())
+                .companyId(org.getCompanyId())
+                .countryCode(org.getCountryCode())
+                .city(org.getCity())
+                .postalCode(org.getPostalCode())
+                .nutsCode(org.getNutsCode())
+                .websiteUri(org.getWebsiteUri())
+                .email(org.getEmail())
+                .phone(org.getPhone())
+                .build();
+    }
+
+    /**
+     * Get statistics about the document collection.
+     */
+    public StatisticsResponse getStatistics() {
+        // Get vectorization stats
+        List<Object[]> vectorStats = documentRepository.countByVectorizationStatus();
+        Map<VectorizationStatus, Long> vectorCounts = vectorStats.stream()
+                .collect(Collectors.toMap(
+                        row -> (VectorizationStatus) row[0],
+                        row -> (Long) row[1]
+                ));
+
+        // Get country stats
+        List<Object[]> countryStats = documentRepository.countByCountry();
+        List<CountryStats> countries = countryStats.stream()
+                .map(row -> CountryStats.builder()
+                        .countryCode((String) row[0])
+                        .documentCount((Long) row[1])
+                        .build())
+                .collect(Collectors.toList());
+
+        // Get notice type stats
+        List<Object[]> noticeTypeStats = documentRepository.countByNoticeType();
+        List<NoticeTypeStats> noticeTypes = noticeTypeStats.stream()
+                .map(row -> NoticeTypeStats.builder()
+                        .noticeType((NoticeType) row[0])
+                        .documentCount((Long) row[1])
+                        .build())
+                .collect(Collectors.toList());
+
+        long total = documentRepository.count();
+
+        return StatisticsResponse.builder()
+                .totalDocuments(total)
+                .vectorizedDocuments(vectorCounts.getOrDefault(VectorizationStatus.COMPLETED, 0L))
+                .pendingVectorization(vectorCounts.getOrDefault(VectorizationStatus.PENDING, 0L))
+                .failedVectorization(vectorCounts.getOrDefault(VectorizationStatus.FAILED, 0L))
+                .uniqueCountries(countries.size())
+                .countryStatistics(countries)
+                .noticeTypeStatistics(noticeTypes)
+                .build();
+    }
+
+    /**
+     * Get documents with upcoming deadlines.
+     */
+    public List<DocumentSummary> getUpcomingDeadlines(int limit) {
+        Page<ProcurementDocument> page = documentRepository.findUpcomingDeadlines(
+                PageRequest.of(0, limit));
+        return page.getContent().stream()
+                .map(this::toSummary)
+                .collect(Collectors.toList());
+    }
+
+    /**
+     * Get distinct countries in the database.
+     */
+    public List<String> getDistinctCountries() {
+        return documentRepository.countByCountry().stream()
+                .map(row -> (String) row[0])
+                .filter(Objects::nonNull)
+                .sorted()
+                .collect(Collectors.toList());
+    }
+
+    /**
+     * Get distinct CPV codes (main classification).
+     */
+    public List<String> getDistinctCpvCodes() {
+        // This would require a native query to unnest the array
+        // For now, return empty list - could be implemented with @Query
+        return List.of();
+    }
+}
--- a/src/main/java/at/procon/ted/service/SimilaritySearchService.java
+++ b/src/main/java/at/procon/ted/service/SimilaritySearchService.java
@ -0,0 +1,253 @@
+package at.procon.ted.service;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.model.entity.ProcurementDocument;
+import at.procon.ted.repository.ProcurementDocumentRepository;
+import at.procon.ted.service.attachment.PdfExtractionService;
+import at.procon.ted.service.attachment.AttachmentExtractor.ExtractionResult;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.math.BigDecimal;
+import java.time.LocalDate;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Service for similarity search on TED procurement documents.
+ * Uses vector embeddings and cosine similarity for semantic matching.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+@Transactional(readOnly = true)
+public class SimilaritySearchService {
+
+    private final VectorizationService vectorizationService;
+    private final ProcurementDocumentRepository documentRepository;
+    private final PdfExtractionService pdfExtractionService;
+    private final TedProcessorProperties properties;
+
+    private static final int DEFAULT_TOP_K = 20;
+    private static final double DEFAULT_THRESHOLD = 0.5;
+
+    /**
+     * Search for similar documents using text query.
+     *
+     * @param queryText  the text to search for similar documents
+     * @param topK       number of top results to return (default 20)
+     * @param threshold  minimum similarity threshold (default 0.5)
+     * @return list of similar documents with similarity scores
+     */
+    public SimilaritySearchResponse searchByText(String queryText, Integer topK, Double threshold) {
+        if (queryText == null || queryText.isBlank()) {
+            throw new IllegalArgumentException("Query text cannot be empty");
+        }
+
+        if (!vectorizationService.isAvailable()) {
+            throw new IllegalStateException("Vectorization service is not available");
+        }
+
+        int limit = topK != null && topK > 0 ? Math.min(topK, 100) : DEFAULT_TOP_K;
+        double similarityThreshold = threshold != null ? threshold : DEFAULT_THRESHOLD;
+
+        log.info("Similarity search: query='{}...', topK={}, threshold={}",
+                queryText.substring(0, Math.min(50, queryText.length())), limit, similarityThreshold);
+
+        try {
+            // Generate query embedding
+            long startTime = System.currentTimeMillis();
+            float[] queryEmbedding = vectorizationService.generateQueryEmbedding(queryText);
+            String vectorStr = vectorizationService.floatArrayToVectorString(queryEmbedding);
+            long embeddingTime = System.currentTimeMillis() - startTime;
+
+            log.debug("Query embedding generated in {}ms ({} dimensions)", embeddingTime, queryEmbedding.length);
+
+            // Execute similarity search (simple query without filters)
+            startTime = System.currentTimeMillis();
+            List<Object[]> results = documentRepository.findBySimilarity(
+                    vectorStr,
+                    similarityThreshold,
+                    limit
+            );
+            long searchTime = System.currentTimeMillis() - startTime;
+
+            log.info("Similarity search completed: {} results in {}ms", results.size(), searchTime);
+
+            // Map results to response
+            List<SimilarDocument> documents = new ArrayList<>();
+            for (Object[] row : results) {
+                SimilarDocument doc = mapToSimilarDocument(row);
+                if (doc != null) {
+                    documents.add(doc);
+                }
+            }
+
+            return SimilaritySearchResponse.builder()
+                    .query(truncateText(queryText, 200))
+                    .results(documents)
+                    .resultCount(documents.size())
+                    .threshold(similarityThreshold)
+                    .embeddingTimeMs(embeddingTime)
+                    .searchTimeMs(searchTime)
+                    .build();
+
+        } catch (Exception e) {
+            log.error("Similarity search failed: {}", e.getMessage(), e);
+            throw new RuntimeException("Similarity search failed: " + e.getMessage(), e);
+        }
+    }
+
+    /**
+     * Search for similar documents using PDF content.
+     *
+     * @param pdfData    the PDF file content
+     * @param filename   original filename
+     * @param topK       number of top results to return (default 20)
+     * @param threshold  minimum similarity threshold (default 0.5)
+     * @return list of similar documents with similarity scores
+     */
+    public SimilaritySearchResponse searchByPdf(byte[] pdfData, String filename, Integer topK, Double threshold) {
+        if (pdfData == null || pdfData.length == 0) {
+            throw new IllegalArgumentException("PDF data cannot be empty");
+        }
+
+        log.info("Extracting text from PDF: {} ({} bytes)", filename, pdfData.length);
+
+        // Extract text from PDF
+        long startTime = System.currentTimeMillis();
+        ExtractionResult extractionResult = pdfExtractionService.extract(pdfData, filename, "application/pdf");
+        long extractionTime = System.currentTimeMillis() - startTime;
+
+        if (!extractionResult.success()) {
+            throw new RuntimeException("PDF text extraction failed: " + extractionResult.errorMessage());
+        }
+
+        String extractedText = extractionResult.extractedText();
+        if (extractedText == null || extractedText.isBlank()) {
+            throw new RuntimeException("No text content could be extracted from PDF");
+        }
+
+        log.info("PDF text extracted in {}ms: {} characters", extractionTime, extractedText.length());
+
+        // Search using extracted text
+        SimilaritySearchResponse response = searchByText(extractedText, topK, threshold);
+
+        // Add PDF extraction info to response
+        return SimilaritySearchResponse.builder()
+                .query("PDF: " + filename + " (" + extractedText.length() + " chars extracted)")
+                .results(response.getResults())
+                .resultCount(response.getResultCount())
+                .threshold(response.getThreshold())
+                .embeddingTimeMs(response.getEmbeddingTimeMs())
+                .searchTimeMs(response.getSearchTimeMs())
+                .pdfExtractionTimeMs(extractionTime)
+                .extractedTextLength(extractedText.length())
+                .build();
+    }
+
+    /**
+     * Map database result row to SimilarDocument DTO.
+     * Result format: [id (UUID), similarity (Double)]
+     */
+    private SimilarDocument mapToSimilarDocument(Object[] row) {
+        if (row == null || row.length < 2) {
+            return null;
+        }
+
+        try {
+            UUID id = (UUID) row[0];
+            Double similarity = row[1] != null ? ((Number) row[1]).doubleValue() : null;
+
+            // Fetch full document for detailed mapping
+            return documentRepository.findById(id)
+                    .map(doc -> SimilarDocument.builder()
+                            .id(doc.getId())
+                            .publicationId(doc.getPublicationId())
+                            .noticeId(doc.getNoticeId())
+                            .noticeUrl(doc.getNoticeUrl())
+                            .noticeType(doc.getNoticeType() != null ? doc.getNoticeType().name() : null)
+                            .projectTitle(doc.getProjectTitle())
+                            .projectDescription(truncateText(doc.getProjectDescription(), 500))
+                            .buyerName(doc.getBuyerName())
+                            .buyerCountryCode(doc.getBuyerCountryCode())
+                            .buyerCity(doc.getBuyerCity())
+                            .contractNature(doc.getContractNature() != null ? doc.getContractNature().name() : null)
+                            .procedureType(doc.getProcedureType() != null ? doc.getProcedureType().name() : null)
+                            .publicationDate(doc.getPublicationDate())
+                            .submissionDeadline(doc.getSubmissionDeadline())
+                            .cpvCodes(doc.getCpvCodes() != null ? List.of(doc.getCpvCodes()) : List.of())
+                            .estimatedValue(doc.getEstimatedValue())
+                            .estimatedValueCurrency(doc.getEstimatedValueCurrency())
+                            .similarity(similarity)
+                            .similarityPercent(similarity != null ? Math.round(similarity * 100) : null)
+                            .build())
+                    .orElse(null);
+
+        } catch (Exception e) {
+            log.warn("Failed to map search result: {}", e.getMessage());
+            return null;
+        }
+    }
+
+    /**
+     * Truncate text to specified length.
+     */
+    private String truncateText(String text, int maxLength) {
+        if (text == null) return null;
+        if (text.length() <= maxLength) return text;
+        return text.substring(0, maxLength - 3) + "...";
+    }
+
+    /**
+     * Response DTO for similarity search.
+     */
+    @lombok.Data
+    @lombok.Builder
+    @lombok.NoArgsConstructor
+    @lombok.AllArgsConstructor
+    public static class SimilaritySearchResponse {
+        private String query;
+        private List<SimilarDocument> results;
+        private int resultCount;
+        private double threshold;
+        private long embeddingTimeMs;
+        private long searchTimeMs;
+        private Long pdfExtractionTimeMs;
+        private Integer extractedTextLength;
+    }
+
+    /**
+     * DTO for a similar document result.
+     */
+    @lombok.Data
+    @lombok.Builder
+    @lombok.NoArgsConstructor
+    @lombok.AllArgsConstructor
+    public static class SimilarDocument {
+        private UUID id;
+        private String publicationId;
+        private String noticeId;
+        private String noticeUrl;
+        private String noticeType;
+        private String projectTitle;
+        private String projectDescription;
+        private String buyerName;
+        private String buyerCountryCode;
+        private String buyerCity;
+        private String contractNature;
+        private String procedureType;
+        private LocalDate publicationDate;
+        private java.time.OffsetDateTime submissionDeadline;
+        private List<String> cpvCodes;
+        private BigDecimal estimatedValue;
+        private String estimatedValueCurrency;
+        private Double similarity;
+        private Long similarityPercent;
+    }
+}
--- a/src/main/java/at/procon/ted/service/TedPackageDownloadService.java
+++ b/src/main/java/at/procon/ted/service/TedPackageDownloadService.java
@ -0,0 +1,558 @@
+package at.procon.ted.service;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.model.entity.TedDailyPackage;
+import at.procon.ted.repository.TedDailyPackageRepository;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.io.*;
+import java.net.HttpURLConnection;
+import java.net.URI;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.security.MessageDigest;
+import java.time.Duration;
+import java.time.LocalDate;
+import java.time.OffsetDateTime;
+import java.time.Year;
+import java.time.ZoneOffset;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Stream;
+
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
+
+/**
+ * Service for downloading and processing TED Daily Packages.
+ *
+ * Features:
+ * - Automatic download from https://ted.europa.eu/packages/daily/
+ * - Hash-based idempotency check
+ * - tar.gz extraction
+ * - Integration with XML processing
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+public class TedPackageDownloadService {
+
+    private final TedProcessorProperties properties;
+    private final TedDailyPackageRepository packageRepository;
+
+    /**
+     * Generates package identifier in YYYYSSSSS format.
+     */
+    public String generatePackageIdentifier(int year, int serialNumber) {
+        return String.format("%04d%05d", year, serialNumber);
+    }
+
+    /**
+     * Determines the next package to download.
+     *
+     * Strategy (starts with current year FIRST, then goes backward):
+     * 1. CURRENT YEAR (2026): Forward from max(nr) until 404 (get today's packages first!)
+     * 2. All years (2026 -> 2025 -> 2024...): Fill gaps (if min(nr) > 1, then backward to 1)
+     * 3. If current year complete (min=1 and 404 after max) -> previous year
+     * 4. Repeat backward until startYear
+     *
+     * This ensures we always get the newest data first!
+     */
+    public PackageInfo getNextPackageToDownload() {
+        int currentYear = Year.now().getValue();
+
+        log.debug("Determining next package to download (current year: {})", currentYear);
+
+        // 1. PRIORITY: Current year forward crawling (max+1) - GET TODAY'S PACKAGES FIRST!
+        PackageInfo nextInCurrentYear = getNextForwardPackage(currentYear);
+        if (nextInCurrentYear != null) {
+            log.info("Next package: {} (CURRENT YEAR {} forward - newest data first!)",
+                    nextInCurrentYear.getIdentifier(), currentYear);
+            return nextInCurrentYear;
+        }
+
+        log.debug("Current year {} complete or has 404, checking older years backward", currentYear);
+
+        // 2. Go through all years BACKWARD (current year -> startYear) for gaps and completion
+        for (int year = currentYear; year >= properties.getDownload().getStartYear(); year--) {
+            // 2a. Check if there are gaps (min > 1)
+            PackageInfo gapFiller = getGapFillerPackage(year);
+            if (gapFiller != null) {
+                log.info("Next package: {} (filling gap in year {})", gapFiller.getIdentifier(), year);
+                return gapFiller;
+            }
+
+            // 2b. If no gap filler, check if year is complete
+            if (!isYearComplete(year)) {
+                // Year not complete, try forward crawling
+                PackageInfo forwardPackage = getNextForwardPackage(year);
+                if (forwardPackage != null) {
+                    log.info("Next package: {} (forward in year {})", forwardPackage.getIdentifier(), year);
+                    return forwardPackage;
+                }
+            } else {
+                log.debug("Year {} is complete", year);
+            }
+        }
+
+        // 3. Check if we can open a new previous year
+        int oldestYear = getOldestYearWithData();
+        if (oldestYear > properties.getDownload().getStartYear()) {
+            int previousYear = oldestYear - 1;
+            if (previousYear >= properties.getDownload().getStartYear()) {
+                // Open new year, start with 1
+                log.info("Next package: {} (opening new year {})", String.format("%04d%05d", previousYear, 1), previousYear);
+                return new PackageInfo(previousYear, 1);
+            }
+        }
+
+        log.info("All years from {} to {} are complete - no more packages",
+                properties.getDownload().getStartYear(), currentYear);
+        return null; // No more packages
+    }
+
+    /**
+     * Finds the next package for forward crawling (max+1).
+     *
+     * Stronger NOT_FOUND handling:
+     * - Current year: a tail 404 is treated as "not available yet" and retried indefinitely
+     * - Older years: a tail 404 remains retryable until the configured grace period after year end expires
+     * - Final year completion is only assumed after that grace period
+     */
+    private PackageInfo getNextForwardPackage(int year) {
+        Optional<TedDailyPackage> latest = packageRepository.findLatestByYear(year);
+
+        if (latest.isEmpty()) {
+            // No package for this year -> start with 1
+            log.debug("Year {} has no packages, starting with 1", year);
+            return new PackageInfo(year, 1);
+        }
+
+        TedDailyPackage latestPackage = latest.get();
+
+        if (latestPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.NOT_FOUND) {
+            if (shouldRetryNotFoundPackage(latestPackage)) {
+                log.info("Retrying tail NOT_FOUND package {} for year {}", latestPackage.getPackageIdentifier(), year);
+                return new PackageInfo(year, latestPackage.getSerialNumber());
+            }
+
+            if (isNotFoundRetryableForYear(latestPackage)) {
+                log.debug("Year {} waiting until {} before retrying tail package {}",
+                    year, calculateNextRetryAt(latestPackage), latestPackage.getPackageIdentifier());
+                return null;
+            }
+
+            log.debug("Year {} finalized after grace period at tail package {}",
+                year, latestPackage.getPackageIdentifier());
+            return null;
+        }
+
+        // Next package (max+1)
+        log.debug("Year {} continues from package {} to {}", year, latestPackage.getSerialNumber(), latestPackage.getSerialNumber() + 1);
+        return new PackageInfo(year, latestPackage.getSerialNumber() + 1);
+    }
+
+    /**
+     * Finds package for gap filling (min-1).
+     * Returns null if no gap exists (min = 1).
+     */
+    private PackageInfo getGapFillerPackage(int year) {
+        Optional<TedDailyPackage> first = packageRepository.findFirstByYear(year);
+
+        if (first.isEmpty()) {
+            // No package for this year
+            return null;
+        }
+
+        int minSerial = first.get().getSerialNumber();
+        if (minSerial <= 1) {
+            // No gap, already starts at 1
+            return null;
+        }
+
+        // Gap found: Get (min-1)
+        return new PackageInfo(year, minSerial - 1);
+    }
+
+    /**
+     * Checks if a year is complete.
+     * A year is complete only when:
+     * - package numbering starts at 1, and
+     * - the current tail package is NOT_FOUND, and
+     * - that NOT_FOUND is no longer retryable (grace period expired)
+     */
+    private boolean isYearComplete(int year) {
+        Optional<TedDailyPackage> first = packageRepository.findFirstByYear(year);
+        Optional<TedDailyPackage> latest = packageRepository.findLatestByYear(year);
+
+        if (first.isEmpty() || latest.isEmpty()) {
+            return false;
+        }
+
+        if (first.get().getSerialNumber() != 1) {
+            return false;
+        }
+
+        TedDailyPackage latestPackage = latest.get();
+        return latestPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.NOT_FOUND
+            && !isNotFoundRetryableForYear(latestPackage);
+    }
+
+
+    private boolean shouldRetryNotFoundPackage(TedDailyPackage pkg) {
+        if (!isNotFoundRetryableForYear(pkg)) {
+            return false;
+        }
+
+        OffsetDateTime nextRetryAt = calculateNextRetryAt(pkg);
+        return !nextRetryAt.isAfter(OffsetDateTime.now());
+    }
+
+    private boolean isNotFoundRetryableForYear(TedDailyPackage pkg) {
+        int currentYear = Year.now().getValue();
+        int packageYear = pkg.getYear() != null ? pkg.getYear() : currentYear;
+
+        if (packageYear >= currentYear) {
+            return properties.getDownload().isRetryCurrentYearNotFoundIndefinitely();
+        }
+
+        return OffsetDateTime.now().isBefore(getYearRetryGraceDeadline(packageYear));
+    }
+
+    private OffsetDateTime calculateNextRetryAt(TedDailyPackage pkg) {
+        OffsetDateTime lastAttemptAt = pkg.getUpdatedAt() != null
+            ? pkg.getUpdatedAt()
+            : (pkg.getCreatedAt() != null ? pkg.getCreatedAt() : OffsetDateTime.now());
+
+        return lastAttemptAt.plus(Duration.ofMillis(properties.getDownload().getNotFoundRetryInterval()));
+    }
+
+    private OffsetDateTime getYearRetryGraceDeadline(int year) {
+        return LocalDate.of(year + 1, 1, 1)
+            .atStartOfDay()
+            .atOffset(ZoneOffset.UTC)
+            .plusDays(properties.getDownload().getPreviousYearGracePeriodDays());
+    }
+
+    /**
+     * Finds the oldest year for which we have data.
+     */
+    private int getOldestYearWithData() {
+        // Start from startYear and go forward to find the first year with data
+        int currentYear = Year.now().getValue();
+        for (int year = properties.getDownload().getStartYear(); year <= currentYear; year++) {
+            if (packageRepository.findLatestByYear(year).isPresent()) {
+                // Found the oldest year with data
+                return year;
+            }
+        }
+        return currentYear;
+    }
+
+    /**
+     * Lädt ein Package herunter und verarbeitet es.
+     */
+    @Transactional
+    public DownloadResult downloadPackage(int year, int serialNumber) {
+        String packageId = generatePackageIdentifier(year, serialNumber);
+
+        log.debug("Starting download of package: {}", packageId);
+
+        // Prüfe ob Package bereits existiert
+        Optional<TedDailyPackage> existing = packageRepository.findByPackageIdentifier(packageId);
+
+        String downloadUrl = buildDownloadUrl(packageId);
+        TedDailyPackage packageEntity;
+
+        if (existing.isPresent()) {
+            TedDailyPackage existingPackage = existing.get();
+
+            if (existingPackage.getDownloadStatus() == TedDailyPackage.DownloadStatus.NOT_FOUND
+                && isNotFoundRetryableForYear(existingPackage)) {
+                log.info("Retrying previously NOT_FOUND package: {}", packageId);
+                existingPackage.setDownloadUrl(downloadUrl);
+                existingPackage.setErrorMessage(null);
+                existingPackage.setDownloadStatus(TedDailyPackage.DownloadStatus.PENDING);
+                packageEntity = packageRepository.save(existingPackage);
+            } else {
+                log.debug("Package {} already exists with status: {}", packageId, existingPackage.getDownloadStatus());
+                return DownloadResult.alreadyExists(existingPackage);
+            }
+        } else {
+            // Erstelle Package-Eintrag
+            packageEntity = TedDailyPackage.builder()
+                .packageIdentifier(packageId)
+                .year(year)
+                .serialNumber(serialNumber)
+                .downloadUrl(downloadUrl)
+                .downloadStatus(TedDailyPackage.DownloadStatus.PENDING)
+                .build();
+
+            packageEntity = packageRepository.save(packageEntity);
+        }
+
+        long startTime = System.currentTimeMillis();
+
+        try {
+            // Update Status: DOWNLOADING
+            updatePackageStatus(packageEntity.getId(), TedDailyPackage.DownloadStatus.DOWNLOADING, null);
+
+            // Download tar.gz file
+            Path downloadPath = downloadFile(downloadUrl, packageId);
+
+            if (downloadPath == null) {
+                // 404 - Package existiert nicht
+                updatePackageStatus(packageEntity.getId(), TedDailyPackage.DownloadStatus.NOT_FOUND,
+                    "Package not found (404)");
+                return DownloadResult.notFound(packageEntity);
+            }
+
+            // Berechne Hash
+            String fileHash = calculateSHA256(downloadPath);
+
+            // Prüfe auf Duplikat via Hash
+            Optional<TedDailyPackage> duplicateByHash = packageRepository.findAll().stream()
+                .filter(p -> fileHash.equals(p.getFileHash()))
+                .findFirst();
+
+            if (duplicateByHash.isPresent()) {
+                log.debug("Duplicate package detected via hash: {} = {}", packageId, duplicateByHash.get().getPackageIdentifier());
+                cleanupDownload(downloadPath);
+                updatePackageStatus(packageEntity.getId(), TedDailyPackage.DownloadStatus.COMPLETED,
+                    "Duplicate of " + duplicateByHash.get().getPackageIdentifier());
+                return DownloadResult.duplicate(packageEntity);
+            }
+
+            long downloadDuration = System.currentTimeMillis() - startTime;
+
+            // Update: DOWNLOADED
+            packageEntity = packageRepository.findById(packageEntity.getId()).orElseThrow();
+            packageEntity.setFileHash(fileHash);
+            packageEntity.setDownloadStatus(TedDailyPackage.DownloadStatus.DOWNLOADED);
+            packageEntity.setDownloadedAt(OffsetDateTime.now());
+            packageEntity.setDownloadDurationMs(downloadDuration);
+            packageEntity = packageRepository.save(packageEntity);
+
+            // Extrahiere XML-Dateien
+            List<Path> xmlFiles = extractTarGz(downloadPath, packageId);
+
+            packageEntity.setXmlFileCount(xmlFiles.size());
+            packageEntity = packageRepository.save(packageEntity);
+
+            // Cleanup tar.gz if configured
+            if (properties.getDownload().isDeleteAfterExtraction()) {
+                cleanupDownload(downloadPath);
+            }
+
+            log.debug("Successfully downloaded package {}: {} XML files", packageId, xmlFiles.size());
+
+            return DownloadResult.success(packageEntity, xmlFiles);
+
+        } catch (Exception e) {
+            log.error("Failed to download package {}: {}", packageId, e.getMessage(), e);
+            updatePackageStatus(packageEntity.getId(), TedDailyPackage.DownloadStatus.FAILED,
+                e.getMessage());
+            return DownloadResult.failed(packageEntity, e);
+        }
+    }
+
+    /**
+     * Baut die Download-URL.
+     */
+    private String buildDownloadUrl(String packageId) {
+        return properties.getDownload().getBaseUrl() + packageId;
+    }
+
+    /**
+     * Lädt eine Datei herunter.
+     * Gibt null zurück bei 404.
+     */
+    private Path downloadFile(String urlString, String packageId) throws IOException {
+        URL url = URI.create(urlString).toURL();
+        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
+        connection.setRequestMethod("GET");
+        connection.setConnectTimeout((int) properties.getDownload().getDownloadTimeout());
+        connection.setReadTimeout((int) properties.getDownload().getDownloadTimeout());
+        connection.setInstanceFollowRedirects(true);
+
+        int responseCode = connection.getResponseCode();
+
+        if (responseCode == 404) {
+            log.info("Package not found (404): {}", urlString);
+            return null;
+        }
+
+        if (responseCode != 200) {
+            throw new IOException("HTTP " + responseCode + " for URL: " + urlString);
+        }
+
+        // Erstelle Download-Verzeichnis
+        Path downloadDir = Paths.get(properties.getDownload().getDownloadDirectory());
+        Files.createDirectories(downloadDir);
+
+        // Download file
+        Path targetPath = downloadDir.resolve(packageId + ".tar.gz");
+
+        try (InputStream in = connection.getInputStream()) {
+            Files.copy(in, targetPath, StandardCopyOption.REPLACE_EXISTING);
+        }
+
+        log.debug("Downloaded {} bytes to {}", Files.size(targetPath), targetPath);
+
+        return targetPath;
+    }
+
+    /**
+     * Berechnet SHA-256 Hash einer Datei.
+     */
+    private String calculateSHA256(Path file) throws Exception {
+        MessageDigest digest = MessageDigest.getInstance("SHA-256");
+
+        try (InputStream is = Files.newInputStream(file)) {
+            byte[] buffer = new byte[8192];
+            int read;
+            while ((read = is.read(buffer)) > 0) {
+                digest.update(buffer, 0, read);
+            }
+        }
+
+        byte[] hashBytes = digest.digest();
+        StringBuilder sb = new StringBuilder();
+        for (byte b : hashBytes) {
+            sb.append(String.format("%02x", b));
+        }
+
+        return sb.toString();
+    }
+
+    /**
+     * Extrahiert tar.gz und gibt Liste der XML-Dateien zurück.
+     */
+    private List<Path> extractTarGz(Path tarGzFile, String packageId) throws IOException {
+        List<Path> xmlFiles = new ArrayList<>();
+
+        Path extractDir = Paths.get(properties.getDownload().getExtractDirectory())
+            .resolve(packageId);
+        Files.createDirectories(extractDir);
+
+        try (FileInputStream fis = new FileInputStream(tarGzFile.toFile());
+             GzipCompressorInputStream gzis = new GzipCompressorInputStream(fis);
+             TarArchiveInputStream tais = new TarArchiveInputStream(gzis)) {
+
+            TarArchiveEntry entry;
+            while ((entry = tais.getNextTarEntry()) != null) {
+                if (entry.isDirectory()) {
+                    continue;
+                }
+
+                String name = entry.getName();
+                if (!name.toLowerCase().endsWith(".xml")) {
+                    continue;
+                }
+
+                // Extrahiere XML-Datei
+                Path outputPath = extractDir.resolve(new File(name).getName());
+
+                try (OutputStream os = Files.newOutputStream(outputPath)) {
+                    byte[] buffer = new byte[8192];
+                    int read;
+                    while ((read = tais.read(buffer)) > 0) {
+                        os.write(buffer, 0, read);
+                    }
+                }
+
+                xmlFiles.add(outputPath);
+            }
+        }
+
+        log.debug("Extracted {} XML files from {}", xmlFiles.size(), tarGzFile.getFileName());
+
+        return xmlFiles;
+    }
+
+    /**
+     * Aktualisiert den Package-Status.
+     */
+    private void updatePackageStatus(java.util.UUID packageId, TedDailyPackage.DownloadStatus status, String errorMessage) {
+        packageRepository.findById(packageId).ifPresent(pkg -> {
+            pkg.setDownloadStatus(status);
+            if (errorMessage != null) {
+                pkg.setErrorMessage(errorMessage);
+            }
+            packageRepository.save(pkg);
+        });
+    }
+
+    /**
+     * Löscht heruntergeladene Datei.
+     */
+    private void cleanupDownload(Path file) {
+        try {
+            Files.deleteIfExists(file);
+            log.debug("Cleaned up download: {}", file);
+        } catch (IOException e) {
+            log.warn("Failed to delete file {}: {}", file, e.getMessage());
+        }
+    }
+
+    /**
+     * Package-Info Record.
+     */
+    public record PackageInfo(int year, int serialNumber) {
+        public String getIdentifier() {
+            return String.format("%04d%05d", year, serialNumber);
+        }
+    }
+
+    /**
+     * Download-Ergebnis.
+     */
+    public record DownloadResult(
+        TedDailyPackage packageEntity,
+        Status status,
+        List<Path> xmlFiles,
+        Exception error
+    ) {
+        public enum Status {
+            SUCCESS,
+            ALREADY_EXISTS,
+            NOT_FOUND,
+            DUPLICATE,
+            FAILED
+        }
+
+        public static DownloadResult success(TedDailyPackage pkg, List<Path> files) {
+            return new DownloadResult(pkg, Status.SUCCESS, files, null);
+        }
+
+        public static DownloadResult alreadyExists(TedDailyPackage pkg) {
+            return new DownloadResult(pkg, Status.ALREADY_EXISTS, List.of(), null);
+        }
+
+        public static DownloadResult notFound(TedDailyPackage pkg) {
+            return new DownloadResult(pkg, Status.NOT_FOUND, List.of(), null);
+        }
+
+        public static DownloadResult duplicate(TedDailyPackage pkg) {
+            return new DownloadResult(pkg, Status.DUPLICATE, List.of(), null);
+        }
+
+        public static DownloadResult failed(TedDailyPackage pkg, Exception e) {
+            return new DownloadResult(pkg, Status.FAILED, List.of(), e);
+        }
+
+        public boolean isSuccess() {
+            return status == Status.SUCCESS;
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/service/VectorizationProcessorService.java
+++ b/src/main/java/at/procon/ted/service/VectorizationProcessorService.java
@ -0,0 +1,123 @@
+package at.procon.ted.service;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.model.entity.ProcurementDocument;
+import at.procon.ted.model.entity.VectorizationStatus;
+import at.procon.ted.repository.ProcurementDocumentRepository;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Propagation;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.time.OffsetDateTime;
+import java.util.UUID;
+
+/**
+ * Service for vectorization processing with transactional support.
+ * Called by VectorizationRoute to ensure proper transaction management.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+public class VectorizationProcessorService {
+
+    private final ProcurementDocumentRepository documentRepository;
+    private final TedProcessorProperties properties;
+
+    /**
+     * Load document text content for vectorization (memory efficient - does NOT load XML).
+     * Updates status to PROCESSING.
+     *
+     * @return DocumentContent with text and documentId, or null if should skip
+     */
+    @Transactional(propagation = Propagation.REQUIRES_NEW)
+    public DocumentContent prepareDocumentForVectorization(UUID documentId) {
+        // Update status to PROCESSING first
+        documentRepository.updateVectorizationStatus(
+                documentId,
+                VectorizationStatus.PROCESSING,
+                null,
+                null
+        );
+
+        // Load ONLY text content (not the whole document with XML) - memory efficient
+        String textContent = documentRepository.findTextContentById(documentId);
+
+        if (textContent == null || textContent.isBlank()) {
+            documentRepository.updateVectorizationStatus(
+                    documentId,
+                    VectorizationStatus.SKIPPED,
+                    "No text content available",
+                    OffsetDateTime.now()
+            );
+            return null; // Skip vectorization
+        }
+
+        // Truncate if necessary
+        int maxLength = properties.getVectorization().getMaxTextLength();
+        if (textContent.length() > maxLength) {
+            textContent = textContent.substring(0, maxLength);
+            log.debug("Truncated text content for document {} from {} to {} chars",
+                    documentId, textContent.length(), maxLength);
+        }
+
+        return new DocumentContent(documentId, textContent);
+    }
+
+    /**
+     * Save embedding vector to database.
+     * Updates status to COMPLETED.
+     */
+    @Transactional(propagation = Propagation.REQUIRES_NEW)
+    public void saveEmbedding(UUID documentId, float[] embedding, Integer tokenCount) {
+        if (embedding == null || embedding.length != properties.getVectorization().getDimensions()) {
+            throw new RuntimeException("Invalid embedding dimension: expected " +
+                    properties.getVectorization().getDimensions() +
+                    ", got " + (embedding != null ? embedding.length : 0));
+        }
+
+        // Convert to PostgreSQL vector format
+        String vectorStr = floatArrayToVectorString(embedding);
+
+        // Update document with vector and token count
+        documentRepository.updateContentVector(documentId, vectorStr, tokenCount);
+
+        log.debug("Successfully vectorized document: {} ({} tokens)", documentId, tokenCount);
+    }
+
+    /**
+     * Mark document as failed with error message.
+     */
+    @Transactional(propagation = Propagation.REQUIRES_NEW)
+    public void markAsFailed(UUID documentId, String errorMessage) {
+        log.debug("Vectorization failed for document {}: {}", documentId, errorMessage);
+
+        documentRepository.updateVectorizationStatus(
+                documentId,
+                VectorizationStatus.FAILED,
+                errorMessage,
+                OffsetDateTime.now()
+        );
+    }
+
+    /**
+     * Convert float array to PostgreSQL vector format: [0.1,0.2,0.3]
+     */
+    private String floatArrayToVectorString(float[] embedding) {
+        StringBuilder sb = new StringBuilder("[");
+        for (int i = 0; i < embedding.length; i++) {
+            if (i > 0) sb.append(",");
+            sb.append(embedding[i]);
+        }
+        sb.append("]");
+        return sb.toString();
+    }
+
+    /**
+     * Document content holder for vectorization.
+     */
+    public record DocumentContent(UUID documentId, String textContent) {}
+}
--- a/src/main/java/at/procon/ted/service/VectorizationService.java
+++ b/src/main/java/at/procon/ted/service/VectorizationService.java
@ -0,0 +1,164 @@
+package at.procon.ted.service;
+
+import at.procon.ted.config.TedProcessorProperties;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.URI;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.stream.Collectors;
+
+/**
+ * Service for vectorization-related utilities.
+ *
+ * This service provides helper methods for:
+ * - Generating query embeddings for semantic search
+ * - Converting float arrays to PostgreSQL vector format
+ * - Checking embedding service availability
+ *
+ * Note: Document vectorization is now handled by VectorizationRoute (Camel-based)
+ * with proper transaction management and concurrent processing.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+public class VectorizationService {
+
+    private final TedProcessorProperties properties;
+
+    /**
+     * Generate query embedding with appropriate prefix for e5 model.
+     * Used for semantic search queries.
+     */
+    public float[] generateQueryEmbedding(String query) {
+        if (!properties.getVectorization().isEnabled()) {
+            throw new IllegalStateException("Vectorization is disabled");
+        }
+
+        try {
+            String embeddingApiUrl = properties.getVectorization().getApiUrl() + "/embed";
+            URL url = URI.create(embeddingApiUrl).toURL();
+            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+            conn.setRequestMethod("POST");
+            conn.setRequestProperty("Content-Type", "application/json");
+            conn.setDoOutput(true);
+            conn.setConnectTimeout(30000);
+            conn.setReadTimeout(60000);
+
+            // Add query prefix for e5 model
+            String prefixedQuery = "query: " + query;
+
+            // Send request
+            String requestBody = "{\"text\": " + escapeJson(prefixedQuery) + ", \"is_query\": true}";
+            try (OutputStream os = conn.getOutputStream()) {
+                os.write(requestBody.getBytes(StandardCharsets.UTF_8));
+            }
+
+            // Read response
+            if (conn.getResponseCode() == 200) {
+                try (BufferedReader reader = new BufferedReader(
+                        new InputStreamReader(conn.getInputStream(), StandardCharsets.UTF_8))) {
+                    String response = reader.lines().collect(Collectors.joining());
+                    return parseEmbeddingJson(response);
+                }
+            } else {
+                throw new RuntimeException("Embedding API returned status: " + conn.getResponseCode());
+            }
+
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to generate query embedding", e);
+        }
+    }
+
+    /**
+     * Parse JSON response from embedding service.
+     * Expected format: {"embedding": [0.1, 0.2, ...], "dimensions": 1024}
+     */
+    private float[] parseEmbeddingJson(String json) {
+        json = json.trim();
+
+        // Check for error response
+        if (json.startsWith("{\"error\"")) {
+            throw new RuntimeException("Embedding error: " + json);
+        }
+
+        // Extract embedding array from response
+        // Format: {"embedding": [...], "dimensions": 1024}
+        int embeddingStart = json.indexOf("\"embedding\":");
+        if (embeddingStart == -1) {
+            throw new RuntimeException("Invalid embedding response: " + json);
+        }
+
+        int arrayStart = json.indexOf("[", embeddingStart);
+        int arrayEnd = json.indexOf("]", arrayStart);
+        if (arrayStart == -1 || arrayEnd == -1) {
+            throw new RuntimeException("Invalid embedding array in response");
+        }
+
+        String arrayContent = json.substring(arrayStart + 1, arrayEnd);
+        String[] parts = arrayContent.split(",");
+        float[] result = new float[parts.length];
+        for (int i = 0; i < parts.length; i++) {
+            result[i] = Float.parseFloat(parts[i].trim());
+        }
+        return result;
+    }
+
+    /**
+     * Escape string for JSON.
+     */
+    private String escapeJson(String text) {
+        return "\"" + text
+                .replace("\\", "\\\\")
+                .replace("\"", "\\\"")
+                .replace("\n", "\\n")
+                .replace("\r", "\\r")
+                .replace("\t", "\\t")
+                + "\"";
+    }
+
+    /**
+     * Convert float array to PostgreSQL vector format: [0.1,0.2,0.3]
+     * Used by VectorizationProcessorService.
+     */
+    public String floatArrayToVectorString(float[] embedding) {
+        StringBuilder sb = new StringBuilder("[");
+        for (int i = 0; i < embedding.length; i++) {
+            if (i > 0) sb.append(",");
+            sb.append(embedding[i]);
+        }
+        sb.append("]");
+        return sb.toString();
+    }
+
+    /**
+     * Check if vectorization service is available.
+     */
+    public boolean isAvailable() {
+        return properties.getVectorization().isEnabled() && isHttpApiAvailable();
+    }
+
+    /**
+     * Check if HTTP embedding API is reachable.
+     */
+    private boolean isHttpApiAvailable() {
+        try {
+            String healthUrl = properties.getVectorization().getApiUrl() + "/health";
+            URL url = URI.create(healthUrl).toURL();
+            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+            conn.setRequestMethod("GET");
+            conn.setConnectTimeout(5000);
+            return conn.getResponseCode() == 200;
+        } catch (Exception e) {
+            return false;
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/service/XmlParserService.java
+++ b/src/main/java/at/procon/ted/service/XmlParserService.java
@ -0,0 +1,597 @@
+package at.procon.ted.service;
+
+import at.procon.ted.model.entity.*;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+import org.w3c.dom.*;
+import org.xml.sax.InputSource;
+
+import javax.xml.namespace.NamespaceContext;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.xpath.*;
+import java.io.StringReader;
+import java.time.LocalDate;
+import java.time.LocalTime;
+import java.time.OffsetDateTime;
+import java.time.ZoneOffset;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeParseException;
+import java.util.*;
+
+/**
+ * Service for parsing EU eForms XML documents.
+ * Extracts structured data from TED procurement notices.
+ * 
+ * Uses XPath for navigation through the UBL 2.3 document structure with eForms extensions.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+public class XmlParserService {
+
+    // Namespace URIs for eForms/UBL documents
+    private static final String NS_CN = "urn:oasis:names:specification:ubl:schema:xsd:ContractNotice-2";
+    private static final String NS_CAC = "urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2";
+    private static final String NS_CBC = "urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2";
+    private static final String NS_EFAC = "http://data.europa.eu/p27/eforms-ubl-extension-aggregate-components/1";
+    private static final String NS_EFBC = "http://data.europa.eu/p27/eforms-ubl-extension-basic-components/1";
+    private static final String NS_EFEXT = "http://data.europa.eu/p27/eforms-ubl-extensions/1";
+    private static final String NS_EXT = "urn:oasis:names:specification:ubl:schema:xsd:CommonExtensionComponents-2";
+
+    private final DocumentBuilderFactory documentBuilderFactory;
+    private final XPathFactory xPathFactory;
+
+    public XmlParserService() {
+        this.documentBuilderFactory = DocumentBuilderFactory.newInstance();
+        this.documentBuilderFactory.setNamespaceAware(true);
+        this.xPathFactory = XPathFactory.newInstance();
+    }
+
+    /**
+     * Parse an eForms XML document and extract structured data.
+     * 
+     * @param xmlContent The XML content as string
+     * @return Populated ProcurementDocument entity (without ID or hash)
+     */
+    public ProcurementDocument parseDocument(String xmlContent) {
+        try {
+            DocumentBuilder builder = documentBuilderFactory.newDocumentBuilder();
+            Document doc = builder.parse(new InputSource(new StringReader(xmlContent)));
+            
+            XPath xpath = xPathFactory.newXPath();
+            xpath.setNamespaceContext(createNamespaceContext());
+
+            ProcurementDocument document = ProcurementDocument.builder()
+                    .xmlDocument(xmlContent)
+                    .build();
+
+            // Parse basic notice information
+            parseNoticeMetadata(doc, xpath, document);
+            
+            // Parse contracting party (buyer) information
+            parseContractingParty(doc, xpath, document);
+            
+            // Parse procurement project information
+            parseProcurementProject(doc, xpath, document);
+            
+            // Parse tendering process
+            parseTenderingProcess(doc, xpath, document);
+            
+            // Parse organizations from extensions
+            parseOrganizations(doc, xpath, document);
+            
+            // Parse lots
+            parseLots(doc, xpath, document);
+            
+            // Parse publication information
+            parsePublication(doc, xpath, document);
+            
+            // Generate text content for vectorization
+            document.setTextContent(generateTextContent(document));
+
+            return document;
+            
+        } catch (Exception e) {
+            log.error("Error parsing XML document: {}", e.getMessage(), e);
+            throw new XmlParsingException("Failed to parse XML document", e);
+        }
+    }
+
+    private void parseNoticeMetadata(Document doc, XPath xpath, ProcurementDocument document) throws XPathExpressionException {
+        // UBL Version
+        document.setUblVersion(getTextContent(xpath, doc, "/*[local-name()='ContractNotice']/cbc:UBLVersionID"));
+        
+        // SDK Version (customization ID)
+        document.setSdkVersion(getTextContent(xpath, doc, "/*[local-name()='ContractNotice']/cbc:CustomizationID"));
+        
+        // Notice ID
+        document.setNoticeId(getTextContent(xpath, doc, "/*[local-name()='ContractNotice']/cbc:ID"));
+        
+        // Contract Folder ID
+        document.setContractFolderId(getTextContent(xpath, doc, "/*[local-name()='ContractNotice']/cbc:ContractFolderID"));
+        
+        // Issue Date and Time - combined into single OffsetDateTime
+        String issueDateStr = getTextContent(xpath, doc, "/*[local-name()='ContractNotice']/cbc:IssueDate");
+        String issueTimeStr = getTextContent(xpath, doc, "/*[local-name()='ContractNotice']/cbc:IssueTime");
+        if (issueDateStr != null) {
+            document.setIssueDateTime(parseDateTime(issueDateStr, issueTimeStr));
+        }
+
+        // Notice Language
+        document.setLanguageCode(getTextContent(xpath, doc, "/*[local-name()='ContractNotice']/cbc:NoticeLanguageCode"));
+        
+        // Notice Type Code
+        String noticeTypeCode = getTextContent(xpath, doc, "/*[local-name()='ContractNotice']/cbc:NoticeTypeCode");
+        document.setNoticeType(mapNoticeType(noticeTypeCode));
+        
+        // Regulatory Domain
+        document.setRegulatoryDomain(getTextContent(xpath, doc, "/*[local-name()='ContractNotice']/cbc:RegulatoryDomain"));
+        
+        // Notice Subtype from extensions
+        String subtypeCode = getTextContent(xpath, doc, 
+            "//efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode");
+        document.setNoticeSubtypeCode(subtypeCode);
+    }
+
+    private void parseContractingParty(Document doc, XPath xpath, ProcurementDocument document) throws XPathExpressionException {
+        // Activity type
+        document.setBuyerActivityType(getTextContent(xpath, doc,
+            "//cac:ContractingParty/cac:ContractingActivity/cbc:ActivityTypeCode"));
+        
+        // Legal type
+        document.setBuyerLegalType(getTextContent(xpath, doc,
+            "//cac:ContractingParty/cac:ContractingPartyType/cbc:PartyTypeCode"));
+        
+        // Organization reference to link with organizations
+        String orgRef = getTextContent(xpath, doc,
+            "//cac:ContractingParty/cac:Party/cac:PartyIdentification/cbc:ID");
+        
+        // Buyer details will be populated from organizations
+    }
+
+    private void parseProcurementProject(Document doc, XPath xpath, ProcurementDocument document) throws XPathExpressionException {
+        // Project title
+        document.setProjectTitle(getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:ProcurementProject/cbc:Name"));
+        
+        // Project description
+        document.setProjectDescription(getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:ProcurementProject/cbc:Description"));
+        
+        // Internal reference
+        document.setInternalReference(getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:ProcurementProject/cbc:ID"));
+        
+        // Contract nature
+        String contractNature = getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:ProcurementProject/cbc:ProcurementTypeCode");
+        document.setContractNature(mapContractNature(contractNature));
+        
+        // CPV codes
+        List<String> cpvCodes = getTextContents(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:ProcurementProject/cac:MainCommodityClassification/cbc:ItemClassificationCode");
+        cpvCodes.addAll(getTextContents(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:ProcurementProject/cac:AdditionalCommodityClassification/cbc:ItemClassificationCode"));
+        document.setCpvCodes(cpvCodes.toArray(new String[0]));
+        
+        // Location - country and NUTS codes
+        document.setBuyerCountryCode(getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:ProcurementProject/cac:RealizedLocation/cac:Address/cac:Country/cbc:IdentificationCode"));
+        document.setBuyerNutsCode(getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:ProcurementProject/cac:RealizedLocation/cac:Address/cbc:CountrySubentityCode"));
+        document.setBuyerCity(getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:ProcurementProject/cac:RealizedLocation/cac:Address/cbc:CityName"));
+        
+        // All NUTS codes from project and lots
+        List<String> nutsCodes = getTextContents(xpath, doc,
+            "//cac:RealizedLocation/cac:Address/cbc:CountrySubentityCode");
+        document.setNutsCodes(nutsCodes.stream().distinct().toArray(String[]::new));
+    }
+
+    private void parseTenderingProcess(Document doc, XPath xpath, ProcurementDocument document) throws XPathExpressionException {
+        // Procedure type
+        String procedureCode = getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:TenderingProcess/cbc:ProcedureCode");
+        document.setProcedureType(mapProcedureType(procedureCode));
+        
+        // Lot distribution
+        String maxLotsAwarded = getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:TenderingTerms/cac:LotDistribution/cbc:MaximumLotsAwardedNumeric");
+        if (maxLotsAwarded != null) {
+            document.setMaxLotsAwarded(Integer.parseInt(maxLotsAwarded));
+        }
+        
+        String maxLotsSubmitted = getTextContent(xpath, doc,
+            "/*[local-name()='ContractNotice']/cac:TenderingTerms/cac:LotDistribution/cbc:MaximumLotsSubmittedNumeric");
+        if (maxLotsSubmitted != null) {
+            document.setMaxLotsSubmitted(Integer.parseInt(maxLotsSubmitted));
+        }
+    }
+
+    private void parseOrganizations(Document doc, XPath xpath, ProcurementDocument document) throws XPathExpressionException {
+        NodeList orgNodes = (NodeList) xpath.evaluate(
+            "//efac:Organizations/efac:Organization", doc, XPathConstants.NODESET);
+        
+        boolean buyerInfoSet = false;
+        
+        for (int i = 0; i < orgNodes.getLength(); i++) {
+            Node orgNode = orgNodes.item(i);
+            
+            Organization org = Organization.builder().build();
+            
+            // Organization reference
+            org.setOrgReference(getTextContent(xpath, orgNode, ".//cac:PartyIdentification/cbc:ID"));
+            
+            // Name
+            org.setName(getTextContent(xpath, orgNode, ".//cac:PartyName/cbc:Name"));
+            
+            // Company ID
+            org.setCompanyId(getTextContent(xpath, orgNode, ".//cac:PartyLegalEntity/cbc:CompanyID"));
+            
+            // Address
+            org.setStreetName(getTextContent(xpath, orgNode, ".//cac:PostalAddress/cbc:StreetName"));
+            org.setCity(getTextContent(xpath, orgNode, ".//cac:PostalAddress/cbc:CityName"));
+            org.setPostalCode(getTextContent(xpath, orgNode, ".//cac:PostalAddress/cbc:PostalZone"));
+            org.setNutsCode(getTextContent(xpath, orgNode, ".//cac:PostalAddress/cbc:CountrySubentityCode"));
+            org.setCountryCode(getTextContent(xpath, orgNode, ".//cac:PostalAddress/cac:Country/cbc:IdentificationCode"));
+            
+            // Contact
+            org.setWebsiteUri(getTextContent(xpath, orgNode, ".//cbc:WebsiteURI"));
+            org.setEmail(getTextContent(xpath, orgNode, ".//cac:Contact/cbc:ElectronicMail"));
+            org.setPhone(getTextContent(xpath, orgNode, ".//cac:Contact/cbc:Telephone"));
+            
+            document.addOrganization(org);
+            
+            // Set buyer info from first organization (typically ORG-0001)
+            if (!buyerInfoSet && "ORG-0001".equals(org.getOrgReference())) {
+                document.setBuyerName(org.getName());
+                if (document.getBuyerCountryCode() == null) {
+                    document.setBuyerCountryCode(org.getCountryCode());
+                }
+                if (document.getBuyerCity() == null) {
+                    document.setBuyerCity(org.getCity());
+                }
+                document.setBuyerPostalCode(org.getPostalCode());
+                if (document.getBuyerNutsCode() == null) {
+                    document.setBuyerNutsCode(org.getNutsCode());
+                }
+                buyerInfoSet = true;
+            }
+        }
+    }
+
+    private void parseLots(Document doc, XPath xpath, ProcurementDocument document) throws XPathExpressionException {
+        NodeList lotNodes = (NodeList) xpath.evaluate(
+            "//cac:ProcurementProjectLot", doc, XPathConstants.NODESET);
+        
+        document.setTotalLots(lotNodes.getLength());
+        
+        for (int i = 0; i < lotNodes.getLength(); i++) {
+            Node lotNode = lotNodes.item(i);
+            
+            ProcurementLot lot = ProcurementLot.builder().build();
+            
+            // Lot ID
+            lot.setLotId(getTextContent(xpath, lotNode, "cbc:ID"));
+            
+            // Internal ID
+            lot.setInternalId(getTextContent(xpath, lotNode, "cac:ProcurementProject/cbc:ID"));
+            
+            // Title and description
+            lot.setTitle(getTextContent(xpath, lotNode, "cac:ProcurementProject/cbc:Name"));
+            lot.setDescription(getTextContent(xpath, lotNode, "cac:ProcurementProject/cbc:Description"));
+            
+            // CPV codes for this lot
+            List<String> lotCpvCodes = new ArrayList<>();
+            NodeList cpvNodes = (NodeList) xpath.evaluate(
+                ".//cac:MainCommodityClassification/cbc:ItemClassificationCode", 
+                lotNode, XPathConstants.NODESET);
+            for (int j = 0; j < cpvNodes.getLength(); j++) {
+                lotCpvCodes.add(cpvNodes.item(j).getTextContent());
+            }
+            lot.setCpvCodes(lotCpvCodes.toArray(new String[0]));
+            
+            // NUTS codes for this lot
+            List<String> lotNutsCodes = new ArrayList<>();
+            NodeList nutsNodes = (NodeList) xpath.evaluate(
+                ".//cac:RealizedLocation/cac:Address/cbc:CountrySubentityCode", 
+                lotNode, XPathConstants.NODESET);
+            for (int j = 0; j < nutsNodes.getLength(); j++) {
+                lotNutsCodes.add(nutsNodes.item(j).getTextContent());
+            }
+            lot.setNutsCodes(lotNutsCodes.toArray(new String[0]));
+            
+            // Duration
+            String durationValue = getTextContent(xpath, lotNode,
+                "cac:ProcurementProject/cac:PlannedPeriod/cbc:DurationMeasure");
+            if (durationValue != null) {
+                try {
+                    lot.setDurationValue(Double.parseDouble(durationValue));
+                } catch (NumberFormatException e) {
+                    log.warn("Invalid duration value '{}' in lot {}, skipping", durationValue, lot.getLotId());
+                }
+            }
+            lot.setDurationUnit(getAttributeValue(xpath, lotNode,
+                "cac:ProcurementProject/cac:PlannedPeriod/cbc:DurationMeasure", "unitCode"));
+            
+            // Submission deadline
+            String endDate = getTextContent(xpath, lotNode,
+                "cac:TenderingProcess/cac:TenderSubmissionDeadlinePeriod/cbc:EndDate");
+            String endTime = getTextContent(xpath, lotNode,
+                "cac:TenderingProcess/cac:TenderSubmissionDeadlinePeriod/cbc:EndTime");
+            if (endDate != null) {
+                lot.setSubmissionDeadline(parseDateTime(endDate, endTime));
+                // Set document-level deadline from first lot if not set
+                if (document.getSubmissionDeadline() == null) {
+                    document.setSubmissionDeadline(lot.getSubmissionDeadline());
+                }
+            }
+            
+            // EU funded
+            String euFunded = getTextContent(xpath, lotNode,
+                "cac:TenderingTerms/cbc:FundingProgramCode");
+            lot.setEuFunded(euFunded != null && !euFunded.contains("no-eu-funds"));
+            
+            document.addLot(lot);
+        }
+        
+        // Check if any lot is EU funded
+        document.setEuFunded(document.getLots().stream().anyMatch(l -> Boolean.TRUE.equals(l.getEuFunded())));
+    }
+
+    private void parsePublication(Document doc, XPath xpath, ProcurementDocument document) throws XPathExpressionException {
+        // Publication ID (OJS notice ID)
+        document.setPublicationId(getTextContent(xpath, doc,
+            "//efac:Publication/efbc:NoticePublicationID"));
+        
+        // OJS ID (gazette ID)
+        document.setOjsId(getTextContent(xpath, doc,
+            "//efac:Publication/efbc:GazetteID"));
+        
+        // Publication date
+        String pubDate = getTextContent(xpath, doc,
+            "//efac:Publication/efbc:PublicationDate");
+        if (pubDate != null) {
+            document.setPublicationDate(parseDate(pubDate));
+        }
+        
+        // Fallback to requested publication date
+        if (document.getPublicationDate() == null) {
+            String requestedPubDate = getTextContent(xpath, doc,
+                "/*[local-name()='ContractNotice']/cbc:RequestedPublicationDate");
+            if (requestedPubDate != null) {
+                document.setPublicationDate(parseDate(requestedPubDate));
+            }
+        }
+    }
+
+    /**
+     * Generate a textual representation for vectorization.
+     */
+    private String generateTextContent(ProcurementDocument document) {
+        StringBuilder sb = new StringBuilder();
+        
+        // Title (most important)
+        if (document.getProjectTitle() != null) {
+            sb.append("Title: ").append(document.getProjectTitle()).append("\n\n");
+        }
+        
+        // Description
+        if (document.getProjectDescription() != null) {
+            sb.append("Description: ").append(document.getProjectDescription()).append("\n\n");
+        }
+        
+        // Buyer information
+        if (document.getBuyerName() != null) {
+            sb.append("Contracting Authority: ").append(document.getBuyerName());
+            if (document.getBuyerCity() != null) {
+                sb.append(", ").append(document.getBuyerCity());
+            }
+            if (document.getBuyerCountryCode() != null) {
+                sb.append(" (").append(document.getBuyerCountryCode()).append(")");
+            }
+            sb.append("\n\n");
+        }
+        
+        // Contract type and procedure
+        if (document.getContractNature() != null) {
+            sb.append("Contract Type: ").append(document.getContractNature()).append("\n");
+        }
+        if (document.getProcedureType() != null) {
+            sb.append("Procedure: ").append(document.getProcedureType()).append("\n");
+        }
+        
+        // CPV classification
+        if (document.getCpvCodes() != null && document.getCpvCodes().length > 0) {
+            sb.append("CPV Codes: ").append(String.join(", ", document.getCpvCodes())).append("\n");
+        }
+        
+        // Lot information
+        if (document.getLots() != null && !document.getLots().isEmpty()) {
+            sb.append("\nLots (").append(document.getLots().size()).append("):\n");
+            for (ProcurementLot lot : document.getLots()) {
+                if (lot.getTitle() != null) {
+                    sb.append("- ").append(lot.getLotId()).append(": ").append(lot.getTitle());
+                    if (lot.getDescription() != null && !lot.getDescription().equals(lot.getTitle())) {
+                        sb.append(" - ").append(lot.getDescription());
+                    }
+                    sb.append("\n");
+                }
+            }
+        }
+        
+        return sb.toString().trim();
+    }
+
+    // Helper methods
+    
+    private String getTextContent(XPath xpath, Object item, String expression) throws XPathExpressionException {
+        Node node = (Node) xpath.evaluate(expression, item, XPathConstants.NODE);
+        return node != null ? node.getTextContent().trim() : null;
+    }
+
+    private List<String> getTextContents(XPath xpath, Object item, String expression) throws XPathExpressionException {
+        List<String> results = new ArrayList<>();
+        NodeList nodes = (NodeList) xpath.evaluate(expression, item, XPathConstants.NODESET);
+        for (int i = 0; i < nodes.getLength(); i++) {
+            String text = nodes.item(i).getTextContent().trim();
+            if (!text.isEmpty()) {
+                results.add(text);
+            }
+        }
+        return results;
+    }
+
+    private String getAttributeValue(XPath xpath, Object item, String expression, String attrName) throws XPathExpressionException {
+        Node node = (Node) xpath.evaluate(expression, item, XPathConstants.NODE);
+        if (node instanceof Element) {
+            return ((Element) node).getAttribute(attrName);
+        }
+        return null;
+    }
+
+    private LocalDate parseDate(String dateStr) {
+        if (dateStr == null || dateStr.isEmpty()) return null;
+        try {
+            // Handle various date formats
+            dateStr = dateStr.trim();
+
+            // Handle datetime with dash separator (e.g. "2025-04-23-03:00")
+            // Extract only the date part (first 10 characters: YYYY-MM-DD)
+            if (dateStr.matches("\\d{4}-\\d{2}-\\d{2}-\\d{2}:\\d{2}.*")) {
+                dateStr = dateStr.substring(0, 10);
+            }
+
+            if (dateStr.contains("+")) {
+                dateStr = dateStr.substring(0, dateStr.indexOf("+"));
+            }
+            if (dateStr.endsWith("Z")) {
+                dateStr = dateStr.substring(0, dateStr.length() - 1);
+            }
+            return LocalDate.parse(dateStr);
+        } catch (DateTimeParseException e) {
+            log.warn("Failed to parse date: {} . Error: {}", dateStr, e.getMessage());
+            return null;
+        }
+    }
+
+    private LocalTime parseTime(String timeStr) {
+        if (timeStr == null || timeStr.isEmpty()) return null;
+        try {
+            timeStr = timeStr.trim();
+
+            // Handle time with offset (e.g. "12:00:00-03:00")
+            // Extract only the time part (first 8 characters: HH:mm:ss)
+            if (timeStr.matches("\\d{2}:\\d{2}:\\d{2}[+-]\\d{2}:\\d{2}")) {
+                timeStr = timeStr.substring(0, 8);
+            }
+
+            if (timeStr.contains("+")) {
+                timeStr = timeStr.substring(0, timeStr.indexOf("+"));
+            }
+            if (timeStr.endsWith("Z")) {
+                timeStr = timeStr.substring(0, timeStr.length() - 1);
+            }
+            return LocalTime.parse(timeStr);
+        } catch (DateTimeParseException e) {
+            log.warn("Failed to parse time: {} . Error: {}", timeStr, e.getMessage());
+            return null;
+        }
+    }
+
+    private OffsetDateTime parseDateTime(String dateStr, String timeStr) {
+        LocalDate date = parseDate(dateStr);
+        if (date == null) return null;
+        
+        LocalTime time = timeStr != null ? parseTime(timeStr) : LocalTime.MIDNIGHT;
+        if (time == null) time = LocalTime.MIDNIGHT;
+        
+        // Parse timezone offset if present in date string
+        ZoneOffset offset = ZoneOffset.UTC;
+        if (dateStr != null && dateStr.contains("+")) {
+            try {
+                String offsetStr = dateStr.substring(dateStr.indexOf("+"));
+                offset = ZoneOffset.of(offsetStr);
+            } catch (Exception e) {
+                // Default to UTC
+            }
+        }
+        
+        return OffsetDateTime.of(date, time, offset);
+    }
+
+    private NoticeType mapNoticeType(String code) {
+        if (code == null) return NoticeType.OTHER;
+        return switch (code.toLowerCase()) {
+            case "cn-standard", "cn-social", "cn-defence" -> NoticeType.CONTRACT_NOTICE;
+            case "pin-only", "pin-rtl", "pin-cfc-standard" -> NoticeType.PRIOR_INFORMATION_NOTICE;
+            case "can-standard", "can-social", "can-modif" -> NoticeType.CONTRACT_AWARD_NOTICE;
+            default -> NoticeType.OTHER;
+        };
+    }
+
+    private ContractNature mapContractNature(String code) {
+        if (code == null) return ContractNature.UNKNOWN;
+        return switch (code.toLowerCase()) {
+            case "supplies" -> ContractNature.SUPPLIES;
+            case "services" -> ContractNature.SERVICES;
+            case "works" -> ContractNature.WORKS;
+            case "mixed" -> ContractNature.MIXED;
+            default -> ContractNature.UNKNOWN;
+        };
+    }
+
+    private ProcedureType mapProcedureType(String code) {
+        if (code == null) return ProcedureType.OTHER;
+        return switch (code.toLowerCase()) {
+            case "open" -> ProcedureType.OPEN;
+            case "restricted" -> ProcedureType.RESTRICTED;
+            case "comp-dial" -> ProcedureType.COMPETITIVE_DIALOGUE;
+            case "innovation" -> ProcedureType.INNOVATION_PARTNERSHIP;
+            case "neg-wo-pub" -> ProcedureType.NEGOTIATED_WITHOUT_PUBLICATION;
+            case "neg-w-pub" -> ProcedureType.NEGOTIATED_WITH_PUBLICATION;
+            default -> ProcedureType.OTHER;
+        };
+    }
+
+    private NamespaceContext createNamespaceContext() {
+        return new NamespaceContext() {
+            @Override
+            public String getNamespaceURI(String prefix) {
+                return switch (prefix) {
+                    case "cn" -> NS_CN;
+                    case "cac" -> NS_CAC;
+                    case "cbc" -> NS_CBC;
+                    case "efac" -> NS_EFAC;
+                    case "efbc" -> NS_EFBC;
+                    case "efext" -> NS_EFEXT;
+                    case "ext" -> NS_EXT;
+                    default -> null;
+                };
+            }
+
+            @Override
+            public String getPrefix(String namespaceURI) {
+                return null;
+            }
+
+            @Override
+            public Iterator<String> getPrefixes(String namespaceURI) {
+                return null;
+            }
+        };
+    }
+
+    /**
+     * Exception thrown when XML parsing fails.
+     */
+    public static class XmlParsingException extends RuntimeException {
+        public XmlParsingException(String message, Throwable cause) {
+            super(message, cause);
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/service/attachment/AttachmentExtractor.java
+++ b/src/main/java/at/procon/ted/service/attachment/AttachmentExtractor.java
@ -0,0 +1,87 @@
+package at.procon.ted.service.attachment;
+
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Interface for attachment content extractors.
+ * Each implementation handles a specific file format (PDF, ZIP, etc.).
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+public interface AttachmentExtractor {
+
+    /**
+     * Returns the set of file extensions this extractor can handle.
+     * Extensions should be lowercase without the dot (e.g., "pdf", "zip").
+     */
+    Set<String> getSupportedExtensions();
+
+    /**
+     * Returns the set of MIME types this extractor can handle.
+     */
+    Set<String> getSupportedMimeTypes();
+
+    /**
+     * Check if this extractor can handle the given file.
+     *
+     * @param filename    the filename
+     * @param contentType the MIME content type
+     * @return true if this extractor can process the file
+     */
+    boolean canHandle(String filename, String contentType);
+
+    /**
+     * Extract content from the attachment.
+     *
+     * @param data        the raw file data
+     * @param filename    the original filename
+     * @param contentType the MIME content type
+     * @return extraction result containing text and/or child attachments
+     */
+    ExtractionResult extract(byte[] data, String filename, String contentType);
+
+    /**
+     * Result of content extraction.
+     */
+    record ExtractionResult(
+            /**
+             * Extracted text content (for PDF, etc.).
+             */
+            String extractedText,
+            /**
+             * Child attachments (for ZIP files).
+             */
+            List<ChildAttachment> childAttachments,
+            /**
+             * Whether extraction was successful.
+             */
+            boolean success,
+            /**
+             * Error message if extraction failed.
+             */
+            String errorMessage
+    ) {
+        public static ExtractionResult success(String text) {
+            return new ExtractionResult(text, List.of(), true, null);
+        }
+
+        public static ExtractionResult successWithChildren(List<ChildAttachment> children) {
+            return new ExtractionResult(null, children, true, null);
+        }
+
+        public static ExtractionResult failure(String errorMessage) {
+            return new ExtractionResult(null, List.of(), false, errorMessage);
+        }
+    }
+
+    /**
+     * Represents a child attachment extracted from a container (e.g., ZIP).
+     */
+    record ChildAttachment(
+            String filename,
+            String contentType,
+            byte[] data,
+            String pathInArchive
+    ) {}
+}
--- a/src/main/java/at/procon/ted/service/attachment/AttachmentProcessingService.java
+++ b/src/main/java/at/procon/ted/service/attachment/AttachmentProcessingService.java
@ -0,0 +1,237 @@
+package at.procon.ted.service.attachment;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.model.entity.ProcessedAttachment;
+import at.procon.ted.model.entity.ProcessedAttachment.ProcessingStatus;
+import at.procon.ted.repository.ProcessedAttachmentRepository;
+import at.procon.ted.util.HashUtils;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * Main service for processing mail attachments.
+ * Handles idempotency via content hash and delegates to format-specific extractors.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@RequiredArgsConstructor
+@Slf4j
+public class AttachmentProcessingService {
+
+    private final ProcessedAttachmentRepository attachmentRepository;
+    private final List<AttachmentExtractor> extractors;
+    private final TedProcessorProperties properties;
+
+    /**
+     * Process an attachment with idempotency check.
+     * Returns the processed attachment entity or null if it's a duplicate.
+     *
+     * @param data         raw attachment data
+     * @param filename     original filename
+     * @param contentType  MIME content type
+     * @param mailSubject  subject of the source email
+     * @param mailFrom     sender of the source email
+     * @param parentHash   hash of parent attachment (for ZIP-extracted files)
+     * @return processing result with entity and child attachments
+     */
+    @Transactional
+    public ProcessingResult processAttachment(byte[] data, String filename, String contentType,
+            String mailSubject, String mailFrom, String parentHash) {
+
+        // Calculate content hash for idempotency
+        String contentHash = HashUtils.computeSha256(data);
+
+        log.debug("Processing attachment: filename='{}', contentType='{}', hash={}",
+                filename, contentType, contentHash);
+
+        // Check if already processed
+        Optional<ProcessedAttachment> existing = attachmentRepository.findByContentHash(contentHash);
+        if (existing.isPresent()) {
+            ProcessedAttachment existingAttachment = existing.get();
+            log.info("Attachment already processed (duplicate): filename='{}', hash={}, status={}",
+                    filename, contentHash, existingAttachment.getProcessingStatus());
+
+            return ProcessingResult.duplicate(existingAttachment);
+        }
+
+        // Determine file type from extension
+        String fileType = extractFileType(filename);
+
+        // Create entity
+        ProcessedAttachment attachment = ProcessedAttachment.builder()
+                .contentHash(contentHash)
+                .originalFilename(filename)
+                .fileType(fileType)
+                .contentType(contentType)
+                .fileSize((long) data.length)
+                .processingStatus(ProcessingStatus.PROCESSING)
+                .mailSubject(mailSubject)
+                .mailFrom(mailFrom)
+                .parentHash(parentHash)
+                .receivedAt(LocalDateTime.now())
+                .build();
+
+        // Save immediately to mark as being processed
+        attachment = attachmentRepository.save(attachment);
+
+        try {
+            // Save attachment to disk
+            String savedPath = saveAttachmentToDisk(data, filename, contentHash);
+            attachment.setSavedPath(savedPath);
+
+            // Find appropriate extractor
+            AttachmentExtractor extractor = findExtractor(filename, contentType);
+            List<AttachmentExtractor.ChildAttachment> childAttachments = new ArrayList<>();
+
+            if (extractor != null) {
+                log.debug("Using extractor {} for file '{}'",
+                        extractor.getClass().getSimpleName(), filename);
+
+                AttachmentExtractor.ExtractionResult result = extractor.extract(data, filename, contentType);
+
+                if (result.success()) {
+                    if (result.extractedText() != null && !result.extractedText().isBlank()) {
+                        attachment.setExtractedText(result.extractedText());
+                        log.debug("Extracted {} characters of text from '{}'",
+                                result.extractedText().length(), filename);
+                    }
+
+                    if (result.childAttachments() != null && !result.childAttachments().isEmpty()) {
+                        childAttachments.addAll(result.childAttachments());
+                        attachment.setChildCount(childAttachments.size());
+                        log.debug("Extracted {} child attachments from '{}'",
+                                childAttachments.size(), filename);
+                    }
+                } else {
+                    attachment.setErrorMessage(result.errorMessage());
+                    log.warn("Extraction failed for '{}': {}", filename, result.errorMessage());
+                }
+            } else {
+                log.debug("No extractor available for file type '{}' ({})", fileType, contentType);
+            }
+
+            // Mark as completed
+            attachment.setProcessingStatus(ProcessingStatus.COMPLETED);
+            attachment.setProcessedAt(LocalDateTime.now());
+            attachment = attachmentRepository.save(attachment);
+
+            log.info("Successfully processed attachment: filename='{}', hash={}, extractedText={}, children={}",
+                    filename, contentHash,
+                    attachment.getExtractedText() != null ? attachment.getExtractedText().length() + " chars" : "none",
+                    childAttachments.size());
+
+            return ProcessingResult.success(attachment, childAttachments);
+
+        } catch (Exception e) {
+            log.error("Failed to process attachment '{}': {}", filename, e.getMessage(), e);
+
+            attachment.setProcessingStatus(ProcessingStatus.FAILED);
+            attachment.setErrorMessage(e.getMessage());
+            attachment.setProcessedAt(LocalDateTime.now());
+            attachmentRepository.save(attachment);
+
+            return ProcessingResult.failure(attachment, e.getMessage());
+        }
+    }
+
+    /**
+     * Find an extractor that can handle the given file.
+     */
+    private AttachmentExtractor findExtractor(String filename, String contentType) {
+        for (AttachmentExtractor extractor : extractors) {
+            if (extractor.canHandle(filename, contentType)) {
+                return extractor;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Extract file type/extension from filename.
+     */
+    private String extractFileType(String filename) {
+        if (filename == null) {
+            return "unknown";
+        }
+        int lastDot = filename.lastIndexOf('.');
+        if (lastDot > 0 && lastDot < filename.length() - 1) {
+            return filename.substring(lastDot + 1).toLowerCase();
+        }
+        return "unknown";
+    }
+
+    /**
+     * Save attachment to disk with hash-based naming.
+     */
+    private String saveAttachmentToDisk(byte[] data, String filename, String contentHash) throws Exception {
+        String outputDir = properties.getMail().getAttachmentOutputDirectory();
+        File dir = new File(outputDir);
+        if (!dir.exists()) {
+            dir.mkdirs();
+        }
+
+        // Create filename with timestamp and hash for uniqueness
+        String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
+        String safeFilename = sanitizeFilename(filename);
+        String shortHash = contentHash.substring(0, 8);
+        String outputFilename = timestamp + "_" + shortHash + "_" + safeFilename;
+
+        File outputFile = new File(dir, outputFilename);
+
+        try (FileOutputStream fos = new FileOutputStream(outputFile)) {
+            fos.write(data);
+        }
+
+        log.debug("Saved attachment to: {}", outputFile.getAbsolutePath());
+        return outputFile.getAbsolutePath();
+    }
+
+    /**
+     * Sanitize filename for filesystem safety.
+     */
+    private String sanitizeFilename(String filename) {
+        if (filename == null) {
+            return "unnamed";
+        }
+        return filename.replaceAll("[\\\\/:*?\"<>|]", "_");
+    }
+
+    /**
+     * Result of attachment processing.
+     */
+    public record ProcessingResult(
+            ProcessedAttachment attachment,
+            List<AttachmentExtractor.ChildAttachment> childAttachments,
+            boolean isDuplicate,
+            boolean isSuccess,
+            String errorMessage
+    ) {
+        public static ProcessingResult success(ProcessedAttachment attachment,
+                List<AttachmentExtractor.ChildAttachment> children) {
+            return new ProcessingResult(attachment, children, false, true, null);
+        }
+
+        public static ProcessingResult duplicate(ProcessedAttachment attachment) {
+            return new ProcessingResult(attachment, List.of(), true, true, null);
+        }
+
+        public static ProcessingResult failure(ProcessedAttachment attachment, String error) {
+            return new ProcessingResult(attachment, List.of(), false, false, error);
+        }
+
+        public boolean hasChildren() {
+            return childAttachments != null && !childAttachments.isEmpty();
+        }
+    }
+}
--- a/src/main/java/at/procon/ted/service/attachment/PdfExtractionService.java
+++ b/src/main/java/at/procon/ted/service/attachment/PdfExtractionService.java
@ -0,0 +1,115 @@
+package at.procon.ted.service.attachment;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.springframework.stereotype.Service;
+
+import java.io.ByteArrayInputStream;
+import java.util.Set;
+
+/**
+ * Service for extracting text content from PDF files.
+ * Uses Apache PDFBox for PDF parsing and text extraction.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@Slf4j
+public class PdfExtractionService implements AttachmentExtractor {
+
+    private static final Set<String> SUPPORTED_EXTENSIONS = Set.of("pdf");
+    private static final Set<String> SUPPORTED_MIME_TYPES = Set.of(
+            "application/pdf",
+            "application/x-pdf"
+    );
+
+    @Override
+    public Set<String> getSupportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+
+    @Override
+    public Set<String> getSupportedMimeTypes() {
+        return SUPPORTED_MIME_TYPES;
+    }
+
+    @Override
+    public boolean canHandle(String filename, String contentType) {
+        if (filename != null) {
+            String lowerFilename = filename.toLowerCase();
+            if (SUPPORTED_EXTENSIONS.stream().anyMatch(ext -> lowerFilename.endsWith("." + ext))) {
+                return true;
+            }
+        }
+        if (contentType != null) {
+            String lowerContentType = contentType.toLowerCase().split(";")[0].trim();
+            return SUPPORTED_MIME_TYPES.contains(lowerContentType);
+        }
+        return false;
+    }
+
+    @Override
+    public ExtractionResult extract(byte[] data, String filename, String contentType) {
+        if (data == null || data.length == 0) {
+            return ExtractionResult.failure("Empty PDF data");
+        }
+
+        log.debug("Extracting text from PDF: {} ({} bytes)", filename, data.length);
+
+        try (PDDocument document = Loader.loadPDF(data)) {
+            // Check if document is encrypted
+            if (document.isEncrypted()) {
+                log.warn("PDF is encrypted, attempting to decrypt with empty password: {}", filename);
+                try {
+                    document.setAllSecurityToBeRemoved(true);
+                } catch (Exception e) {
+                    return ExtractionResult.failure("PDF is encrypted and cannot be decrypted: " + e.getMessage());
+                }
+            }
+
+            PDFTextStripper stripper = new PDFTextStripper();
+            stripper.setSortByPosition(true);
+
+            String text = stripper.getText(document);
+
+            // Clean up extracted text
+            text = cleanExtractedText(text);
+
+            int pageCount = document.getNumberOfPages();
+            log.info("Successfully extracted {} characters from {} pages of PDF: {}",
+                    text.length(), pageCount, filename);
+
+            return ExtractionResult.success(text);
+
+        } catch (Exception e) {
+            log.error("Failed to extract text from PDF '{}': {}", filename, e.getMessage(), e);
+            return ExtractionResult.failure("PDF extraction failed: " + e.getMessage());
+        }
+    }
+
+    /**
+     * Clean up extracted text by removing excessive whitespace and normalizing line breaks.
+     */
+    private String cleanExtractedText(String text) {
+        if (text == null) {
+            return "";
+        }
+
+        // Normalize line breaks
+        text = text.replaceAll("\r\n", "\n");
+        text = text.replaceAll("\r", "\n");
+
+        // Remove excessive blank lines (more than 2 consecutive)
+        text = text.replaceAll("\n{3,}", "\n\n");
+
+        // Remove trailing whitespace from each line
+        text = text.replaceAll("[ \t]+\n", "\n");
+
+        // Trim leading/trailing whitespace
+        text = text.trim();
+
+        return text;
+    }
+}
--- a/src/main/java/at/procon/ted/service/attachment/ZipExtractionService.java
+++ b/src/main/java/at/procon/ted/service/attachment/ZipExtractionService.java
@ -0,0 +1,234 @@
+package at.procon.ted.service.attachment;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Service;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.net.URLConnection;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+/**
+ * Service for extracting files from ZIP archives.
+ * Extracts all contained files as child attachments for recursive processing.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Service
+@Slf4j
+public class ZipExtractionService implements AttachmentExtractor {
+
+    private static final Set<String> SUPPORTED_EXTENSIONS = Set.of("zip");
+    private static final Set<String> SUPPORTED_MIME_TYPES = Set.of(
+            "application/zip",
+            "application/x-zip",
+            "application/x-zip-compressed",
+            "application/octet-stream" // Often used for ZIP files
+    );
+
+    // Security limits
+    private static final long MAX_TOTAL_SIZE = 500 * 1024 * 1024; // 500 MB total extracted size
+    private static final long MAX_SINGLE_FILE_SIZE = 100 * 1024 * 1024; // 100 MB per file
+    private static final int MAX_FILES = 1000; // Maximum number of files in archive
+    private static final int MAX_PATH_LENGTH = 500; // Maximum path length
+
+    @Override
+    public Set<String> getSupportedExtensions() {
+        return SUPPORTED_EXTENSIONS;
+    }
+
+    @Override
+    public Set<String> getSupportedMimeTypes() {
+        return SUPPORTED_MIME_TYPES;
+    }
+
+    @Override
+    public boolean canHandle(String filename, String contentType) {
+        if (filename != null) {
+            String lowerFilename = filename.toLowerCase();
+            if (SUPPORTED_EXTENSIONS.stream().anyMatch(ext -> lowerFilename.endsWith("." + ext))) {
+                return true;
+            }
+        }
+        // Only use MIME type if it's explicitly zip, not application/octet-stream
+        if (contentType != null) {
+            String lowerContentType = contentType.toLowerCase().split(";")[0].trim();
+            if (lowerContentType.contains("zip")) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    @Override
+    public ExtractionResult extract(byte[] data, String filename, String contentType) {
+        if (data == null || data.length == 0) {
+            return ExtractionResult.failure("Empty ZIP data");
+        }
+
+        log.debug("Extracting files from ZIP: {} ({} bytes)", filename, data.length);
+
+        List<ChildAttachment> children = new ArrayList<>();
+        long totalExtractedSize = 0;
+        int fileCount = 0;
+
+        try (ZipInputStream zis = new ZipInputStream(new ByteArrayInputStream(data))) {
+            ZipEntry entry;
+
+            while ((entry = zis.getNextEntry()) != null) {
+                // Security check: skip directories
+                if (entry.isDirectory()) {
+                    zis.closeEntry();
+                    continue;
+                }
+
+                String entryName = entry.getName();
+
+                // Security check: path traversal protection
+                if (entryName.contains("..") || entryName.startsWith("/") || entryName.startsWith("\\")) {
+                    log.warn("Skipping potentially malicious ZIP entry: {}", entryName);
+                    zis.closeEntry();
+                    continue;
+                }
+
+                // Security check: path length
+                if (entryName.length() > MAX_PATH_LENGTH) {
+                    log.warn("Skipping ZIP entry with too long path: {}", entryName.substring(0, 100) + "...");
+                    zis.closeEntry();
+                    continue;
+                }
+
+                // Security check: maximum files
+                if (fileCount >= MAX_FILES) {
+                    log.warn("ZIP file contains too many files, stopping at {} files", MAX_FILES);
+                    break;
+                }
+
+                // Read entry content
+                ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                byte[] buffer = new byte[8192];
+                int len;
+                long entrySize = 0;
+
+                while ((len = zis.read(buffer)) > 0) {
+                    entrySize += len;
+
+                    // Security check: single file size
+                    if (entrySize > MAX_SINGLE_FILE_SIZE) {
+                        log.warn("Skipping ZIP entry exceeding max file size: {} (> {} MB)",
+                                entryName, MAX_SINGLE_FILE_SIZE / 1024 / 1024);
+                        break;
+                    }
+
+                    // Security check: total extracted size (zip bomb protection)
+                    if (totalExtractedSize + entrySize > MAX_TOTAL_SIZE) {
+                        log.warn("ZIP extraction stopped: total extracted size exceeds limit ({} MB)",
+                                MAX_TOTAL_SIZE / 1024 / 1024);
+                        return ExtractionResult.successWithChildren(children);
+                    }
+
+                    baos.write(buffer, 0, len);
+                }
+
+                if (entrySize > MAX_SINGLE_FILE_SIZE) {
+                    zis.closeEntry();
+                    continue;
+                }
+
+                byte[] entryData = baos.toByteArray();
+                totalExtractedSize += entryData.length;
+                fileCount++;
+
+                // Determine content type from filename
+                String childContentType = guessContentType(entryName);
+
+                // Extract just the filename from the path
+                String childFilename = extractFilename(entryName);
+
+                ChildAttachment child = new ChildAttachment(
+                        childFilename,
+                        childContentType,
+                        entryData,
+                        entryName
+                );
+                children.add(child);
+
+                log.debug("Extracted from ZIP: {} ({} bytes, type={})",
+                        entryName, entryData.length, childContentType);
+
+                zis.closeEntry();
+            }
+
+            log.info("Successfully extracted {} files ({} bytes total) from ZIP: {}",
+                    children.size(), totalExtractedSize, filename);
+
+            return ExtractionResult.successWithChildren(children);
+
+        } catch (Exception e) {
+            log.error("Failed to extract ZIP '{}': {}", filename, e.getMessage(), e);
+            return ExtractionResult.failure("ZIP extraction failed: " + e.getMessage());
+        }
+    }
+
+    /**
+     * Guess the MIME content type from a filename.
+     */
+    private String guessContentType(String filename) {
+        if (filename == null) {
+            return "application/octet-stream";
+        }
+
+        String lowerFilename = filename.toLowerCase();
+
+        // Common types
+        if (lowerFilename.endsWith(".pdf")) {
+            return "application/pdf";
+        } else if (lowerFilename.endsWith(".xml")) {
+            return "application/xml";
+        } else if (lowerFilename.endsWith(".zip")) {
+            return "application/zip";
+        } else if (lowerFilename.endsWith(".txt")) {
+            return "text/plain";
+        } else if (lowerFilename.endsWith(".html") || lowerFilename.endsWith(".htm")) {
+            return "text/html";
+        } else if (lowerFilename.endsWith(".json")) {
+            return "application/json";
+        } else if (lowerFilename.endsWith(".doc")) {
+            return "application/msword";
+        } else if (lowerFilename.endsWith(".docx")) {
+            return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
+        } else if (lowerFilename.endsWith(".xls")) {
+            return "application/vnd.ms-excel";
+        } else if (lowerFilename.endsWith(".xlsx")) {
+            return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
+        } else if (lowerFilename.endsWith(".png")) {
+            return "image/png";
+        } else if (lowerFilename.endsWith(".jpg") || lowerFilename.endsWith(".jpeg")) {
+            return "image/jpeg";
+        }
+
+        // Try to guess from URLConnection
+        String guessed = URLConnection.guessContentTypeFromName(filename);
+        return guessed != null ? guessed : "application/octet-stream";
+    }
+
+    /**
+     * Extract just the filename from a path (handles both / and \ separators).
+     */
+    private String extractFilename(String path) {
+        if (path == null) {
+            return "unnamed";
+        }
+        int lastSlash = Math.max(path.lastIndexOf('/'), path.lastIndexOf('\\'));
+        if (lastSlash >= 0 && lastSlash < path.length() - 1) {
+            return path.substring(lastSlash + 1);
+        }
+        return path;
+    }
+}
--- a/src/main/java/at/procon/ted/startup/OrganizationSchemaFixRunner.java
+++ b/src/main/java/at/procon/ted/startup/OrganizationSchemaFixRunner.java
@ -0,0 +1,104 @@
+package at.procon.ted.startup;
+
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.boot.ApplicationArguments;
+import org.springframework.boot.ApplicationRunner;
+import org.springframework.core.annotation.Order;
+import org.springframework.jdbc.core.JdbcTemplate;
+import org.springframework.stereotype.Component;
+
+/**
+ * Startup runner that fixes the organization table schema if needed.
+ * This is a workaround for Flyway V2 migration not being applied automatically.
+ *
+ * Extends VARCHAR fields to handle long TED data.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Component
+@Order(1) // Run before other startup runners
+@RequiredArgsConstructor
+@Slf4j
+public class OrganizationSchemaFixRunner implements ApplicationRunner {
+
+    private final JdbcTemplate jdbcTemplate;
+
+    @Override
+    public void run(ApplicationArguments args) throws Exception {
+        log.info("Checking organization table schema...");
+
+        try {
+            // Check if schema fix is needed by trying to query column types
+            String checkSql = """
+                SELECT column_name, character_maximum_length, data_type
+                FROM information_schema.columns
+                WHERE table_schema = 'ted'
+                AND table_name = 'organization'
+                AND column_name IN ('postal_code', 'company_id', 'name')
+                ORDER BY column_name
+                """;
+
+            var columnInfo = jdbcTemplate.queryForList(checkSql);
+            boolean needsFix = false;
+
+            for (var row : columnInfo) {
+                String columnName = (String) row.get("column_name");
+                Integer maxLength = (Integer) row.get("character_maximum_length");
+                String dataType = (String) row.get("data_type");
+
+                log.debug("Column {}: type={}, max_length={}", columnName, dataType, maxLength);
+
+                // Check if any field is still too small
+                if ("postal_code".equals(columnName) && maxLength != null && maxLength < 255) {
+                    needsFix = true;
+                    log.warn("Column postal_code is too small: {} chars, needs 255", maxLength);
+                }
+                if ("company_id".equals(columnName) && maxLength != null && maxLength < 255) {
+                    needsFix = true;
+                    log.warn("Column company_id is too small: {} chars, needs 255", maxLength);
+                }
+            }
+
+            if (needsFix) {
+                log.warn("Organization table schema needs fixing - applying migration...");
+                applySchemaFix();
+                log.info("Organization table schema fixed successfully!");
+            } else {
+                log.info("Organization table schema is up to date");
+            }
+
+        } catch (Exception e) {
+            log.error("Failed to check/fix organization table schema: {}", e.getMessage(), e);
+            throw e;
+        }
+    }
+
+    private void applySchemaFix() {
+        log.info("Applying schema fix to ted.organization table...");
+
+        // Apply all column type changes
+        // Use TEXT for fields that can be extremely long
+        String[] alterStatements = {
+            "ALTER TABLE ted.organization ALTER COLUMN postal_code TYPE TEXT",
+            "ALTER TABLE ted.organization ALTER COLUMN street_name TYPE TEXT",
+            "ALTER TABLE ted.organization ALTER COLUMN city TYPE TEXT",  // Some cities have very long names
+            "ALTER TABLE ted.organization ALTER COLUMN phone TYPE VARCHAR(100)",
+            "ALTER TABLE ted.organization ALTER COLUMN org_reference TYPE VARCHAR(100)",
+            "ALTER TABLE ted.organization ALTER COLUMN role TYPE VARCHAR(100)",
+            "ALTER TABLE ted.organization ALTER COLUMN company_id TYPE TEXT",  // Can be very long
+            "ALTER TABLE ted.organization ALTER COLUMN name TYPE TEXT"
+        };
+
+        for (String sql : alterStatements) {
+            try {
+                jdbcTemplate.execute(sql);
+                log.debug("Executed: {}", sql);
+            } catch (Exception e) {
+                log.warn("Failed to execute {}: {} (may already be applied)", sql, e.getMessage());
+            }
+        }
+
+        log.info("Schema fix applied successfully");
+    }
+}
--- a/src/main/java/at/procon/ted/startup/VectorizationStartupRunner.java
+++ b/src/main/java/at/procon/ted/startup/VectorizationStartupRunner.java
@ -0,0 +1,112 @@
+package at.procon.ted.startup;
+
+import at.procon.ted.config.TedProcessorProperties;
+import at.procon.ted.model.entity.VectorizationStatus;
+import at.procon.ted.repository.ProcurementDocumentRepository;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.camel.ProducerTemplate;
+import org.springframework.boot.ApplicationArguments;
+import org.springframework.boot.ApplicationRunner;
+import org.springframework.data.domain.PageRequest;
+import org.springframework.data.domain.Pageable;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+import java.util.UUID;
+
+/**
+ * Startup runner that processes all pending and failed vectorizations on application start.
+ *
+ * This ensures that any documents that were saved but not yet vectorized
+ * or failed during vectorization (e.g., due to service restart or embedding service issues)
+ * are immediately queued for (re-)vectorization.
+ *
+ * Memory efficient: Only loads document IDs, not full entities.
+ * Processes in batches to avoid holding database connections for too long.
+ *
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Component
+@RequiredArgsConstructor
+@Slf4j
+public class VectorizationStartupRunner implements ApplicationRunner {
+
+    private final ProcurementDocumentRepository documentRepository;
+    private final ProducerTemplate producerTemplate;
+    private final TedProcessorProperties properties;
+
+    private static final int BATCH_SIZE = 1000;
+
+    @Override
+    public void run(ApplicationArguments args) throws Exception {
+        if (!properties.getVectorization().isEnabled()) {
+            log.info("Vectorization is disabled, skipping startup processing");
+            return;
+        }
+
+        log.info("Checking for pending and failed vectorizations on startup...");
+
+        try {
+            int successCount = 0;
+            int queueFailedCount = 0;
+
+            // Process PENDING documents first (higher priority)
+            successCount += processDocumentsByStatus(VectorizationStatus.PENDING, "PENDING");
+
+            // Then process FAILED documents (retry)
+            successCount += processDocumentsByStatus(VectorizationStatus.FAILED, "FAILED");
+
+            if (successCount == 0 && queueFailedCount == 0) {
+                log.info("No pending or failed vectorizations found");
+                return;
+            }
+
+            log.info("Startup vectorization processing completed: {} queued successfully, {} failed to queue",
+                    successCount, queueFailedCount);
+
+        } catch (Exception e) {
+            log.error("Error during startup vectorization processing: {}", e.getMessage(), e);
+        }
+    }
+
+    /**
+     * Process documents by status in batches to avoid connection leaks.
+     */
+    private int processDocumentsByStatus(VectorizationStatus status, String statusName) {
+        int successCount = 0;
+        int page = 0;
+        List<UUID> documentIds;
+
+        do {
+            // Load batch of document IDs (memory efficient - only IDs, not full entities)
+            Pageable pageable = PageRequest.of(page, BATCH_SIZE);
+            documentIds = documentRepository.findIdsByVectorizationStatus(status, pageable);
+
+            if (documentIds.isEmpty()) {
+                break;
+            }
+
+            if (page == 0) {
+                log.info("Found {} documents with status {}, processing in batches of {}...",
+                        documentIds.size(), statusName, BATCH_SIZE);
+            }
+
+            // Queue each document for vectorization
+            for (UUID documentId : documentIds) {
+                try {
+                    producerTemplate.sendBodyAndHeader("direct:vectorize", null, "documentId", documentId);
+                    successCount++;
+                } catch (Exception e) {
+                    log.warn("Failed to queue {} document {} for vectorization: {}",
+                            statusName, documentId, e.getMessage());
+                }
+            }
+
+            page++;
+
+        } while (documentIds.size() == BATCH_SIZE);
+
+        return successCount;
+    }
+}
--- a/src/main/java/at/procon/ted/util/HashUtils.java
+++ b/src/main/java/at/procon/ted/util/HashUtils.java
@ -0,0 +1,82 @@
+package at.procon.ted.util;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+/**
+ * Utility class for computing document hashes.
+ * Uses SHA-256 for generating unique document identifiers.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+@Slf4j
+public final class HashUtils {
+
+    private static final char[] HEX_ARRAY = "0123456789abcdef".toCharArray();
+
+    private HashUtils() {
+        // Utility class, no instantiation
+    }
+
+    /**
+     * Compute SHA-256 hash of the given content.
+     * 
+     * @param content The content to hash
+     * @return Lowercase hex-encoded hash string (64 characters)
+     */
+    public static String computeSha256(String content) {
+        try {
+            MessageDigest digest = MessageDigest.getInstance("SHA-256");
+            byte[] hashBytes = digest.digest(content.getBytes(StandardCharsets.UTF_8));
+            return bytesToHex(hashBytes);
+        } catch (NoSuchAlgorithmException e) {
+            // SHA-256 is always available in Java
+            throw new RuntimeException("SHA-256 algorithm not available", e);
+        }
+    }
+
+    /**
+     * Compute SHA-256 hash of the given byte array.
+     * 
+     * @param content The content to hash
+     * @return Lowercase hex-encoded hash string (64 characters)
+     */
+    public static String computeSha256(byte[] content) {
+        try {
+            MessageDigest digest = MessageDigest.getInstance("SHA-256");
+            byte[] hashBytes = digest.digest(content);
+            return bytesToHex(hashBytes);
+        } catch (NoSuchAlgorithmException e) {
+            throw new RuntimeException("SHA-256 algorithm not available", e);
+        }
+    }
+
+    /**
+     * Convert byte array to lowercase hexadecimal string.
+     */
+    private static String bytesToHex(byte[] bytes) {
+        char[] hexChars = new char[bytes.length * 2];
+        for (int i = 0; i < bytes.length; i++) {
+            int v = bytes[i] & 0xFF;
+            hexChars[i * 2] = HEX_ARRAY[v >>> 4];
+            hexChars[i * 2 + 1] = HEX_ARRAY[v & 0x0F];
+        }
+        return new String(hexChars);
+    }
+
+    /**
+     * Validate a hash string format.
+     * 
+     * @param hash The hash to validate
+     * @return true if valid SHA-256 hex string
+     */
+    public static boolean isValidSha256(String hash) {
+        if (hash == null || hash.length() != 64) {
+            return false;
+        }
+        return hash.matches("^[a-f0-9]{64}$");
+    }
+}
--- a/src/main/java/at/procon/ted/util/InspectDatabase.java
+++ b/src/main/java/at/procon/ted/util/InspectDatabase.java
@ -0,0 +1,86 @@
+package at.procon.ted.util;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.Statement;
+
+/**
+ * Utility to inspect database objects that depend on ENUM types.
+ */
+@Slf4j
+public class InspectDatabase {
+
+    private static final String DB_URL = "jdbc:postgresql://94.130.218.54:5432/Sales";
+    private static final String DB_USER = "postgres";
+    private static final String DB_PASSWORD = "PDmXRx0Rbk9OFOn9qO5Gm/mPCfqW8zwbZ+/YIU1lySc=";
+
+    public static void main(String[] args) {
+        try (Connection conn = DriverManager.getConnection(DB_URL, DB_USER, DB_PASSWORD)) {
+            log.info("Connected to database");
+
+            // Check for views
+            log.info("\n=== VIEWS ===");
+            try (Statement stmt = conn.createStatement();
+                 ResultSet rs = stmt.executeQuery(
+                     "SELECT schemaname, viewname FROM pg_views WHERE schemaname ILIKE 'ted'")) {
+                while (rs.next()) {
+                    log.info("View: {}.{}", rs.getString(1), rs.getString(2));
+                }
+            }
+
+            // Check for functions
+            log.info("\n=== FUNCTIONS ===");
+            try (Statement stmt = conn.createStatement();
+                 ResultSet rs = stmt.executeQuery(
+                     "SELECT n.nspname, p.proname FROM pg_proc p " +
+                     "JOIN pg_namespace n ON p.pronamespace = n.oid " +
+                     "WHERE n.nspname ILIKE 'ted'")) {
+                while (rs.next()) {
+                    log.info("Function: {}.{}", rs.getString(1), rs.getString(2));
+                }
+            }
+
+            // Check for indexes
+            log.info("\n=== INDEXES ===");
+            try (Statement stmt = conn.createStatement();
+                 ResultSet rs = stmt.executeQuery(
+                     "SELECT schemaname, tablename, indexname FROM pg_indexes WHERE schemaname ILIKE 'ted'")) {
+                while (rs.next()) {
+                    log.info("Index: {}.{} on table {}", rs.getString(1), rs.getString(3), rs.getString(2));
+                }
+            }
+
+            // Check column types
+            log.info("\n=== ENUM COLUMNS ===");
+            try (Statement stmt = conn.createStatement();
+                 ResultSet rs = stmt.executeQuery(
+                     "SELECT table_schema, table_name, column_name, udt_name " +
+                     "FROM information_schema.columns " +
+                     "WHERE table_schema ILIKE 'ted' AND table_name = 'procurement_document' " +
+                     "AND column_name IN ('notice_type', 'contract_nature', 'procedure_type', 'vectorization_status')")) {
+                while (rs.next()) {
+                    log.info("Column: {}.{}.{} -> Type: {}",
+                        rs.getString(1), rs.getString(2), rs.getString(3), rs.getString(4));
+                }
+            }
+
+            // Check for ENUM types
+            log.info("\n=== ENUM TYPES ===");
+            try (Statement stmt = conn.createStatement();
+                 ResultSet rs = stmt.executeQuery(
+                     "SELECT n.nspname, t.typname FROM pg_type t " +
+                     "JOIN pg_namespace n ON t.typnamespace = n.oid " +
+                     "WHERE n.nspname ILIKE 'ted' AND t.typtype = 'e'")) {
+                while (rs.next()) {
+                    log.info("ENUM Type: {}.{}", rs.getString(1), rs.getString(2));
+                }
+            }
+
+        } catch (Exception e) {
+            log.error("Error: {}", e.getMessage(), e);
+        }
+    }
+}
--- a/src/main/resources/application
+++ b/src/main/resources/application
@ -0,0 +1,234 @@
+# TED Procurement Document Processor Configuration
+# Author: Martin.Schweitzer@procon.co.at and claude.ai
+
+server:
+  port: 8888
+  servlet:
+    context-path: /api
+
+spring:
+  application:
+    name: ted-procurement-processor
+
+  datasource:
+    url: jdbc:postgresql://localhost:32333/RELM
+    username: ${DB_USERNAME:postgres}
+    password: ${DB_PASSWORD:pwd}
+    driver-class-name: org.postgresql.Driver
+    hikari:
+      maximum-pool-size: 5
+      minimum-idle: 2
+      connection-timeout: 30000
+      idle-timeout: 300000
+      max-lifetime: 900000
+      leak-detection-threshold: 120000  # 2 minutes - increased to avoid false positives with batch processing
+
+  jpa:
+    hibernate:
+      ddl-auto: none
+    show-sql: false
+    open-in-view: false
+    properties:
+      hibernate:
+        format_sql: true
+        default_schema: TED
+        jdbc:
+          batch_size: 25  # Match chunk size for optimal batch processing
+        order_inserts: true
+        order_updates: true
+
+  flyway:
+    enabled: true
+    locations: classpath:db/migration
+    baseline-on-migrate: true
+    create-schemas: true
+    schemas: TED
+    default-schema: TED
+
+# Apache Camel Configuration
+camel:
+  springboot:
+    main-run-controller: true
+  health:
+    enabled: true
+    # Weniger strenge Health-Checks für File-Consumer
+    consumers-enabled: false
+    
+# Custom Application Properties
+ted:
+  # Directory configuration for file processing
+  input:
+    # Base directory for watching incoming TED XML files
+    directory: ${TED_INPUT_DIR:D:/ted.europe/extracted}
+    # File pattern to match (recursive scanning)
+    pattern: "**/*.xml"
+    # Move processed files to this directory
+    processed-directory: ${TED_PROCESSED_DIR:.processed}
+    # Move failed files to this directory  
+    error-directory: ${TED_ERROR_DIR:.error}
+    # Polling interval in milliseconds
+    poll-interval: 5000
+    # Maximum messages per poll (reduced to prevent memory issues)
+    max-messages-per-poll: 10
+    
+  # Schema validation configuration
+  schema:
+    # Enable/disable XSD validation
+    enabled: true
+    # Path to eForms SDK schemas (from Maven dependency or custom location)
+    path: classpath:schemas/maindoc/UBL-ContractNotice-2.3.xsd
+    
+  # Vectorization configuration
+  vectorization:
+    # Enable/disable async vectorization
+    enabled: true
+    # Use external HTTP API instead of subprocess
+    use-http-api: true
+    # Embedding service URL
+    api-url: http://localhost:8001
+    # Model name for sentence-transformers
+    model-name: intfloat/multilingual-e5-large
+    # Vector dimensions (must match model output)
+    dimensions: 1024
+    # Batch size for vectorization
+    batch-size: 16
+    # Thread pool size for async processing
+    thread-pool-size: 4
+    # Maximum text length for vectorization (characters)
+    max-text-length: 8192
+    # HTTP connection timeout (milliseconds)
+    connect-timeout: 10000
+    # HTTP socket/read timeout (milliseconds)
+    socket-timeout: 60000
+    # Maximum retries on connection failure
+    max-retries: 5
+
+  # Search configuration
+  search:
+    # Default page size for search results
+    default-page-size: 20
+    # Maximum page size
+    max-page-size: 100
+    # Similarity threshold for vector search (0.0 - 1.0)
+    similarity-threshold: 0.7
+
+  # TED Daily Package Download configuration
+  download:
+    # Enable/disable automatic package download
+    enabled: true
+    # Base URL for TED Daily Packages
+    base-url: https://ted.europa.eu/packages/daily/
+    # Download directory for tar.gz files
+    download-directory: D:/ted.europe/downloads
+    # Extract directory for XML files
+    extract-directory: D:/ted.europe/extracted
+    # Start year for downloads
+    start-year: 2015
+    # Max consecutive 404 errors before stopping
+    max-consecutive-404: 4
+    # Polling interval (milliseconds) - 2 minutes
+    poll-interval: 120000
+    # Download timeout (milliseconds) - 5 minutes
+    download-timeout: 300000
+    # Max concurrent downloads
+    max-concurrent-downloads: 2
+    # Delay between downloads (milliseconds) for rate limiting - 5 seconds
+    delay-between-downloads: 3000
+    # Delete tar.gz after extraction
+    delete-after-extraction: true
+    # Prioritize current year first
+    prioritize-current-year: false
+
+  # IMAP Mail configuration
+  mail:
+    # Enable/disable mail processing
+    enabled: true
+    # IMAP server hostname
+    host: host
+    # IMAP server port (993 for IMAPS)
+    port: 993
+    # Mail account username (email address)
+    username: ${MAIL_USERNAME:}
+    # Mail account password
+    password: ${MAIL_PASSWORD:}
+    # Use SSL/TLS connection
+    ssl: true
+    # Mail folder to read from
+    folder-name: INBOX
+    # Delete messages after processing
+    delete: false
+    # Mark messages as seen after processing (false = peek mode, don't mark as read)
+    seen: false
+    # Only process unseen messages
+    unseen: true
+    # Polling delay in milliseconds (1 minute)
+    delay: 60000
+    # Max messages per poll
+    max-messages-per-poll: 10
+    # Output directory for processed attachments
+    attachment-output-directory: D:/ted.europe/mail-attachments
+    # Enable/disable MIME file input processing
+    mime-input-enabled: true
+    # Input directory for MIME files (.eml)
+    mime-input-directory: D:/ted.europe/mime-input
+    # File pattern for MIME files (regex)
+    mime-input-pattern: .*\\.eml
+    # Polling interval for MIME input directory (milliseconds)
+    mime-input-poll-interval: 10000
+
+  # Solution Brief processing configuration
+  solution-brief:
+    # Enable/disable Solution Brief processing
+    enabled: true
+    # Input directory for Solution Brief PDF files
+    input-directory: C:/work/SolutionBrief
+    # Output directory for Excel result files (relative to input or absolute)
+    result-directory: ./result
+    # Number of top similar documents to include
+    top-k: 20
+    # Minimum similarity threshold (0.0-1.0)
+    similarity-threshold: 0.5
+    # Polling interval in milliseconds (30 seconds)
+    poll-interval: 30000
+    # File pattern for PDF files (regex)
+    file-pattern: .*\\.pdf
+    # Process files only once (idempotent)
+    idempotent: true
+    # Idempotent repository file path
+    idempotent-repository: ./solution-brief-processed.dat
+
+  # Data cleanup configuration
+  cleanup:
+    # Enable automatic cleanup of old documents
+    enabled: false
+    # Retention period in years (default: 10)
+    retention-years: 10
+    # Cron expression for cleanup schedule (default: daily at 2 AM)
+    cron: "0 0 2 * * *"
+
+# Actuator endpoints
+management:
+  endpoints:
+    web:
+      exposure:
+        include: health,info,metrics,camel
+  endpoint:
+    health:
+      show-details: when-authorized
+
+# OpenAPI documentation
+springdoc:
+  api-docs:
+    path: /v3/api-docs
+  swagger-ui:
+    path: /swagger-ui.html
+    operations-sorter: method
+
+# Logging configuration
+logging:
+  level:
+    at.procon.ted: INFO
+    at.procon.ted.camel.SolutionBriefRoute: INFO
+    org.apache.camel: INFO
+    org.hibernate.SQL: WARN
+    org.hibernate.type.descriptor.sql: WARN
--- a/src/main/resources/application.yml
+++ b/src/main/resources/application.yml
@ -0,0 +1,240 @@
+# TED Procurement Document Processor Configuration
+# Author: Martin.Schweitzer@procon.co.at and claude.ai
+
+server:
+  port: 8888
+  servlet:
+    context-path: /api
+
+spring:
+  application:
+    name: ted-procurement-processor
+
+  datasource:
+    url: jdbc:postgresql://94.130.218.54:32333/RELM
+    username: ${DB_USERNAME:postgres}
+    password: ${DB_PASSWORD:PDmXRx0Rbk9OFOn9qO5Gm/mPCfqW8zwbZ+/YIU1lySc=}
+    driver-class-name: org.postgresql.Driver
+    hikari:
+      maximum-pool-size: 5
+      minimum-idle: 2
+      connection-timeout: 30000
+      idle-timeout: 300000
+      max-lifetime: 900000
+      leak-detection-threshold: 120000  # 2 minutes - increased to avoid false positives with batch processing
+
+  jpa:
+    hibernate:
+      ddl-auto: none
+    show-sql: false
+    open-in-view: false
+    properties:
+      hibernate:
+        format_sql: true
+        default_schema: TED
+        jdbc:
+          batch_size: 25  # Match chunk size for optimal batch processing
+        order_inserts: true
+        order_updates: true
+
+  flyway:
+    enabled: true
+    locations: classpath:db/migration
+    baseline-on-migrate: true
+    create-schemas: true
+    schemas: TED
+    default-schema: TED
+
+# Apache Camel Configuration
+camel:
+  springboot:
+    main-run-controller: true
+  health:
+    enabled: true
+    # Weniger strenge Health-Checks für File-Consumer
+    consumers-enabled: false
+    
+# Custom Application Properties
+ted:
+  # Directory configuration for file processing
+  input:
+    # Base directory for watching incoming TED XML files
+    directory: ${TED_INPUT_DIR:/ted.europe/extracted}
+    # File pattern to match (recursive scanning)
+    pattern: "**/*.xml"
+    # Move processed files to this directory
+    processed-directory: ${TED_PROCESSED_DIR:.processed}
+    # Move failed files to this directory  
+    error-directory: ${TED_ERROR_DIR:.error}
+    # Polling interval in milliseconds
+    poll-interval: 5000
+    # Maximum messages per poll (reduced to prevent memory issues)
+    max-messages-per-poll: 10
+    
+  # Schema validation configuration
+  schema:
+    # Enable/disable XSD validation
+    enabled: true
+    # Path to eForms SDK schemas (from Maven dependency or custom location)
+    path: classpath:schemas/maindoc/UBL-ContractNotice-2.3.xsd
+    
+  # Vectorization configuration
+  vectorization:
+    # Enable/disable async vectorization
+    enabled: false
+    # Use external HTTP API instead of subprocess
+    use-http-api: true
+    # Embedding service URL
+    api-url: http://172.20.240.18:8001
+    # Model name for sentence-transformers
+    model-name: intfloat/multilingual-e5-large
+    # Vector dimensions (must match model output)
+    dimensions: 1024
+    # Batch size for vectorization
+    batch-size: 16
+    # Thread pool size for async processing
+    thread-pool-size: 4
+    # Maximum text length for vectorization (characters)
+    max-text-length: 8192
+    # HTTP connection timeout (milliseconds)
+    connect-timeout: 10000
+    # HTTP socket/read timeout (milliseconds)
+    socket-timeout: 60000
+    # Maximum retries on connection failure
+    max-retries: 5
+
+  # Search configuration
+  search:
+    # Default page size for search results
+    default-page-size: 20
+    # Maximum page size
+    max-page-size: 100
+    # Similarity threshold for vector search (0.0 - 1.0)
+    similarity-threshold: 0.7
+
+  # TED Daily Package Download configuration
+  download:
+    # Enable/disable automatic package download
+    enabled: true
+    # Base URL for TED Daily Packages
+    base-url: https://ted.europa.eu/packages/daily/
+    # Download directory for tar.gz files
+    download-directory: /ted.europe/downloads
+    # Extract directory for XML files
+    extract-directory: /ted.europe/extracted
+    # Start year for downloads
+    start-year: 2023
+    # Max consecutive 404 errors before stopping
+    max-consecutive-404: 4
+    # Polling interval (milliseconds) - 2 minutes
+    poll-interval: 120000
+    # Retry interval for tail NOT_FOUND packages - 6 hours
+    not-found-retry-interval: 21600000
+    # Grace period after year end before a previous-year tail 404 is treated as final
+    previous-year-grace-period-days: 30
+    # Keep retrying current-year tail 404 packages indefinitely
+    retry-current-year-not-found-indefinitely: true
+    # Download timeout (milliseconds) - 5 minutes
+    download-timeout: 300000
+    # Max concurrent downloads
+    max-concurrent-downloads: 2
+    # Delay between downloads (milliseconds) for rate limiting - 5 seconds
+    delay-between-downloads: 3000
+    # Delete tar.gz after extraction
+    delete-after-extraction: true
+    # Prioritize current year first
+    prioritize-current-year: false
+
+  # IMAP Mail configuration
+  mail:
+    # Enable/disable mail processing
+    enabled: true
+    # IMAP server hostname
+    host: mail.mymagenta.business
+    # IMAP server port (993 for IMAPS)
+    port: 993
+    # Mail account username (email address)
+    username: archiv@procon.co.at
+    # Mail account password
+    password: ${MAIL_PASSWORD:worasigg}
+    # Use SSL/TLS connection
+    ssl: true
+    # Mail folder to read from
+    folder-name: INBOX
+    # Delete messages after processing
+    delete: false
+    # Mark messages as seen after processing (false = peek mode, don't mark as read)
+    seen: false
+    # Only process unseen messages
+    unseen: true
+    # Polling delay in milliseconds (1 minute)
+    delay: 60000
+    # Max messages per poll
+    max-messages-per-poll: 10
+    # Output directory for processed attachments
+    attachment-output-directory: D:/ted.europe/mail-attachments
+    # Enable/disable MIME file input processing
+    mime-input-enabled: true
+    # Input directory for MIME files (.eml)
+    mime-input-directory: D:/ted.europe/mime-input
+    # File pattern for MIME files (regex)
+    mime-input-pattern: .*\\.eml
+    # Polling interval for MIME input directory (milliseconds)
+    mime-input-poll-interval: 10000
+
+  # Solution Brief processing configuration
+  solution-brief:
+    # Enable/disable Solution Brief processing
+    enabled: true
+    # Input directory for Solution Brief PDF files
+    input-directory: C:/work/SolutionBrief
+    # Output directory for Excel result files (relative to input or absolute)
+    result-directory: ./result
+    # Number of top similar documents to include
+    top-k: 20
+    # Minimum similarity threshold (0.0-1.0)
+    similarity-threshold: 0.5
+    # Polling interval in milliseconds (30 seconds)
+    poll-interval: 30000
+    # File pattern for PDF files (regex)
+    file-pattern: .*\\.pdf
+    # Process files only once (idempotent)
+    idempotent: true
+    # Idempotent repository file path
+    idempotent-repository: ./solution-brief-processed.dat
+
+  # Data cleanup configuration
+  cleanup:
+    # Enable automatic cleanup of old documents
+    enabled: false
+    # Retention period in years (default: 10)
+    retention-years: 10
+    # Cron expression for cleanup schedule (default: daily at 2 AM)
+    cron: "0 0 2 * * *"
+
+# Actuator endpoints
+management:
+  endpoints:
+    web:
+      exposure:
+        include: health,info,metrics,camel
+  endpoint:
+    health:
+      show-details: when-authorized
+
+# OpenAPI documentation
+springdoc:
+  api-docs:
+    path: /v3/api-docs
+  swagger-ui:
+    path: /swagger-ui.html
+    operations-sorter: method
+
+# Logging configuration
+logging:
+  level:
+    at.procon.ted: INFO
+    at.procon.ted.camel.SolutionBriefRoute: INFO
+    org.apache.camel: INFO
+    org.hibernate.SQL: WARN
+    org.hibernate.type.descriptor.sql: WARN
--- a/src/main/resources/banner.txt
+++ b/src/main/resources/banner.txt
@ -0,0 +1,12 @@
+
+  ______      _ _ _  __                    _          _
+ |  ____|    | | | |/ /                   | |        | |
+ | |__ _   _ | | | ' / _ __   _____      _| | ___  __| | __ _  ___
+ |  __| | | || | |  < | '_ \ / _ \ \ /\ / / |/ _ \/ _` |/ _` |/ _ \
+ | |  | |_| || | | . \| | | | (_) \ V  V /| |  __/ (_| | (_| |  __/
+ |_|   \__,_||_|_|_|\_\_| |_|\___/ \_/\_/ |_|\___|\__,_|\__, |\___|
+                                                         __/ |
+                                                        |___/
+  TED Procurement Processor :: ${spring-boot.version}
+  (c) procon.co.at
+
--- a/src/main/resources/db/migration/V1__initial_schema.sql
+++ b/src/main/resources/db/migration/V1__initial_schema.sql
@ -0,0 +1,428 @@
+-- TED Procurement Document Database Schema
+-- Author: Martin.Schweitzer@procon.co.at and claude.ai
+-- Description: PostgreSQL schema for storing EU eForms procurement notices with vector search support
+
+-- Create TED schema if it doesn't exist
+CREATE SCHEMA IF NOT EXISTS TED;
+
+-- Set search path to use TED schema
+SET search_path TO TED;
+
+-- Enable required PostgreSQL extensions (wenn Berechtigung vorhanden)
+-- Falls Extensions nicht erstellt werden können, müssen diese vom DBA manuell erstellt werden
+DO $$
+BEGIN
+    CREATE EXTENSION IF NOT EXISTS pgcrypto SCHEMA public;
+EXCEPTION
+    WHEN insufficient_privilege THEN
+        RAISE NOTICE 'Skipping pgcrypto extension creation (insufficient privileges)';
+    WHEN duplicate_object THEN
+        RAISE NOTICE 'Extension pgcrypto already exists';
+END
+$$;
+
+DO $$
+BEGIN
+    CREATE EXTENSION IF NOT EXISTS vector SCHEMA public;
+EXCEPTION
+    WHEN insufficient_privilege THEN
+        RAISE NOTICE 'Skipping vector extension creation (insufficient privileges)';
+    WHEN duplicate_object THEN
+        RAISE NOTICE 'Extension vector already exists';
+    WHEN undefined_file THEN
+        RAISE WARNING 'Extension vector not available - install pgvector on the database server';
+END
+$$;
+
+DO $$
+BEGIN
+    CREATE EXTENSION IF NOT EXISTS pg_trgm SCHEMA public;
+EXCEPTION
+    WHEN insufficient_privilege THEN
+        RAISE NOTICE 'Skipping pg_trgm extension creation (insufficient privileges)';
+    WHEN duplicate_object THEN
+        RAISE NOTICE 'Extension pg_trgm already exists';
+END
+$$;
+
+-- Enum types for notice classifications
+CREATE TYPE notice_type AS ENUM (
+    'CONTRACT_NOTICE',           -- cn-standard, cn-social, etc.
+    'PRIOR_INFORMATION_NOTICE',  -- pin-*
+    'CONTRACT_AWARD_NOTICE',     -- can-*
+    'MODIFICATION_NOTICE',       -- mod-*
+    'OTHER'
+);
+
+CREATE TYPE contract_nature AS ENUM (
+    'SUPPLIES',
+    'SERVICES', 
+    'WORKS',
+    'MIXED',
+    'UNKNOWN'
+);
+
+CREATE TYPE procedure_type AS ENUM (
+    'OPEN',
+    'RESTRICTED',
+    'COMPETITIVE_DIALOGUE',
+    'INNOVATION_PARTNERSHIP',
+    'NEGOTIATED_WITHOUT_PUBLICATION',
+    'NEGOTIATED_WITH_PUBLICATION',
+    'OTHER'
+);
+
+CREATE TYPE vectorization_status AS ENUM (
+    'PENDING',
+    'PROCESSING',
+    'COMPLETED',
+    'FAILED',
+    'SKIPPED'
+);
+
+-- Main procurement document table
+CREATE TABLE procurement_document (
+    -- Primary key using UUID
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    
+    -- Document hash for idempotent processing (SHA-256 of XML content)
+    document_hash VARCHAR(64) NOT NULL UNIQUE,
+    
+    -- TED/eForms identifiers
+    notice_id VARCHAR(100),                    -- e.g., "9f87fd31-2c94-45cb-92ca-a3f876252149"
+    publication_id VARCHAR(50),                -- e.g., "00786665-2025"
+    ojs_id VARCHAR(20),                        -- e.g., "229/2025" (Official Journal Supplement)
+    contract_folder_id VARCHAR(100),           -- Contract folder grouping
+    
+    -- Document metadata
+    notice_type notice_type NOT NULL DEFAULT 'OTHER',
+    notice_subtype_code VARCHAR(10),           -- e.g., "16" for Contract Notice Standard
+    sdk_version VARCHAR(20),                   -- e.g., "eforms-sdk-1.13"
+    ubl_version VARCHAR(10),                   -- e.g., "2.3"
+    language_code VARCHAR(10),                 -- Primary language (e.g., "POL")
+    
+    -- Timestamps from document
+    issue_date DATE,
+    issue_time TIME,
+    publication_date DATE,
+    submission_deadline TIMESTAMP WITH TIME ZONE,
+    
+    -- Contracting authority information
+    buyer_name TEXT,
+    buyer_country_code VARCHAR(10),            -- ISO 3166-1 alpha-3 (e.g., "POL")
+    buyer_city VARCHAR(255),
+    buyer_postal_code VARCHAR(20),
+    buyer_nuts_code VARCHAR(10),               -- NUTS region code (e.g., "PL415")
+    buyer_activity_type VARCHAR(50),           -- e.g., "health", "defence"
+    buyer_legal_type VARCHAR(50),              -- e.g., "body-pl"
+    
+    -- Procurement project information
+    project_title TEXT,
+    project_description TEXT,
+    internal_reference VARCHAR(100),           -- Buyer's internal reference
+    contract_nature contract_nature NOT NULL DEFAULT 'UNKNOWN',
+    procedure_type procedure_type DEFAULT 'OTHER',
+    
+    -- Classification
+    cpv_codes VARCHAR(100)[],                  -- Common Procurement Vocabulary codes
+    nuts_codes VARCHAR(20)[],                  -- All NUTS codes for delivery locations
+    
+    -- Financial information (if available)
+    estimated_value DECIMAL(20, 2),
+    estimated_value_currency VARCHAR(3),
+    
+    -- Lot information
+    total_lots INTEGER DEFAULT 0,
+    max_lots_awarded INTEGER,
+    max_lots_submitted INTEGER,
+    
+    -- Legal basis
+    regulatory_domain VARCHAR(50),             -- e.g., "32014L0024" (EU directive reference)
+    eu_funded BOOLEAN DEFAULT FALSE,
+    
+    -- Textual representation for vectorization
+    -- Contains extracted and normalized text content for semantic search
+    text_content TEXT,
+    
+    -- Original XML document stored in native PostgreSQL XML type
+    xml_document XML NOT NULL,
+    
+    -- Vector embedding for semantic search (1024 dimensions for multilingual-e5-large)
+    content_vector vector(1024),
+    
+    -- Vectorization tracking
+    vectorization_status vectorization_status DEFAULT 'PENDING',
+    vectorization_error TEXT,
+    vectorized_at TIMESTAMP WITH TIME ZONE,
+    
+    -- Processing metadata
+    source_filename VARCHAR(500),
+    source_path TEXT,
+    file_size_bytes BIGINT,
+    
+    -- Audit fields
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+    processing_duration_ms INTEGER
+);
+
+-- Table for storing lot details (denormalized for search performance)
+CREATE TABLE procurement_lot (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    document_id UUID NOT NULL REFERENCES procurement_document(id) ON DELETE CASCADE,
+    
+    lot_id VARCHAR(50) NOT NULL,               -- e.g., "LOT-0001"
+    internal_id VARCHAR(100),                  -- Buyer's internal lot reference
+    
+    title TEXT,
+    description TEXT,
+    
+    cpv_codes VARCHAR(100)[],
+    nuts_codes VARCHAR(20)[],
+    
+    estimated_value DECIMAL(20, 2),
+    estimated_value_currency VARCHAR(3),
+    
+    duration_value INTEGER,
+    duration_unit VARCHAR(20),                 -- e.g., "MONTH", "DAY"
+    
+    submission_deadline TIMESTAMP WITH TIME ZONE,
+    
+    eu_funded BOOLEAN DEFAULT FALSE,
+    
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+    
+    UNIQUE(document_id, lot_id)
+);
+
+-- Table for organizations mentioned in notices
+CREATE TABLE organization (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    document_id UUID NOT NULL REFERENCES procurement_document(id) ON DELETE CASCADE,
+
+    org_reference VARCHAR(100),                -- Internal reference (e.g., "ORG-0001")
+    role VARCHAR(100),                         -- e.g., "buyer", "review-body", "ted-esen"
+
+    name TEXT,                                 -- Full organization name
+    company_id TEXT,                           -- Tax/registration ID (can be very long)
+
+    country_code VARCHAR(10),
+    city TEXT,                                 -- City name (can be extremely long)
+    postal_code TEXT,                          -- Address/postal code (can contain full addresses)
+    street_name TEXT,                          -- Street address
+    nuts_code VARCHAR(10),
+
+    website_uri TEXT,
+    email VARCHAR(255),
+    phone VARCHAR(100),                        -- International phone numbers
+
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
+
+    UNIQUE(document_id, org_reference)
+);
+
+-- Processing log for tracking and debugging
+CREATE TABLE processing_log (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+    document_id UUID REFERENCES procurement_document(id) ON DELETE SET NULL,
+    document_hash VARCHAR(64),
+
+    event_type VARCHAR(50) NOT NULL,           -- e.g., "RECEIVED", "VALIDATED", "PARSED", "STORED", "VECTORIZED", "ERROR"
+    event_status VARCHAR(20) NOT NULL,         -- e.g., "SUCCESS", "FAILURE", "SKIPPED"
+
+    message TEXT,
+    error_details TEXT,
+
+    source_filename VARCHAR(500),
+
+    duration_ms INTEGER,
+
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+);
+
+-- TED Daily Package Tracking Table
+CREATE TABLE ted_daily_package (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+
+    -- Package identifier (YYYYSSSSS format, e.g., 202400001)
+    package_identifier VARCHAR(20) NOT NULL UNIQUE,
+
+    -- Year and serial number
+    year INTEGER NOT NULL,
+    serial_number INTEGER NOT NULL,
+
+    -- Download information
+    download_url VARCHAR(500) NOT NULL,
+    file_hash VARCHAR(64),                     -- SHA-256 hash for idempotency
+
+    -- Processing statistics
+    xml_file_count INTEGER,
+    processed_count INTEGER DEFAULT 0,
+    failed_count INTEGER DEFAULT 0,
+
+    -- Status
+    download_status VARCHAR(30) NOT NULL DEFAULT 'PENDING',  -- PENDING, DOWNLOADING, DOWNLOADED, PROCESSING, COMPLETED, FAILED, NOT_FOUND
+    error_message TEXT,
+
+    -- Timestamps
+    downloaded_at TIMESTAMP WITH TIME ZONE,
+    processed_at TIMESTAMP WITH TIME ZONE,
+
+    -- Performance metrics
+    download_duration_ms BIGINT,
+    processing_duration_ms BIGINT,
+
+    -- Audit fields
+    created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+    -- Unique constraint on year + serial
+    UNIQUE(year, serial_number)
+);
+
+-- Indexes for efficient querying
+
+-- Hash lookup for idempotent processing (most critical)
+CREATE INDEX idx_doc_hash ON procurement_document(document_hash);
+
+-- Publication/notice ID lookups
+CREATE INDEX idx_doc_publication_id ON procurement_document(publication_id);
+CREATE INDEX idx_doc_notice_id ON procurement_document(notice_id);
+
+-- Date range queries
+CREATE INDEX idx_doc_publication_date ON procurement_document(publication_date);
+CREATE INDEX idx_doc_issue_date ON procurement_document(issue_date);
+CREATE INDEX idx_doc_submission_deadline ON procurement_document(submission_deadline);
+
+-- Geographic searches
+CREATE INDEX idx_doc_buyer_country ON procurement_document(buyer_country_code);
+CREATE INDEX idx_doc_buyer_nuts ON procurement_document(buyer_nuts_code);
+CREATE INDEX idx_doc_nuts_codes ON procurement_document USING GIN(nuts_codes);
+
+-- Classification searches
+CREATE INDEX idx_doc_notice_type ON procurement_document(notice_type);
+CREATE INDEX idx_doc_contract_nature ON procurement_document(contract_nature);
+CREATE INDEX idx_doc_procedure_type ON procurement_document(procedure_type);
+CREATE INDEX idx_doc_cpv_codes ON procurement_document USING GIN(cpv_codes);
+
+-- Full-text search on textual content
+CREATE INDEX idx_doc_text_content_trgm ON procurement_document USING GIN(text_content gin_trgm_ops);
+
+-- Vector similarity search using IVFFlat index (efficient for approximate nearest neighbor)
+-- Lists parameter: sqrt(number_of_vectors) for optimal performance
+CREATE INDEX idx_doc_vector ON procurement_document USING ivfflat (content_vector vector_cosine_ops) WITH (lists = 100);
+
+-- Vectorization status for async processing
+CREATE INDEX idx_doc_vectorization_status ON procurement_document(vectorization_status) WHERE vectorization_status IN ('PENDING', 'PROCESSING');
+
+-- Lot indexes
+CREATE INDEX idx_lot_document ON procurement_lot(document_id);
+CREATE INDEX idx_lot_cpv_codes ON procurement_lot USING GIN(cpv_codes);
+
+-- Organization indexes
+CREATE INDEX idx_org_document ON organization(document_id);
+CREATE INDEX idx_org_country ON organization(country_code);
+
+-- Processing log indexes
+CREATE INDEX idx_log_document ON processing_log(document_id);
+CREATE INDEX idx_log_created ON processing_log(created_at);
+CREATE INDEX idx_log_event_type ON processing_log(event_type);
+
+-- TED daily package indexes
+CREATE INDEX idx_package_identifier ON ted_daily_package(package_identifier);
+CREATE INDEX idx_package_year_serial ON ted_daily_package(year, serial_number);
+CREATE INDEX idx_package_status ON ted_daily_package(download_status);
+CREATE INDEX idx_package_downloaded_at ON ted_daily_package(downloaded_at);
+
+-- Trigger to update updated_at timestamp
+CREATE OR REPLACE FUNCTION update_updated_at_column()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.updated_at = CURRENT_TIMESTAMP;
+    RETURN NEW;
+END;
+$$ language 'plpgsql';
+
+CREATE TRIGGER update_procurement_document_updated_at
+    BEFORE UPDATE ON procurement_document
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+CREATE TRIGGER update_ted_daily_package_updated_at
+    BEFORE UPDATE ON ted_daily_package
+    FOR EACH ROW
+    EXECUTE FUNCTION update_updated_at_column();
+
+-- Helper function for semantic search with filtering
+CREATE OR REPLACE FUNCTION search_documents_semantic(
+    query_vector vector(1024),
+    similarity_threshold FLOAT DEFAULT 0.7,
+    country_filter VARCHAR(10) DEFAULT NULL,
+    notice_type_filter notice_type DEFAULT NULL,
+    contract_nature_filter contract_nature DEFAULT NULL,
+    cpv_prefix VARCHAR(20) DEFAULT NULL,
+    date_from DATE DEFAULT NULL,
+    date_to DATE DEFAULT NULL,
+    result_limit INTEGER DEFAULT 20
+)
+RETURNS TABLE (
+    id UUID,
+    publication_id VARCHAR(50),
+    project_title TEXT,
+    buyer_name TEXT,
+    buyer_country_code VARCHAR(10),
+    publication_date DATE,
+    similarity FLOAT
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT 
+        pd.id,
+        pd.publication_id,
+        pd.project_title,
+        pd.buyer_name,
+        pd.buyer_country_code,
+        pd.publication_date,
+        1 - (pd.content_vector <=> query_vector) AS similarity
+    FROM procurement_document pd
+    WHERE 
+        pd.content_vector IS NOT NULL
+        AND (1 - (pd.content_vector <=> query_vector)) >= similarity_threshold
+        AND (country_filter IS NULL OR pd.buyer_country_code = country_filter)
+        AND (notice_type_filter IS NULL OR pd.notice_type = notice_type_filter)
+        AND (contract_nature_filter IS NULL OR pd.contract_nature = contract_nature_filter)
+        AND (cpv_prefix IS NULL OR EXISTS (
+            SELECT 1 FROM unnest(pd.cpv_codes) code WHERE code LIKE cpv_prefix || '%'
+        ))
+        AND (date_from IS NULL OR pd.publication_date >= date_from)
+        AND (date_to IS NULL OR pd.publication_date <= date_to)
+    ORDER BY similarity DESC
+    LIMIT result_limit;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Statistics view for monitoring
+CREATE VIEW document_statistics AS
+SELECT 
+    COUNT(*) AS total_documents,
+    COUNT(*) FILTER (WHERE vectorization_status = 'COMPLETED') AS vectorized_documents,
+    COUNT(*) FILTER (WHERE vectorization_status = 'PENDING') AS pending_vectorization,
+    COUNT(*) FILTER (WHERE vectorization_status = 'FAILED') AS failed_vectorization,
+    COUNT(DISTINCT buyer_country_code) AS unique_countries,
+    COUNT(DISTINCT notice_type) AS notice_types,
+    MIN(publication_date) AS earliest_publication,
+    MAX(publication_date) AS latest_publication,
+    AVG(total_lots) AS avg_lots_per_notice,
+    SUM(total_lots) AS total_lots
+FROM procurement_document;
+
+COMMENT ON TABLE procurement_document IS 'Main table storing EU eForms procurement notices from TED';
+COMMENT ON COLUMN procurement_document.document_hash IS 'SHA-256 hash of XML content for idempotent processing';
+COMMENT ON COLUMN procurement_document.content_vector IS '1024-dimensional vector from multilingual-e5-large model';
+COMMENT ON COLUMN procurement_document.text_content IS 'Normalized text for vectorization and full-text search';
+
+COMMENT ON TABLE ted_daily_package IS 'Tracking table for TED daily package downloads';
+COMMENT ON COLUMN ted_daily_package.package_identifier IS 'Unique package identifier in YYYYSSSSS format';
+COMMENT ON COLUMN ted_daily_package.file_hash IS 'SHA-256 hash for idempotency checking';
+COMMENT ON COLUMN ted_daily_package.download_status IS 'Current status: PENDING, DOWNLOADING, DOWNLOADED, PROCESSING, COMPLETED, FAILED, NOT_FOUND';
+COMMENT ON COLUMN organization.postal_code IS 'Postal code or ZIP code. Extended to 255 chars to handle multi-line addresses from TED data.';
--- a/src/main/resources/db/migration/V2__extend_organization_varchar_fields.sql
+++ b/src/main/resources/db/migration/V2__extend_organization_varchar_fields.sql
@ -0,0 +1,28 @@
+-- Extend VARCHAR fields in organization table to handle longer values from TED data
+-- Author: Martin.Schweitzer@procon.co.at and claude.ai
+
+-- Extend postal_code (was VARCHAR(50), sometimes contains full addresses)
+ALTER TABLE ted.organization ALTER COLUMN postal_code TYPE TEXT;
+
+-- Extend street_name (was VARCHAR(50), sometimes very long)
+ALTER TABLE ted.organization ALTER COLUMN street_name TYPE TEXT;
+
+-- Extend city (was VARCHAR(100), can be extremely long - some have >255 chars)
+ALTER TABLE ted.organization ALTER COLUMN city TYPE TEXT;
+
+-- Extend phone (was VARCHAR(50), international numbers can be longer)
+ALTER TABLE ted.organization ALTER COLUMN phone TYPE VARCHAR(100);
+
+-- Extend org_reference (was VARCHAR(50), sometimes longer internal references)
+ALTER TABLE ted.organization ALTER COLUMN org_reference TYPE VARCHAR(100);
+
+-- Extend role (was VARCHAR(50), enum-like field but allow more space)
+ALTER TABLE ted.organization ALTER COLUMN role TYPE VARCHAR(100);
+
+-- Extend company_id (was VARCHAR(100), can contain very long registration info)
+-- Example: 'KRS pod numerem: 0000070678, REGON: 016134981, NIP: 9521822413' (67 chars)
+ALTER TABLE ted.organization ALTER COLUMN company_id TYPE TEXT;
+
+-- Extend name (was TEXT already in V1, but ensure it's TEXT)
+-- Contains full organization names which can be very long
+ALTER TABLE ted.organization ALTER COLUMN name TYPE TEXT;
--- a/src/main/resources/db/migration/V3__add_processed_attachment_table.sql
+++ b/src/main/resources/db/migration/V3__add_processed_attachment_table.sql
@ -0,0 +1,72 @@
+-- Migration: V3__add_processed_attachment_table.sql
+-- Author: Martin.Schweitzer@procon.co.at and claude.ai
+-- Description: Add table for tracking processed mail attachments with idempotency support
+
+-- Create processed_attachment table for tracking mail attachments
+CREATE TABLE IF NOT EXISTS ted.processed_attachment (
+    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
+
+    -- Content hash for idempotent processing (SHA-256, 64 hex chars)
+    content_hash VARCHAR(64) NOT NULL UNIQUE,
+
+    -- File metadata
+    original_filename VARCHAR(500) NOT NULL,
+    file_type VARCHAR(50),
+    content_type VARCHAR(255),
+    file_size BIGINT,
+
+    -- Processing status
+    processing_status VARCHAR(20) NOT NULL DEFAULT 'PENDING',
+
+    -- Extracted content
+    extracted_text TEXT,
+
+    -- Storage path
+    saved_path VARCHAR(1000),
+
+    -- Source email metadata
+    mail_subject VARCHAR(500),
+    mail_from VARCHAR(500),
+
+    -- Parent reference (for files extracted from ZIP)
+    parent_hash VARCHAR(64),
+
+    -- Error handling
+    error_message TEXT,
+
+    -- Child count (for ZIP files)
+    child_count INTEGER,
+
+    -- Timestamps
+    received_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    processed_at TIMESTAMP,
+
+    -- Constraints
+    CONSTRAINT chk_processing_status CHECK (processing_status IN ('PENDING', 'PROCESSING', 'COMPLETED', 'FAILED', 'DUPLICATE'))
+);
+
+-- Index on content_hash for fast idempotency lookups
+CREATE UNIQUE INDEX IF NOT EXISTS idx_processed_attachment_hash
+    ON ted.processed_attachment(content_hash);
+
+-- Index on processing_status for finding pending/failed items
+CREATE INDEX IF NOT EXISTS idx_processed_attachment_status
+    ON ted.processed_attachment(processing_status);
+
+-- Index on file_type for filtering by type
+CREATE INDEX IF NOT EXISTS idx_processed_attachment_type
+    ON ted.processed_attachment(file_type);
+
+-- Index on parent_hash for finding children of ZIP files
+CREATE INDEX IF NOT EXISTS idx_processed_attachment_parent
+    ON ted.processed_attachment(parent_hash);
+
+-- Index on received_at for chronological queries
+CREATE INDEX IF NOT EXISTS idx_processed_attachment_received
+    ON ted.processed_attachment(received_at DESC);
+
+-- Comment on table
+COMMENT ON TABLE ted.processed_attachment IS 'Tracks processed mail attachments with idempotency via content hash';
+COMMENT ON COLUMN ted.processed_attachment.content_hash IS 'SHA-256 hash of file content for duplicate detection';
+COMMENT ON COLUMN ted.processed_attachment.parent_hash IS 'Reference to parent attachment (for files extracted from ZIP)';
+COMMENT ON COLUMN ted.processed_attachment.extracted_text IS 'Text content extracted from PDF and other document types';
--- a/src/test/java/at/procon/ted/service/XmlParserServiceTest.java
+++ b/src/test/java/at/procon/ted/service/XmlParserServiceTest.java
@ -0,0 +1,199 @@
+package at.procon.ted.service;
+
+import at.procon.ted.model.entity.*;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.DisplayName;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * Unit tests for XmlParserService.
+ * 
+ * @author Martin.Schweitzer@procon.co.at and claude.ai
+ */
+class XmlParserServiceTest {
+
+    private XmlParserService parserService;
+
+    @BeforeEach
+    void setUp() {
+        parserService = new XmlParserService();
+    }
+
+    @Test
+    @DisplayName("Should parse contract notice XML and extract basic metadata")
+    void testParseContractNotice() {
+        // Sample minimal eForms XML
+        String xml = """
+                <?xml version="1.0" encoding="UTF-8"?>
+                <ContractNotice xmlns="urn:oasis:names:specification:ubl:schema:xsd:ContractNotice-2"
+                    xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
+                    xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2">
+                    <cbc:UBLVersionID>2.3</cbc:UBLVersionID>
+                    <cbc:CustomizationID>eforms-sdk-1.13</cbc:CustomizationID>
+                    <cbc:ID schemeName="notice-id">test-notice-123</cbc:ID>
+                    <cbc:IssueDate>2025-01-15</cbc:IssueDate>
+                    <cbc:NoticeTypeCode listName="competition">cn-standard</cbc:NoticeTypeCode>
+                    <cbc:NoticeLanguageCode>ENG</cbc:NoticeLanguageCode>
+                    <cac:ProcurementProject>
+                        <cbc:Name languageID="ENG">Test Procurement Project</cbc:Name>
+                        <cbc:Description languageID="ENG">This is a test description</cbc:Description>
+                        <cbc:ProcurementTypeCode listName="contract-nature">supplies</cbc:ProcurementTypeCode>
+                    </cac:ProcurementProject>
+                    <cac:TenderingProcess>
+                        <cbc:ProcedureCode listName="procurement-procedure-type">open</cbc:ProcedureCode>
+                    </cac:TenderingProcess>
+                </ContractNotice>
+                """;
+
+        ProcurementDocument document = parserService.parseDocument(xml);
+
+        assertNotNull(document);
+        assertEquals("2.3", document.getUblVersion());
+        assertEquals("eforms-sdk-1.13", document.getSdkVersion());
+        assertEquals("test-notice-123", document.getNoticeId());
+        assertEquals(NoticeType.CONTRACT_NOTICE, document.getNoticeType());
+        assertEquals("ENG", document.getLanguageCode());
+        assertEquals("Test Procurement Project", document.getProjectTitle());
+        assertEquals("This is a test description", document.getProjectDescription());
+        assertEquals(ContractNature.SUPPLIES, document.getContractNature());
+        assertEquals(ProcedureType.OPEN, document.getProcedureType());
+    }
+
+    @Test
+    @DisplayName("Should extract CPV codes from procurement project")
+    void testParseCpvCodes() {
+        String xml = """
+                <?xml version="1.0" encoding="UTF-8"?>
+                <ContractNotice xmlns="urn:oasis:names:specification:ubl:schema:xsd:ContractNotice-2"
+                    xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
+                    xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2">
+                    <cbc:UBLVersionID>2.3</cbc:UBLVersionID>
+                    <cac:ProcurementProject>
+                        <cbc:Name languageID="ENG">Medical Supplies</cbc:Name>
+                        <cbc:ProcurementTypeCode>supplies</cbc:ProcurementTypeCode>
+                        <cac:MainCommodityClassification>
+                            <cbc:ItemClassificationCode listName="cpv">33140000</cbc:ItemClassificationCode>
+                        </cac:MainCommodityClassification>
+                        <cac:AdditionalCommodityClassification>
+                            <cbc:ItemClassificationCode listName="cpv">33141000</cbc:ItemClassificationCode>
+                        </cac:AdditionalCommodityClassification>
+                    </cac:ProcurementProject>
+                </ContractNotice>
+                """;
+
+        ProcurementDocument document = parserService.parseDocument(xml);
+
+        assertNotNull(document.getCpvCodes());
+        assertEquals(2, document.getCpvCodes().length);
+        assertEquals("33140000", document.getCpvCodes()[0]);
+        assertEquals("33141000", document.getCpvCodes()[1]);
+    }
+
+    @Test
+    @DisplayName("Should generate text content for vectorization")
+    void testTextContentGeneration() {
+        String xml = """
+                <?xml version="1.0" encoding="UTF-8"?>
+                <ContractNotice xmlns="urn:oasis:names:specification:ubl:schema:xsd:ContractNotice-2"
+                    xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
+                    xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2">
+                    <cbc:UBLVersionID>2.3</cbc:UBLVersionID>
+                    <cac:ProcurementProject>
+                        <cbc:Name languageID="ENG">Hospital Equipment Procurement</cbc:Name>
+                        <cbc:Description languageID="ENG">Procurement of medical imaging equipment</cbc:Description>
+                        <cbc:ProcurementTypeCode>supplies</cbc:ProcurementTypeCode>
+                    </cac:ProcurementProject>
+                </ContractNotice>
+                """;
+
+        ProcurementDocument document = parserService.parseDocument(xml);
+
+        assertNotNull(document.getTextContent());
+        assertTrue(document.getTextContent().contains("Hospital Equipment Procurement"));
+        assertTrue(document.getTextContent().contains("medical imaging equipment"));
+    }
+
+    @Test
+    @DisplayName("Should handle missing optional fields gracefully")
+    void testMissingOptionalFields() {
+        String xml = """
+                <?xml version="1.0" encoding="UTF-8"?>
+                <ContractNotice xmlns="urn:oasis:names:specification:ubl:schema:xsd:ContractNotice-2"
+                    xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
+                    <cbc:UBLVersionID>2.3</cbc:UBLVersionID>
+                </ContractNotice>
+                """;
+
+        ProcurementDocument document = parserService.parseDocument(xml);
+
+        assertNotNull(document);
+        assertEquals("2.3", document.getUblVersion());
+        assertNull(document.getProjectTitle());
+        assertNull(document.getPublicationId());
+        assertEquals(NoticeType.OTHER, document.getNoticeType());
+    }
+
+    @Test
+    @DisplayName("Should throw exception for invalid XML")
+    void testInvalidXml() {
+        String invalidXml = "This is not XML";
+
+        assertThrows(XmlParserService.XmlParsingException.class, () -> {
+            parserService.parseDocument(invalidXml);
+        });
+    }
+
+    @Test
+    @DisplayName("Should store original XML document")
+    void testXmlDocumentStorage() {
+        String xml = """
+                <?xml version="1.0" encoding="UTF-8"?>
+                <ContractNotice xmlns="urn:oasis:names:specification:ubl:schema:xsd:ContractNotice-2"
+                    xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2">
+                    <cbc:UBLVersionID>2.3</cbc:UBLVersionID>
+                </ContractNotice>
+                """;
+
+        ProcurementDocument document = parserService.parseDocument(xml);
+
+        assertNotNull(document.getXmlDocument());
+        assertEquals(xml, document.getXmlDocument());
+    }
+
+    @Test
+    @DisplayName("Should map contract nature correctly")
+    void testContractNatureMapping() {
+        String[] natures = {"supplies", "services", "works", "mixed", "unknown"};
+        ContractNature[] expected = {
+            ContractNature.SUPPLIES, 
+            ContractNature.SERVICES, 
+            ContractNature.WORKS, 
+            ContractNature.MIXED, 
+            ContractNature.UNKNOWN
+        };
+
+        for (int i = 0; i < natures.length; i++) {
+            String xml = String.format("""
+                    <?xml version="1.0" encoding="UTF-8"?>
+                    <ContractNotice xmlns="urn:oasis:names:specification:ubl:schema:xsd:ContractNotice-2"
+                        xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
+                        xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2">
+                        <cbc:UBLVersionID>2.3</cbc:UBLVersionID>
+                        <cac:ProcurementProject>
+                            <cbc:ProcurementTypeCode>%s</cbc:ProcurementTypeCode>
+                        </cac:ProcurementProject>
+                    </ContractNotice>
+                    """, natures[i]);
+
+            ProcurementDocument document = parserService.parseDocument(xml);
+            assertEquals(expected[i], document.getContractNature(), 
+                    "Failed for nature: " + natures[i]);
+        }
+    }
+}
--- a/start.bat
+++ b/start.bat
@ -0,0 +1,9 @@
+@echo off
+REM TED Procurement Processor Startup Script
+REM Increases Java heap size to 8GB
+
+echo Starting TED Procurement Processor with 8GB heap...
+
+java -Xms4g -Xmx8g -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -jar target\ted-procurement-processor-1.0.0-SNAPSHOT.jar
+
+pause
--- a/start.sh
+++ b/start.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+# TED Procurement Processor Startup Script
+# Increases Java heap size to 4GB
+
+echo "Starting TED Procurement Processor with 4GB heap..."
+
+java -Xms2g -Xmx4g -jar target/ted-procurement-processor-1.0.0-SNAPSHOT.jar
--- a/ted-procurement-processor.zip
+++ b/ted-procurement-processor.zip