from __future__ import annotations from enum import Enum from typing import Any from uuid import UUID from pydantic import BaseModel, ConfigDict, Field class ClusteringAlgorithm(str, Enum): KMEANS = "KMEANS" MINI_BATCH_KMEANS = "MINI_BATCH_KMEANS" DBSCAN = "DBSCAN" HDBSCAN = "HDBSCAN" AGGLOMERATIVE = "AGGLOMERATIVE" class ReductionMethod(str, Enum): NONE = "NONE" PCA = "PCA" UMAP = "UMAP" class PythonClusteringItem(BaseModel): embeddingId: UUID documentId: UUID | None = None representationId: UUID | None = None vector: list[float] class PythonClusteringRequest(BaseModel): algorithm: ClusteringAlgorithm parameters: dict[str, Any] = Field(default_factory=dict) reductionMethod: ReductionMethod = ReductionMethod.NONE reductionDimensions: int | None = None items: list[PythonClusteringItem] model_config = ConfigDict(use_enum_values=True) class PythonRunExecutionRequest(BaseModel): runId: UUID class PythonCluster(BaseModel): clusterLabel: int itemCount: int noiseCluster: bool = False class PythonAssignment(BaseModel): embeddingId: UUID documentId: UUID | None = None representationId: UUID | None = None clusterLabel: int distanceToCentroid: float | None = None membershipScore: float | None = None noise: bool = False class PythonClusteringResponse(BaseModel): clusters: list[PythonCluster] assignments: list[PythonAssignment] noiseCount: int class RunMetadata(BaseModel): runId: UUID algorithm: ClusteringAlgorithm parameters: dict[str, Any] = Field(default_factory=dict) reductionMethod: ReductionMethod = ReductionMethod.NONE reductionDimensions: int | None = None selection: dict[str, Any] = Field(default_factory=dict) model_config = ConfigDict(use_enum_values=True)