76 lines
1.8 KiB
Python
76 lines
1.8 KiB
Python
from __future__ import annotations
|
|
|
|
from enum import Enum
|
|
from typing import Any
|
|
from uuid import UUID
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field
|
|
|
|
|
|
class ClusteringAlgorithm(str, Enum):
|
|
KMEANS = "KMEANS"
|
|
MINI_BATCH_KMEANS = "MINI_BATCH_KMEANS"
|
|
DBSCAN = "DBSCAN"
|
|
HDBSCAN = "HDBSCAN"
|
|
AGGLOMERATIVE = "AGGLOMERATIVE"
|
|
|
|
|
|
class ReductionMethod(str, Enum):
|
|
NONE = "NONE"
|
|
PCA = "PCA"
|
|
UMAP = "UMAP"
|
|
|
|
|
|
class PythonClusteringItem(BaseModel):
|
|
embeddingId: UUID
|
|
documentId: UUID | None = None
|
|
representationId: UUID | None = None
|
|
vector: list[float]
|
|
|
|
|
|
class PythonClusteringRequest(BaseModel):
|
|
algorithm: ClusteringAlgorithm
|
|
parameters: dict[str, Any] = Field(default_factory=dict)
|
|
reductionMethod: ReductionMethod = ReductionMethod.NONE
|
|
reductionDimensions: int | None = None
|
|
items: list[PythonClusteringItem]
|
|
|
|
model_config = ConfigDict(use_enum_values=True)
|
|
|
|
|
|
class PythonRunExecutionRequest(BaseModel):
|
|
runId: UUID
|
|
|
|
|
|
class PythonCluster(BaseModel):
|
|
clusterLabel: int
|
|
itemCount: int
|
|
noiseCluster: bool = False
|
|
|
|
|
|
class PythonAssignment(BaseModel):
|
|
embeddingId: UUID
|
|
documentId: UUID | None = None
|
|
representationId: UUID | None = None
|
|
clusterLabel: int
|
|
distanceToCentroid: float | None = None
|
|
membershipScore: float | None = None
|
|
noise: bool = False
|
|
|
|
|
|
class PythonClusteringResponse(BaseModel):
|
|
clusters: list[PythonCluster]
|
|
assignments: list[PythonAssignment]
|
|
noiseCount: int
|
|
|
|
|
|
class RunMetadata(BaseModel):
|
|
runId: UUID
|
|
algorithm: ClusteringAlgorithm
|
|
parameters: dict[str, Any] = Field(default_factory=dict)
|
|
reductionMethod: ReductionMethod = ReductionMethod.NONE
|
|
reductionDimensions: int | None = None
|
|
selection: dict[str, Any] = Field(default_factory=dict)
|
|
|
|
model_config = ConfigDict(use_enum_values=True)
|