Source code for autorag.schemas
"""Pydantic request/response and entity models for the RAG pipeline.
These models double as the on-the-wire schema for the HTTP API
(:mod:`autorag.api`) and as the in-process value types passed between
the embedder, store, retriever, and generator.
"""
from __future__ import annotations
from pathlib import Path
from typing import Any
from pydantic import BaseModel, Field
[docs]
class Document(BaseModel):
"""One ingested source document, before chunking."""
id: str
source: str
text: str
metadata: dict[str, Any] = Field(default_factory=dict)
[docs]
class Chunk(BaseModel):
"""A retrieval-sized piece of a :class:`Document`.
``embedding`` is filled in by :class:`~autorag.embed.Embedder` and
remains ``None`` until the chunk has been embedded.
"""
id: str
doc_id: str
text: str
metadata: dict[str, Any] = Field(default_factory=dict)
embedding: list[float] | None = None
[docs]
class Retrieved(BaseModel):
"""A chunk plus its similarity score from a vector-store search."""
chunk: Chunk
score: float
[docs]
class QueryRequest(BaseModel):
"""Request body for ``POST /query``."""
question: str
top_k: int | None = None
[docs]
class QueryResponse(BaseModel):
"""Response body for ``POST /query``: generated answer plus its sources."""
answer: str
sources: list[Retrieved]
[docs]
class IngestRequest(BaseModel):
"""Request body for ``POST /ingest``: filesystem paths to ingest."""
paths: list[str | Path]
[docs]
class IngestResponse(BaseModel):
"""Response body for ``POST /ingest``: counts of documents and chunks."""
ingested: int
chunks: int