Source code for autorag.schemas

"""Pydantic request/response and entity models for the RAG pipeline.

These models double as the on-the-wire schema for the HTTP API
(:mod:`autorag.api`) and as the in-process value types passed between
the embedder, store, retriever, and generator.
"""

from __future__ import annotations

from pathlib import Path
from typing import Any

from pydantic import BaseModel, Field


[docs] class Document(BaseModel): """One ingested source document, before chunking.""" id: str source: str text: str metadata: dict[str, Any] = Field(default_factory=dict)
[docs] class Chunk(BaseModel): """A retrieval-sized piece of a :class:`Document`. ``embedding`` is filled in by :class:`~autorag.embed.Embedder` and remains ``None`` until the chunk has been embedded. """ id: str doc_id: str text: str metadata: dict[str, Any] = Field(default_factory=dict) embedding: list[float] | None = None
[docs] class Retrieved(BaseModel): """A chunk plus its similarity score from a vector-store search.""" chunk: Chunk score: float
[docs] class QueryRequest(BaseModel): """Request body for ``POST /query``.""" question: str top_k: int | None = None
[docs] class QueryResponse(BaseModel): """Response body for ``POST /query``: generated answer plus its sources.""" answer: str sources: list[Retrieved]
[docs] class IngestRequest(BaseModel): """Request body for ``POST /ingest``: filesystem paths to ingest.""" paths: list[str | Path]
[docs] class IngestResponse(BaseModel): """Response body for ``POST /ingest``: counts of documents and chunks.""" ingested: int chunks: int