Source code for autorag.store

"""Vector-store façade for the RAG pipeline.

Defines the :class:`VectorStore` interface that
:class:`~autorag.retrieve.Retriever` calls into. Concrete backends
(in-memory, Chroma, etc.) implement the four primitives; the topic-side
Chroma collection used by the ``/viz`` page lives separately in
:mod:`autorag.chroma_store`.
"""

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from autorag.schemas import Chunk, Retrieved



[docs]
class VectorStore:
    """Abstract embedding-vector store.

    Subclasses provide a backend-specific implementation of each method.
    The interface is intentionally thin: the orchestration layer
    (:class:`~autorag.core.AutoRAG`) handles batching, persistence
    cadence, and tenant separation.
    """


[docs]
    def add(self, chunks: list[Chunk]) -> None:
        """Insert chunks (with populated ``embedding`` fields) into the store."""
        raise NotImplementedError



[docs]
    def search(self, query_embedding: list[float], top_k: int) -> list[Retrieved]:
        """Return the ``top_k`` nearest chunks to ``query_embedding``."""
        raise NotImplementedError



[docs]
    def persist(self) -> None:
        """Flush in-memory state to durable storage."""
        raise NotImplementedError



[docs]
    def load(self) -> None:
        """Restore previously persisted state."""
        raise NotImplementedError





[docs]
class InMemoryStore(VectorStore):
    """Simple, non-persistent reference implementation.

    Stores chunks in a Python list. Useful for tests and small demos;
    not suitable for production retrieval workloads.
    """

    def __init__(self) -> None:
        self._chunks: list[Chunk] = []


[docs]
    def add(self, chunks: list[Chunk]) -> None:
        """Append chunks; no embedding-index structure is built."""
        self._chunks.extend(chunks)