Source code for autorag.types

"""Public typed-dict shapes for the audio→topics pipeline.

Kept dependency-free so SDK consumers can reference these types without
forcing the optional `[audio]` / `[diarize]` extras (langchain, whisper,
pyannote) to be importable.
"""

from __future__ import annotations

from typing import TypedDict



[docs]
class WordSpan(TypedDict, total=False):
    """One word emitted by the transcription pipeline.

    Keys: ``w`` (word), ``s``/``e`` (start/end seconds), ``segment_id``
    (Whisper segment id), and ``speaker`` (string id assigned by
    diarization; ``"0"`` when diarization is disabled).
    """

    w: str
    s: float
    e: float
    segment_id: str
    speaker: str




[docs]
class TopicDict(TypedDict, total=False):
    """One node in the L0/L1/L2 topic tree."""

    title: str
    summary: str
    s: float
    e: float
    children: list[TopicDict]




[docs]
class TopicTree(TypedDict):
    """Container returned by :meth:`autorag.core.AutoRAG.generate_topics`."""

    topics: list[TopicDict]




[docs]
class TranscriptionResult(TypedDict):
    """Combined transcript + topics, the output of ``build_agent``."""

    transcription: list[WordSpan]
    topics: TopicTree