Source code for autorag.types
"""Public typed-dict shapes for the audio→topics pipeline.
Kept dependency-free so SDK consumers can reference these types without
forcing the optional `[audio]` / `[diarize]` extras (langchain, whisper,
pyannote) to be importable.
"""
from __future__ import annotations
from typing import TypedDict
[docs]
class WordSpan(TypedDict, total=False):
"""One word emitted by the transcription pipeline.
Keys: ``w`` (word), ``s``/``e`` (start/end seconds), ``segment_id``
(Whisper segment id), and ``speaker`` (string id assigned by
diarization; ``"0"`` when diarization is disabled).
"""
w: str
s: float
e: float
segment_id: str
speaker: str
[docs]
class TopicDict(TypedDict, total=False):
"""One node in the L0/L1/L2 topic tree."""
title: str
summary: str
s: float
e: float
children: list[TopicDict]
[docs]
class TopicTree(TypedDict):
"""Container returned by :meth:`autorag.core.AutoRAG.generate_topics`."""
topics: list[TopicDict]
[docs]
class TranscriptionResult(TypedDict):
"""Combined transcript + topics, the output of ``build_agent``."""
transcription: list[WordSpan]
topics: TopicTree