← back to invincible-jha__aumai-toolretrieval

Function bodies 13 total

All specs Real LLM only Function bodies

_load_index function · python · L17-L28 (12 LOC)

src/aumai_toolretrieval/cli.py

def _load_index() -> ToolIndex:
    """Load a ToolIndex from persisted state."""
    index = ToolIndex()
    if _DEFAULT_INDEX_FILE.exists():
        raw: list[dict[str, object]] = json.loads(
            _DEFAULT_INDEX_FILE.read_text(encoding="utf-8")
        )
        for entry in raw:
            tool = ToolRecord.model_validate(entry)
            index.add_tool(tool)
        index.build_index()
    return index

index_cmd function · python · L51-L78 (28 LOC)

src/aumai_toolretrieval/cli.py

def index_cmd(tools_dir: str) -> None:
    """Build or update the tool index from JSON definition files.

    Each JSON file in TOOLS_DIR must represent a ToolRecord (tool_id, name,
    description, tags, capabilities).
    """
    tools_path = Path(tools_dir)
    tool_files = list(tools_path.glob("*.json"))

    if not tool_files:
        click.echo(f"No JSON files found in {tools_dir}.", err=True)
        return

    index = ToolIndex()
    loaded_count = 0

    for tool_file in tool_files:
        try:
            raw = json.loads(tool_file.read_text(encoding="utf-8"))
            tool = ToolRecord.model_validate(raw)
            index.add_tool(tool)
            loaded_count += 1
        except Exception as exc:  # noqa: BLE001
            click.echo(f"  Skipping {tool_file.name}: {exc}", err=True)

    index.build_index()
    _save_index(index)
    click.echo(f"Indexed {loaded_count} tool(s) from {tools_dir}.")

search_cmd function · python · L102-L137 (36 LOC)

src/aumai_toolretrieval/cli.py

def search_cmd(
    query: str, top_k: int, tags: tuple[str, ...], output_format: str
) -> None:
    """Search the tool index for tools matching a natural-language query."""
    index = _load_index()

    search_query = SearchQuery(
        query_text=query,
        tags_filter=list(tags) if tags else None,
        top_k=top_k,
    )
    results = index.search(search_query)

    if not results:
        click.echo("No matching tools found.")
        return

    if output_format == "json":
        data = [
            {
                "rank": r.rank,
                "tool_id": r.tool.tool_id,
                "name": r.tool.name,
                "score": round(r.score, 4),
                "description": r.tool.description,
            }
            for r in results
        ]
        click.echo(json.dumps(data, indent=2))
    else:
        for result in results:
            click.echo(
                f"  [{result.rank}] {result.tool.name} "
                f"(score={result.score:.4f})\n"

SimpleEmbedder.fit method · python · L38-L57 (20 LOC)

src/aumai_toolretrieval/core.py

    def fit(self, documents: list[str]) -> None:
        """Build the vocabulary and IDF weights from *documents*.

        Args:
            documents: Corpus of text strings.
        """
        self._num_docs = len(documents)
        doc_freq: Counter[str] = Counter()

        for doc in documents:
            tokens = set(_tokenize(doc))
            doc_freq.update(tokens)

        vocab = sorted(doc_freq.keys())
        self._vocab = vocab
        self._vocab_index = {word: idx for idx, word in enumerate(vocab)}

        # Compute smooth IDF: log((N+1) / (df+1)) + 1
        for word, df in doc_freq.items():
            self._idf[word] = math.log((self._num_docs + 1) / (df + 1)) + 1.0

SimpleEmbedder.embed method · python · L59-L90 (32 LOC)

src/aumai_toolretrieval/core.py

    def embed(self, text: str) -> list[float]:
        """Encode *text* as a unit-normalised TF-IDF vector.

        Args:
            text: Text to encode.

        Returns:
            A dense float vector with dimensionality equal to the vocabulary size.
            Returns a zero vector when the vocabulary has not been fitted yet.
        """
        if not self._vocab:
            return []

        tokens = _tokenize(text)
        if not tokens:
            return [0.0] * len(self._vocab)

        tf: Counter[str] = Counter(tokens)
        vector: list[float] = [0.0] * len(self._vocab)

        for token, count in tf.items():
            idx = self._vocab_index.get(token)
            if idx is not None:
                idf_weight = self._idf.get(token, 1.0)
                vector[idx] = (count / len(tokens)) * idf_weight

        # L2 normalise
        magnitude = math.sqrt(sum(v * v for v in vector))
        if magnitude > 0:
            vector = [v / magnitude for v in vector]

CosineSimilarity.compute method · python · L97-L128 (32 LOC)

src/aumai_toolretrieval/core.py

    def compute(vec_a: list[float], vec_b: list[float]) -> float:
        """Return the cosine similarity between *vec_a* and *vec_b*.

        Uses numpy for efficient dot product and norm computation.

        Args:
            vec_a: First vector.
            vec_b: Second vector.  Must have the same length as *vec_a*.

        Returns:
            A value in ``[-1, 1]``.  Returns 0.0 for zero-length or empty vectors.

        Raises:
            ValueError: When the vectors have different lengths.
        """
        if len(vec_a) != len(vec_b):
            raise ValueError(
                f"Vector length mismatch: {len(vec_a)} vs {len(vec_b)}"
            )
        if not vec_a:
            return 0.0

        arr_a: np.ndarray[tuple[int], np.dtype[np.float64]] = np.array(vec_a, dtype=np.float64)
        arr_b: np.ndarray[tuple[int], np.dtype[np.float64]] = np.array(vec_b, dtype=np.float64)

        norm_a = float(np.linalg.norm(arr_a))
        norm_b = float(np.linalg.norm(arr_b

ToolIndex.add_tool method · python · L143-L153 (11 LOC)

src/aumai_toolretrieval/core.py

    def add_tool(self, tool: ToolRecord) -> None:
        """Add or replace a tool in the index.

        Args:
            tool: The tool record to register.

        Note:
            You must call :meth:`build_index` after adding tools to update
            the embeddings; otherwise searches may use stale vectors.
        """
        self._tools[tool.tool_id] = tool

Repobility (the analyzer behind this table) · https://repobility.com

ToolIndex.build_index method · python · L155-L173 (19 LOC)

src/aumai_toolretrieval/core.py

    def build_index(self) -> None:
        """Refit the embedder on all tool documents and update embeddings.

        Call this method after adding or removing tools to ensure all
        similarity scores reflect the current corpus.
        """
        tools = list(self._tools.values())
        if not tools:
            return

        documents = [self._tool_document(tool) for tool in tools]
        self._embedder.fit(documents)

        # Recompute embeddings for all tools
        for tool in tools:
            doc = self._tool_document(tool)
            embedding = self._embedder.embed(doc)
            updated = tool.model_copy(update={"embedding": embedding})
            self._tools[tool.tool_id] = updated

ToolIndex.search method · python · L175-L212 (38 LOC)

src/aumai_toolretrieval/core.py

    def search(self, query: SearchQuery) -> list[SearchResult]:
        """Perform semantic similarity search against the tool index.

        Args:
            query: Search parameters.

        Returns:
            Up to ``query.top_k`` :class:`~aumai_toolretrieval.models.SearchResult`
            objects sorted by descending similarity score.
        """
        query_embedding = self._embedder.embed(query.query_text)
        if not query_embedding:
            return []

        candidates = list(self._tools.values())

        if query.tags_filter:
            filter_set = {tag.lower() for tag in query.tags_filter}
            candidates = [
                tool
                for tool in candidates
                if filter_set.intersection(t.lower() for t in tool.tags)
            ]

        scored: list[tuple[float, ToolRecord]] = []
        for tool in candidates:
            if tool.embedding is None:
                continue
            score = self._similarity.compute(query

ToolIndex.search_by_tags method · python · L214-L228 (15 LOC)

src/aumai_toolretrieval/core.py

    def search_by_tags(self, tags: list[str]) -> list[ToolRecord]:
        """Return all tools that have at least one of the given tags.

        Args:
            tags: Tag strings to match (case-insensitive).

        Returns:
            Matching :class:`~aumai_toolretrieval.models.ToolRecord` objects.
        """
        filter_set = {tag.lower() for tag in tags}
        return [
            tool
            for tool in self._tools.values()
            if filter_set.intersection(t.lower() for t in tool.tags)
        ]

ToolIndex.get_tool method · python · L230-L239 (10 LOC)

src/aumai_toolretrieval/core.py

    def get_tool(self, tool_id: str) -> ToolRecord | None:
        """Retrieve a tool by its ID.

        Args:
            tool_id: The tool's unique identifier.

        Returns:
            The :class:`~aumai_toolretrieval.models.ToolRecord`, or *None*.
        """
        return self._tools.get(tool_id)

ToolIndex.get_all_tools method · python · L241-L247 (7 LOC)

src/aumai_toolretrieval/core.py

    def get_all_tools(self) -> list[ToolRecord]:
        """Return all tools currently in the index.

        Returns:
            A list of :class:`~aumai_toolretrieval.models.ToolRecord` objects.
        """
        return list(self._tools.values())

ToolIndex._tool_document method · python · L253-L258 (6 LOC)

src/aumai_toolretrieval/core.py

    def _tool_document(self, tool: ToolRecord) -> str:
        """Combine all searchable text fields of a tool into a single string."""
        parts = [tool.name, tool.description]
        parts.extend(tool.tags)
        parts.extend(tool.capabilities)
        return " ".join(parts)