Function bodies 13 total
_load_index function · python · L17-L28 (12 LOC)src/aumai_toolretrieval/cli.py
def _load_index() -> ToolIndex:
"""Load a ToolIndex from persisted state."""
index = ToolIndex()
if _DEFAULT_INDEX_FILE.exists():
raw: list[dict[str, object]] = json.loads(
_DEFAULT_INDEX_FILE.read_text(encoding="utf-8")
)
for entry in raw:
tool = ToolRecord.model_validate(entry)
index.add_tool(tool)
index.build_index()
return indexindex_cmd function · python · L51-L78 (28 LOC)src/aumai_toolretrieval/cli.py
def index_cmd(tools_dir: str) -> None:
"""Build or update the tool index from JSON definition files.
Each JSON file in TOOLS_DIR must represent a ToolRecord (tool_id, name,
description, tags, capabilities).
"""
tools_path = Path(tools_dir)
tool_files = list(tools_path.glob("*.json"))
if not tool_files:
click.echo(f"No JSON files found in {tools_dir}.", err=True)
return
index = ToolIndex()
loaded_count = 0
for tool_file in tool_files:
try:
raw = json.loads(tool_file.read_text(encoding="utf-8"))
tool = ToolRecord.model_validate(raw)
index.add_tool(tool)
loaded_count += 1
except Exception as exc: # noqa: BLE001
click.echo(f" Skipping {tool_file.name}: {exc}", err=True)
index.build_index()
_save_index(index)
click.echo(f"Indexed {loaded_count} tool(s) from {tools_dir}.")search_cmd function · python · L102-L137 (36 LOC)src/aumai_toolretrieval/cli.py
def search_cmd(
query: str, top_k: int, tags: tuple[str, ...], output_format: str
) -> None:
"""Search the tool index for tools matching a natural-language query."""
index = _load_index()
search_query = SearchQuery(
query_text=query,
tags_filter=list(tags) if tags else None,
top_k=top_k,
)
results = index.search(search_query)
if not results:
click.echo("No matching tools found.")
return
if output_format == "json":
data = [
{
"rank": r.rank,
"tool_id": r.tool.tool_id,
"name": r.tool.name,
"score": round(r.score, 4),
"description": r.tool.description,
}
for r in results
]
click.echo(json.dumps(data, indent=2))
else:
for result in results:
click.echo(
f" [{result.rank}] {result.tool.name} "
f"(score={result.score:.4f})\n"SimpleEmbedder.fit method · python · L38-L57 (20 LOC)src/aumai_toolretrieval/core.py
def fit(self, documents: list[str]) -> None:
"""Build the vocabulary and IDF weights from *documents*.
Args:
documents: Corpus of text strings.
"""
self._num_docs = len(documents)
doc_freq: Counter[str] = Counter()
for doc in documents:
tokens = set(_tokenize(doc))
doc_freq.update(tokens)
vocab = sorted(doc_freq.keys())
self._vocab = vocab
self._vocab_index = {word: idx for idx, word in enumerate(vocab)}
# Compute smooth IDF: log((N+1) / (df+1)) + 1
for word, df in doc_freq.items():
self._idf[word] = math.log((self._num_docs + 1) / (df + 1)) + 1.0SimpleEmbedder.embed method · python · L59-L90 (32 LOC)src/aumai_toolretrieval/core.py
def embed(self, text: str) -> list[float]:
"""Encode *text* as a unit-normalised TF-IDF vector.
Args:
text: Text to encode.
Returns:
A dense float vector with dimensionality equal to the vocabulary size.
Returns a zero vector when the vocabulary has not been fitted yet.
"""
if not self._vocab:
return []
tokens = _tokenize(text)
if not tokens:
return [0.0] * len(self._vocab)
tf: Counter[str] = Counter(tokens)
vector: list[float] = [0.0] * len(self._vocab)
for token, count in tf.items():
idx = self._vocab_index.get(token)
if idx is not None:
idf_weight = self._idf.get(token, 1.0)
vector[idx] = (count / len(tokens)) * idf_weight
# L2 normalise
magnitude = math.sqrt(sum(v * v for v in vector))
if magnitude > 0:
vector = [v / magnitude for v in vector]
CosineSimilarity.compute method · python · L97-L128 (32 LOC)src/aumai_toolretrieval/core.py
def compute(vec_a: list[float], vec_b: list[float]) -> float:
"""Return the cosine similarity between *vec_a* and *vec_b*.
Uses numpy for efficient dot product and norm computation.
Args:
vec_a: First vector.
vec_b: Second vector. Must have the same length as *vec_a*.
Returns:
A value in ``[-1, 1]``. Returns 0.0 for zero-length or empty vectors.
Raises:
ValueError: When the vectors have different lengths.
"""
if len(vec_a) != len(vec_b):
raise ValueError(
f"Vector length mismatch: {len(vec_a)} vs {len(vec_b)}"
)
if not vec_a:
return 0.0
arr_a: np.ndarray[tuple[int], np.dtype[np.float64]] = np.array(vec_a, dtype=np.float64)
arr_b: np.ndarray[tuple[int], np.dtype[np.float64]] = np.array(vec_b, dtype=np.float64)
norm_a = float(np.linalg.norm(arr_a))
norm_b = float(np.linalg.norm(arr_bToolIndex.add_tool method · python · L143-L153 (11 LOC)src/aumai_toolretrieval/core.py
def add_tool(self, tool: ToolRecord) -> None:
"""Add or replace a tool in the index.
Args:
tool: The tool record to register.
Note:
You must call :meth:`build_index` after adding tools to update
the embeddings; otherwise searches may use stale vectors.
"""
self._tools[tool.tool_id] = toolRepobility (the analyzer behind this table) · https://repobility.com
ToolIndex.build_index method · python · L155-L173 (19 LOC)src/aumai_toolretrieval/core.py
def build_index(self) -> None:
"""Refit the embedder on all tool documents and update embeddings.
Call this method after adding or removing tools to ensure all
similarity scores reflect the current corpus.
"""
tools = list(self._tools.values())
if not tools:
return
documents = [self._tool_document(tool) for tool in tools]
self._embedder.fit(documents)
# Recompute embeddings for all tools
for tool in tools:
doc = self._tool_document(tool)
embedding = self._embedder.embed(doc)
updated = tool.model_copy(update={"embedding": embedding})
self._tools[tool.tool_id] = updatedToolIndex.search method · python · L175-L212 (38 LOC)src/aumai_toolretrieval/core.py
def search(self, query: SearchQuery) -> list[SearchResult]:
"""Perform semantic similarity search against the tool index.
Args:
query: Search parameters.
Returns:
Up to ``query.top_k`` :class:`~aumai_toolretrieval.models.SearchResult`
objects sorted by descending similarity score.
"""
query_embedding = self._embedder.embed(query.query_text)
if not query_embedding:
return []
candidates = list(self._tools.values())
if query.tags_filter:
filter_set = {tag.lower() for tag in query.tags_filter}
candidates = [
tool
for tool in candidates
if filter_set.intersection(t.lower() for t in tool.tags)
]
scored: list[tuple[float, ToolRecord]] = []
for tool in candidates:
if tool.embedding is None:
continue
score = self._similarity.compute(queryToolIndex.search_by_tags method · python · L214-L228 (15 LOC)src/aumai_toolretrieval/core.py
def search_by_tags(self, tags: list[str]) -> list[ToolRecord]:
"""Return all tools that have at least one of the given tags.
Args:
tags: Tag strings to match (case-insensitive).
Returns:
Matching :class:`~aumai_toolretrieval.models.ToolRecord` objects.
"""
filter_set = {tag.lower() for tag in tags}
return [
tool
for tool in self._tools.values()
if filter_set.intersection(t.lower() for t in tool.tags)
]ToolIndex.get_tool method · python · L230-L239 (10 LOC)src/aumai_toolretrieval/core.py
def get_tool(self, tool_id: str) -> ToolRecord | None:
"""Retrieve a tool by its ID.
Args:
tool_id: The tool's unique identifier.
Returns:
The :class:`~aumai_toolretrieval.models.ToolRecord`, or *None*.
"""
return self._tools.get(tool_id)ToolIndex.get_all_tools method · python · L241-L247 (7 LOC)src/aumai_toolretrieval/core.py
def get_all_tools(self) -> list[ToolRecord]:
"""Return all tools currently in the index.
Returns:
A list of :class:`~aumai_toolretrieval.models.ToolRecord` objects.
"""
return list(self._tools.values())ToolIndex._tool_document method · python · L253-L258 (6 LOC)src/aumai_toolretrieval/core.py
def _tool_document(self, tool: ToolRecord) -> str:
"""Combine all searchable text fields of a tool into a single string."""
parts = [tool.name, tool.description]
parts.extend(tool.tags)
parts.extend(tool.capabilities)
return " ".join(parts)