← back to invincible-jha__aumai-toolcanon

Function bodies 21 total

canonicalize function · python · L40-L60 (21 LOC)

src/aumai_toolcanon/cli.py

def canonicalize(
    input_file: Path, source_format: str | None, output: Path | None
) -> None:
    """Canonicalize a tool definition to the AumAI Tool IR."""
    from aumai_toolcanon.core import Canonicalizer

    tool_def: dict[str, object] = json.loads(input_file.read_text(encoding="utf-8"))
    fmt: SourceFormat | None = SourceFormat(source_format) if source_format else None

    canon = Canonicalizer()
    result = canon.canonicalize(tool_def, fmt)

    for warning in result.warnings:
        click.echo(f"Warning: {warning}", err=True)

    output_json = json.dumps(result.tool.model_dump(mode="json"), indent=2)
    if output:
        output.write_text(output_json, encoding="utf-8")
        click.echo(f"Canonical tool written to {output}")
    else:
        click.echo(output_json)

emit function · python · L83-L110 (28 LOC)

src/aumai_toolcanon/cli.py

def emit(input_file: Path, target: str, output: Path | None) -> None:
    """Emit a canonical tool definition to a target format."""
    from aumai_toolcanon.emitter import (
        emit_anthropic,
        emit_json_schema,
        emit_mcp,
        emit_openai,
    )
    from aumai_toolcanon.models import CanonicalTool

    raw: dict[str, object] = json.loads(input_file.read_text(encoding="utf-8"))
    tool = CanonicalTool.model_validate(raw)

    emitter_map = {
        "openai": emit_openai,
        "anthropic": emit_anthropic,
        "mcp": emit_mcp,
        "json-schema": emit_json_schema,
    }
    emitter = emitter_map[target]
    result = emitter(tool)
    output_json = json.dumps(result, indent=2)

    if output:
        output.write_text(output_json, encoding="utf-8")
        click.echo(f"Emitted {target} tool written to {output}")
    else:
        click.echo(output_json)

detect function · python · L127-L143 (17 LOC)

src/aumai_toolcanon/cli.py

def detect(input_file: Path, verbose: bool) -> None:
    """Detect the source format of a tool definition file."""
    from aumai_toolcanon.core import FormatDetector

    tool_def: dict[str, object] = json.loads(input_file.read_text(encoding="utf-8"))
    detector = FormatDetector()
    detected = detector.detect(tool_def)

    click.echo(f"Detected format: {detected.value}")

    if verbose:
        scores = detector.confidence(tool_def)
        click.echo("\nConfidence scores:")
        for fmt, score in sorted(scores.items(), key=lambda x: -x[1]):
            click.echo(f"  {fmt.value:12s}: {score:.0%}")

    sys.exit(0)

FormatDetector.__init__ method · python · L24-L30 (7 LOC)

src/aumai_toolcanon/core.py

    def __init__(self) -> None:
        self._parsers = [
            (SourceFormat.openai, OpenAIParser()),
            (SourceFormat.anthropic, AnthropicParser()),
            (SourceFormat.mcp, MCPParser()),
            (SourceFormat.langchain, LangChainParser()),
        ]

FormatDetector.detect method · python · L32-L45 (14 LOC)

src/aumai_toolcanon/core.py

    def detect(self, tool_def: dict[str, Any]) -> SourceFormat:
        """Return the most likely SourceFormat for the given dict.

        Detection priority:
        1. OpenAI  — ``type=="function"`` wrapper or ``parameters`` key
        2. Anthropic — ``input_schema`` key
        3. MCP — ``inputSchema`` key
        4. LangChain — ``args_schema`` / ``schema`` key
        5. Raw — fallback
        """
        for fmt, parser in self._parsers:
            if parser.can_parse(tool_def):
                return fmt
        return SourceFormat.raw

FormatDetector.confidence method · python · L47-L80 (34 LOC)

src/aumai_toolcanon/core.py

    def confidence(self, tool_def: dict[str, Any]) -> dict[SourceFormat, float]:
        """Return a confidence score (0–1) for each known format."""
        scores: dict[SourceFormat, float] = {}

        # OpenAI signals
        openai_score = 0.0
        if tool_def.get("type") == "function" and "function" in tool_def:
            openai_score = 1.0
        elif "parameters" in tool_def and "name" in tool_def:
            openai_score = 0.7
        scores[SourceFormat.openai] = openai_score

        # Anthropic signals
        anthropic_score = 0.0
        if "input_schema" in tool_def and "name" in tool_def:
            anthropic_score = 1.0
        scores[SourceFormat.anthropic] = anthropic_score

        # MCP signals
        mcp_score = 0.0
        if "inputSchema" in tool_def and "name" in tool_def:
            mcp_score = 1.0
        scores[SourceFormat.mcp] = mcp_score

        # LangChain signals
        lc_score = 0.0
        if "args_schema" in tool_def or "schema" in tool

Canonicalizer.__init__ method · python · L86-L93 (8 LOC)

src/aumai_toolcanon/core.py

    def __init__(self) -> None:
        self._detector = FormatDetector()
        self._parsers: dict[SourceFormat, Any] = {
            SourceFormat.openai: OpenAIParser(),
            SourceFormat.anthropic: AnthropicParser(),
            SourceFormat.mcp: MCPParser(),
            SourceFormat.langchain: LangChainParser(),
        }

About: code-quality intelligence by Repobility · https://repobility.com

Canonicalizer.canonicalize method · python · L95-L139 (45 LOC)

src/aumai_toolcanon/core.py

    def canonicalize(
        self,
        tool_def: dict[str, Any],
        source_format: SourceFormat | None = None,
    ) -> CanonicalizationResult:
        """Canonicalize a tool definition dict.

        If ``source_format`` is None, auto-detection is used.
        Returns a CanonicalizationResult with the canonical tool and any warnings.
        """
        warnings: list[str] = []

        if source_format is None:
            detected = self._detector.detect(tool_def)
            if detected == SourceFormat.raw:
                warnings.append(
                    "Could not detect source format; using raw passthrough."
                )
        else:
            detected = source_format

        parser = self._parsers.get(detected)
        if parser is None:
            # Raw format — best-effort passthrough
            tool = self._raw_canonicalize(tool_def)
            warnings.append(
                "No parser for 'raw' format; extracted fields by heuristic."

Canonicalizer._raw_canonicalize method · python · L141-L167 (27 LOC)

src/aumai_toolcanon/core.py

    def _raw_canonicalize(self, tool_def: dict[str, Any]) -> CanonicalTool:
        """Best-effort extraction from an unknown format."""
        name: str = (
            tool_def.get("name")
            or tool_def.get("title")
            or tool_def.get("function", {}).get("name", "")
        )
        description: str = tool_def.get("description", "")

        # Try to find parameters-like key
        inputs: dict[str, Any] = (
            tool_def.get("parameters")
            or tool_def.get("input_schema")
            or tool_def.get("inputSchema")
            or tool_def.get("schema")
            or {}
        )

        return CanonicalTool(
            name=str(name),
            description=str(description),
            capabilities=ToolCapability(),
            inputs=inputs,
            outputs={},
            source_format=SourceFormat.raw,
            original_definition=tool_def,
        )

emit_openai function · python · L10-L28 (19 LOC)

src/aumai_toolcanon/emitter.py

def emit_openai(tool: CanonicalTool) -> dict[str, Any]:
    """Emit a CanonicalTool as an OpenAI tool definition.

    Output:
    ``{"type": "function", "function": {"name": ..., "description": ...,
    "parameters": {...}}}``
    """
    parameters = tool.inputs or {"type": "object", "properties": {}}
    if "type" not in parameters:
        parameters = {"type": "object", **parameters}

    return {
        "type": "function",
        "function": {
            "name": tool.name,
            "description": tool.description,
            "parameters": parameters,
        },
    }

emit_anthropic function · python · L31-L45 (15 LOC)

src/aumai_toolcanon/emitter.py

def emit_anthropic(tool: CanonicalTool) -> dict[str, Any]:
    """Emit a CanonicalTool as an Anthropic tool definition.

    Output:
    ``{"name": ..., "description": ..., "input_schema": {...}}``
    """
    input_schema = tool.inputs or {"type": "object", "properties": {}}
    if "type" not in input_schema:
        input_schema = {"type": "object", **input_schema}

    return {
        "name": tool.name,
        "description": tool.description,
        "input_schema": input_schema,
    }

emit_mcp function · python · L48-L62 (15 LOC)

src/aumai_toolcanon/emitter.py

def emit_mcp(tool: CanonicalTool) -> dict[str, Any]:
    """Emit a CanonicalTool as an MCP tool definition.

    Output:
    ``{"name": ..., "description": ..., "inputSchema": {...}}``
    """
    input_schema = tool.inputs or {"type": "object", "properties": {}}
    if "type" not in input_schema:
        input_schema = {"type": "object", **input_schema}

    return {
        "name": tool.name,
        "description": tool.description,
        "inputSchema": input_schema,
    }

emit_json_schema function · python · L65-L99 (35 LOC)

src/aumai_toolcanon/emitter.py

def emit_json_schema(tool: CanonicalTool) -> dict[str, Any]:
    """Emit a CanonicalTool as a standalone JSON Schema document.

    The resulting schema describes the tool's *input* interface in standard
    JSON Schema Draft 7 / 2019-09 format, with metadata in ``$defs``.
    """
    inputs = tool.inputs or {"type": "object", "properties": {}}
    base: dict[str, Any] = {
        "$schema": "https://json-schema.org/draft/2019-09/schema",
        "title": tool.name,
        "description": tool.description,
    }
    base.update(inputs)

    # Inject outputs as an extension annotation if available
    if tool.outputs:
        base["x-outputs"] = tool.outputs

    # Inject capability metadata as extensions
    base["x-capabilities"] = {
        "action": tool.capabilities.action,
        "domain": tool.capabilities.domain,
        "side_effects": tool.capabilities.side_effects,
        "idempotent": tool.capabilities.idempotent,
        "cost_estimate": tool.capabilities.cost_estimate,

AnthropicParser.parse method · python · L14-L34 (21 LOC)

src/aumai_toolcanon/parsers/anthropic.py

    def parse(self, tool_def: dict[str, Any]) -> CanonicalTool:
        """Parse Anthropic tool definition.

        Expected format:
        ``{"name": ..., "description": ..., "input_schema": {...}}``
        """
        name: str = tool_def.get("name", "")
        description: str = tool_def.get("description", "")
        input_schema: dict[str, Any] = tool_def.get("input_schema", {})

        capabilities = _infer_capabilities(name, description)

        return CanonicalTool(
            name=name,
            description=description,
            capabilities=capabilities,
            inputs=input_schema,
            outputs={},
            source_format=SourceFormat.anthropic,
            original_definition=tool_def,
        )

LangChainParser.parse method · python · L25-L59 (35 LOC)

src/aumai_toolcanon/parsers/langchain.py

    def parse(self, tool_def: dict[str, Any]) -> CanonicalTool:
        """Parse a LangChain tool definition dict."""
        name: str = tool_def.get("name", tool_def.get("title", ""))
        description: str = tool_def.get("description", "")

        # Try to extract parameter schema from various LangChain representations
        inputs: dict[str, Any] = {}

        if "args_schema" in tool_def:
            # Pydantic model schema dumped as dict
            schema = tool_def["args_schema"]
            inputs = self._extract_schema(schema)
        elif "schema" in tool_def:
            inputs = self._extract_schema(tool_def["schema"])
        elif "parameters" in tool_def:
            inputs = tool_def["parameters"]
        elif "properties" in tool_def:
            # Direct JSON Schema object
            inputs = {
                "type": "object",
                "properties": tool_def["properties"],
                "required": tool_def.get("required", []),
            }

        c

LangChainParser._extract_schema method · python · L61-L79 (19 LOC)

src/aumai_toolcanon/parsers/langchain.py

    def _extract_schema(self, schema: dict[str, Any]) -> dict[str, Any]:
        """Normalize a Pydantic/JSON schema to a JSON Schema object."""
        if schema.get("type") == "object" or "properties" in schema:
            return {
                "type": "object",
                "properties": schema.get("properties", {}),
                "required": schema.get("required", []),
            }
        # Pydantic v2 model_json_schema format
        if "model_fields" in schema:
            properties: dict[str, Any] = {}
            required: list[str] = []
            for field_name, field_info in schema["model_fields"].items():
                properties[field_name] = {"type": "string"}  # conservative default
                if field_info.get("is_required", True):
                    required.append(field_name)
            return {"type": "object", "properties": properties, "required": required}

        return schema

LangChainParser.can_parse method · python · L81-L91 (11 LOC)

src/aumai_toolcanon/parsers/langchain.py

    def can_parse(self, tool_def: dict[str, Any]) -> bool:
        """Return True if this dict looks like a LangChain tool definition."""
        return (
            "args_schema" in tool_def
            or "schema" in tool_def
            or (
                "name" in tool_def
                and "description" in tool_def
                and "properties" in tool_def
            )
        )

MCPParser.parse method · python · L14-L37 (24 LOC)

src/aumai_toolcanon/parsers/mcp.py

    def parse(self, tool_def: dict[str, Any]) -> CanonicalTool:
        """Parse MCP tool definition.

        Expected format:
        ``{"name": ..., "description": ..., "inputSchema": {...}}``
        """
        name: str = tool_def.get("name", "")
        description: str = tool_def.get("description", "")
        # MCP uses camelCase inputSchema
        input_schema: dict[str, Any] = tool_def.get(
            "inputSchema", tool_def.get("input_schema", {})
        )

        capabilities = _infer_capabilities(name, description)

        return CanonicalTool(
            name=name,
            description=description,
            capabilities=capabilities,
            inputs=input_schema,
            outputs={},
            source_format=SourceFormat.mcp,
            original_definition=tool_def,
        )

OpenAIParser.parse method · python · L13-L41 (29 LOC)

src/aumai_toolcanon/parsers/openai.py

    def parse(self, tool_def: dict[str, Any]) -> CanonicalTool:
        """Parse OpenAI tool definition.

        Handles:
        - Wrapped: ``{"type": "function", "function": {...}}``
        - Legacy function call: ``{"name": ..., "parameters": {...}}``
        """
        # Unwrap tool wrapper
        if tool_def.get("type") == "function" and "function" in tool_def:
            func: dict[str, Any] = tool_def["function"]
        else:
            func = tool_def

        name: str = func.get("name", "")
        description: str = func.get("description", "")
        parameters: dict[str, Any] = func.get("parameters", {})

        # Infer capabilities from name / description
        capabilities = _infer_capabilities(name, description)

        return CanonicalTool(
            name=name,
            description=description,
            capabilities=capabilities,
            inputs=parameters,
            outputs={},
            source_format=SourceFormat.openai,
            original

OpenAIParser.can_parse method · python · L43-L49 (7 LOC)

src/aumai_toolcanon/parsers/openai.py

    def can_parse(self, tool_def: dict[str, Any]) -> bool:
        """Return True if this dict looks like an OpenAI tool definition."""
        if tool_def.get("type") == "function" and "function" in tool_def:
            return True
        if "name" in tool_def and "parameters" in tool_def:
            return True
        return False

_infer_capabilities function · python · L52-L91 (40 LOC)

src/aumai_toolcanon/parsers/openai.py

def _infer_capabilities(name: str, description: str) -> ToolCapability:
    """Heuristically infer tool capabilities from name and description text."""
    text = (name + " " + description).lower()

    side_effect_verbs = {
        "write", "create", "delete", "update", "post", "send", "save", "remove"
    }
    read_verbs = {"read", "get", "fetch", "list", "search", "query", "find"}

    has_side_effects = any(v in text for v in side_effect_verbs)
    action = "write" if has_side_effects else "read"
    for verb in read_verbs:
        if verb in text:
            action = verb
            break

    domain_map = {
        "file": "filesystem",
        "web": "web",
        "search": "web",
        "database": "database",
        "sql": "database",
        "code": "code",
        "email": "email",
        "http": "web",
        "api": "web",
    }
    domain = "general"
    for keyword, dom in domain_map.items():
        if keyword in text:
            domain = dom
            break