Function bodies 178 total

get_hitl_queue function · python · L28-L55 (28 LOC)

harness/hitl/router.py

async def get_hitl_queue(
    request: Request,
    tenant: TenantConfig = Depends(verify_api_key),
    rail: str = Query(default="all"),
    tenant_filter: str = Query(default="all", alias="tenant"),
    since: str = Query(default="24h"),
    hide_reviewed: bool = Query(default=False),
) -> JSONResponse:
    """Return the HITL review queue, priority-sorted.

    Items are sorted by review urgency: closest-to-threshold flagged traces
    appear first; already-reviewed items sort to the bottom.

    Query parameters:
        rail: Filter by triggering rail name, or 'all' (default).
        tenant: Filter by tenant ID, or 'all' (default).
        since: Time window — ISO8601 timestamp or shorthand like '24h', '7d'.
        hide_reviewed: If true, exclude traces that have corrections.
    """
    since_ts = _resolve_since(since)
    trace_store = request.app.state.trace_store
    results = await trace_store.query_hitl_queue(
        since=since_ts,
        rail_filter=rail,
        tenant

submit_correction function · python · L59-L72 (14 LOC)

harness/hitl/router.py

async def submit_correction(
    request: Request,
    body: CorrectionRequest,
    tenant: TenantConfig = Depends(verify_api_key),
) -> JSONResponse:
    """Submit a human correction for a flagged trace.

    Action must be one of: approve, reject, edit.
    When action is 'edit', edited_response should contain the corrected text
    (PII will be redacted before storage).
    """
    trace_store = request.app.state.trace_store
    await trace_store.write_correction(body.model_dump())
    return JSONResponse(content={"status": "ok", "request_id": body.request_id})

_extract_triggering_rail_inline function · python · L15-L35 (21 LOC)

harness/hitl/ui.py

def _extract_triggering_rail_inline(guardrail_decisions: dict) -> str | None:
    """Return the rail name from the all_results entry closest to threshold.

    Mirrors harness.traces.store._extract_triggering_rail for inline use
    without importing the async store module.
    """
    if isinstance(guardrail_decisions, list):
        all_results = guardrail_decisions
    else:
        all_results = guardrail_decisions.get("all_results", [])
    best: str | None = None
    best_distance = float("inf")
    for result in all_results:
        score = result.get("score", 0)
        threshold = result.get("threshold", 1.0)
        if score > 0:
            distance = threshold - score
            if distance < best_distance:
                best_distance = distance
                best = result.get("rail_name") or result.get("rail")
    return best

_action_taken function · python · L38-L62 (25 LOC)

harness/hitl/ui.py

def _action_taken(item: dict) -> str:
    """Derive human-readable action from guardrail_decisions dict."""
    gd = item.get("guardrail_decisions") or {}
    if isinstance(gd, str):
        try:
            gd = json.loads(gd)
        except (json.JSONDecodeError, TypeError):
            gd = {}
    status_code = item.get("status_code", 200)
    refusal_event = item.get("refusal_event", 0)
    # guardrail_decisions may be a list (from trace JSON) or a dict with 'all_results'
    if isinstance(gd, list):
        all_results = gd
    else:
        all_results = gd.get("all_results", [])
    # Any result with score > threshold means it was blocked/steered
    has_cai = item.get("cai_critique") is not None
    if refusal_event:
        return "blocked"
    if has_cai:
        return "critiqued"
    if status_code in (200, None):
        if any(r.get("score", 0) > r.get("threshold", 1.0) for r in all_results):
            return "blocked"
    return "allowed"

build_ui function · python · L65-L414 (350 LOC)

harness/hitl/ui.py

def build_ui(api_url: str, api_key: str):  # -> gr.Blocks
    """Build and return a Gradio Blocks dashboard for HITL review.

    Args:
        api_url: Base URL of the harness API (e.g. "http://localhost:8080").
        api_key: Bearer token for the harness API.

    Returns:
        gr.Blocks instance (not yet launched — caller calls .launch()).
    """
    import gradio as gr  # imported here so module loads without gradio installed

    client = httpx.Client(
        base_url=api_url,
        headers={"Authorization": f"Bearer {api_key}"},
        timeout=30.0,
    )

    # -----------------------------------------------------------------------
    # Callback implementations
    # -----------------------------------------------------------------------

    def refresh_queue(rail: str, tenant: str, time_range: str, hide_rev: bool) -> list[list[Any]]:
        """Fetch queue from API and return formatted dataframe rows."""
        try:
            resp = client.get(
                "/

lifespan function · python · L23-L79 (57 LOC)

harness/main.py

async def lifespan(app: FastAPI):
    """App lifespan: load tenants, create HTTP client pool, initialize rate limiter and trace store."""
    # Load tenant config
    tenants_path = os.path.join(_CONFIG_DIR, "tenants.yaml")
    app.state.tenants = load_tenants(tenants_path)

    # Shared async HTTP client for proxying to LiteLLM
    app.state.http_client = httpx.AsyncClient(
        base_url=_LITELLM_BASE,
        timeout=httpx.Timeout(120.0),
        limits=httpx.Limits(max_connections=50, max_keepalive_connections=20),
    )

    # In-memory rate limiter (per-tenant RPM + TPM)
    app.state.rate_limiter = SlidingWindowLimiter()

    # Trace store — initialize SQLite schema (WAL mode, indexes)
    os.makedirs(_DATA_DIR, exist_ok=True)
    db_path = os.path.join(_DATA_DIR, "traces.db")
    app.state.trace_store = TraceStore(db_path=db_path)
    await app.state.trace_store.init_db()

    # Eagerly import PII redactor so AnalyzerEngine loads at startup
    import harness.pii.redactor  #

probe function · python · L104-L106 (3 LOC)

harness/main.py

async def probe(tenant: Annotated[TenantConfig, Depends(verify_api_key)]):
    """Probe endpoint used in tests to verify auth resolves to the correct tenant."""
    return {"tenant_id": tenant.tenant_id, "bypass": tenant.bypass}

Want this analysis on your repo? https://repobility.com/scan/

_regex_redact function · python · L98-L102 (5 LOC)

harness/pii/redactor.py

def _regex_redact(text: str) -> str:
    """Apply regex patterns for structured PII replacement."""
    for pattern, replacement in _REGEX_PATTERNS:
        text = pattern.sub(replacement, text)
    return text

redact function · python · L105-L132 (28 LOC)

harness/pii/redactor.py

def redact(text: str, strictness: str = "balanced") -> str:
    """Redact PII from text using a regex pre-pass and Presidio NER.

    Args:
        text: Input text, potentially containing PII.
        strictness: One of "strict", "balanced", or "minimal".
                    Controls which Presidio entity types are detected.

    Returns:
        Text with PII replaced by typed tokens such as [EMAIL], [PHONE],
        [SSN], [CREDIT_CARD], [NAME], [ADDRESS], [REDACTED].
    """
    # Step 1: Regex pre-pass — catches structured PII regardless of spaCy model
    text = _regex_redact(text)

    # Step 2: Presidio NER pass — catches unstructured PII (names, addresses, etc.)
    entities = STRICTNESS_ENTITIES.get(strictness, STRICTNESS_ENTITIES["balanced"])
    results = _analyzer.analyze(text=text, entities=entities, language="en")

    if not results:
        return text

    anonymized = _anonymizer.anonymize(
        text=text,
        analyzer_results=results,
        operators=_OPERA

suggest_tuning function · python · L17-L49 (33 LOC)

harness/proxy/admin.py

async def suggest_tuning(
    request: Request,
    tenant: TenantConfig = Depends(verify_api_key),
    since: str = Query(default="24h", description="Time window: ISO8601 timestamp or shorthand like '24h', '7d'"),
):
    """Trigger on-demand tuning analysis based on trace history.

    Returns ranked threshold + principle tuning suggestions as both
    human-readable report and machine-readable YAML diffs.
    """
    from harness.critique.analyzer import analyze_traces

    # Resolve shorthand time strings
    since_ts = _resolve_since(since)

    trace_store = request.app.state.trace_store
    http_client = request.app.state.http_client
    critique_engine = getattr(request.app.state, "critique_engine", None)
    constitution = critique_engine.constitution if critique_engine else None

    if constitution is None:
        return JSONResponse(
            content={"error": "Constitutional AI not configured"},
            status_code=503,
        )

    result = await analyze_traces(

_resolve_since function · python · L52-L62 (11 LOC)

harness/proxy/admin.py

def _resolve_since(since: str) -> str:
    """Convert shorthand like '24h', '7d' to ISO8601 timestamp."""
    now = datetime.now(timezone.utc)
    if since.endswith("h"):
        hours = int(since[:-1])
        return (now - timedelta(hours=hours)).isoformat()
    elif since.endswith("d"):
        days = int(since[:-1])
        return (now - timedelta(days=days)).isoformat()
    else:
        return since  # Assume ISO8601 already

chat_completions function · python · L28-L233 (206 LOC)

harness/proxy/litellm.py

async def chat_completions(
    request: Request,
    tenant: TenantConfig = Depends(verify_api_key),
) -> JSONResponse:
    """Proxy /v1/chat/completions to LiteLLM with auth, rate limiting, guardrails, and tracing.

    Pipeline:
    1. RPM check (pre-request)
    2. TPM check (checks previous request's accumulated tokens)
    3. Read request body
    4. Guardrail pipeline (skip for bypass tenants):
       a. Unicode normalize
       b. Input rails
       c. If blocked: return refusal or soft-steer to LiteLLM
    5. Proxy body to LiteLLM
    6. Record TPM for this response (gates the next request)
    7. Output rails (skip for bypass tenants)
    8. PII redact + trace write in background after response sent
    """
    rate_limiter = request.app.state.rate_limiter

    # 1. RPM check
    try:
        await rate_limiter.check_rpm(tenant.tenant_id, tenant.rpm_limit)
    except RateLimitExceeded as exc:
        logger.warning("429 RPM: tenant=%s limit=%d detail=%s", tenant.tenant_id, te

_write_trace function · python · L236-L295 (60 LOC)

harness/proxy/litellm.py

async def _write_trace(
    app,
    request_id: str,
    tenant: TenantConfig,
    body: dict,
    response_data: dict,
    latency_ms: int,
    status_code: int,
    guardrail_decisions=None,
    is_refusal: bool = False,
    cai_critique=None,
) -> None:
    """Extract, PII-redact, and write a trace record to SQLite.

    Called as a BackgroundTask after the response has been sent to the client.
    Raw PII never persists — redaction happens before any SQLite write.
    """
    # Extract prompt from messages (join all content fields)
    messages = body.get("messages", [])
    prompt_parts: list[str] = []
    for msg in messages:
        content = msg.get("content")
        if isinstance(content, str):
            prompt_parts.append(content)
        elif isinstance(content, list):
            # Multi-modal content: extract text parts
            for part in content:
                if isinstance(part, dict) and part.get("type") == "text":
                    prompt_parts.append(par

RateLimitExceeded class · python · L9-L14 (6 LOC)