Function bodies 96 total

estimate_tokens function · python · L23-L25 (3 LOC)

scripts/prepare_dataset.py

def estimate_tokens(text):
    """Rough estimation: ~4 characters per token"""
    return len(text) // 4

download_openorca_sample function · python · L27-L93 (67 LOC)

scripts/prepare_dataset.py

def download_openorca_sample(num_samples=500):
    """Download and process OpenOrca dataset"""
    print("Downloading OpenOrca dataset from HuggingFace...")

    # Use the 1M-GPT4-Augmented subset (smaller, high quality)
    url = "https://huggingface.co/datasets/Open-Orca/OpenOrca/resolve/main/1M-GPT4-Augmented.parquet"

    # Download the parquet file
    print(f"Fetching {url}")
    print("(This may take a few minutes for the first download...)")

    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()

        # Save to temporary file
        temp_file = Path("/tmp/openorca.parquet")
        with open(temp_file, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)

        print(f"Downloaded to {temp_file}")

        # Read the parquet file
        print("Reading parquet file...")
        df = pd.read_parquet(temp_file)

        print(f"Dataset contains {len(df)} examples")
        print(f

generate_synthetic_fallback function · python · L95-L134 (40 LOC)

scripts/prepare_dataset.py

def generate_synthetic_fallback(num_samples):
    """Generate synthetic examples if download fails"""
    print("Using synthetic fallback dataset")

    examples = [
        # Short Q&A
        {"prompt": "What is the difference between a violin and a fiddle?", "expected_tokens": 100},
        {"prompt": "How many planets are in our solar system?", "expected_tokens": 50},
        {"prompt": "What causes rain?", "expected_tokens": 150},
        {"prompt": "Explain TCP vs UDP in networking.", "expected_tokens": 200},
        {"prompt": "What is the Python GIL?", "expected_tokens": 250},

        # Code generation
        {"prompt": "Write a Python function to find the nth Fibonacci number using dynamic programming.", "expected_tokens": 200},
        {"prompt": "Create a SQL query to find the second highest salary from an employees table.", "expected_tokens": 150},
        {"prompt": "Implement a binary search tree in Python with insert and search methods.", "expected_tokens": 400},

save_dataset function · python · L136-L141 (6 LOC)

scripts/prepare_dataset.py

def save_dataset(prompts, filename):
    """Save prompts in JSONL format"""
    with open(filename, 'w') as f:
        for prompt in prompts:
            f.write(json.dumps(prompt) + '\n')
    print(f"Saved {len(prompts)} prompts to {filename}")

main function · python · L143-L177 (35 LOC)

scripts/prepare_dataset.py

def main():
    import argparse

    parser = argparse.ArgumentParser(description='Download and prepare OpenOrca dataset for benchmarking')
    parser.add_argument('--samples', type=int, default=10000, help='Number of samples to generate (default: 10000)')
    parser.add_argument('--output-dir', type=str, default='examples/prompts', help='Output directory (default: examples/prompts)')
    args = parser.parse_args()

    # Download and prepare dataset
    prompts = download_openorca_sample(args.samples)

    # Shuffle for variety
    random.shuffle(prompts)

    # Create output directory if needed
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # Save dataset
    output_path = output_dir / f"openorca-{args.samples}.jsonl"
    save_dataset(prompts, str(output_path))

    print("\nDataset statistics:")
    token_counts = [p['max_tokens'] for p in prompts]
    print(f"   Total prompts: {len(prompts)}")
    print(f"   Average max_tokens: {sum(to

start_server function · rust · L6-L15 (10 LOC)

src/admin.rs

pub async fn start_server(addr: SocketAddr) {
    info!("Starting metrics server on {}", addr);

    let routes = metrics_endpoint()
        .or(metrics_json_endpoint())
        .or(vars_endpoint())
        .or(vars_json_endpoint());

    warp::serve(routes).run(addr).await;
}

metrics_endpoint function · rust · L18-L23 (6 LOC)

src/admin.rs

fn metrics_endpoint() -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone
{
    warp::path!("metrics")
        .and(warp::get())
        .and_then(prometheus_metrics)
}

metrics_json_endpoint function · rust · L26-L31 (6 LOC)

src/admin.rs

fn metrics_json_endpoint()
-> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone {
    warp::path!("metrics.json")
        .and(warp::get())
        .and_then(json_metrics)
}

vars_endpoint function · rust · L34-L36 (3 LOC)

src/admin.rs

fn vars_endpoint() -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone {
    warp::path!("vars").and(warp::get()).and_then(human_metrics)
}

vars_json_endpoint function · rust · L39-L44 (6 LOC)

src/admin.rs

fn vars_json_endpoint()
-> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone {
    warp::path!("vars.json")
        .and(warp::get())
        .and_then(json_metrics)
}

prometheus_metrics function · rust · L45-L115 (71 LOC)

src/admin.rs

async fn prometheus_metrics() -> Result<impl warp::Reply, Infallible> {
    use metriken::Value;
    use std::time::{SystemTime, UNIX_EPOCH};

    let mut lines = Vec::new();
    let timestamp = SystemTime::now()
        .duration_since(UNIX_EPOCH)
        .expect("system time should be after UNIX epoch")
        .as_millis();

    for metric in &metriken::metrics() {
        let name = metric.name().replace('/', "_");

        match metric.value() {
            Some(Value::Counter(value)) => {
                if let Some(description) = metric.description() {
                    lines.push(format!(
                        "# TYPE {} counter\n# HELP {} {}\n{} {} {}",
                        name, name, description, name, value, timestamp
                    ));
                } else {
                    lines.push(format!(
                        "# TYPE {} counter\n{} {} {}",
                        name, name, value, timestamp
                    ));
                }
            }

json_metrics function · rust · L116-L153 (38 LOC)

src/admin.rs

async fn json_metrics() -> Result<impl warp::Reply, Infallible> {
    use metriken::Value;
    use serde_json::json;

    let mut metrics = serde_json::Map::new();

    for metric in &metriken::metrics() {
        let name = metric.name();

        match metric.value() {
            Some(Value::Counter(value)) => {
                metrics.insert(name.to_string(), json!(value));
            }
            Some(Value::Gauge(value)) => {
                metrics.insert(name.to_string(), json!(value));
            }
            Some(Value::Other(other)) => {
                // Handle histograms
                if let Some(histogram) = other.downcast_ref::<metriken::AtomicHistogram>()
                    && let Some(loaded) = histogram.load()
                {
                    // Export common percentiles
                    let percentiles = [50.0, 90.0, 95.0, 99.0, 99.9];
                    if let Ok(Some(values)) = loaded.percentiles(&percentiles) {
                        for (percen

human_metrics function · rust · L154-L200 (47 LOC)

src/admin.rs

async fn human_metrics() -> Result<impl warp::Reply, Infallible> {
    use metriken::Value;

    let mut lines = Vec::new();

    for metric in &metriken::metrics() {
        let name = metric.name();

        match metric.value() {
            Some(Value::Counter(value)) => {
                lines.push(format!("{}: {}", name, value));
            }
            Some(Value::Gauge(value)) => {
                lines.push(format!("{}: {}", name, value));
            }
            Some(Value::Other(other)) => {
                // Handle histograms
                if let Some(histogram) = other.downcast_ref::<metriken::AtomicHistogram>()
                    && let Some(loaded) = histogram.load()
                {
                    // Export common percentiles
                    let percentiles = [50.0, 90.0, 95.0, 99.0, 99.9];
                    if let Ok(Some(values)) = loaded.percentiles(&percentiles) {
                        for (percentile, bucket) in values.iter() {

new function · rust · L96-L166 (71 LOC)

src/benchmark.rs

    pub async fn new(mut config: Config) -> Result<Self> {
        // Initialize metrics
        Metrics::init();

        // Wait for server to be ready if timeout is set (> 0)
        // This is optional and useful when starting servers that need time to load models
        if config.endpoint.health_check_timeout > 0 {
            crate::client::check_server_ready(
                &config.endpoint.base_url,
                config.endpoint.api_key.as_deref(),
                Duration::from_secs(config.endpoint.health_check_timeout),
                Duration::from_secs(config.endpoint.health_check_interval),
            )
            .await?;
        }

        // Detect model from server if not provided
        let model = if let Some(model) = config.endpoint.model.clone() {
            model
        } else {
            info!("Model not specified, querying server for available models");
            let detected = crate::client::detect_model(
                &config.endpoint.base_url,

load_prompts function · rust · L167-L186 (20 LOC)

src/benchmark.rs

    async fn load_prompts(path: &Path) -> Result<Vec<Prompt>> {
        let file = File::open(path).await?;
        let reader = BufReader::new(file);
        let mut lines = reader.lines();
        let mut prompts = Vec::new();

        while let Some(line) = lines.next_line().await? {
            if line.trim().is_empty() {
                continue;
            }

            match serde_json::from_str::<Prompt>(&line) {
                Ok(prompt) => prompts.push(prompt),
                Err(e) => warn!("Failed to parse prompt line: {}", e),
            }
        }

        Ok(prompts)
    }

All rows scored by the Repobility analyzer (https://repobility.com)

run function · rust · L221-L285 (65 LOC)

src/benchmark.rs

    pub async fn run(&self) -> Result<()> {
        let report_builder = ReportBuilder::new().with_config(self.config.clone());
        let start_instant = Instant::now();

        debug!("Starting benchmark run");

        // Set running flag
        crate::metrics::RUNNING.store(true, std::sync::atomic::Ordering::Relaxed);

        // Create notification for warmup completion
        let warmup_complete = Arc::new(tokio::sync::Notify::new());

        // Spawn periodic stats output task (unless in quiet mode or JSON to stdout)
        let json_to_stdout = matches!(self.config.output.format, crate::config::OutputFormat::Json)
            && self.config.output.file.is_none();
        let stats_handle = if !self.config.output.quiet && !json_to_stdout {
            let config = self.config.clone();
            let warmup_notify = Arc::clone(&warmup_complete);
            Some(tokio::spawn(async move {
                crate::stats::periodic_stats(config, warmup_notify).await;

run_concurrent_mode_internal function · rust · L286-L547 (262 LOC)

src/benchmark.rs

    async fn run_concurrent_mode_internal(
        &self,
        start_instant: Instant,
        warmup_complete: Arc<tokio::sync::Notify>,
    ) -> Result<Duration> {
        info!(
            "Running in concurrent mode with {} workers",
            self.config.load.concurrent_requests
        );

        // Create semaphore for concurrency control
        let semaphore = Arc::new(Semaphore::new(self.config.load.concurrent_requests));

        // Determine test duration or request count
        let (total_requests, duration_limit) = if let Some(total) = self.config.load.total_requests
        {
            (Some(total), None)
        } else if let Some(duration_secs) = self.config.load.duration_seconds {
            (None, Some(Duration::from_secs(duration_secs)))
        } else {
            anyhow::bail!("Either total_requests or duration_seconds must be specified");
        };

        // Calculate warmup parameters
        let warmup_count = self.config.load.warmup_requests.un

run_qps_mode_internal function · rust · L548-L776 (229 LOC)

src/benchmark.rs

    async fn run_qps_mode_internal(
        &self,
        start_instant: Instant,
        warmup_complete: Arc<tokio::sync::Notify>,
    ) -> Result<Duration> {
        let qps = self
            .config
            .load
            .qps
            .expect("QPS must be specified for fixed_qps mode");

        // Create request distribution
        let distribution = RequestDistribution::new(&self.config.load.arrival_distribution, qps);

        info!(
            "Running in fixed QPS mode: {} requests/second, {} distribution, max {} in-flight",
            qps,
            distribution.distribution_name(),
            self.config.load.concurrent_requests
        );

        // Calculate warmup parameters
        let warmup_count = self.config.load.warmup_requests.unwrap_or(0);
        let warmup_duration = self.config.load.warmup_duration.map(Duration::from_secs);

        // Log warmup info
        if warmup_count > 0 {
            info!("Starting warmup phase: {} requests", warm

generate_report function · rust · L777-L800 (24 LOC)

src/benchmark.rs

    async fn generate_report(&self, report_builder: ReportBuilder) -> Result<()> {
        match &self.config.output.format {
            crate::config::OutputFormat::Console => {
                report_builder.print_console_report()?;
            }
            crate::config::OutputFormat::Json => {
                let report = report_builder.build()?;
                let json = serde_json::to_string_pretty(&report)?;

                if let Some(file_path) = &self.config.output.file {
                    // Writing to file - show brief summary to console
                    tokio::fs::write(file_path, json).await?;
                    if !self.config.output.quiet {
                        self.print_brief_summary(&report)?;
                    }
                } else {
                    // Writing JSON to stdout - this is for piping
                    println!("{}", json);
                }
            }
        }
        Ok(())
    }

execute_request function · rust · L801-L912 (112 LOC)

src/benchmark.rs

    async fn execute_request(
        client: Arc<OpenAIClient>,
        tokenizer: Arc<Tokenizer>,
        prompt: Prompt,
        index: usize,
        is_warmup: bool,
    ) -> Result<()> {
        debug!("Executing request {} (warmup: {})", index, is_warmup);

        let request_start = Instant::now();

        // Only record metrics if not in warmup phase
        if !is_warmup {
            Metrics::record_request_sent();
        }

        // Add per-request cache-busting to ensure every request is unique
        let cache_bust_prompt = format!("[req-{}] {}", index, prompt.prompt);
        let request = client.create_request(&cache_bust_prompt, prompt.max_tokens);

        match client.chat_completion_stream(request).await {
            Ok(mut stream) => {
                // Consume the stream to measure TTFT and total time
                let mut total_content = String::new();

                while let Some(chunk) = stream.next_chunk().await? {
                    for choice

print_brief_summary function · rust · L913-L1003 (91 LOC)

src/benchmark.rs

    fn print_brief_summary(&self, report: &crate::report::BenchmarkReport) -> Result<()> {
        use chrono::Utc;

        let now = Utc::now();
        let timestamp = now.to_rfc3339_opts(chrono::SecondsFormat::Millis, false);

        println!();
        println!("{}", timestamp);
        println!("{} -----", timestamp);
        println!("{} Benchmark Complete", timestamp);
        println!(
            "{} Duration: {:.1}s",
            timestamp,
            report.duration.as_secs_f64()
        );
        println!(
            "{} Requests: Sent: {}",
            timestamp, report.summary.total_requests
        );
        println!(
            "{} Responses: Received: {} Ok: {} Err: {} Success: {:.2}%",
            timestamp,
            report.summary.successful_requests + report.summary.failed_requests,
            report.summary.successful_requests,
            report.summary.failed_requests,
            report.summary.success_rate * 100.0
        );

        // Error breakd

new function · rust · L174-L194 (21 LOC)

src/client.rs

    pub fn new(config: ClientConfig) -> Result<Self> {
        let client = Client::builder()
            .timeout(config.timeout)
            .pool_max_idle_per_host(config.pool_size) // Match concurrency for optimal connection reuse
            .pool_idle_timeout(Duration::from_secs(300)) // Keep connections alive for 5 minutes
            .tcp_keepalive(Duration::from_secs(60)) // TCP keep-alive every 60 seconds
            .http2_keep_alive_interval(Duration::from_secs(30)) // HTTP/2 keep-alive
            .http2_keep_alive_timeout(Duration::from_secs(20))
            .http2_keep_alive_while_idle(true) // Send keep-alive even when idle
            .build()?;

        Ok(Self {
            client,
            base_url: config.base_url,
            api_key: config.api_key,
            model: config.model,
            max_retries: config.max_retries,
            retry_initial_delay_ms: config.retry_initial_delay_ms,
            retry_max_delay_ms: config.retry_max_delay_ms,
        })

chat_completion function · rust · L195-L218 (24 LOC)

src/client.rs

    pub async fn chat_completion(
        &self,
        request: ChatCompletionRequest,
    ) -> Result<ChatCompletionResponse> {
        let url = format!("{}/chat/completions", self.base_url);

        let mut req = self.client.post(&url).json(&request);

        if let Some(api_key) = &self.api_key {
            req = req.header("Authorization", format!("Bearer {}", api_key));
        }

        let response = req.send().await?;

        if !response.status().is_success() {
            let status = response.status();
            let text = response.text().await?;
            anyhow::bail!("API request failed with status {}: {}", status, text);
        }

        let completion: ChatCompletionResponse = response.json().await?;
        Ok(completion)
    }

Generated by Repobility's multi-pass static-analysis pipeline (https://repobility.com)

create_request function · rust · L219-L231 (13 LOC)

src/client.rs

    pub fn create_request(&self, prompt: &str, max_tokens: Option<u32>) -> ChatCompletionRequest {
        ChatCompletionRequest {
            model: self.model.clone(),
            messages: vec![Message {
                role: "user".to_string(),
                content: prompt.to_string(),
            }],
            max_tokens,
            temperature: None,
            stream: Some(false),
        }
    }

chat_completion_stream function · rust · L234-L275 (42 LOC)

src/client.rs

    pub async fn chat_completion_stream(
        &self,
        request: ChatCompletionRequest,
    ) -> Result<StreamResponse> {
        let mut attempt = 0;

        loop {
            match self.chat_completion_stream_internal(request.clone()).await {
                Ok(stream) => {
                    if attempt > 0 {
                        log::debug!("Request succeeded after {} retries", attempt);
                    }
                    return Ok(stream);
                }
                Err(e) => {
                    // Check if we should retry
                    if attempt < self.max_retries && Self::is_retriable_error(&e) {
                        // Record retry in metrics
                        crate::metrics::Metrics::record_retry();

                        let delay = self.calculate_backoff_delay(attempt);
                        log::debug!(
                            "Request failed (attempt {}/{}): {}. Retrying in {:?}",
                            attempt + 1,

chat_completion_stream_internal function · rust · L278-L359 (82 LOC)

src/client.rs

    async fn chat_completion_stream_internal(
        &self,
        request: ChatCompletionRequest,
    ) -> Result<StreamResponse> {
        let mut request = request;
        request.stream = Some(true);

        let url = format!("{}/chat/completions", self.base_url);

        let mut req = self
            .client
            .post(&url)
            .json(&request)
            .header("Connection", "keep-alive"); // Ensure HTTP/1.1 keep-alive

        if let Some(api_key) = &self.api_key {
            req = req.header("Authorization", format!("Bearer {}", api_key));
        }

        let start_time = Instant::now();

        // Send request and handle connection errors
        let response = match req.send().await {
            Ok(resp) => resp,
            Err(e) => {
                if e.is_connect() {
                    return Err(ClientError::Connection(e.to_string()).into());
                } else if e.is_timeout() {
                    return Err(ClientError::Timeout(Dura

is_retriable_error function · rust · L362-L377 (16 LOC)

src/client.rs

    fn is_retriable_error(error: &anyhow::Error) -> bool {
        if let Some(client_error) = error.downcast_ref::<ClientError>() {
            match client_error {
                ClientError::Connection(_) => true,   // Network issues
                ClientError::Timeout(_) => true,      // Timeout
                ClientError::Http5xx { .. } => true,  // Server errors
                ClientError::Http4xx { .. } => false, // Client errors (don't retry)
                ClientError::Parse(_) => false,       // Parse errors (don't retry)
                ClientError::Other(_) => false,       // Unknown errors (don't retry)
            }
        } else {
            // For non-ClientError types, check the error message
            let err_str = error.to_string().to_lowercase();
            err_str.contains("timeout") || err_str.contains("connection")
        }
    }

calculate_backoff_delay function · rust · L380-L393 (14 LOC)

src/client.rs

    fn calculate_backoff_delay(&self, attempt: u32) -> Duration {
        let base_delay_ms = self.retry_initial_delay_ms * 2_u64.pow(attempt);
        let max_delay_ms = self.retry_max_delay_ms;

        // Cap at max delay
        let capped_delay_ms = base_delay_ms.min(max_delay_ms);

        // Add jitter: random value between 50% and 100% of the capped delay
        let mut rng = rand::thread_rng();
        let jitter_factor = rng.gen_range(0.5..=1.0);
        let jittered_delay_ms = (capped_delay_ms as f64 * jitter_factor) as u64;

        Duration::from_millis(jittered_delay_ms)
    }

next_chunk function · rust · L406-L459 (54 LOC)

src/client.rs

    pub async fn next_chunk(&mut self) -> Result<Option<ChatCompletionChunk>> {
        loop {
            let bytes = self.response.chunk().await?;

            // If no more data from server, stream is done
            if bytes.is_none() {
                return Ok(None);
            }

            let data = bytes.unwrap();

            // Parse SSE format
            let text = String::from_utf8_lossy(&data);
            for line in text.lines() {
                if let Some(json_str) = line.strip_prefix("data: ") {
                    if json_str == "[DONE]" {
                        return Ok(None);
                    }

                    if let Ok(chunk) = serde_json::from_str::<ChatCompletionChunk>(json_str) {
                        // Check if this chunk contains content
                        let has_content = chunk.choices.iter().any(|c| c.delta.content.is_some());

                        if has_content {
                            let now = Instant::now();

time_to_first_token function · rust · L460-L463 (4 LOC)

src/client.rs

    pub fn time_to_first_token(&self) -> Option<Duration> {
        self.first_token_time
    }

total_duration function · rust · L464-L467 (4 LOC)

src/client.rs

    pub fn total_duration(&self) -> Duration {
        self.start_time.elapsed()
    }

Provenance: Repobility (https://repobility.com) — every score reproducible from /scan/

inter_token_latencies function · rust · L468-L471 (4 LOC)

src/client.rs

    pub fn inter_token_latencies(&self) -> &[Duration] {
        &self.inter_token_latencies
    }

check_server_ready function · rust · L495-L571 (77 LOC)

src/client.rs

pub async fn check_server_ready(
    base_url: &str,
    api_key: Option<&str>,
    total_timeout: Duration,
    retry_interval: Duration,
) -> Result<()> {
    let start_time = Instant::now();
    let mut attempt = 0;

    log::info!("Waiting for server to be ready at {}...", base_url);

    loop {
        attempt += 1;

        log::debug!(
            "Server readiness check attempt {}: GET {}/models",
            attempt,
            base_url
        );

        // Try to list models with a short timeout per request
        match tokio::time::timeout(
            Duration::from_secs(10),
            list_models(base_url, api_key, Duration::from_secs(10)),
        )
        .await
        {
            Ok(Ok(models)) => {
                log::info!(
                    "Server is ready ({} model{} available after {:.1}s)",
                    models.len(),
                    if models.len() == 1 { "" } else { "s" },
                    start_time.elapsed().as_secs_f64()

list_models function · rust · L574-L608 (35 LOC)

src/client.rs

pub async fn list_models(
    base_url: &str,
    api_key: Option<&str>,
    timeout: Duration,
) -> Result<Vec<Model>> {
    let client = Client::builder().timeout(timeout).build()?;

    let url = format!("{}/models", base_url);
    let mut req = client.get(&url);

    if let Some(key) = api_key {
        req = req.header("Authorization", format!("Bearer {}", key));
    }

    let response = req
        .send()
        .await
        .map_err(|e| anyhow::anyhow!("Failed to query models endpoint: {}", e))?;

    if !response.status().is_success() {
        let status = response.status();
        let text = response
            .text()
            .await
            .unwrap_or_else(|_| "Unable to read response".to_string());
        anyhow::bail!("Models endpoint returned {}: {}", status, text);
    }

    let models_response: ModelsResponse = response
        .json()
        .await
        .map_err(|e| anyhow::anyhow!("Failed to parse models response: {}", e))?;

    Ok(models_respons

detect_model function · rust · L611-L644 (34 LOC)

src/client.rs

pub async fn detect_model(
    base_url: &str,
    api_key: Option<&str>,
    timeout: Duration,
) -> Result<String> {
    let models = list_models(base_url, api_key, timeout).await?;

    if models.is_empty() {
        anyhow::bail!("No models available from server at {}/models", base_url);
    }

    // Return the first model (raw name for API requests)
    let raw_model = models[0].id.clone();
    let normalized_model = normalize_model_name(&raw_model);

    if models.len() > 1 {
        log::info!("Found {} models, using: {}", models.len(), normalized_model);
        log::debug!(
            "Available models: {:?}",
            models.iter().map(|m| &m.id).collect::<Vec<_>>()
        );
    } else if raw_model != normalized_model {
        log::info!(
            "Detected model: {} (server reports as: {})",
            normalized_model,
            raw_model
        );
    } else {
        log::info!("Detected model: {}", raw_model);
    }

    // Return raw model name for API re

normalize_model_name function · rust · L665-L743 (79 LOC)

src/client.rs

fn normalize_model_name(model: &str) -> String {
    let is_file_path = model.contains('/') || model.contains('\\');
    let is_gguf = model.ends_with(".gguf");

    // If it looks like a file path, extract just the filename
    // Handle both Unix (/) and Windows (\) path separators
    let name = if is_file_path {
        // Try forward slash first, then backslash
        let from_forward = model.rsplit('/').next();
        let from_backward = model.rsplit('\\').next();

        // Use whichever gives us the shortest result (more specific)
        match (from_forward, from_backward) {
            (Some(f), Some(b)) => {
                if f.len() <= b.len() {
                    f
                } else {
                    b
                }
            }
            (Some(f), None) => f,
            (None, Some(b)) => b,
            (None, None) => model,
        }
    } else {
        model
    };

    // Remove .gguf extension if present
    let name = name.strip_suffix(".gguf"

test_normalize_model_name function · rust · L750-L789 (40 LOC)

src/client.rs

    fn test_normalize_model_name() {
        // llama.cpp full path - F16 has no underscores, dot is not a version
        assert_eq!(
            normalize_model_name("/mnt/llm-models/GGUF/Qwen/Qwen3-4B/Qwen3-4B.F16.gguf"),
            "qwen3-4b-f16"
        );

        // Windows path with version number and quantization format
        assert_eq!(
            normalize_model_name("C:\\Models\\llama-3.1-8b-q4_k_m.gguf"),
            "llama-3.1-8b-q4_k_m"
        );

        // GGUF with version number and quantization format
        assert_eq!(
            normalize_model_name("Mistral-7B-Instruct-v0.3-Q5_K_M.gguf"),
            "mistral-7b-instruct-v0.3-q5_k_m"
        );

        // Regular model name (OpenAI style) - preserve dots
        assert_eq!(normalize_model_name("gpt-3.5-turbo"), "gpt-3.5-turbo");

        // Model with underscores (non-GGUF) - normalize underscores only
        assert_eq!(
            normalize_model_name("llama_3_1_8b_instruct"),
            "llama-3-1-8b

parse_args function · rust · L13-L15 (3 LOC)

src/cli.rs

    pub fn parse_args() -> Self {
        Cli::parse()
    }

to_level_filter function · rust · L114-L122 (9 LOC)

src/config.rs

    pub fn to_level_filter(&self) -> log::LevelFilter {
        match self {
            LogLevel::Error => log::LevelFilter::Error,
            LogLevel::Warn => log::LevelFilter::Warn,
            LogLevel::Info => log::LevelFilter::Info,
            LogLevel::Debug => log::LevelFilter::Debug,
            LogLevel::Trace => log::LevelFilter::Trace,
        }
    }

default function · rust · L153-L158 (6 LOC)

src/config.rs

    fn default() -> Self {
        Self {
            listen: default_admin_listen(),
            enabled: default_admin_enabled(),
        }
    }

default function · rust · L162-L166 (5 LOC)

src/config.rs

    fn default() -> Self {
        Self {
            worker_threads: default_worker_threads(),
        }
    }

default function · rust · L170-L175 (6 LOC)

src/config.rs

    fn default() -> Self {
        Self {
            level: default_log_level(),
            filter: Vec::new(),
        }
    }

default_timeout function · rust · L177-L180 (4 LOC)

src/config.rs

fn default_timeout() -> u64 {
    60
}

default_retry_initial_delay_ms function · rust · L181-L184 (4 LOC)

src/config.rs

fn default_retry_initial_delay_ms() -> u64 {
    100
}

default_retry_max_delay_ms function · rust · L185-L188 (4 LOC)

src/config.rs

fn default_retry_max_delay_ms() -> u64 {
    10000 // 10 seconds
}

default_health_check_timeout function · rust · L189-L192 (4 LOC)

src/config.rs

fn default_health_check_timeout() -> u64 {
    0 // Disabled by default
}

default_health_check_interval function · rust · L193-L196 (4 LOC)

src/config.rs

fn default_health_check_interval() -> u64 {
    5 // 5 seconds
}

All rows scored by the Repobility analyzer (https://repobility.com)

default_concurrent_requests function · rust · L197-L200 (4 LOC)

src/config.rs

fn default_concurrent_requests() -> usize {
    10
}

default_output_format function · rust · L201-L204 (4 LOC)

src/config.rs

fn default_output_format() -> OutputFormat {
    OutputFormat::Console
}

default_worker_threads function · rust · L205-L208 (4 LOC)

src/config.rs

fn default_worker_threads() -> usize {
    num_cpus::get()
}

page 1 / 2next ›