← back to hivellm__transmutation

Function bodies 424 total

All specs Real LLM only Function bodies
detect_column_positions function · rust · L205-L237 (33 LOC)
src/engines/table_detector.rs
    fn detect_column_positions(&self, lines: &[&str]) -> Option<Vec<usize>> {
        // Find positions where words consistently start
        let mut position_votes: std::collections::HashMap<usize, usize> =
            std::collections::HashMap::new();

        for line in lines {
            let mut in_word = false;
            for (pos, ch) in line.chars().enumerate() {
                if ch.is_whitespace() {
                    in_word = false;
                } else if !in_word {
                    *position_votes.entry(pos).or_insert(0) += 1;
                    in_word = true;
                }
            }
        }

        // Keep positions that appear in most lines
        let threshold = (lines.len() as f32 * 0.7) as usize;
        let mut positions: Vec<usize> = position_votes
            .into_iter()
            .filter(|(_, votes)| *votes >= threshold)
            .map(|(pos, _)| pos)
            .collect();

        positions.sort_unstable();

        if positions.le
line_matches_columns function · rust · L240-L258 (19 LOC)
src/engines/table_detector.rs
    fn line_matches_columns(&self, line: &str, positions: &[usize]) -> bool {
        let mut matches = 0;
        let mut in_word = false;

        for (pos, ch) in line.chars().enumerate() {
            if ch.is_whitespace() {
                in_word = false;
            } else if !in_word {
                // Check if position is near any column position (within 2 chars)
                if positions.iter().any(|&col_pos| pos.abs_diff(col_pos) <= 2) {
                    matches += 1;
                }
                in_word = true;
            }
        }

        // At least 50% of columns should match
        matches >= positions.len() / 2
    }
parse_aligned_table function · rust · L261-L308 (48 LOC)
src/engines/table_detector.rs
    fn parse_aligned_table(
        &self,
        lines: &[&str],
        column_positions: &[usize],
    ) -> Option<DetectedTable> {
        let mut rows = Vec::new();

        for line in lines {
            let mut cells = Vec::new();
            let line_chars: Vec<char> = line.chars().collect();

            for i in 0..column_positions.len() {
                let start = column_positions[i].min(line_chars.len());
                let end = column_positions
                    .get(i + 1)
                    .copied()
                    .unwrap_or(line_chars.len())
                    .min(line_chars.len());

                if start < line_chars.len() {
                    let cell: String = line_chars[start..end].iter().collect();
                    cells.push(cell.trim().to_string());
                }
            }

            if !cells.is_empty() {
                rows.push(cells);
            }
        }

        if rows.is_empty() {
            return None;
        }

 
detect_tab_separated_tables function · rust · L311-L328 (18 LOC)
src/engines/table_detector.rs
    fn detect_tab_separated_tables(&self, text: &str) -> Vec<DetectedTable> {
        let mut tables = Vec::new();
        let lines: Vec<&str> = text.lines().collect();

        let mut i = 0;
        while i < lines.len() {
            if let Some(table_end) = self.find_tab_table_end(&lines[i..]) {
                if let Some(table) = self.parse_tab_table(&lines[i..=i + table_end]) {
                    tables.push(table);
                    i += table_end + 1;
                    continue;
                }
            }
            i += 1;
        }

        tables
    }
find_tab_table_end function · rust · L331-L353 (23 LOC)
src/engines/table_detector.rs
    fn find_tab_table_end(&self, lines: &[&str]) -> Option<usize> {
        let mut end = 0;
        let expected_tabs = lines.first()?.matches('\t').count();

        if expected_tabs < self.min_columns - 1 {
            return None;
        }

        for (i, line) in lines.iter().enumerate() {
            let tab_count = line.matches('\t').count();
            if tab_count >= expected_tabs - 1 && tab_count <= expected_tabs + 1 {
                end = i;
            } else if i > 0 {
                break;
            }
        }

        if end >= self.min_rows - 1 {
            Some(end)
        } else {
            None
        }
    }
parse_tab_table function · rust · L356-L375 (20 LOC)
src/engines/table_detector.rs
    fn parse_tab_table(&self, lines: &[&str]) -> Option<DetectedTable> {
        let rows: Vec<Vec<String>> = lines
            .iter()
            .map(|line| line.split('\t').map(|s| s.trim().to_string()).collect())
            .filter(|row: &Vec<String>| !row.is_empty() && row.iter().any(|s| !s.is_empty()))
            .collect();

        if rows.len() < self.min_rows {
            return None;
        }

        let column_count = rows.iter().map(|r| r.len()).max().unwrap_or(0);

        Some(DetectedTable {
            rows,
            column_count,
            has_header: true, // Assume first row is header for TSV
            confidence: 0.85,
        })
    }
test_pipe_delimited_table function · rust · L389-L402 (14 LOC)
src/engines/table_detector.rs
    fn test_pipe_delimited_table() {
        let detector = TableDetector::new();
        let text = r"
| Name | Age | City |
| --- | --- | --- |
| Alice | 30 | NYC |
| Bob | 25 | LA |
";

        let tables = detector.detect_tables(text);
        assert!(!tables.is_empty());
        assert_eq!(tables[0].column_count, 3);
        assert!(tables[0].has_header);
    }
Source: Repobility analyzer · https://repobility.com
test_tab_separated_table function · rust · L405-L413 (9 LOC)
src/engines/table_detector.rs
    fn test_tab_separated_table() {
        let detector = TableDetector::new();
        let text = "Name\tAge\tCity\nAlice\t30\tNYC\nBob\t25\tLA";

        let tables = detector.detect_tables(text);
        assert!(!tables.is_empty());
        // Tab count = columns - 1, so 2 tabs = 3 columns, but detector counts tabs
        assert_eq!(tables[0].column_count, 2);
    }
test_no_table function · rust · L416-L422 (7 LOC)
src/engines/table_detector.rs
    fn test_no_table() {
        let detector = TableDetector::new();
        let text = "This is just regular text with no table structure.";

        let tables = detector.detect_tables(text);
        assert!(tables.is_empty());
    }
conversion_failed function · rust · L81-L86 (6 LOC)
src/error.rs
    pub fn conversion_failed<S: Into<String>>(reason: S) -> Self {
        Self::ConversionFailed {
            reason: reason.into(),
            source: None,
        }
    }
conversion_failed_with_source function · rust · L89-L97 (9 LOC)
src/error.rs
    pub fn conversion_failed_with_source<S: Into<String>, E>(reason: S, source: E) -> Self
    where
        E: std::error::Error + Send + Sync + 'static,
    {
        Self::ConversionFailed {
            reason: reason.into(),
            source: Some(Box::new(source)),
        }
    }
engine_error function · rust · L100-L106 (7 LOC)
src/error.rs
    pub fn engine_error<S1: Into<String>, S2: Into<String>>(engine: S1, message: S2) -> Self {
        Self::EngineError {
            engine: engine.into(),
            message: message.into(),
            source: None,
        }
    }
engine_error_with_source function · rust · L109-L122 (14 LOC)
src/error.rs
    pub fn engine_error_with_source<S1: Into<String>, S2: Into<String>, E>(
        engine: S1,
        message: S2,
        source: E,
    ) -> Self
    where
        E: std::error::Error + Send + Sync + 'static,
    {
        Self::EngineError {
            engine: engine.into(),
            message: message.into(),
            source: Some(Box::new(source)),
        }
    }
is_recoverable function · rust · L125-L130 (6 LOC)
src/error.rs
    pub fn is_recoverable(&self) -> bool {
        matches!(
            self,
            Self::Timeout(_) | Self::NetworkError(_) | Self::CacheError(_)
        )
    }
from function · rust · L152-L157 (6 LOC)
src/error.rs
    fn from(err: zip::result::ZipError) -> Self {
        TransmutationError::IoError(std::io::Error::new(
            std::io::ErrorKind::Other,
            err.to_string(),
        ))
    }
Repobility · severity-and-effort ranking · https://repobility.com
test_is_recoverable function · rust · L177-L183 (7 LOC)
src/error.rs
    fn test_is_recoverable() {
        let err = TransmutationError::Timeout(std::time::Duration::from_secs(1));
        assert!(err.is_recoverable());

        let err = TransmutationError::UnsupportedFormat("test".to_string());
        assert!(!err.is_recoverable());
    }
default function · rust · L75-L81 (7 LOC)
src/lib.rs
    fn default() -> Self {
        Self {
            enable_cache: true,
            max_parallel: num_cpus::get(),
            timeout: std::time::Duration::from_secs(300),
        }
    }
new function · rust · L123-L129 (7 LOC)
src/lib.rs
    pub fn new(input: std::path::PathBuf) -> Self {
        Self {
            input,
            output_format: None,
            options: ConversionOptions::default(),
        }
    }
execute function · rust · L144-L299 (156 LOC)
src/lib.rs
    pub async fn execute(self) -> Result<ConversionResult> {
        use crate::utils::detect_format;

        // Detect input format
        let input_format = detect_format(&self.input).await?;

        // Get output format (default to Markdown)
        let output_format = self.output_format.unwrap_or(OutputFormat::Markdown {
            split_pages: false,
            optimize_for_llm: true,
        });

        // Select appropriate converter

        // Core formats (always enabled)
        if input_format == FileFormat::Pdf {
            use crate::converters::pdf::PdfConverter;
            let converter = PdfConverter::new();
            return converter
                .convert(&self.input, output_format, self.options)
                .await;
        }

        if input_format == FileFormat::Html {
            use crate::converters::html::HtmlConverter;
            let converter = HtmlConverter::new();
            return converter
                .convert(&self.input, output_fo
match_cells function · rust · L33-L84 (52 LOC)
src/ml/cell_matching.rs
    pub fn match_cells(
        &self,
        table_cells: &[TableCell],
        text_cells: &[TextCell],
    ) -> Result<Vec<MatchedCell>> {
        let mut matched = Vec::new();

        for table_cell in table_cells {
            let table_bbox = self.table_cell_to_bbox(table_cell);

            // Find all text cells that overlap with this table cell
            let mut matching_texts = Vec::new();

            for text_cell in text_cells {
                let iou = table_bbox.intersection_over_union(&text_cell.bbox);

                if iou >= self.iou_threshold {
                    matching_texts.push((text_cell, iou));
                }
            }

            // Sort by position (top-to-bottom, left-to-right) then by IoU
            matching_texts.sort_by(|a, b| {
                let y_cmp = a.0.bbox.t.partial_cmp(&b.0.bbox.t).unwrap();
                if y_cmp == std::cmp::Ordering::Equal {
                    a.0.bbox.l.partial_cmp(&b.0.bbox.l).unwrap()
                }
table_cell_to_bbox function · rust · L87-L96 (10 LOC)
src/ml/cell_matching.rs
    fn table_cell_to_bbox(&self, cell: &TableCell) -> BoundingBox {
        let (x0, y0, x1, y1) = cell.bbox;
        BoundingBox::new(
            f64::from(x0),
            f64::from(y0),
            f64::from(x1),
            f64::from(y1),
            crate::document::types_extended::CoordOrigin::TopLeft,
        )
    }
to_table_data function · rust · L135-L166 (32 LOC)
src/ml/cell_matching.rs
    pub fn to_table_data(cells: Vec<MatchedCell>) -> crate::document::types::TableData {
        if cells.is_empty() {
            return crate::document::types::TableData {
                num_rows: 0,
                num_cols: 0,
                grid: Vec::new(),
            };
        }

        // Find dimensions
        let num_rows = cells.iter().map(|c| c.row + c.row_span).max().unwrap_or(0);
        let num_cols = cells.iter().map(|c| c.col + c.col_span).max().unwrap_or(0);

        // Build grid
        let mut grid = vec![Vec::new(); num_rows];

        for cell in cells {
            if cell.row < num_rows {
                grid[cell.row].push(crate::document::types::TableCell {
                    text: cell.text,
                    row_span: cell.row_span,
                    col_span: cell.col_span,
                });
            }
        }

        crate::document::types::TableData {
            num_rows,
            num_cols,
            grid,
        }
    }
test_cell_matcher_basic function · rust · L175-L202 (28 LOC)
src/ml/cell_matching.rs
    fn test_cell_matcher_basic() {
        let matcher = CellMatcher::new();

        let table_cells = vec![TableCell {
            row: 0,
            col: 0,
            row_span: 1,
            col_span: 1,
            bbox: (0.0, 0.0, 10.0, 10.0),
            is_header: true,
        }];

        let text_cells = vec![TextCell {
            index: 0,
            text: "Cell A".to_string(),
            bbox: BoundingBox::new(1.0, 1.0, 9.0, 9.0, CoordOrigin::TopLeft),
            font_name: None,
            font_size: None,
            confidence: 1.0,
            from_ocr: false,
        }];

        let matched = matcher.match_cells(&table_cells, &text_cells).unwrap();

        assert_eq!(matched.len(), 1);
        assert_eq!(matched[0].text, "Cell A");
        assert!(matched[0].is_header);
    }
Repobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
test_cell_matcher_multiple_texts function · rust · L205-L242 (38 LOC)
src/ml/cell_matching.rs
    fn test_cell_matcher_multiple_texts() {
        let matcher = CellMatcher::new();

        let table_cells = vec![TableCell {
            row: 0,
            col: 0,
            row_span: 1,
            col_span: 1,
            bbox: (0.0, 0.0, 20.0, 10.0),
            is_header: false,
        }];

        let text_cells = vec![
            TextCell {
                index: 0,
                text: "Part 1".to_string(),
                bbox: BoundingBox::new(1.0, 1.0, 8.0, 9.0, CoordOrigin::TopLeft),
                font_name: None,
                font_size: None,
                confidence: 1.0,
                from_ocr: false,
            },
            TextCell {
                index: 1,
                text: "Part 2".to_string(),
                bbox: BoundingBox::new(12.0, 1.0, 18.0, 9.0, CoordOrigin::TopLeft),
                font_name: None,
                font_size: None,
                confidence: 1.0,
                from_ocr: false,
            },
        ];

      
test_to_table_data function · rust · L245-L283 (39 LOC)
src/ml/cell_matching.rs
    fn test_to_table_data() {
        let cells = vec![
            MatchedCell {
                row: 0,
                col: 0,
                row_span: 1,
                col_span: 1,
                text: "A".to_string(),
                is_header: true,
                confidence: 0.9,
            },
            MatchedCell {
                row: 0,
                col: 1,
                row_span: 1,
                col_span: 1,
                text: "B".to_string(),
                is_header: true,
                confidence: 0.9,
            },
            MatchedCell {
                row: 1,
                col: 0,
                row_span: 1,
                col_span: 1,
                text: "1".to_string(),
                is_header: false,
                confidence: 0.8,
            },
        ];

        let table_data = MatchedCell::to_table_data(cells);

        assert_eq!(table_data.num_rows, 2);
        assert_eq!(table_data.num_cols, 2);
        assert_eq!(table_d
new function · rust · L71-L98 (28 LOC)
src/ml/layout_model.rs
    pub fn new<P: AsRef<Path>>(model_path: P) -> Result<Self> {
        let model_path = model_path.as_ref().to_path_buf();

        #[cfg(feature = "docling-ffi")]
        {
            let session = SessionBuilder::new()?
                .with_intra_threads(4)?
                .commit_from_file(&model_path)
                .map_err(|e| TransmutationError::EngineError {
                    engine: "layout-model".to_string(),
                    message: format!("Failed to load ONNX model: {e}"),
                    source: None,
                })?;

            Ok(Self {
                session,
                model_path,
            })
        }

        #[cfg(not(feature = "docling-ffi"))]
        {
            Err(TransmutationError::EngineError(
                "layout-model".to_string(),
                "docling-ffi feature not enabled".to_string(),
            ))
        }
    }
run_inference function · rust · L102-L120 (19 LOC)
src/ml/layout_model.rs
    fn run_inference(&mut self, input: &Array4<f32>) -> Result<Vec<DetectedRegion>> {
        // Convert ndarray to ONNX tensor (ort v2 API)
        // Extract shape and data as Vec for compatibility with OwnedTensorArrayData
        let shape = input.shape().to_vec();
        let data = input.iter().copied().collect::<Vec<f32>>();
        let input_tensor = Tensor::from_array((shape, data))?;

        // Run inference (ort v2 requires mutable session)
        // Extract outputs in a separate scope to end mutable borrow
        let (output_data, output_shape) = {
            let outputs = self.session.run(ort::inputs![input_tensor])?;
            let output_value = &outputs[0];
            let (shape, data) = output_value.try_extract_tensor::<f32>()?;
            (data.to_vec(), shape.to_vec())
        };

        // Now process with immutable borrow
        self.post_process_output_from_data(&output_shape, &output_data)
    }
post_process_output_from_data function · rust · L123-L167 (45 LOC)
src/ml/layout_model.rs
    fn post_process_output_from_data(
        &self,
        shape: &[i64],
        data: &[f32],
    ) -> Result<Vec<DetectedRegion>> {
        // Extract segmentation masks from ONNX output
        // Output format: [batch, num_classes, height, width]
        if shape.len() != 4 {
            return Err(crate::TransmutationError::EngineError {
                engine: "layout-model".to_string(),
                message: format!("Expected 4D output tensor, got {}D", shape.len()),
                source: None,
            });
        }

        let num_classes = shape[1] as usize;
        let height = shape[2] as usize;
        let width = shape[3] as usize;

        // Reconstruct ndarray from shape and data for easier manipulation
        use ndarray::Array4;
        let masks_array = Array4::from_shape_vec((1, num_classes, height, width), data.to_vec())
            .map_err(|e| crate::TransmutationError::EngineError {
                engine: "layout-model".to_string(),
              
mask_to_regions function · rust · L171-L210 (40 LOC)
src/ml/layout_model.rs
    fn mask_to_regions(
        &self,
        mask: &ndarray::ArrayView2<f32>,
        class_id: usize,
        width: usize,
        height: usize,
    ) -> Result<Vec<DetectedRegion>> {
        let threshold = 0.5; // Confidence threshold
        let mut regions = Vec::new();

        // Simple threshold-based approach
        // For production, use connected components algorithm
        let mut visited = vec![vec![false; width]; height];

        for y in 0..height {
            for x in 0..width {
                if mask[[y, x]] > threshold && !visited[y][x] {
                    // Start a new region
                    let bbox =
                        self.flood_fill_bbox(mask, &mut visited, x, y, width, height, threshold);

                    if let Some((x0, y0, x1, y1)) = bbox {
                        // Map class_id to LayoutLabel
                        if let Some(label) = self.class_id_to_label(class_id) {
                            // Calculate confidence (average o
flood_fill_bbox function · rust · L214-L264 (51 LOC)
src/ml/layout_model.rs
    fn flood_fill_bbox(
        &self,
        mask: &ndarray::ArrayView2<f32>,
        visited: &mut Vec<Vec<bool>>,
        start_x: usize,
        start_y: usize,
        width: usize,
        height: usize,
        threshold: f32,
    ) -> Option<(usize, usize, usize, usize)> {
        let mut stack = vec![(start_x, start_y)];
        let mut min_x = start_x;
        let mut min_y = start_y;
        let mut max_x = start_x;
        let mut max_y = start_y;

        while let Some((x, y)) = stack.pop() {
            if x >= width || y >= height || visited[y][x] || mask[[y, x]] <= threshold {
                continue;
            }

            visited[y][x] = true;

            // Update bounding box
            min_x = min_x.min(x);
            min_y = min_y.min(y);
            max_x = max_x.max(x);
            max_y = max_y.max(y);

            // Add neighbors (4-connectivity)
            if x > 0 {
                stack.push((x - 1, y));
            }
            if x + 1 < widt
calculate_region_confidence function · rust · L268-L289 (22 LOC)
src/ml/layout_model.rs
    fn calculate_region_confidence(
        &self,
        mask: &ndarray::ArrayView2<f32>,
        x0: usize,
        y0: usize,
        x1: usize,
        y1: usize,
    ) -> f32 {
        let mut sum = 0.0;
        let mut count = 0;

        for y in y0..=y1 {
            for x in x0..=x1 {
                if y < mask.shape()[0] && x < mask.shape()[1] {
                    sum += mask[[y, x]];
                    count += 1;
                }
            }
        }

        if count > 0 { sum / count as f32 } else { 0.0 }
    }
If a scraper extracted this row, it came from Repobility (https://repobility.com)
apply_nms function · rust · L293-L325 (33 LOC)
src/ml/layout_model.rs
    fn apply_nms(
        &self,
        mut regions: Vec<DetectedRegion>,
        iou_threshold: f32,
    ) -> Result<Vec<DetectedRegion>> {
        // Sort by confidence (descending)
        regions.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());

        let mut keep = Vec::new();
        let mut suppressed = vec![false; regions.len()];

        for i in 0..regions.len() {
            if suppressed[i] {
                continue;
            }

            keep.push(regions[i].clone());

            // Suppress overlapping regions
            for j in (i + 1)..regions.len() {
                if suppressed[j] {
                    continue;
                }

                let iou = self.calculate_iou(&regions[i].bbox, &regions[j].bbox);
                if iou > iou_threshold {
                    suppressed[j] = true;
                }
            }
        }

        Ok(keep)
    }
class_id_to_label function · rust · L360-L376 (17 LOC)
src/ml/layout_model.rs
    fn class_id_to_label(&self, class_id: usize) -> Option<LayoutLabel> {
        match class_id {
            0 => Some(LayoutLabel::Text),
            1 => Some(LayoutLabel::Title),
            2 => Some(LayoutLabel::SectionHeader),
            3 => Some(LayoutLabel::ListItem),
            4 => Some(LayoutLabel::Caption),
            5 => Some(LayoutLabel::Footnote),
            6 => Some(LayoutLabel::PageHeader),
            7 => Some(LayoutLabel::PageFooter),
            8 => Some(LayoutLabel::Table),
            9 => Some(LayoutLabel::Figure),
            10 => Some(LayoutLabel::Formula),
            11 => Some(LayoutLabel::Code),
            _ => None, // Unknown class
        }
    }
predict function · rust · L383-L398 (16 LOC)
src/ml/layout_model.rs
    fn predict(&mut self, input: &Self::Input) -> Result<Self::Output> {
        // Preprocess image
        let tensor = preprocessing::preprocess_for_layout(input)?;

        // Run inference
        let regions = self.run_inference(&tensor)?;

        let (width, height) = input.dimensions();

        Ok(LayoutPrediction {
            regions,
            page_width: width,
            page_height: height,
        })
    }
new function · rust · L29-L36 (8 LOC)
src/ml/model_cache.rs
    fn new() -> Self {
        Self {
            layout_model: None,
            table_model: None,
            layout_model_path: None,
            table_model_path: None,
        }
    }
get_or_load_layout_model function · rust · L42-L68 (27 LOC)
src/ml/model_cache.rs
    fn get_or_load_layout_model(&mut self, model_path: PathBuf) -> Option<Arc<Mutex<LayoutModel>>> {
        // Check if already cached and path matches
        if let Some(ref cached_path) = self.layout_model_path {
            if *cached_path == model_path {
                if let Some(ref model) = self.layout_model {
                    eprintln!("📦 Using cached LayoutModel");
                    return Some(Arc::clone(model));
                }
            }
        }

        // Load new model
        eprintln!("🔄 Loading LayoutModel from {}", model_path.display());
        match LayoutModel::new(&model_path) {
            Ok(model) => {
                let arc_model = Arc::new(Mutex::new(model));
                self.layout_model = Some(Arc::clone(&arc_model));
                self.layout_model_path = Some(model_path);
                eprintln!("✅ LayoutModel loaded and cached");
                Some(arc_model)
            }
            Err(e) => {
                eprintln!("❌ Fa
get_or_load_table_model function · rust · L71-L103 (33 LOC)
src/ml/model_cache.rs
    fn get_or_load_table_model(
        &mut self,
        model_path: PathBuf,
    ) -> Option<Arc<Mutex<TableStructureModel>>> {
        // Check if already cached and path matches
        if let Some(ref cached_path) = self.table_model_path {
            if *cached_path == model_path {
                if let Some(ref model) = self.table_model {
                    eprintln!("📦 Using cached TableStructureModel");
                    return Some(Arc::clone(model));
                }
            }
        }

        // Load new model
        eprintln!(
            "🔄 Loading TableStructureModel from {}",
            model_path.display()
        );
        match TableStructureModel::new(&model_path, 1.0) {
            Ok(model) => {
                let arc_model = Arc::new(Mutex::new(model));
                self.table_model = Some(Arc::clone(&arc_model));
                self.table_model_path = Some(model_path);
                eprintln!("✅ TableStructureModel loaded and cached");
    
clear function · rust · L106-L112 (7 LOC)
src/ml/model_cache.rs
    fn clear(&mut self) {
        self.layout_model = None;
        self.table_model = None;
        self.layout_model_path = None;
        self.table_model_path = None;
        eprintln!("🗑️  Model cache cleared");
    }
get_layout_model function · rust · L116-L121 (6 LOC)
src/ml/model_cache.rs
pub fn get_layout_model(model_path: PathBuf) -> Option<Arc<Mutex<LayoutModel>>> {
    MODEL_CACHE
        .lock()
        .ok()?
        .get_or_load_layout_model(model_path)
}
Source: Repobility analyzer · https://repobility.com
has_cached_layout_model function · rust · L136-L142 (7 LOC)
src/ml/model_cache.rs
pub fn has_cached_layout_model() -> bool {
    MODEL_CACHE
        .lock()
        .ok()
        .and_then(|cache| cache.layout_model.as_ref().map(|_| true))
        .unwrap_or(false)
}
has_cached_table_model function · rust · L145-L151 (7 LOC)
src/ml/model_cache.rs
pub fn has_cached_table_model() -> bool {
    MODEL_CACHE
        .lock()
        .ok()
        .and_then(|cache| cache.table_model.as_ref().map(|_| true))
        .unwrap_or(false)
}
new function · rust · L24-L35 (12 LOC)
src/ml/model_manager.rs
    pub fn new() -> Result<Self> {
        let cache_dir = Self::default_cache_dir()?;
        fs::create_dir_all(&cache_dir)?;

        // Build search paths in priority order
        let search_paths = Self::build_search_paths()?;

        Ok(Self {
            cache_dir,
            search_paths,
        })
    }
default_cache_dir function · rust · L38-L52 (15 LOC)
src/ml/model_manager.rs
    fn default_cache_dir() -> Result<PathBuf> {
        if let Some(cache_dir) = dirs::cache_dir() {
            return Ok(cache_dir.join("transmutation_models"));
        }

        // Fallback to home directory
        let home = dirs::home_dir().ok_or_else(|| {
            TransmutationError::IoError(std::io::Error::new(
                std::io::ErrorKind::NotFound,
                "Home directory not found",
            ))
        })?;

        Ok(home.join(".cache").join("transmutation_models"))
    }
build_search_paths function · rust · L55-L80 (26 LOC)
src/ml/model_manager.rs
    fn build_search_paths() -> Result<Vec<PathBuf>> {
        let mut paths = Vec::new();

        // 1. Environment variable (highest priority)
        if let Ok(env_path) = env::var("TRANSMUTATION_MODELS_DIR") {
            paths.push(PathBuf::from(env_path));
        }

        // 2. Project models/ directory (for development)
        if let Ok(current_dir) = env::current_dir() {
            paths.push(current_dir.join("models"));
            paths.push(current_dir.join("transmutation").join("models"));
        }

        // 3. Executable directory (for deployment)
        if let Ok(exe_path) = env::current_exe() {
            if let Some(exe_dir) = exe_path.parent() {
                paths.push(exe_dir.join("models"));
            }
        }

        // 4. System cache (lowest priority)
        paths.push(Self::default_cache_dir()?);

        Ok(paths)
    }
load_or_download function · rust · L84-L102 (19 LOC)
src/ml/model_manager.rs
    pub fn load_or_download(&self, model_name: &str) -> Option<PathBuf> {
        // Try all search paths
        for search_path in &self.search_paths {
            let model_path = search_path.join(model_name);
            if model_path.exists() {
                eprintln!("✅ Found {} at {}", model_name, model_path.display());
                return Some(model_path);
            }
        }

        eprintln!("⚠️  Model {model_name} not found in any search path");
        eprintln!("   Searched:");
        for path in &self.search_paths {
            eprintln!("     - {}", path.display());
        }
        eprintln!("   To export models, run: python scripts/export_onnx_models.py");

        None
    }
get_all_models function · rust · L128-L136 (9 LOC)
src/ml/model_manager.rs
    pub fn get_all_models(&self) -> Option<ModelPaths> {
        let layout_model = self.load_or_download(LAYOUT_MODEL_NAME)?;
        let table_model = self.load_or_download(TABLE_STRUCTURE_MODEL_NAME);

        Some(ModelPaths {
            layout_model,
            table_model,
        })
    }
download_model function · rust · L139-L145 (7 LOC)
src/ml/model_manager.rs
    pub async fn download_model(&self, _model_name: &str, _repo_id: &str) -> Result<PathBuf> {
        // TODO: Implement actual download from HuggingFace
        // For now, return error indicating manual download needed
        Err(TransmutationError::UnsupportedFormat(
            "Automatic model download not yet implemented. Please manually place ONNX models in models/ directory".to_string()
        ))
    }
Repobility · severity-and-effort ranking · https://repobility.com
preprocess_for_layout function · rust · L27-L38 (12 LOC)
src/ml/preprocessing.rs
pub fn preprocess_for_layout(image: &DynamicImage) -> Result<Array4<f32>> {
    // Resize to model input size
    let resized = resize_with_padding(image, LAYOUT_MODEL_SIZE, LAYOUT_MODEL_SIZE)?;

    // Convert to RGB if needed
    let rgb_image = resized.to_rgb8();

    // Convert to ndarray and normalize
    let tensor = image_to_tensor(&rgb_image, &IMAGENET_MEAN, &IMAGENET_STD)?;

    Ok(tensor)
}
resize_with_padding function · rust · L41-L76 (36 LOC)
src/ml/preprocessing.rs
fn resize_with_padding(
    image: &DynamicImage,
    target_width: u32,
    target_height: u32,
) -> Result<DynamicImage> {
    let (width, height) = image.dimensions();
    let aspect_ratio = width as f32 / height as f32;
    let target_aspect_ratio = target_width as f32 / target_height as f32;

    let (new_width, new_height) = if aspect_ratio > target_aspect_ratio {
        // Width is limiting factor
        (target_width, (target_width as f32 / aspect_ratio) as u32)
    } else {
        // Height is limiting factor
        ((target_height as f32 * aspect_ratio) as u32, target_height)
    };

    // Resize image
    let resized = image.resize_exact(new_width, new_height, image::imageops::FilterType::Lanczos3);

    // Create canvas with padding
    let mut canvas = DynamicImage::new_rgb8(target_width, target_height);

    // Center the image
    let x_offset = (target_width - new_width) / 2;
    let y_offset = (target_height - new_height) / 2;

    image::imageops::overlay(
      
image_to_tensor function · rust · L79-L101 (23 LOC)
src/ml/preprocessing.rs
fn image_to_tensor(
    image: &ImageBuffer<Rgb<u8>, Vec<u8>>,
    mean: &[f32; 3],
    std: &[f32; 3],
) -> Result<Array4<f32>> {
    let (width, height) = image.dimensions();
    let mut tensor = Array4::<f32>::zeros((1, 3, height as usize, width as usize));

    // Convert from HWC to CHW and normalize
    for y in 0..height {
        for x in 0..width {
            let pixel = image.get_pixel(x, y);

            for c in 0..3 {
                let value = f32::from(pixel[c]) / 255.0; // [0, 1]
                let normalized = (value - mean[c]) / std[c];
                tensor[[0, c, y as usize, x as usize]] = normalized;
            }
        }
    }

    Ok(tensor)
}
‹ prevpage 7 / 9next ›