Function bodies 424 total
detect_column_positions function · rust · L205-L237 (33 LOC)src/engines/table_detector.rs
fn detect_column_positions(&self, lines: &[&str]) -> Option<Vec<usize>> {
// Find positions where words consistently start
let mut position_votes: std::collections::HashMap<usize, usize> =
std::collections::HashMap::new();
for line in lines {
let mut in_word = false;
for (pos, ch) in line.chars().enumerate() {
if ch.is_whitespace() {
in_word = false;
} else if !in_word {
*position_votes.entry(pos).or_insert(0) += 1;
in_word = true;
}
}
}
// Keep positions that appear in most lines
let threshold = (lines.len() as f32 * 0.7) as usize;
let mut positions: Vec<usize> = position_votes
.into_iter()
.filter(|(_, votes)| *votes >= threshold)
.map(|(pos, _)| pos)
.collect();
positions.sort_unstable();
if positions.leline_matches_columns function · rust · L240-L258 (19 LOC)src/engines/table_detector.rs
fn line_matches_columns(&self, line: &str, positions: &[usize]) -> bool {
let mut matches = 0;
let mut in_word = false;
for (pos, ch) in line.chars().enumerate() {
if ch.is_whitespace() {
in_word = false;
} else if !in_word {
// Check if position is near any column position (within 2 chars)
if positions.iter().any(|&col_pos| pos.abs_diff(col_pos) <= 2) {
matches += 1;
}
in_word = true;
}
}
// At least 50% of columns should match
matches >= positions.len() / 2
}parse_aligned_table function · rust · L261-L308 (48 LOC)src/engines/table_detector.rs
fn parse_aligned_table(
&self,
lines: &[&str],
column_positions: &[usize],
) -> Option<DetectedTable> {
let mut rows = Vec::new();
for line in lines {
let mut cells = Vec::new();
let line_chars: Vec<char> = line.chars().collect();
for i in 0..column_positions.len() {
let start = column_positions[i].min(line_chars.len());
let end = column_positions
.get(i + 1)
.copied()
.unwrap_or(line_chars.len())
.min(line_chars.len());
if start < line_chars.len() {
let cell: String = line_chars[start..end].iter().collect();
cells.push(cell.trim().to_string());
}
}
if !cells.is_empty() {
rows.push(cells);
}
}
if rows.is_empty() {
return None;
}
detect_tab_separated_tables function · rust · L311-L328 (18 LOC)src/engines/table_detector.rs
fn detect_tab_separated_tables(&self, text: &str) -> Vec<DetectedTable> {
let mut tables = Vec::new();
let lines: Vec<&str> = text.lines().collect();
let mut i = 0;
while i < lines.len() {
if let Some(table_end) = self.find_tab_table_end(&lines[i..]) {
if let Some(table) = self.parse_tab_table(&lines[i..=i + table_end]) {
tables.push(table);
i += table_end + 1;
continue;
}
}
i += 1;
}
tables
}find_tab_table_end function · rust · L331-L353 (23 LOC)src/engines/table_detector.rs
fn find_tab_table_end(&self, lines: &[&str]) -> Option<usize> {
let mut end = 0;
let expected_tabs = lines.first()?.matches('\t').count();
if expected_tabs < self.min_columns - 1 {
return None;
}
for (i, line) in lines.iter().enumerate() {
let tab_count = line.matches('\t').count();
if tab_count >= expected_tabs - 1 && tab_count <= expected_tabs + 1 {
end = i;
} else if i > 0 {
break;
}
}
if end >= self.min_rows - 1 {
Some(end)
} else {
None
}
}parse_tab_table function · rust · L356-L375 (20 LOC)src/engines/table_detector.rs
fn parse_tab_table(&self, lines: &[&str]) -> Option<DetectedTable> {
let rows: Vec<Vec<String>> = lines
.iter()
.map(|line| line.split('\t').map(|s| s.trim().to_string()).collect())
.filter(|row: &Vec<String>| !row.is_empty() && row.iter().any(|s| !s.is_empty()))
.collect();
if rows.len() < self.min_rows {
return None;
}
let column_count = rows.iter().map(|r| r.len()).max().unwrap_or(0);
Some(DetectedTable {
rows,
column_count,
has_header: true, // Assume first row is header for TSV
confidence: 0.85,
})
}test_pipe_delimited_table function · rust · L389-L402 (14 LOC)src/engines/table_detector.rs
fn test_pipe_delimited_table() {
let detector = TableDetector::new();
let text = r"
| Name | Age | City |
| --- | --- | --- |
| Alice | 30 | NYC |
| Bob | 25 | LA |
";
let tables = detector.detect_tables(text);
assert!(!tables.is_empty());
assert_eq!(tables[0].column_count, 3);
assert!(tables[0].has_header);
}Source: Repobility analyzer · https://repobility.com
test_tab_separated_table function · rust · L405-L413 (9 LOC)src/engines/table_detector.rs
fn test_tab_separated_table() {
let detector = TableDetector::new();
let text = "Name\tAge\tCity\nAlice\t30\tNYC\nBob\t25\tLA";
let tables = detector.detect_tables(text);
assert!(!tables.is_empty());
// Tab count = columns - 1, so 2 tabs = 3 columns, but detector counts tabs
assert_eq!(tables[0].column_count, 2);
}test_no_table function · rust · L416-L422 (7 LOC)src/engines/table_detector.rs
fn test_no_table() {
let detector = TableDetector::new();
let text = "This is just regular text with no table structure.";
let tables = detector.detect_tables(text);
assert!(tables.is_empty());
}conversion_failed function · rust · L81-L86 (6 LOC)src/error.rs
pub fn conversion_failed<S: Into<String>>(reason: S) -> Self {
Self::ConversionFailed {
reason: reason.into(),
source: None,
}
}conversion_failed_with_source function · rust · L89-L97 (9 LOC)src/error.rs
pub fn conversion_failed_with_source<S: Into<String>, E>(reason: S, source: E) -> Self
where
E: std::error::Error + Send + Sync + 'static,
{
Self::ConversionFailed {
reason: reason.into(),
source: Some(Box::new(source)),
}
}engine_error function · rust · L100-L106 (7 LOC)src/error.rs
pub fn engine_error<S1: Into<String>, S2: Into<String>>(engine: S1, message: S2) -> Self {
Self::EngineError {
engine: engine.into(),
message: message.into(),
source: None,
}
}engine_error_with_source function · rust · L109-L122 (14 LOC)src/error.rs
pub fn engine_error_with_source<S1: Into<String>, S2: Into<String>, E>(
engine: S1,
message: S2,
source: E,
) -> Self
where
E: std::error::Error + Send + Sync + 'static,
{
Self::EngineError {
engine: engine.into(),
message: message.into(),
source: Some(Box::new(source)),
}
}is_recoverable function · rust · L125-L130 (6 LOC)src/error.rs
pub fn is_recoverable(&self) -> bool {
matches!(
self,
Self::Timeout(_) | Self::NetworkError(_) | Self::CacheError(_)
)
}from function · rust · L152-L157 (6 LOC)src/error.rs
fn from(err: zip::result::ZipError) -> Self {
TransmutationError::IoError(std::io::Error::new(
std::io::ErrorKind::Other,
err.to_string(),
))
}Repobility · severity-and-effort ranking · https://repobility.com
test_is_recoverable function · rust · L177-L183 (7 LOC)src/error.rs
fn test_is_recoverable() {
let err = TransmutationError::Timeout(std::time::Duration::from_secs(1));
assert!(err.is_recoverable());
let err = TransmutationError::UnsupportedFormat("test".to_string());
assert!(!err.is_recoverable());
}default function · rust · L75-L81 (7 LOC)src/lib.rs
fn default() -> Self {
Self {
enable_cache: true,
max_parallel: num_cpus::get(),
timeout: std::time::Duration::from_secs(300),
}
}new function · rust · L123-L129 (7 LOC)src/lib.rs
pub fn new(input: std::path::PathBuf) -> Self {
Self {
input,
output_format: None,
options: ConversionOptions::default(),
}
}execute function · rust · L144-L299 (156 LOC)src/lib.rs
pub async fn execute(self) -> Result<ConversionResult> {
use crate::utils::detect_format;
// Detect input format
let input_format = detect_format(&self.input).await?;
// Get output format (default to Markdown)
let output_format = self.output_format.unwrap_or(OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
});
// Select appropriate converter
// Core formats (always enabled)
if input_format == FileFormat::Pdf {
use crate::converters::pdf::PdfConverter;
let converter = PdfConverter::new();
return converter
.convert(&self.input, output_format, self.options)
.await;
}
if input_format == FileFormat::Html {
use crate::converters::html::HtmlConverter;
let converter = HtmlConverter::new();
return converter
.convert(&self.input, output_fomatch_cells function · rust · L33-L84 (52 LOC)src/ml/cell_matching.rs
pub fn match_cells(
&self,
table_cells: &[TableCell],
text_cells: &[TextCell],
) -> Result<Vec<MatchedCell>> {
let mut matched = Vec::new();
for table_cell in table_cells {
let table_bbox = self.table_cell_to_bbox(table_cell);
// Find all text cells that overlap with this table cell
let mut matching_texts = Vec::new();
for text_cell in text_cells {
let iou = table_bbox.intersection_over_union(&text_cell.bbox);
if iou >= self.iou_threshold {
matching_texts.push((text_cell, iou));
}
}
// Sort by position (top-to-bottom, left-to-right) then by IoU
matching_texts.sort_by(|a, b| {
let y_cmp = a.0.bbox.t.partial_cmp(&b.0.bbox.t).unwrap();
if y_cmp == std::cmp::Ordering::Equal {
a.0.bbox.l.partial_cmp(&b.0.bbox.l).unwrap()
}table_cell_to_bbox function · rust · L87-L96 (10 LOC)src/ml/cell_matching.rs
fn table_cell_to_bbox(&self, cell: &TableCell) -> BoundingBox {
let (x0, y0, x1, y1) = cell.bbox;
BoundingBox::new(
f64::from(x0),
f64::from(y0),
f64::from(x1),
f64::from(y1),
crate::document::types_extended::CoordOrigin::TopLeft,
)
}to_table_data function · rust · L135-L166 (32 LOC)src/ml/cell_matching.rs
pub fn to_table_data(cells: Vec<MatchedCell>) -> crate::document::types::TableData {
if cells.is_empty() {
return crate::document::types::TableData {
num_rows: 0,
num_cols: 0,
grid: Vec::new(),
};
}
// Find dimensions
let num_rows = cells.iter().map(|c| c.row + c.row_span).max().unwrap_or(0);
let num_cols = cells.iter().map(|c| c.col + c.col_span).max().unwrap_or(0);
// Build grid
let mut grid = vec![Vec::new(); num_rows];
for cell in cells {
if cell.row < num_rows {
grid[cell.row].push(crate::document::types::TableCell {
text: cell.text,
row_span: cell.row_span,
col_span: cell.col_span,
});
}
}
crate::document::types::TableData {
num_rows,
num_cols,
grid,
}
}test_cell_matcher_basic function · rust · L175-L202 (28 LOC)src/ml/cell_matching.rs
fn test_cell_matcher_basic() {
let matcher = CellMatcher::new();
let table_cells = vec![TableCell {
row: 0,
col: 0,
row_span: 1,
col_span: 1,
bbox: (0.0, 0.0, 10.0, 10.0),
is_header: true,
}];
let text_cells = vec![TextCell {
index: 0,
text: "Cell A".to_string(),
bbox: BoundingBox::new(1.0, 1.0, 9.0, 9.0, CoordOrigin::TopLeft),
font_name: None,
font_size: None,
confidence: 1.0,
from_ocr: false,
}];
let matched = matcher.match_cells(&table_cells, &text_cells).unwrap();
assert_eq!(matched.len(), 1);
assert_eq!(matched[0].text, "Cell A");
assert!(matched[0].is_header);
}Repobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
test_cell_matcher_multiple_texts function · rust · L205-L242 (38 LOC)src/ml/cell_matching.rs
fn test_cell_matcher_multiple_texts() {
let matcher = CellMatcher::new();
let table_cells = vec![TableCell {
row: 0,
col: 0,
row_span: 1,
col_span: 1,
bbox: (0.0, 0.0, 20.0, 10.0),
is_header: false,
}];
let text_cells = vec![
TextCell {
index: 0,
text: "Part 1".to_string(),
bbox: BoundingBox::new(1.0, 1.0, 8.0, 9.0, CoordOrigin::TopLeft),
font_name: None,
font_size: None,
confidence: 1.0,
from_ocr: false,
},
TextCell {
index: 1,
text: "Part 2".to_string(),
bbox: BoundingBox::new(12.0, 1.0, 18.0, 9.0, CoordOrigin::TopLeft),
font_name: None,
font_size: None,
confidence: 1.0,
from_ocr: false,
},
];
test_to_table_data function · rust · L245-L283 (39 LOC)src/ml/cell_matching.rs
fn test_to_table_data() {
let cells = vec![
MatchedCell {
row: 0,
col: 0,
row_span: 1,
col_span: 1,
text: "A".to_string(),
is_header: true,
confidence: 0.9,
},
MatchedCell {
row: 0,
col: 1,
row_span: 1,
col_span: 1,
text: "B".to_string(),
is_header: true,
confidence: 0.9,
},
MatchedCell {
row: 1,
col: 0,
row_span: 1,
col_span: 1,
text: "1".to_string(),
is_header: false,
confidence: 0.8,
},
];
let table_data = MatchedCell::to_table_data(cells);
assert_eq!(table_data.num_rows, 2);
assert_eq!(table_data.num_cols, 2);
assert_eq!(table_dnew function · rust · L71-L98 (28 LOC)src/ml/layout_model.rs
pub fn new<P: AsRef<Path>>(model_path: P) -> Result<Self> {
let model_path = model_path.as_ref().to_path_buf();
#[cfg(feature = "docling-ffi")]
{
let session = SessionBuilder::new()?
.with_intra_threads(4)?
.commit_from_file(&model_path)
.map_err(|e| TransmutationError::EngineError {
engine: "layout-model".to_string(),
message: format!("Failed to load ONNX model: {e}"),
source: None,
})?;
Ok(Self {
session,
model_path,
})
}
#[cfg(not(feature = "docling-ffi"))]
{
Err(TransmutationError::EngineError(
"layout-model".to_string(),
"docling-ffi feature not enabled".to_string(),
))
}
}run_inference function · rust · L102-L120 (19 LOC)src/ml/layout_model.rs
fn run_inference(&mut self, input: &Array4<f32>) -> Result<Vec<DetectedRegion>> {
// Convert ndarray to ONNX tensor (ort v2 API)
// Extract shape and data as Vec for compatibility with OwnedTensorArrayData
let shape = input.shape().to_vec();
let data = input.iter().copied().collect::<Vec<f32>>();
let input_tensor = Tensor::from_array((shape, data))?;
// Run inference (ort v2 requires mutable session)
// Extract outputs in a separate scope to end mutable borrow
let (output_data, output_shape) = {
let outputs = self.session.run(ort::inputs![input_tensor])?;
let output_value = &outputs[0];
let (shape, data) = output_value.try_extract_tensor::<f32>()?;
(data.to_vec(), shape.to_vec())
};
// Now process with immutable borrow
self.post_process_output_from_data(&output_shape, &output_data)
}post_process_output_from_data function · rust · L123-L167 (45 LOC)src/ml/layout_model.rs
fn post_process_output_from_data(
&self,
shape: &[i64],
data: &[f32],
) -> Result<Vec<DetectedRegion>> {
// Extract segmentation masks from ONNX output
// Output format: [batch, num_classes, height, width]
if shape.len() != 4 {
return Err(crate::TransmutationError::EngineError {
engine: "layout-model".to_string(),
message: format!("Expected 4D output tensor, got {}D", shape.len()),
source: None,
});
}
let num_classes = shape[1] as usize;
let height = shape[2] as usize;
let width = shape[3] as usize;
// Reconstruct ndarray from shape and data for easier manipulation
use ndarray::Array4;
let masks_array = Array4::from_shape_vec((1, num_classes, height, width), data.to_vec())
.map_err(|e| crate::TransmutationError::EngineError {
engine: "layout-model".to_string(),
mask_to_regions function · rust · L171-L210 (40 LOC)src/ml/layout_model.rs
fn mask_to_regions(
&self,
mask: &ndarray::ArrayView2<f32>,
class_id: usize,
width: usize,
height: usize,
) -> Result<Vec<DetectedRegion>> {
let threshold = 0.5; // Confidence threshold
let mut regions = Vec::new();
// Simple threshold-based approach
// For production, use connected components algorithm
let mut visited = vec![vec![false; width]; height];
for y in 0..height {
for x in 0..width {
if mask[[y, x]] > threshold && !visited[y][x] {
// Start a new region
let bbox =
self.flood_fill_bbox(mask, &mut visited, x, y, width, height, threshold);
if let Some((x0, y0, x1, y1)) = bbox {
// Map class_id to LayoutLabel
if let Some(label) = self.class_id_to_label(class_id) {
// Calculate confidence (average oflood_fill_bbox function · rust · L214-L264 (51 LOC)src/ml/layout_model.rs
fn flood_fill_bbox(
&self,
mask: &ndarray::ArrayView2<f32>,
visited: &mut Vec<Vec<bool>>,
start_x: usize,
start_y: usize,
width: usize,
height: usize,
threshold: f32,
) -> Option<(usize, usize, usize, usize)> {
let mut stack = vec![(start_x, start_y)];
let mut min_x = start_x;
let mut min_y = start_y;
let mut max_x = start_x;
let mut max_y = start_y;
while let Some((x, y)) = stack.pop() {
if x >= width || y >= height || visited[y][x] || mask[[y, x]] <= threshold {
continue;
}
visited[y][x] = true;
// Update bounding box
min_x = min_x.min(x);
min_y = min_y.min(y);
max_x = max_x.max(x);
max_y = max_y.max(y);
// Add neighbors (4-connectivity)
if x > 0 {
stack.push((x - 1, y));
}
if x + 1 < widtcalculate_region_confidence function · rust · L268-L289 (22 LOC)src/ml/layout_model.rs
fn calculate_region_confidence(
&self,
mask: &ndarray::ArrayView2<f32>,
x0: usize,
y0: usize,
x1: usize,
y1: usize,
) -> f32 {
let mut sum = 0.0;
let mut count = 0;
for y in y0..=y1 {
for x in x0..=x1 {
if y < mask.shape()[0] && x < mask.shape()[1] {
sum += mask[[y, x]];
count += 1;
}
}
}
if count > 0 { sum / count as f32 } else { 0.0 }
}If a scraper extracted this row, it came from Repobility (https://repobility.com)
apply_nms function · rust · L293-L325 (33 LOC)src/ml/layout_model.rs
fn apply_nms(
&self,
mut regions: Vec<DetectedRegion>,
iou_threshold: f32,
) -> Result<Vec<DetectedRegion>> {
// Sort by confidence (descending)
regions.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
let mut keep = Vec::new();
let mut suppressed = vec![false; regions.len()];
for i in 0..regions.len() {
if suppressed[i] {
continue;
}
keep.push(regions[i].clone());
// Suppress overlapping regions
for j in (i + 1)..regions.len() {
if suppressed[j] {
continue;
}
let iou = self.calculate_iou(®ions[i].bbox, ®ions[j].bbox);
if iou > iou_threshold {
suppressed[j] = true;
}
}
}
Ok(keep)
}class_id_to_label function · rust · L360-L376 (17 LOC)src/ml/layout_model.rs
fn class_id_to_label(&self, class_id: usize) -> Option<LayoutLabel> {
match class_id {
0 => Some(LayoutLabel::Text),
1 => Some(LayoutLabel::Title),
2 => Some(LayoutLabel::SectionHeader),
3 => Some(LayoutLabel::ListItem),
4 => Some(LayoutLabel::Caption),
5 => Some(LayoutLabel::Footnote),
6 => Some(LayoutLabel::PageHeader),
7 => Some(LayoutLabel::PageFooter),
8 => Some(LayoutLabel::Table),
9 => Some(LayoutLabel::Figure),
10 => Some(LayoutLabel::Formula),
11 => Some(LayoutLabel::Code),
_ => None, // Unknown class
}
}predict function · rust · L383-L398 (16 LOC)src/ml/layout_model.rs
fn predict(&mut self, input: &Self::Input) -> Result<Self::Output> {
// Preprocess image
let tensor = preprocessing::preprocess_for_layout(input)?;
// Run inference
let regions = self.run_inference(&tensor)?;
let (width, height) = input.dimensions();
Ok(LayoutPrediction {
regions,
page_width: width,
page_height: height,
})
}new function · rust · L29-L36 (8 LOC)src/ml/model_cache.rs
fn new() -> Self {
Self {
layout_model: None,
table_model: None,
layout_model_path: None,
table_model_path: None,
}
}get_or_load_layout_model function · rust · L42-L68 (27 LOC)src/ml/model_cache.rs
fn get_or_load_layout_model(&mut self, model_path: PathBuf) -> Option<Arc<Mutex<LayoutModel>>> {
// Check if already cached and path matches
if let Some(ref cached_path) = self.layout_model_path {
if *cached_path == model_path {
if let Some(ref model) = self.layout_model {
eprintln!("📦 Using cached LayoutModel");
return Some(Arc::clone(model));
}
}
}
// Load new model
eprintln!("🔄 Loading LayoutModel from {}", model_path.display());
match LayoutModel::new(&model_path) {
Ok(model) => {
let arc_model = Arc::new(Mutex::new(model));
self.layout_model = Some(Arc::clone(&arc_model));
self.layout_model_path = Some(model_path);
eprintln!("✅ LayoutModel loaded and cached");
Some(arc_model)
}
Err(e) => {
eprintln!("❌ Faget_or_load_table_model function · rust · L71-L103 (33 LOC)src/ml/model_cache.rs
fn get_or_load_table_model(
&mut self,
model_path: PathBuf,
) -> Option<Arc<Mutex<TableStructureModel>>> {
// Check if already cached and path matches
if let Some(ref cached_path) = self.table_model_path {
if *cached_path == model_path {
if let Some(ref model) = self.table_model {
eprintln!("📦 Using cached TableStructureModel");
return Some(Arc::clone(model));
}
}
}
// Load new model
eprintln!(
"🔄 Loading TableStructureModel from {}",
model_path.display()
);
match TableStructureModel::new(&model_path, 1.0) {
Ok(model) => {
let arc_model = Arc::new(Mutex::new(model));
self.table_model = Some(Arc::clone(&arc_model));
self.table_model_path = Some(model_path);
eprintln!("✅ TableStructureModel loaded and cached");
clear function · rust · L106-L112 (7 LOC)src/ml/model_cache.rs
fn clear(&mut self) {
self.layout_model = None;
self.table_model = None;
self.layout_model_path = None;
self.table_model_path = None;
eprintln!("🗑️ Model cache cleared");
}get_layout_model function · rust · L116-L121 (6 LOC)src/ml/model_cache.rs
pub fn get_layout_model(model_path: PathBuf) -> Option<Arc<Mutex<LayoutModel>>> {
MODEL_CACHE
.lock()
.ok()?
.get_or_load_layout_model(model_path)
}Source: Repobility analyzer · https://repobility.com
has_cached_layout_model function · rust · L136-L142 (7 LOC)src/ml/model_cache.rs
pub fn has_cached_layout_model() -> bool {
MODEL_CACHE
.lock()
.ok()
.and_then(|cache| cache.layout_model.as_ref().map(|_| true))
.unwrap_or(false)
}has_cached_table_model function · rust · L145-L151 (7 LOC)src/ml/model_cache.rs
pub fn has_cached_table_model() -> bool {
MODEL_CACHE
.lock()
.ok()
.and_then(|cache| cache.table_model.as_ref().map(|_| true))
.unwrap_or(false)
}new function · rust · L24-L35 (12 LOC)src/ml/model_manager.rs
pub fn new() -> Result<Self> {
let cache_dir = Self::default_cache_dir()?;
fs::create_dir_all(&cache_dir)?;
// Build search paths in priority order
let search_paths = Self::build_search_paths()?;
Ok(Self {
cache_dir,
search_paths,
})
}default_cache_dir function · rust · L38-L52 (15 LOC)src/ml/model_manager.rs
fn default_cache_dir() -> Result<PathBuf> {
if let Some(cache_dir) = dirs::cache_dir() {
return Ok(cache_dir.join("transmutation_models"));
}
// Fallback to home directory
let home = dirs::home_dir().ok_or_else(|| {
TransmutationError::IoError(std::io::Error::new(
std::io::ErrorKind::NotFound,
"Home directory not found",
))
})?;
Ok(home.join(".cache").join("transmutation_models"))
}build_search_paths function · rust · L55-L80 (26 LOC)src/ml/model_manager.rs
fn build_search_paths() -> Result<Vec<PathBuf>> {
let mut paths = Vec::new();
// 1. Environment variable (highest priority)
if let Ok(env_path) = env::var("TRANSMUTATION_MODELS_DIR") {
paths.push(PathBuf::from(env_path));
}
// 2. Project models/ directory (for development)
if let Ok(current_dir) = env::current_dir() {
paths.push(current_dir.join("models"));
paths.push(current_dir.join("transmutation").join("models"));
}
// 3. Executable directory (for deployment)
if let Ok(exe_path) = env::current_exe() {
if let Some(exe_dir) = exe_path.parent() {
paths.push(exe_dir.join("models"));
}
}
// 4. System cache (lowest priority)
paths.push(Self::default_cache_dir()?);
Ok(paths)
}load_or_download function · rust · L84-L102 (19 LOC)src/ml/model_manager.rs
pub fn load_or_download(&self, model_name: &str) -> Option<PathBuf> {
// Try all search paths
for search_path in &self.search_paths {
let model_path = search_path.join(model_name);
if model_path.exists() {
eprintln!("✅ Found {} at {}", model_name, model_path.display());
return Some(model_path);
}
}
eprintln!("⚠️ Model {model_name} not found in any search path");
eprintln!(" Searched:");
for path in &self.search_paths {
eprintln!(" - {}", path.display());
}
eprintln!(" To export models, run: python scripts/export_onnx_models.py");
None
}get_all_models function · rust · L128-L136 (9 LOC)src/ml/model_manager.rs
pub fn get_all_models(&self) -> Option<ModelPaths> {
let layout_model = self.load_or_download(LAYOUT_MODEL_NAME)?;
let table_model = self.load_or_download(TABLE_STRUCTURE_MODEL_NAME);
Some(ModelPaths {
layout_model,
table_model,
})
}download_model function · rust · L139-L145 (7 LOC)src/ml/model_manager.rs
pub async fn download_model(&self, _model_name: &str, _repo_id: &str) -> Result<PathBuf> {
// TODO: Implement actual download from HuggingFace
// For now, return error indicating manual download needed
Err(TransmutationError::UnsupportedFormat(
"Automatic model download not yet implemented. Please manually place ONNX models in models/ directory".to_string()
))
}Repobility · severity-and-effort ranking · https://repobility.com
preprocess_for_layout function · rust · L27-L38 (12 LOC)src/ml/preprocessing.rs
pub fn preprocess_for_layout(image: &DynamicImage) -> Result<Array4<f32>> {
// Resize to model input size
let resized = resize_with_padding(image, LAYOUT_MODEL_SIZE, LAYOUT_MODEL_SIZE)?;
// Convert to RGB if needed
let rgb_image = resized.to_rgb8();
// Convert to ndarray and normalize
let tensor = image_to_tensor(&rgb_image, &IMAGENET_MEAN, &IMAGENET_STD)?;
Ok(tensor)
}resize_with_padding function · rust · L41-L76 (36 LOC)src/ml/preprocessing.rs
fn resize_with_padding(
image: &DynamicImage,
target_width: u32,
target_height: u32,
) -> Result<DynamicImage> {
let (width, height) = image.dimensions();
let aspect_ratio = width as f32 / height as f32;
let target_aspect_ratio = target_width as f32 / target_height as f32;
let (new_width, new_height) = if aspect_ratio > target_aspect_ratio {
// Width is limiting factor
(target_width, (target_width as f32 / aspect_ratio) as u32)
} else {
// Height is limiting factor
((target_height as f32 * aspect_ratio) as u32, target_height)
};
// Resize image
let resized = image.resize_exact(new_width, new_height, image::imageops::FilterType::Lanczos3);
// Create canvas with padding
let mut canvas = DynamicImage::new_rgb8(target_width, target_height);
// Center the image
let x_offset = (target_width - new_width) / 2;
let y_offset = (target_height - new_height) / 2;
image::imageops::overlay(
image_to_tensor function · rust · L79-L101 (23 LOC)src/ml/preprocessing.rs
fn image_to_tensor(
image: &ImageBuffer<Rgb<u8>, Vec<u8>>,
mean: &[f32; 3],
std: &[f32; 3],
) -> Result<Array4<f32>> {
let (width, height) = image.dimensions();
let mut tensor = Array4::<f32>::zeros((1, 3, height as usize, width as usize));
// Convert from HWC to CHW and normalize
for y in 0..height {
for x in 0..width {
let pixel = image.get_pixel(x, y);
for c in 0..3 {
let value = f32::from(pixel[c]) / 255.0; // [0, 1]
let normalized = (value - mean[c]) / std[c];
tensor[[0, c, y as usize, x as usize]] = normalized;
}
}
}
Ok(tensor)
}