Function bodies 424 total
main function · rust · L5-L201 (197 LOC)build.rs
fn main() {
// Embed icon and metadata into Windows executable (only for CLI binary builds)
// When used as a library dependency, skip resource compilation to avoid
// duplicate resource errors (see: https://github.com/hivellm/transmutation/issues/3)
#[cfg(all(target_os = "windows", feature = "cli"))]
{
let mut res = winres::WindowsResource::new();
res.set_icon("assets/icon.ico");
res.set("ProductName", "Transmutation");
res.set(
"FileDescription",
"High-performance document conversion engine for AI/LLM embeddings",
);
res.set("CompanyName", "HiveLLM Team");
res.set("LegalCopyright", "Copyright (c) 2025 HiveLLM Team");
res.set("OriginalFilename", "transmutation.exe");
if let Err(e) = res.compile() {
eprintln!("Warning: Failed to compile Windows resources: {e}");
}
}
// Check for optional external dependencies
check_external_dependencies()command_exists function · rust · L283-L293 (11 LOC)build.rs
fn command_exists(cmd: &str) -> bool {
Command::new(if cfg!(target_os = "windows") {
"where"
} else {
"which"
})
.arg(cmd)
.output()
.map(|output| output.status.success())
.unwrap_or(false)
}get_install_command function · rust · L297-L330 (34 LOC)build.rs
fn get_install_command(tool: &str) -> String {
#[cfg(target_os = "linux")]
{
match tool {
"poppler-utils" => "sudo apt-get install poppler-utils".to_string(),
"libreoffice" => "sudo apt-get install libreoffice".to_string(),
"tesseract" => "sudo apt-get install tesseract-ocr".to_string(),
"ffmpeg" => "sudo apt-get install ffmpeg".to_string(),
_ => format!("sudo apt-get install {tool}"),
}
}
#[cfg(target_os = "macos")]
{
match tool {
"poppler-utils" => "brew install poppler".to_string(),
"libreoffice" => "brew install --cask libreoffice".to_string(),
"tesseract" => "brew install tesseract".to_string(),
"ffmpeg" => "brew install ffmpeg".to_string(),
_ => format!("brew install {tool}"),
}
}
#[cfg(target_os = "windows")]
{
match tool {
"poppler-utils" => "choco install poppler".to_string(get_quick_install_all function · rust · L333-L348 (16 LOC)build.rs
fn get_quick_install_all() -> &'static str {
#[cfg(target_os = "linux")]
{
" ./install/install-deps-linux.sh"
}
#[cfg(target_os = "macos")]
{
" ./install/install-deps-macos.sh"
}
#[cfg(target_os = "windows")]
{
" .\\install\\install-deps-windows.ps1 (or .bat)"
}
}docling_open_pdf function · cpp · L34-L60 (27 LOC)cpp/docling_ffi.cpp
DoclingError docling_open_pdf(const char* pdf_path, DoclingDocumentHandle* out_handle) {
std::cerr << "[FFI] docling_open_pdf called with: " << (pdf_path ? pdf_path : "NULL") << std::endl;
if (!pdf_path || !out_handle) {
set_last_error("Invalid arguments");
std::cerr << "[FFI] ERROR: Invalid arguments" << std::endl;
return DOCLING_ERROR_INVALID_PDF;
}
try {
auto doc = new DoclingDocument();
doc->pdf_path = pdf_path;
// Initialize docling-parse config
doc->config["data"] = nlohmann::json::object();
doc->config["files"]["pdf"]["filename"] = pdf_path;
*out_handle = doc;
std::cerr << "[FFI] PDF opened successfully" << std::endl;
return DOCLING_OK;
} catch (const std::exception& e) {
set_last_error(std::string("Failed to open PDF: ") + e.what());
return DOCLING_ERROR_PARSE_FAILED;
}
}docling_close_pdf function · cpp · L61-L75 (15 LOC)cpp/docling_ffi.cpp
DoclingError docling_close_pdf(DoclingDocumentHandle handle) {
if (!handle) {
return DOCLING_ERROR_INVALID_PDF;
}
try {
auto doc = static_cast<DoclingDocument*>(handle);
delete doc;
return DOCLING_OK;
} catch (const std::exception& e) {
set_last_error(std::string("Failed to close PDF: ") + e.what());
return DOCLING_ERROR_PARSE_FAILED;
}
}docling_get_page_count function · cpp · L76-L92 (17 LOC)cpp/docling_ffi.cpp
DoclingError docling_get_page_count(DoclingDocumentHandle handle, int* out_count) {
if (!handle || !out_count) {
return DOCLING_ERROR_INVALID_PDF;
}
try {
auto doc = static_cast<DoclingDocument*>(handle);
// Parse PDF to get page count (mock for now)
*out_count = 1; // TODO: Implement page count extraction
return DOCLING_OK;
} catch (const std::exception& e) {
set_last_error(std::string("Failed to get page count: ") + e.what());
return DOCLING_ERROR_PARSE_FAILED;
}
}Repobility (the analyzer behind this table) · https://repobility.com
docling_get_page function · cpp · L93-L120 (28 LOC)cpp/docling_ffi.cpp
DoclingError docling_get_page(DoclingDocumentHandle handle, int page_num, DoclingPage** out_page) {
if (!handle || !out_page) {
return DOCLING_ERROR_INVALID_PDF;
}
try {
// Allocate page structure
DoclingPage* page = (DoclingPage*)malloc(sizeof(DoclingPage));
if (!page) {
return DOCLING_ERROR_PARSE_FAILED;
}
page->page_number = page_num;
page->width = 612.0f; // Default US Letter width
page->height = 792.0f; // Default US Letter height
page->cell_count = 0;
page->cells = nullptr;
// TODO: Parse PDF and extract cells for this page
*out_page = page;
return DOCLING_OK;
} catch (const std::exception& e) {
set_last_error(std::string("Failed to get page: ") + e.what());
return DOCLING_ERROR_PARSE_FAILED;
}
}docling_free_page function · cpp · L121-L134 (14 LOC)cpp/docling_ffi.cpp
DoclingError docling_free_page(DoclingPage* page) {
if (!page) return DOCLING_OK;
if (page->cells) {
for (size_t i = 0; i < page->cell_count; ++i) {
free((void*)page->cells[i].text);
free((void*)page->cells[i].font_name);
}
free(page->cells);
}
free(page);
return DOCLING_OK;
}docling_export_markdown function · cpp · L135-L210 (76 LOC)cpp/docling_ffi.cpp
DoclingError docling_export_markdown(DoclingDocumentHandle handle, char** out_markdown) {
if (!handle || !out_markdown) {
return DOCLING_ERROR_INVALID_PDF;
}
try {
auto doc = static_cast<DoclingDocument*>(handle);
// Set the resources directory BEFORE creating the parser
std::filesystem::path root_path(ROOT_PATH);
std::filesystem::path resources_path = root_path / "docling_parse" / "pdf_resources_v2";
resources_path = std::filesystem::absolute(resources_path);
if (!std::filesystem::exists(resources_path)) {
std::cerr << "[FFI] ERROR: Resources path does not exist: " << resources_path << std::endl;
set_last_error("Resources path does not exist: " + resources_path.string());
return DOCLING_ERROR_PARSE_FAILED;
}
// Set the resources directory globally using resource_utils
std::cerr << "[FFI] Setting resources directory: " << resoudocling_free_string function · cpp · L211-L217 (7 LOC)cpp/docling_ffi.cpp
DoclingError docling_free_string(char* str) {
if (str) {
free(str);
}
return DOCLING_OK;
}docling_export_markdown function · cpp · L42-L47 (6 LOC)cpp/docling_ffi_stub.cpp
DoclingError docling_export_markdown(DoclingDocumentHandle handle, char** out_markdown) {
g_error = "Stub: Use full FFI build for docling-parse functionality";
*out_markdown = STRDUP(g_error.c_str());
return DOCLING_ERROR_PARSE_FAILED;
}main function · rust · L10-L85 (76 LOC)examples/advanced_options.rs
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let converter = Converter::new()?;
// Configure conversion options
let options = ConversionOptions {
// Output control
split_pages: true,
optimize_for_llm: true,
max_chunk_size: 1024, // Tokens per chunk
// Quality settings
image_quality: ImageQuality::High,
dpi: 300,
ocr_language: "eng".to_string(),
// Processing options
preserve_layout: true,
extract_tables: true,
extract_images: true,
include_metadata: true,
// Optimization
compression_level: 9,
remove_headers_footers: true,
remove_watermarks: false,
normalize_whitespace: true,
// Feature flags
use_ffi: false,
use_precision_mode: false,
};
println!("Converting with advanced options...");
let split_pages = options.split_pages;
// Convert with custom options
let result = main function · rust · L9-L48 (40 LOC)examples/basic_conversion.rs
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize the converter
let converter = Converter::new()?;
println!("Converting PDF to Markdown...");
// Convert PDF to Markdown
let result = converter
.convert("document.pdf") // Replace with your PDF file
.to(OutputFormat::Markdown {
split_pages: false, // Combine all pages into one document
optimize_for_llm: true, // Optimize output for LLM processing
})
.execute()
.await?;
// Display conversion statistics
println!("\n✅ Conversion complete!");
println!("📄 Input file: {:?}", result.input_path);
println!("📝 Pages processed: {}", result.statistics.pages_processed);
println!(
"📊 Tables extracted: {}",
result.statistics.tables_extracted
);
println!("⏱️ Duration: {:?}", result.statistics.duration);
println!(
"📏 Input size: {} bytes",
result.statistics.input_size_bytes
main function · rust · L13-L103 (91 LOC)examples/batch_processing.rs
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let converter = Converter::new()?;
// List of files to process
let input_files = vec![
"document1.pdf",
"document2.pdf",
"document3.pdf",
// Add more files as needed
];
println!(
"Starting batch conversion of {} files...\n",
input_files.len()
);
// Create conversion tasks
let tasks: Vec<_> = input_files
.into_iter()
.map(|file| {
let converter = Converter::new().unwrap();
let file = file.to_string();
async move {
println!("🔄 Processing: {}", file);
let output_file = PathBuf::from(&file).with_extension("md");
let result = converter
.convert(&file)
.to(OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
})
Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
main function · rust · L15-L122 (108 LOC)examples/pdf_conversion.rs
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize logging
tracing_subscriber::fmt::init();
// Get PDF path from command line
let args: Vec<String> = std::env::args().collect();
if args.len() < 2 {
eprintln!("Usage: {} <pdf-file>", args[0]);
eprintln!("Example: {} document.pdf", args[0]);
std::process::exit(1);
}
let pdf_path = &args[1];
println!("Converting PDF: {}", pdf_path);
// Create converter
let converter = Converter::new()?;
// Example 1: Basic conversion to Markdown
println!("\n=== Example 1: Basic PDF → Markdown ===");
let result = converter
.convert(pdf_path)
.to(OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
})
.execute()
.await?;
println!("✓ Converted {} pages", result.page_count());
println!(" Input size: {} bytes", result.input_size());
println!(" Output size: {} bytes"new function · rust · L25-L35 (11 LOC)src/batch.rs
pub fn new() -> Self {
Self {
files: Vec::new(),
output_format: OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
},
options: ConversionOptions::default(),
parallel_jobs: num_cpus::get(),
}
}add_files function · rust · L44-L49 (6 LOC)src/batch.rs
pub fn add_files<P: AsRef<Path>>(mut self, paths: &[P]) -> Self {
for path in paths {
self.files.push(path.as_ref().to_path_buf());
}
self
}execute function · rust · L70-L142 (73 LOC)src/batch.rs
pub async fn execute(self) -> Result<BatchResult> {
let start_time = Instant::now();
let total_files = self.files.len();
eprintln!("🚀 Starting batch conversion...");
eprintln!(" Files: {}", total_files);
eprintln!(" Concurrent jobs: {}", self.parallel_jobs);
eprintln!(" Output format: {:?}", self.output_format);
eprintln!();
let output_format = self.output_format.clone();
let options = self.options.clone();
// Process files concurrently using Tokio
let mut tasks = Vec::new();
for file in self.files {
let output_format = output_format.clone();
let options = options.clone();
let task = tokio::spawn(async move {
let result = match Converter::new() {
Ok(converter) => {
converter
.convert(&file)
.to(output_format)
test_batch_result_creation function · rust · L162-L172 (11 LOC)src/batch.rs
fn test_batch_result_creation() {
let result = BatchResult {
total_files: 10,
successes: vec![],
failures: vec![],
total_time: std::time::Duration::from_secs(1),
};
assert_eq!(result.total_files, 10);
assert_eq!(result.successes.len(), 0);
assert_eq!(result.failures.len(), 0);
}success_rate function · rust · L190-L196 (7 LOC)src/batch.rs
pub fn success_rate(&self) -> f64 {
if self.total_files == 0 {
0.0
} else {
(self.successes.len() as f64 / self.total_files as f64) * 100.0
}
}total_pages function · rust · L199-L204 (6 LOC)src/batch.rs
pub fn total_pages(&self) -> usize {
self.successes
.iter()
.map(|(_, result)| result.metadata.page_count)
.sum()
}pages_per_second function · rust · L207-L216 (10 LOC)src/batch.rs
pub fn pages_per_second(&self) -> f64 {
let total_pages = self.total_pages() as f64;
let total_secs = self.total_time.as_secs_f64();
if total_secs > 0.0 {
total_pages / total_secs
} else {
0.0
}
}Repobility · open methodology · https://repobility.com/research/
save_all function · rust · L219-L244 (26 LOC)src/batch.rs
pub async fn save_all<P: AsRef<Path>>(&self, output_dir: P) -> Result<()> {
let output_dir = output_dir.as_ref();
tokio::fs::create_dir_all(output_dir).await?;
for (input_path, result) in &self.successes {
let filename = input_path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("output");
let extension = match result.output_format {
OutputFormat::Markdown { .. } => "md",
OutputFormat::Json { .. } => "json",
OutputFormat::Image { .. } => "png",
_ => "txt",
};
let output_path = output_dir.join(format!("{}.{}", filename, extension));
if let Some(output) = result.content.first() {
tokio::fs::write(&output_path, &output.data).await?;
}
}
Ok(())
}list_zip_files function · rust · L41-L55 (15 LOC)src/converters/archive.rs
async fn list_zip_files(&self, archive_path: &Path) -> Result<Vec<(String, u64)>> {
let data = fs::read(archive_path).await?;
let cursor = Cursor::new(data);
let mut archive = ZipArchive::new(cursor)?;
let mut files = Vec::new();
for i in 0..archive.len() {
let file = archive.by_index(i)?;
if !file.is_dir() {
files.push((file.name().to_string(), file.size()));
}
}
Ok(files)
}list_tar_files function · rust · L59-L93 (35 LOC)src/converters/archive.rs
async fn list_tar_files(
&self,
archive_path: &Path,
is_gzipped: bool,
) -> Result<Vec<(String, u64)>> {
let data = fs::read(archive_path).await?;
let cursor = Cursor::new(data);
let mut files = Vec::new();
if is_gzipped {
let decoder = GzDecoder::new(cursor);
let mut archive = TarArchive::new(decoder);
for entry in archive.entries()? {
let entry = entry?;
let path = entry.path()?;
if !entry.header().entry_type().is_dir() {
files.push((path.display().to_string(), entry.header().size()?));
}
}
} else {
let mut archive = TarArchive::new(cursor);
for entry in archive.entries()? {
let entry = entry?;
let path = entry.path()?;
if !entry.header().entry_type().is_dir() {
files.push((path.display().list_archive_files function · rust · L96-L112 (17 LOC)src/converters/archive.rs
async fn list_archive_files(
&self,
archive_path: &Path,
format: FileFormat,
) -> Result<Vec<(String, u64)>> {
match format {
FileFormat::Zip => self.list_zip_files(archive_path).await,
#[cfg(feature = "archives-extended")]
FileFormat::Tar => self.list_tar_files(archive_path, false).await,
#[cfg(feature = "archives-extended")]
FileFormat::TarGz => self.list_tar_files(archive_path, true).await,
_ => Err(crate::TransmutationError::UnsupportedFormat(format!(
"Archive format {:?} not yet supported",
format
))),
}
}output_formats function · rust · L224-L236 (13 LOC)src/converters/archive.rs
fn output_formats(&self) -> Vec<OutputFormat> {
vec![
OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
},
OutputFormat::Json {
structured: true,
include_metadata: true,
},
]
}convert function · rust · L237-L324 (88 LOC)src/converters/archive.rs
async fn convert(
&self,
input: &Path,
output_format: OutputFormat,
_options: ConversionOptions,
) -> Result<ConversionResult> {
let archive_name = input
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("archive");
// Detect archive format
let input_format = file_detect::detect_format(input).await?;
eprintln!("🔄 Archive Processing (Pure Rust)");
eprintln!(
" Archive ({:?}) → List Files → {:?}",
input_format, output_format
);
eprintln!();
// List files in archive
let files = self.list_archive_files(input, input_format).await?;
eprintln!("📦 Found {} files in archive", files.len());
// Convert to requested format
let output_data = match output_format {
OutputFormat::Markdown { .. } => {
eprintln!("📝 Generating Markdown index...");
let markdown =metadata function · rust · L325-L333 (9 LOC)src/converters/archive.rs
fn metadata(&self) -> ConverterMetadata {
ConverterMetadata {
name: "Archive Converter".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
description: "Archive to document index converter (ZIP, TAR, 7Z)".to_string(),
external_deps: vec![],
}
}check_whisper function · rust · L34-L57 (24 LOC)src/converters/audio.rs
fn check_whisper() -> bool {
// Try whisper in PATH
if Command::new("whisper").arg("--help").output().is_ok() {
return true;
}
// Try common installation paths
let paths = vec![
format!(
"{}/.local/bin/whisper",
std::env::var("HOME").unwrap_or_default()
),
"/usr/local/bin/whisper".to_string(),
"/usr/bin/whisper".to_string(),
];
for path in paths {
if std::path::Path::new(&path).exists() {
return true;
}
}
false
}All rows scored by the Repobility analyzer (https://repobility.com)
get_whisper_cmd function · rust · L60-L79 (20 LOC)src/converters/audio.rs
fn get_whisper_cmd() -> String {
// Try common paths
let paths = vec![
format!(
"{}/.local/bin/whisper",
std::env::var("HOME").unwrap_or_default()
),
"/usr/local/bin/whisper".to_string(),
"/usr/bin/whisper".to_string(),
"whisper".to_string(),
];
for path in &paths {
if std::path::Path::new(path).exists() || path == "whisper" {
return path.clone();
}
}
"whisper".to_string()
}transcribe_audio function · rust · L82-L137 (56 LOC)src/converters/audio.rs
async fn transcribe_audio(&self, audio_path: &Path, language: Option<&str>) -> Result<String> {
if !Self::check_whisper() {
return Err(crate::TransmutationError::conversion_failed(
"Whisper not found. Install: pip install openai-whisper (or pipx install openai-whisper)",
));
}
// Use Whisper CLI for transcription
let whisper_cmd = Self::get_whisper_cmd();
let mut cmd = Command::new(&whisper_cmd);
cmd.arg(audio_path);
cmd.arg("--model").arg("base"); // Use base model (fast, good quality)
cmd.arg("--output_format").arg("txt");
cmd.arg("--output_dir").arg("/tmp");
if let Some(lang) = language {
cmd.arg("--language").arg(lang);
}
eprintln!("📝 Running Whisper transcription...");
let output = cmd.output().map_err(|e| {
crate::TransmutationError::conversion_failed(&format!(
"Whisper execution failed: {}",audio_to_markdown function · rust · L140-L155 (16 LOC)src/converters/audio.rs
async fn audio_to_markdown(&self, audio_path: &Path, language: Option<&str>) -> Result<String> {
let transcript = self.transcribe_audio(audio_path, language).await?;
let mut markdown = String::new();
markdown.push_str("# Audio Transcription\n\n");
if let Some(lang) = language {
markdown.push_str(&format!("**Language**: {}\n\n", lang));
}
markdown.push_str("## Transcript\n\n");
markdown.push_str(&transcript);
markdown.push('\n');
Ok(markdown)
}supported_formats function · rust · L166-L174 (9 LOC)src/converters/audio.rs
fn supported_formats(&self) -> Vec<FileFormat> {
vec![
FileFormat::Mp3,
FileFormat::Wav,
FileFormat::M4a,
FileFormat::Ogg,
FileFormat::Flac,
]
}output_formats function · rust · L175-L187 (13 LOC)src/converters/audio.rs
fn output_formats(&self) -> Vec<OutputFormat> {
vec![
OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
},
OutputFormat::Json {
structured: true,
include_metadata: true,
},
]
}convert function · rust · L188-L263 (76 LOC)src/converters/audio.rs
async fn convert(
&self,
input: &Path,
output_format: OutputFormat,
_options: ConversionOptions,
) -> Result<ConversionResult> {
eprintln!("🔄 Audio Transcription (Whisper)");
eprintln!(" Audio → Whisper → {:?}", output_format);
eprintln!();
let language = None; // Auto-detect (can be made configurable)
// Convert audio to text
let markdown = self.audio_to_markdown(input, language).await?;
// Convert to requested format
let output_data = match output_format {
OutputFormat::Markdown { .. } => {
eprintln!("✅ Transcription complete!");
markdown.into_bytes()
}
OutputFormat::Json { .. } => {
eprintln!("📝 Converting to JSON...");
let json = serde_json::json!({
"transcription": {
"text": markdown,
"language": language.unmetadata function · rust · L264-L272 (9 LOC)src/converters/audio.rs
fn metadata(&self) -> ConverterMetadata {
ConverterMetadata {
name: "Audio Transcription Converter".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
description: "Audio to text converter using Whisper ASR".to_string(),
external_deps: vec!["whisper".to_string(), "ffmpeg".to_string()],
}
}csv_to_markdown function · rust · L36-L75 (40 LOC)src/converters/csv.rs
fn csv_to_markdown(&self, csv: &str) -> String {
let lines: Vec<&str> = csv.lines().filter(|l| !l.trim().is_empty()).collect();
if lines.is_empty() {
return "# Empty File\n".to_string();
}
let mut markdown = String::new();
markdown.push_str("# Data Table\n\n");
for (idx, line) in lines.iter().enumerate() {
let cells: Vec<&str> = line.split(self.delimiter).collect();
// Header row
if idx == 0 {
markdown.push('|');
for cell in &cells {
markdown.push_str(&format!(" {} |", cell.trim()));
}
markdown.push('\n');
// Separator
markdown.push('|');
for _ in &cells {
markdown.push_str("---|");
}
markdown.push('\n');
} else {
// Data rows
markdown.push('|');
Repobility (the analyzer behind this table) · https://repobility.com
csv_to_json function · rust · L78-L116 (39 LOC)src/converters/csv.rs
fn csv_to_json(&self, csv: &str) -> Result<String> {
let lines: Vec<&str> = csv.lines().filter(|l| !l.trim().is_empty()).collect();
if lines.is_empty() {
return Ok(serde_json::json!({"data": []}).to_string());
}
// First line is headers
let headers: Vec<String> = lines[0]
.split(self.delimiter)
.map(|h| h.trim().to_string())
.collect();
// Remaining lines are data
let mut rows = Vec::new();
for line in &lines[1..] {
let cells: Vec<&str> = line.split(self.delimiter).collect();
let mut row = serde_json::Map::new();
for (idx, cell) in cells.iter().enumerate() {
if idx < headers.len() {
row.insert(
headers[idx].clone(),
serde_json::Value::String(cell.trim().to_string()),
);
}
}
rows.push(row);
output_formats function · rust · L130-L142 (13 LOC)src/converters/csv.rs
fn output_formats(&self) -> Vec<OutputFormat> {
vec![
OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
},
OutputFormat::Json {
structured: true,
include_metadata: true,
},
]
}convert function · rust · L143-L218 (76 LOC)src/converters/csv.rs
async fn convert(
&self,
input: &Path,
output_format: OutputFormat,
_options: ConversionOptions,
) -> Result<ConversionResult> {
eprintln!("🔄 CSV/TSV Conversion (Pure Rust)");
eprintln!(" CSV → Parsing → {:?}", output_format);
eprintln!();
// Read CSV file
let csv_content = fs::read_to_string(input).await?;
// Convert to requested format
let output_data = match output_format {
OutputFormat::Markdown { .. } => {
eprintln!("📝 Converting to Markdown table...");
let markdown = self.csv_to_markdown(&csv_content);
markdown.into_bytes()
}
OutputFormat::Json { .. } => {
eprintln!("📝 Converting to JSON...");
let json = self.csv_to_json(&csv_content)?;
json.into_bytes()
}
_ => {
return Err(crate::TransmutationError::Unsupportedmetadata function · rust · L219-L227 (9 LOC)src/converters/csv.rs
fn metadata(&self) -> ConverterMetadata {
ConverterMetadata {
name: "CSV/TSV Converter".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
description: "CSV/TSV to Markdown tables and JSON (pure Rust)".to_string(),
external_deps: vec![],
}
}test_csv_converter_creation function · rust · L235-L240 (6 LOC)src/converters/csv.rs
fn test_csv_converter_creation() {
let converter = CsvConverter::new();
let formats = converter.supported_formats();
assert!(formats.contains(&FileFormat::Csv));
assert!(formats.contains(&FileFormat::Tsv));
}test_csv_to_markdown_basic function · rust · L243-L249 (7 LOC)src/converters/csv.rs
fn test_csv_to_markdown_basic() {
let converter = CsvConverter::new();
let csv = "Name,Age\nAlice,30\nBob,25";
let result = converter.csv_to_markdown(csv);
assert!(result.contains("Name"));
assert!(result.contains("Alice"));
}convert_to_images function · rust · L32-L39 (8 LOC)src/converters/docx.rs
async fn convert_to_images(
&self,
path: &Path,
format: crate::types::ImageFormat,
_quality: u8,
dpi: u32,
_options: &ConversionOptions,
) -> Result<Vec<ConversionOutput>> {convert_to_markdown function · rust · L196-L286 (91 LOC)src/converters/docx.rs
async fn convert_to_markdown(
&self,
path: &Path,
options: &ConversionOptions,
) -> Result<Vec<ConversionOutput>> {
eprintln!("📄 Reading DOCX file with docx-rs...");
// Read DOCX file
let file_data = tokio::fs::read(path).await?;
// Parse DOCX using docx-rs
let docx = docx_rs::read_docx(&file_data).map_err(|e| {
crate::TransmutationError::engine_error(
"docx-rs",
format!("Failed to parse DOCX: {:?}", e),
)
})?;
eprintln!("✓ DOCX parsed successfully");
// Extract all paragraphs first
let mut all_paragraphs = Vec::new();
for child in &docx.document.children {
let text = self.extract_text_from_child(child);
if !text.is_empty() {
all_paragraphs.push(text);
}
}
// If split_pages enabled, divide into chunks (10-15 paragraphs per "page")
if optionCitation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
extract_text_from_child function · rust · L290-L298 (9 LOC)src/converters/docx.rs
fn extract_text_from_child(&self, child: &docx_rs::DocumentChild) -> String {
use docx_rs::DocumentChild;
match child {
DocumentChild::Paragraph(para) => self.extract_paragraph_text(para),
DocumentChild::Table(table) => self.extract_table_text(table),
_ => String::new(),
}
}extract_paragraph_text function · rust · L302-L318 (17 LOC)src/converters/docx.rs
fn extract_paragraph_text(&self, para: &docx_rs::Paragraph) -> String {
use docx_rs::ParagraphChild;
let mut text = String::new();
for child in ¶.children {
if let ParagraphChild::Run(run) = child {
for run_child in &run.children {
if let docx_rs::RunChild::Text(t) = run_child {
text.push_str(&t.text);
}
}
}
}
text.trim().to_string()
}output_formats function · rust · L340-L346 (7 LOC)src/converters/docx.rs
fn output_formats(&self) -> Vec<OutputFormat> {
vec![OutputFormat::Markdown {
split_pages: false,
optimize_for_llm: true,
}]
}page 1 / 9next ›