← back to hivellm__transmutation

Function bodies 424 total

All specs Real LLM only Function bodies
main function · rust · L5-L201 (197 LOC)
build.rs
fn main() {
    // Embed icon and metadata into Windows executable (only for CLI binary builds)
    // When used as a library dependency, skip resource compilation to avoid
    // duplicate resource errors (see: https://github.com/hivellm/transmutation/issues/3)
    #[cfg(all(target_os = "windows", feature = "cli"))]
    {
        let mut res = winres::WindowsResource::new();
        res.set_icon("assets/icon.ico");
        res.set("ProductName", "Transmutation");
        res.set(
            "FileDescription",
            "High-performance document conversion engine for AI/LLM embeddings",
        );
        res.set("CompanyName", "HiveLLM Team");
        res.set("LegalCopyright", "Copyright (c) 2025 HiveLLM Team");
        res.set("OriginalFilename", "transmutation.exe");
        if let Err(e) = res.compile() {
            eprintln!("Warning: Failed to compile Windows resources: {e}");
        }
    }

    // Check for optional external dependencies
    check_external_dependencies()
command_exists function · rust · L283-L293 (11 LOC)
build.rs
fn command_exists(cmd: &str) -> bool {
    Command::new(if cfg!(target_os = "windows") {
        "where"
    } else {
        "which"
    })
    .arg(cmd)
    .output()
    .map(|output| output.status.success())
    .unwrap_or(false)
}
get_install_command function · rust · L297-L330 (34 LOC)
build.rs
fn get_install_command(tool: &str) -> String {
    #[cfg(target_os = "linux")]
    {
        match tool {
            "poppler-utils" => "sudo apt-get install poppler-utils".to_string(),
            "libreoffice" => "sudo apt-get install libreoffice".to_string(),
            "tesseract" => "sudo apt-get install tesseract-ocr".to_string(),
            "ffmpeg" => "sudo apt-get install ffmpeg".to_string(),
            _ => format!("sudo apt-get install {tool}"),
        }
    }

    #[cfg(target_os = "macos")]
    {
        match tool {
            "poppler-utils" => "brew install poppler".to_string(),
            "libreoffice" => "brew install --cask libreoffice".to_string(),
            "tesseract" => "brew install tesseract".to_string(),
            "ffmpeg" => "brew install ffmpeg".to_string(),
            _ => format!("brew install {tool}"),
        }
    }

    #[cfg(target_os = "windows")]
    {
        match tool {
            "poppler-utils" => "choco install poppler".to_string(
get_quick_install_all function · rust · L333-L348 (16 LOC)
build.rs
fn get_quick_install_all() -> &'static str {
    #[cfg(target_os = "linux")]
    {
        "   ./install/install-deps-linux.sh"
    }

    #[cfg(target_os = "macos")]
    {
        "   ./install/install-deps-macos.sh"
    }

    #[cfg(target_os = "windows")]
    {
        "   .\\install\\install-deps-windows.ps1 (or .bat)"
    }
}
docling_open_pdf function · cpp · L34-L60 (27 LOC)
cpp/docling_ffi.cpp
DoclingError docling_open_pdf(const char* pdf_path, DoclingDocumentHandle* out_handle) {
    std::cerr << "[FFI] docling_open_pdf called with: " << (pdf_path ? pdf_path : "NULL") << std::endl;
    
    if (!pdf_path || !out_handle) {
        set_last_error("Invalid arguments");
        std::cerr << "[FFI] ERROR: Invalid arguments" << std::endl;
        return DOCLING_ERROR_INVALID_PDF;
    }

    try {
        auto doc = new DoclingDocument();
        doc->pdf_path = pdf_path;
        
        // Initialize docling-parse config
        doc->config["data"] = nlohmann::json::object();
        doc->config["files"]["pdf"]["filename"] = pdf_path;
        
        *out_handle = doc;
        std::cerr << "[FFI] PDF opened successfully" << std::endl;
        return DOCLING_OK;
        
    } catch (const std::exception& e) {
        set_last_error(std::string("Failed to open PDF: ") + e.what());
        return DOCLING_ERROR_PARSE_FAILED;
    }
}
docling_close_pdf function · cpp · L61-L75 (15 LOC)
cpp/docling_ffi.cpp
DoclingError docling_close_pdf(DoclingDocumentHandle handle) {
    if (!handle) {
        return DOCLING_ERROR_INVALID_PDF;
    }
    
    try {
        auto doc = static_cast<DoclingDocument*>(handle);
        delete doc;
        return DOCLING_OK;
    } catch (const std::exception& e) {
        set_last_error(std::string("Failed to close PDF: ") + e.what());
        return DOCLING_ERROR_PARSE_FAILED;
    }
}
docling_get_page_count function · cpp · L76-L92 (17 LOC)
cpp/docling_ffi.cpp
DoclingError docling_get_page_count(DoclingDocumentHandle handle, int* out_count) {
    if (!handle || !out_count) {
        return DOCLING_ERROR_INVALID_PDF;
    }
    
    try {
        auto doc = static_cast<DoclingDocument*>(handle);
        
        // Parse PDF to get page count (mock for now)
        *out_count = 1; // TODO: Implement page count extraction
        return DOCLING_OK;
    } catch (const std::exception& e) {
        set_last_error(std::string("Failed to get page count: ") + e.what());
        return DOCLING_ERROR_PARSE_FAILED;
    }
}
Repobility (the analyzer behind this table) · https://repobility.com
docling_get_page function · cpp · L93-L120 (28 LOC)
cpp/docling_ffi.cpp
DoclingError docling_get_page(DoclingDocumentHandle handle, int page_num, DoclingPage** out_page) {
    if (!handle || !out_page) {
        return DOCLING_ERROR_INVALID_PDF;
    }
    
    try {
        // Allocate page structure
        DoclingPage* page = (DoclingPage*)malloc(sizeof(DoclingPage));
        if (!page) {
            return DOCLING_ERROR_PARSE_FAILED;
        }

        page->page_number = page_num;
        page->width = 612.0f;  // Default US Letter width
        page->height = 792.0f; // Default US Letter height
        page->cell_count = 0;
        page->cells = nullptr;

        // TODO: Parse PDF and extract cells for this page

        *out_page = page;
        return DOCLING_OK;
    } catch (const std::exception& e) {
        set_last_error(std::string("Failed to get page: ") + e.what());
        return DOCLING_ERROR_PARSE_FAILED;
    }
}
docling_free_page function · cpp · L121-L134 (14 LOC)
cpp/docling_ffi.cpp
DoclingError docling_free_page(DoclingPage* page) {
    if (!page) return DOCLING_OK;
    
    if (page->cells) {
        for (size_t i = 0; i < page->cell_count; ++i) {
            free((void*)page->cells[i].text);
            free((void*)page->cells[i].font_name);
        }
        free(page->cells);
    }
    free(page);
    return DOCLING_OK;
}
docling_export_markdown function · cpp · L135-L210 (76 LOC)
cpp/docling_ffi.cpp
DoclingError docling_export_markdown(DoclingDocumentHandle handle, char** out_markdown) {
    if (!handle || !out_markdown) {
        return DOCLING_ERROR_INVALID_PDF;
    }
    
    try {
        auto doc = static_cast<DoclingDocument*>(handle);
        
        // Set the resources directory BEFORE creating the parser
        std::filesystem::path root_path(ROOT_PATH);
        std::filesystem::path resources_path = root_path / "docling_parse" / "pdf_resources_v2";
        resources_path = std::filesystem::absolute(resources_path);
        
        if (!std::filesystem::exists(resources_path)) {
            std::cerr << "[FFI] ERROR: Resources path does not exist: " << resources_path << std::endl;
            set_last_error("Resources path does not exist: " + resources_path.string());
            return DOCLING_ERROR_PARSE_FAILED;
        }
        
        // Set the resources directory globally using resource_utils
        std::cerr << "[FFI] Setting resources directory: " << resou
docling_free_string function · cpp · L211-L217 (7 LOC)
cpp/docling_ffi.cpp
DoclingError docling_free_string(char* str) {
    if (str) {
        free(str);
    }
    return DOCLING_OK;
}
docling_export_markdown function · cpp · L42-L47 (6 LOC)
cpp/docling_ffi_stub.cpp
DoclingError docling_export_markdown(DoclingDocumentHandle handle, char** out_markdown) {
    g_error = "Stub: Use full FFI build for docling-parse functionality";
    *out_markdown = STRDUP(g_error.c_str());
    return DOCLING_ERROR_PARSE_FAILED;
}
main function · rust · L10-L85 (76 LOC)
examples/advanced_options.rs
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let converter = Converter::new()?;

    // Configure conversion options
    let options = ConversionOptions {
        // Output control
        split_pages: true,
        optimize_for_llm: true,
        max_chunk_size: 1024, // Tokens per chunk

        // Quality settings
        image_quality: ImageQuality::High,
        dpi: 300,
        ocr_language: "eng".to_string(),

        // Processing options
        preserve_layout: true,
        extract_tables: true,
        extract_images: true,
        include_metadata: true,

        // Optimization
        compression_level: 9,
        remove_headers_footers: true,
        remove_watermarks: false,
        normalize_whitespace: true,

        // Feature flags
        use_ffi: false,
        use_precision_mode: false,
    };

    println!("Converting with advanced options...");

    let split_pages = options.split_pages;

    // Convert with custom options
    let result = 
main function · rust · L9-L48 (40 LOC)
examples/basic_conversion.rs
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Initialize the converter
    let converter = Converter::new()?;

    println!("Converting PDF to Markdown...");

    // Convert PDF to Markdown
    let result = converter
        .convert("document.pdf") // Replace with your PDF file
        .to(OutputFormat::Markdown {
            split_pages: false,     // Combine all pages into one document
            optimize_for_llm: true, // Optimize output for LLM processing
        })
        .execute()
        .await?;

    // Display conversion statistics
    println!("\n✅ Conversion complete!");
    println!("📄 Input file: {:?}", result.input_path);
    println!("📝 Pages processed: {}", result.statistics.pages_processed);
    println!(
        "📊 Tables extracted: {}",
        result.statistics.tables_extracted
    );
    println!("⏱️  Duration: {:?}", result.statistics.duration);
    println!(
        "📏 Input size: {} bytes",
        result.statistics.input_size_bytes
    
main function · rust · L13-L103 (91 LOC)
examples/batch_processing.rs
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let converter = Converter::new()?;

    // List of files to process
    let input_files = vec![
        "document1.pdf",
        "document2.pdf",
        "document3.pdf",
        // Add more files as needed
    ];

    println!(
        "Starting batch conversion of {} files...\n",
        input_files.len()
    );

    // Create conversion tasks
    let tasks: Vec<_> = input_files
        .into_iter()
        .map(|file| {
            let converter = Converter::new().unwrap();
            let file = file.to_string();

            async move {
                println!("🔄 Processing: {}", file);

                let output_file = PathBuf::from(&file).with_extension("md");

                let result = converter
                    .convert(&file)
                    .to(OutputFormat::Markdown {
                        split_pages: false,
                        optimize_for_llm: true,
                    })
                 
Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
main function · rust · L15-L122 (108 LOC)
examples/pdf_conversion.rs
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    // Initialize logging
    tracing_subscriber::fmt::init();

    // Get PDF path from command line
    let args: Vec<String> = std::env::args().collect();
    if args.len() < 2 {
        eprintln!("Usage: {} <pdf-file>", args[0]);
        eprintln!("Example: {} document.pdf", args[0]);
        std::process::exit(1);
    }

    let pdf_path = &args[1];
    println!("Converting PDF: {}", pdf_path);

    // Create converter
    let converter = Converter::new()?;

    // Example 1: Basic conversion to Markdown
    println!("\n=== Example 1: Basic PDF → Markdown ===");
    let result = converter
        .convert(pdf_path)
        .to(OutputFormat::Markdown {
            split_pages: false,
            optimize_for_llm: true,
        })
        .execute()
        .await?;

    println!("✓ Converted {} pages", result.page_count());
    println!("  Input size:  {} bytes", result.input_size());
    println!("  Output size: {} bytes"
new function · rust · L25-L35 (11 LOC)
src/batch.rs
    pub fn new() -> Self {
        Self {
            files: Vec::new(),
            output_format: OutputFormat::Markdown {
                split_pages: false,
                optimize_for_llm: true,
            },
            options: ConversionOptions::default(),
            parallel_jobs: num_cpus::get(),
        }
    }
add_files function · rust · L44-L49 (6 LOC)
src/batch.rs
    pub fn add_files<P: AsRef<Path>>(mut self, paths: &[P]) -> Self {
        for path in paths {
            self.files.push(path.as_ref().to_path_buf());
        }
        self
    }
execute function · rust · L70-L142 (73 LOC)
src/batch.rs
    pub async fn execute(self) -> Result<BatchResult> {
        let start_time = Instant::now();
        let total_files = self.files.len();

        eprintln!("🚀 Starting batch conversion...");
        eprintln!("   Files: {}", total_files);
        eprintln!("   Concurrent jobs: {}", self.parallel_jobs);
        eprintln!("   Output format: {:?}", self.output_format);
        eprintln!();

        let output_format = self.output_format.clone();
        let options = self.options.clone();

        // Process files concurrently using Tokio
        let mut tasks = Vec::new();

        for file in self.files {
            let output_format = output_format.clone();
            let options = options.clone();

            let task = tokio::spawn(async move {
                let result = match Converter::new() {
                    Ok(converter) => {
                        converter
                            .convert(&file)
                            .to(output_format)
                  
test_batch_result_creation function · rust · L162-L172 (11 LOC)
src/batch.rs
    fn test_batch_result_creation() {
        let result = BatchResult {
            total_files: 10,
            successes: vec![],
            failures: vec![],
            total_time: std::time::Duration::from_secs(1),
        };
        assert_eq!(result.total_files, 10);
        assert_eq!(result.successes.len(), 0);
        assert_eq!(result.failures.len(), 0);
    }
success_rate function · rust · L190-L196 (7 LOC)
src/batch.rs
    pub fn success_rate(&self) -> f64 {
        if self.total_files == 0 {
            0.0
        } else {
            (self.successes.len() as f64 / self.total_files as f64) * 100.0
        }
    }
total_pages function · rust · L199-L204 (6 LOC)
src/batch.rs
    pub fn total_pages(&self) -> usize {
        self.successes
            .iter()
            .map(|(_, result)| result.metadata.page_count)
            .sum()
    }
pages_per_second function · rust · L207-L216 (10 LOC)
src/batch.rs
    pub fn pages_per_second(&self) -> f64 {
        let total_pages = self.total_pages() as f64;
        let total_secs = self.total_time.as_secs_f64();

        if total_secs > 0.0 {
            total_pages / total_secs
        } else {
            0.0
        }
    }
Repobility · open methodology · https://repobility.com/research/
save_all function · rust · L219-L244 (26 LOC)
src/batch.rs
    pub async fn save_all<P: AsRef<Path>>(&self, output_dir: P) -> Result<()> {
        let output_dir = output_dir.as_ref();
        tokio::fs::create_dir_all(output_dir).await?;

        for (input_path, result) in &self.successes {
            let filename = input_path
                .file_stem()
                .and_then(|s| s.to_str())
                .unwrap_or("output");

            let extension = match result.output_format {
                OutputFormat::Markdown { .. } => "md",
                OutputFormat::Json { .. } => "json",
                OutputFormat::Image { .. } => "png",
                _ => "txt",
            };

            let output_path = output_dir.join(format!("{}.{}", filename, extension));

            if let Some(output) = result.content.first() {
                tokio::fs::write(&output_path, &output.data).await?;
            }
        }

        Ok(())
    }
list_zip_files function · rust · L41-L55 (15 LOC)
src/converters/archive.rs
    async fn list_zip_files(&self, archive_path: &Path) -> Result<Vec<(String, u64)>> {
        let data = fs::read(archive_path).await?;
        let cursor = Cursor::new(data);
        let mut archive = ZipArchive::new(cursor)?;

        let mut files = Vec::new();
        for i in 0..archive.len() {
            let file = archive.by_index(i)?;
            if !file.is_dir() {
                files.push((file.name().to_string(), file.size()));
            }
        }

        Ok(files)
    }
list_tar_files function · rust · L59-L93 (35 LOC)
src/converters/archive.rs
    async fn list_tar_files(
        &self,
        archive_path: &Path,
        is_gzipped: bool,
    ) -> Result<Vec<(String, u64)>> {
        let data = fs::read(archive_path).await?;
        let cursor = Cursor::new(data);

        let mut files = Vec::new();

        if is_gzipped {
            let decoder = GzDecoder::new(cursor);
            let mut archive = TarArchive::new(decoder);

            for entry in archive.entries()? {
                let entry = entry?;
                let path = entry.path()?;
                if !entry.header().entry_type().is_dir() {
                    files.push((path.display().to_string(), entry.header().size()?));
                }
            }
        } else {
            let mut archive = TarArchive::new(cursor);

            for entry in archive.entries()? {
                let entry = entry?;
                let path = entry.path()?;
                if !entry.header().entry_type().is_dir() {
                    files.push((path.display().
list_archive_files function · rust · L96-L112 (17 LOC)
src/converters/archive.rs
    async fn list_archive_files(
        &self,
        archive_path: &Path,
        format: FileFormat,
    ) -> Result<Vec<(String, u64)>> {
        match format {
            FileFormat::Zip => self.list_zip_files(archive_path).await,
            #[cfg(feature = "archives-extended")]
            FileFormat::Tar => self.list_tar_files(archive_path, false).await,
            #[cfg(feature = "archives-extended")]
            FileFormat::TarGz => self.list_tar_files(archive_path, true).await,
            _ => Err(crate::TransmutationError::UnsupportedFormat(format!(
                "Archive format {:?} not yet supported",
                format
            ))),
        }
    }
output_formats function · rust · L224-L236 (13 LOC)
src/converters/archive.rs
    fn output_formats(&self) -> Vec<OutputFormat> {
        vec![
            OutputFormat::Markdown {
                split_pages: false,
                optimize_for_llm: true,
            },
            OutputFormat::Json {
                structured: true,
                include_metadata: true,
            },
        ]
    }
convert function · rust · L237-L324 (88 LOC)
src/converters/archive.rs
    async fn convert(
        &self,
        input: &Path,
        output_format: OutputFormat,
        _options: ConversionOptions,
    ) -> Result<ConversionResult> {
        let archive_name = input
            .file_name()
            .and_then(|n| n.to_str())
            .unwrap_or("archive");

        // Detect archive format
        let input_format = file_detect::detect_format(input).await?;

        eprintln!("🔄 Archive Processing (Pure Rust)");
        eprintln!(
            "   Archive ({:?}) → List Files → {:?}",
            input_format, output_format
        );
        eprintln!();

        // List files in archive
        let files = self.list_archive_files(input, input_format).await?;
        eprintln!("📦 Found {} files in archive", files.len());

        // Convert to requested format
        let output_data = match output_format {
            OutputFormat::Markdown { .. } => {
                eprintln!("📝 Generating Markdown index...");
                let markdown =
metadata function · rust · L325-L333 (9 LOC)
src/converters/archive.rs
    fn metadata(&self) -> ConverterMetadata {
        ConverterMetadata {
            name: "Archive Converter".to_string(),
            version: env!("CARGO_PKG_VERSION").to_string(),
            description: "Archive to document index converter (ZIP, TAR, 7Z)".to_string(),
            external_deps: vec![],
        }
    }
check_whisper function · rust · L34-L57 (24 LOC)
src/converters/audio.rs
    fn check_whisper() -> bool {
        // Try whisper in PATH
        if Command::new("whisper").arg("--help").output().is_ok() {
            return true;
        }

        // Try common installation paths
        let paths = vec![
            format!(
                "{}/.local/bin/whisper",
                std::env::var("HOME").unwrap_or_default()
            ),
            "/usr/local/bin/whisper".to_string(),
            "/usr/bin/whisper".to_string(),
        ];

        for path in paths {
            if std::path::Path::new(&path).exists() {
                return true;
            }
        }

        false
    }
All rows scored by the Repobility analyzer (https://repobility.com)
get_whisper_cmd function · rust · L60-L79 (20 LOC)
src/converters/audio.rs
    fn get_whisper_cmd() -> String {
        // Try common paths
        let paths = vec![
            format!(
                "{}/.local/bin/whisper",
                std::env::var("HOME").unwrap_or_default()
            ),
            "/usr/local/bin/whisper".to_string(),
            "/usr/bin/whisper".to_string(),
            "whisper".to_string(),
        ];

        for path in &paths {
            if std::path::Path::new(path).exists() || path == "whisper" {
                return path.clone();
            }
        }

        "whisper".to_string()
    }
transcribe_audio function · rust · L82-L137 (56 LOC)
src/converters/audio.rs
    async fn transcribe_audio(&self, audio_path: &Path, language: Option<&str>) -> Result<String> {
        if !Self::check_whisper() {
            return Err(crate::TransmutationError::conversion_failed(
                "Whisper not found. Install: pip install openai-whisper (or pipx install openai-whisper)",
            ));
        }

        // Use Whisper CLI for transcription
        let whisper_cmd = Self::get_whisper_cmd();
        let mut cmd = Command::new(&whisper_cmd);
        cmd.arg(audio_path);
        cmd.arg("--model").arg("base"); // Use base model (fast, good quality)
        cmd.arg("--output_format").arg("txt");
        cmd.arg("--output_dir").arg("/tmp");

        if let Some(lang) = language {
            cmd.arg("--language").arg(lang);
        }

        eprintln!("📝 Running Whisper transcription...");
        let output = cmd.output().map_err(|e| {
            crate::TransmutationError::conversion_failed(&format!(
                "Whisper execution failed: {}",
audio_to_markdown function · rust · L140-L155 (16 LOC)
src/converters/audio.rs
    async fn audio_to_markdown(&self, audio_path: &Path, language: Option<&str>) -> Result<String> {
        let transcript = self.transcribe_audio(audio_path, language).await?;

        let mut markdown = String::new();
        markdown.push_str("# Audio Transcription\n\n");

        if let Some(lang) = language {
            markdown.push_str(&format!("**Language**: {}\n\n", lang));
        }

        markdown.push_str("## Transcript\n\n");
        markdown.push_str(&transcript);
        markdown.push('\n');

        Ok(markdown)
    }
supported_formats function · rust · L166-L174 (9 LOC)
src/converters/audio.rs
    fn supported_formats(&self) -> Vec<FileFormat> {
        vec![
            FileFormat::Mp3,
            FileFormat::Wav,
            FileFormat::M4a,
            FileFormat::Ogg,
            FileFormat::Flac,
        ]
    }
output_formats function · rust · L175-L187 (13 LOC)
src/converters/audio.rs
    fn output_formats(&self) -> Vec<OutputFormat> {
        vec![
            OutputFormat::Markdown {
                split_pages: false,
                optimize_for_llm: true,
            },
            OutputFormat::Json {
                structured: true,
                include_metadata: true,
            },
        ]
    }
convert function · rust · L188-L263 (76 LOC)
src/converters/audio.rs
    async fn convert(
        &self,
        input: &Path,
        output_format: OutputFormat,
        _options: ConversionOptions,
    ) -> Result<ConversionResult> {
        eprintln!("🔄 Audio Transcription (Whisper)");
        eprintln!("   Audio → Whisper → {:?}", output_format);
        eprintln!();

        let language = None; // Auto-detect (can be made configurable)

        // Convert audio to text
        let markdown = self.audio_to_markdown(input, language).await?;

        // Convert to requested format
        let output_data = match output_format {
            OutputFormat::Markdown { .. } => {
                eprintln!("✅ Transcription complete!");
                markdown.into_bytes()
            }
            OutputFormat::Json { .. } => {
                eprintln!("📝 Converting to JSON...");
                let json = serde_json::json!({
                    "transcription": {
                        "text": markdown,
                        "language": language.un
metadata function · rust · L264-L272 (9 LOC)
src/converters/audio.rs
    fn metadata(&self) -> ConverterMetadata {
        ConverterMetadata {
            name: "Audio Transcription Converter".to_string(),
            version: env!("CARGO_PKG_VERSION").to_string(),
            description: "Audio to text converter using Whisper ASR".to_string(),
            external_deps: vec!["whisper".to_string(), "ffmpeg".to_string()],
        }
    }
csv_to_markdown function · rust · L36-L75 (40 LOC)
src/converters/csv.rs
    fn csv_to_markdown(&self, csv: &str) -> String {
        let lines: Vec<&str> = csv.lines().filter(|l| !l.trim().is_empty()).collect();

        if lines.is_empty() {
            return "# Empty File\n".to_string();
        }

        let mut markdown = String::new();
        markdown.push_str("# Data Table\n\n");

        for (idx, line) in lines.iter().enumerate() {
            let cells: Vec<&str> = line.split(self.delimiter).collect();

            // Header row
            if idx == 0 {
                markdown.push('|');
                for cell in &cells {
                    markdown.push_str(&format!(" {} |", cell.trim()));
                }
                markdown.push('\n');

                // Separator
                markdown.push('|');
                for _ in &cells {
                    markdown.push_str("---|");
                }
                markdown.push('\n');
            } else {
                // Data rows
                markdown.push('|');
            
Repobility (the analyzer behind this table) · https://repobility.com
csv_to_json function · rust · L78-L116 (39 LOC)
src/converters/csv.rs
    fn csv_to_json(&self, csv: &str) -> Result<String> {
        let lines: Vec<&str> = csv.lines().filter(|l| !l.trim().is_empty()).collect();

        if lines.is_empty() {
            return Ok(serde_json::json!({"data": []}).to_string());
        }

        // First line is headers
        let headers: Vec<String> = lines[0]
            .split(self.delimiter)
            .map(|h| h.trim().to_string())
            .collect();

        // Remaining lines are data
        let mut rows = Vec::new();
        for line in &lines[1..] {
            let cells: Vec<&str> = line.split(self.delimiter).collect();
            let mut row = serde_json::Map::new();

            for (idx, cell) in cells.iter().enumerate() {
                if idx < headers.len() {
                    row.insert(
                        headers[idx].clone(),
                        serde_json::Value::String(cell.trim().to_string()),
                    );
                }
            }
            rows.push(row);
 
output_formats function · rust · L130-L142 (13 LOC)
src/converters/csv.rs
    fn output_formats(&self) -> Vec<OutputFormat> {
        vec![
            OutputFormat::Markdown {
                split_pages: false,
                optimize_for_llm: true,
            },
            OutputFormat::Json {
                structured: true,
                include_metadata: true,
            },
        ]
    }
convert function · rust · L143-L218 (76 LOC)
src/converters/csv.rs
    async fn convert(
        &self,
        input: &Path,
        output_format: OutputFormat,
        _options: ConversionOptions,
    ) -> Result<ConversionResult> {
        eprintln!("🔄 CSV/TSV Conversion (Pure Rust)");
        eprintln!("   CSV → Parsing → {:?}", output_format);
        eprintln!();

        // Read CSV file
        let csv_content = fs::read_to_string(input).await?;

        // Convert to requested format
        let output_data = match output_format {
            OutputFormat::Markdown { .. } => {
                eprintln!("📝 Converting to Markdown table...");
                let markdown = self.csv_to_markdown(&csv_content);
                markdown.into_bytes()
            }
            OutputFormat::Json { .. } => {
                eprintln!("📝 Converting to JSON...");
                let json = self.csv_to_json(&csv_content)?;
                json.into_bytes()
            }
            _ => {
                return Err(crate::TransmutationError::Unsupported
metadata function · rust · L219-L227 (9 LOC)
src/converters/csv.rs
    fn metadata(&self) -> ConverterMetadata {
        ConverterMetadata {
            name: "CSV/TSV Converter".to_string(),
            version: env!("CARGO_PKG_VERSION").to_string(),
            description: "CSV/TSV to Markdown tables and JSON (pure Rust)".to_string(),
            external_deps: vec![],
        }
    }
test_csv_converter_creation function · rust · L235-L240 (6 LOC)
src/converters/csv.rs
    fn test_csv_converter_creation() {
        let converter = CsvConverter::new();
        let formats = converter.supported_formats();
        assert!(formats.contains(&FileFormat::Csv));
        assert!(formats.contains(&FileFormat::Tsv));
    }
test_csv_to_markdown_basic function · rust · L243-L249 (7 LOC)
src/converters/csv.rs
    fn test_csv_to_markdown_basic() {
        let converter = CsvConverter::new();
        let csv = "Name,Age\nAlice,30\nBob,25";
        let result = converter.csv_to_markdown(csv);
        assert!(result.contains("Name"));
        assert!(result.contains("Alice"));
    }
convert_to_images function · rust · L32-L39 (8 LOC)
src/converters/docx.rs
    async fn convert_to_images(
        &self,
        path: &Path,
        format: crate::types::ImageFormat,
        _quality: u8,
        dpi: u32,
        _options: &ConversionOptions,
    ) -> Result<Vec<ConversionOutput>> {
convert_to_markdown function · rust · L196-L286 (91 LOC)
src/converters/docx.rs
    async fn convert_to_markdown(
        &self,
        path: &Path,
        options: &ConversionOptions,
    ) -> Result<Vec<ConversionOutput>> {
        eprintln!("📄 Reading DOCX file with docx-rs...");

        // Read DOCX file
        let file_data = tokio::fs::read(path).await?;

        // Parse DOCX using docx-rs
        let docx = docx_rs::read_docx(&file_data).map_err(|e| {
            crate::TransmutationError::engine_error(
                "docx-rs",
                format!("Failed to parse DOCX: {:?}", e),
            )
        })?;

        eprintln!("✓ DOCX parsed successfully");

        // Extract all paragraphs first
        let mut all_paragraphs = Vec::new();

        for child in &docx.document.children {
            let text = self.extract_text_from_child(child);
            if !text.is_empty() {
                all_paragraphs.push(text);
            }
        }

        // If split_pages enabled, divide into chunks (10-15 paragraphs per "page")
        if option
Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
extract_text_from_child function · rust · L290-L298 (9 LOC)
src/converters/docx.rs
    fn extract_text_from_child(&self, child: &docx_rs::DocumentChild) -> String {
        use docx_rs::DocumentChild;

        match child {
            DocumentChild::Paragraph(para) => self.extract_paragraph_text(para),
            DocumentChild::Table(table) => self.extract_table_text(table),
            _ => String::new(),
        }
    }
extract_paragraph_text function · rust · L302-L318 (17 LOC)
src/converters/docx.rs
    fn extract_paragraph_text(&self, para: &docx_rs::Paragraph) -> String {
        use docx_rs::ParagraphChild;

        let mut text = String::new();

        for child in &para.children {
            if let ParagraphChild::Run(run) = child {
                for run_child in &run.children {
                    if let docx_rs::RunChild::Text(t) = run_child {
                        text.push_str(&t.text);
                    }
                }
            }
        }

        text.trim().to_string()
    }
output_formats function · rust · L340-L346 (7 LOC)
src/converters/docx.rs
    fn output_formats(&self) -> Vec<OutputFormat> {
        vec![OutputFormat::Markdown {
            split_pages: false,
            optimize_for_llm: true,
        }]
    }
page 1 / 9next ›