Function bodies 424 total
test_character_normalization function · rust · L414-L419 (6 LOC)src/document/text_utils.rs
fn test_character_normalization() {
let sanitizer = TextSanitizer::new();
let text = "Price: $100⁄month — \u{201C}special\u{201D} offer";
let result = sanitizer.sanitize(text);
assert_eq!(result, "Price: $100/month - \"special\" offer");
}test_whitespace_normalization function · rust · L422-L427 (6 LOC)src/document/text_utils.rs
fn test_whitespace_normalization() {
let sanitizer = TextSanitizer::new();
let text = "Too many spaces";
let result = sanitizer.sanitize(text);
assert_eq!(result, "Too many spaces");
}test_is_likely_heading function · rust · L430-L438 (9 LOC)src/document/text_utils.rs
fn test_is_likely_heading() {
assert!(is_likely_heading("1. Introduction"));
assert!(is_likely_heading("CHAPTER 1"));
assert!(is_likely_heading("1.2.3 Methods"));
assert!(!is_likely_heading("This is a regular sentence."));
assert!(!is_likely_heading(
"This is a very long text that goes on and on and definitely should not be considered a heading because it's way too long."
));
}test_extract_section_number function · rust · L441-L451 (11 LOC)src/document/text_utils.rs
fn test_extract_section_number() {
assert_eq!(
extract_section_number("1.2.3 Methods"),
Some("1.2.3".to_string())
);
assert_eq!(
extract_section_number("1. Introduction"),
Some("1".to_string())
);
assert_eq!(extract_section_number("No number here"), None);
}test_calculate_section_level function · rust · L454-L459 (6 LOC)src/document/text_utils.rs
fn test_calculate_section_level() {
assert_eq!(calculate_section_level("1"), 1);
assert_eq!(calculate_section_level("1.2"), 2);
assert_eq!(calculate_section_level("1.2.3"), 3);
assert_eq!(calculate_section_level("1.2.3.4"), 4);
}test_ligature_normalization function · rust · L469-L474 (6 LOC)src/document/text_utils.rs
fn test_ligature_normalization() {
let sanitizer = TextSanitizer::new();
let text = "file with ligatures: ff, fi, fl";
let result = sanitizer.sanitize(text);
assert_eq!(result, "file with ligatures: ff, fi, fl");
}intersection_over_union function · rust · L49-L67 (19 LOC)src/document/types_extended.rs
pub fn intersection_over_union(&self, other: &BoundingBox) -> f64 {
let inter_l = self.l.max(other.l);
let inter_t = self.t.max(other.t);
let inter_r = self.r.min(other.r);
let inter_b = self.b.min(other.b);
if inter_r <= inter_l || inter_b <= inter_t {
return 0.0;
}
let inter_area = (inter_r - inter_l) * (inter_b - inter_t);
let union_area = self.area() + other.area() - inter_area;
if union_area > 0.0 {
inter_area / union_area
} else {
0.0
}
}Repobility — same analyzer, your code, free for public repos · /scan/
intersection_over_self function · rust · L70-L88 (19 LOC)src/document/types_extended.rs
pub fn intersection_over_self(&self, other: &BoundingBox) -> f64 {
let inter_l = self.l.max(other.l);
let inter_t = self.t.max(other.t);
let inter_r = self.r.min(other.r);
let inter_b = self.b.min(other.b);
if inter_r <= inter_l || inter_b <= inter_t {
return 0.0;
}
let inter_area = (inter_r - inter_l) * (inter_b - inter_t);
let self_area = self.area();
if self_area > 0.0 {
inter_area / self_area
} else {
0.0
}
}test_bbox_intersection function · rust · L183-L189 (7 LOC)src/document/types_extended.rs
fn test_bbox_intersection() {
let bbox1 = BoundingBox::new(0.0, 0.0, 10.0, 10.0, CoordOrigin::TopLeft);
let bbox2 = BoundingBox::new(5.0, 5.0, 15.0, 15.0, CoordOrigin::TopLeft);
let iou = bbox1.intersection_over_union(&bbox2);
assert!(iou > 0.0 && iou < 1.0);
}new function · rust · L18-L24 (7 LOC)src/document/types.rs
pub fn new(name: String) -> Self {
Self {
name,
items: Vec::new(),
items_by_ref: HashMap::new(),
}
}add_item function · rust · L25-L31 (7 LOC)src/document/types.rs
pub fn add_item(&mut self, item: DocItem) -> String {
let item_ref = format!("item_{}", self.items.len());
self.items_by_ref.insert(item_ref.clone(), self.items.len());
self.items.push(item);
item_ref
}test_add_item function · rust · L146-L156 (11 LOC)src/document/types.rs
fn test_add_item() {
let mut doc = DoclingDocument::new("test".to_string());
let item = DocItem::Paragraph(TextItem {
text: "Test".to_string(),
formatting: None,
label: DocItemLabel::Paragraph,
});
let item_ref = doc.add_item(item);
assert_eq!(doc.items.len(), 1);
assert_eq!(item_ref, "item_0");
}test_formatting_default function · rust · L159-L164 (6 LOC)src/document/types.rs
fn test_formatting_default() {
let formatting = Formatting::default();
assert!(!formatting.bold);
assert!(!formatting.italic);
assert!(!formatting.underline);
}test_table_cell_creation function · rust · L167-L175 (9 LOC)src/document/types.rs
fn test_table_cell_creation() {
let cell = TableCell {
text: "Cell".to_string(),
row_span: 1,
col_span: 1,
};
assert_eq!(cell.text, "Cell");
assert_eq!(cell.row_span, 1);
}test_section_header_item function · rust · L178-L186 (9 LOC)src/document/types.rs
fn test_section_header_item() {
let header = SectionHeaderItem {
text: "Section 1".to_string(),
level: 1,
formatting: None,
};
assert_eq!(header.text, "Section 1");
assert_eq!(header.level, 1);
}Repobility (the analyzer behind this table) · https://repobility.com
test_list_item_data function · rust · L189-L198 (10 LOC)src/document/types.rs
fn test_list_item_data() {
let list_item = ListItemData {
text: "Item 1".to_string(),
marker: "-".to_string(),
enumerated: false,
level: 0,
};
assert_eq!(list_item.text, "Item 1");
assert!(!list_item.enumerated);
}test_table_data_creation function · rust · L201-L209 (9 LOC)src/document/types.rs
fn test_table_data_creation() {
let table = TableData {
num_rows: 2,
num_cols: 3,
grid: vec![],
};
assert_eq!(table.num_rows, 2);
assert_eq!(table.num_cols, 3);
}parse_docling_json_to_markdown function · rust · L9-L88 (80 LOC)src/engines/docling_json_parser.rs
pub fn parse_docling_json_to_markdown(json_str: &str) -> Result<String> {
eprintln!(
"[JSON Parser] Starting parse, input size: {} bytes",
json_str.len()
);
let json: Value = serde_json::from_str(json_str).map_err(|e| {
eprintln!("[JSON Parser] ERROR parsing JSON: {e}");
e
})?;
eprintln!("[JSON Parser] JSON parsed successfully");
let mut markdown = String::new();
// Extract table of contents for heading detection
let toc = extract_table_of_contents(&json);
let heading_titles: HashMap<String, usize> = toc
.iter()
.map(|(title, level)| (title.to_lowercase(), *level))
.collect();
// Process each page
if let Some(pages) = json["pages"].as_array() {
for page in pages {
if let Some(page_num) = page["page_number"].as_u64() {
eprintln!("[JSON Parser] Processing page {page_num}");
}
// Extract text from lines
if let Somextract_table_of_contents function · rust · L91-L101 (11 LOC)src/engines/docling_json_parser.rs
fn extract_table_of_contents(json: &Value) -> Vec<(String, usize)> {
let mut toc = Vec::new();
if let Some(annotations) = json["annotations"].as_object() {
if let Some(toc_array) = annotations["table_of_contents"].as_array() {
extract_toc_recursive(toc_array, &mut toc);
}
}
toc
}extract_line_text function · rust · L118-L154 (37 LOC)src/engines/docling_json_parser.rs
fn extract_line_text(line: &Value, original: &Value) -> Option<String> {
// Lines contain indices that point to cells
if let Some(indices) = line["i"].as_array() {
if indices.is_empty() {
return None;
}
// Get cell data
if let Some(cells_obj) = original["cells"].as_object() {
if let Some(cell_data) = cells_obj["data"].as_array() {
let mut text = String::new();
// Extract characters from the cell range
if let (Some(start), Some(end)) =
(indices[0].as_u64(), indices.get(1).and_then(|v| v.as_u64()))
{
for idx in start as usize..end as usize {
if let Some(cell) = cell_data.get(idx) {
if let Some(cell_array) = cell.as_array() {
// Character is at index 12 in the cell array
if let Some(ch) = cell_array.get(12).should_join_line function · rust · L157-L181 (25 LOC)src/engines/docling_json_parser.rs
fn should_join_line(prev: &str, current: &str) -> bool {
if prev.is_empty() {
return false;
}
// Check if previous line ends with:
// - lowercase letter (likely continuation)
// - comma, semicolon (mid-sentence)
// - hyphen (word break)
let last_char = prev.chars().last();
match last_char {
Some(c) if c.is_lowercase() => true,
Some(',') | Some(';') | Some('-') => true,
Some('.') | Some('!') | Some('?') => false, // Sentence end
_ => {
// Check if current starts with lowercase (likely continuation)
current
.chars()
.next()
.map(|c| c.is_lowercase())
.unwrap_or(false)
}
}
}test_should_join_line function · rust · L188-L196 (9 LOC)src/engines/docling_json_parser.rs
fn test_should_join_line() {
assert!(should_join_line(
"This is a sentence that continues",
"on the next line."
));
assert!(should_join_line("First part,", "second part."));
assert!(!should_join_line("First sentence.", "Second sentence."));
assert!(!should_join_line("Question?", "Next question?"));
}test_extract_toc function · rust · L199-L219 (21 LOC)src/engines/docling_json_parser.rs
fn test_extract_toc() {
let json: Value = serde_json::from_str(
r#"{
"annotations": {
"table_of_contents": [
{"title": "Introduction", "level": 0},
{"title": "Methods", "level": 0, "children": [
{"title": "Experiment 1", "level": 1}
]}
]
}
}"#,
)
.unwrap();
let toc = extract_table_of_contents(&json);
assert_eq!(toc.len(), 3);
assert_eq!(toc[0], ("Introduction".to_string(), 0));
assert_eq!(toc[1], ("Methods".to_string(), 0));
assert_eq!(toc[2], ("Experiment 1".to_string(), 1));
}If a scraper extracted this row, it came from Repobility (https://repobility.com)
open function · rust · L87-L122 (36 LOC)src/engines/docling_parse_ffi.rs
pub fn open(path: &Path) -> Result<Self> {
#[cfg(feature = "docling-ffi")]
{
let path_str = path
.to_str()
.ok_or_else(|| TransmutationError::conversion_failed("Invalid file path"))?;
let c_path = CString::new(path_str).map_err(|e| {
TransmutationError::engine_error("docling-parse", format!("Invalid path: {e}"))
})?;
let mut handle: DoclingDocumentHandle = ptr::null_mut();
unsafe {
let result = docling_open_pdf(c_path.as_ptr(), &mut handle);
if result != DoclingError::Ok {
let err_msg = CStr::from_ptr(docling_get_last_error())
.to_string_lossy()
.to_string();
return Err(TransmutationError::engine_error("docling-parse", err_msg));
}
}
Ok(Self { handle })
}
#[cfg(not(feature = "doclipage_count function · rust · L125-L150 (26 LOC)src/engines/docling_parse_ffi.rs
pub fn page_count(&self) -> Result<usize> {
#[cfg(feature = "docling-ffi")]
{
let mut count: c_int = 0;
unsafe {
let result = docling_get_page_count(self.handle, &mut count);
if result != DoclingError::Ok {
let err_msg = CStr::from_ptr(docling_get_last_error())
.to_string_lossy()
.to_string();
return Err(TransmutationError::engine_error("docling-parse", err_msg));
}
}
Ok(count as usize)
}
#[cfg(not(feature = "docling-ffi"))]
{
Err(TransmutationError::engine_error(
"docling-parse",
"Feature not enabled",
))
}
}export_markdown function · rust · L153-L182 (30 LOC)src/engines/docling_parse_ffi.rs
pub fn export_markdown(&self) -> Result<String> {
#[cfg(feature = "docling-ffi")]
{
let mut markdown_ptr: *mut c_char = ptr::null_mut();
unsafe {
let result = docling_export_markdown(self.handle, &mut markdown_ptr);
if result != DoclingError::Ok {
let err_msg = CStr::from_ptr(docling_get_last_error())
.to_string_lossy()
.to_string();
return Err(TransmutationError::engine_error("docling-parse", err_msg));
}
let markdown = CStr::from_ptr(markdown_ptr).to_string_lossy().to_string();
docling_free_string(markdown_ptr);
Ok(markdown)
}
}
#[cfg(not(feature = "docling-ffi"))]
{
Err(TransmutationError::engine_error(
"docling-parse",
"Feature not enabled",
))
}
}drop function · rust · L186-L191 (6 LOC)src/engines/docling_parse_ffi.rs
fn drop(&mut self) {
#[cfg(feature = "docling-ffi")]
unsafe {
docling_close_pdf(self.handle);
}
}get_layout_clusters function · rust · L14-L148 (135 LOC)src/engines/docling_python_bridge.rs
pub fn get_layout_clusters(pdf_path: &Path) -> Result<Vec<Cluster>> {
eprintln!("🐍 Calling Python docling for layout analysis...");
// Create a temporary Python script
let script = r#"
import sys
import json
from pathlib import Path
from docling.document_converter import DocumentConverter
try:
# Convert PDF
converter = DocumentConverter()
pdf_path = Path(sys.argv[1])
result = converter.convert(pdf_path)
# Extract layout information
clusters = []
cluster_id = 0
# Iterate through document items
for item in result.document.items:
# Get bounding box
if hasattr(item, 'prov') and hasattr(item.prov, 'bbox'):
bbox = item.prov.bbox
cluster = {{
"id": cluster_id,
"label": str(item.label) if hasattr(item, 'label') else "text",
"bbox": {{
"l": bbox.l,
"t": bbox.t,
"r": bbox.parse_label function · rust · L149-L164 (16 LOC)src/engines/docling_python_bridge.rs
fn parse_label(label_str: &str) -> DocItemLabel {
match label_str.to_lowercase().as_str() {
"title" => DocItemLabel::Title,
"section_header" | "sectionheader" => DocItemLabel::SectionHeader,
"paragraph" => DocItemLabel::Paragraph,
"list_item" | "listitem" => DocItemLabel::ListItem,
"table" => DocItemLabel::Table,
"picture" | "figure" => DocItemLabel::Picture,
"code" => DocItemLabel::Code,
"formula" => DocItemLabel::Formula,
"caption" => DocItemLabel::Caption,
"footnote" => DocItemLabel::Footnote,
_ => DocItemLabel::Text,
}
}new function · rust · L23-L29 (7 LOC)src/engines/layout_analyzer.rs
pub fn new() -> Self {
Self {
heading_font_threshold: 12.0,
paragraph_y_gap: 10.0,
base_font_size: 10.0,
}
}analyze function · rust · L32-L65 (34 LOC)src/engines/layout_analyzer.rs
pub fn analyze(&self, blocks: &[TextBlock]) -> Vec<AnalyzedBlock> {
if blocks.is_empty() {
return Vec::new();
}
let mut analyzed = Vec::new();
let mut i = 0;
while i < blocks.len() {
let block = &blocks[i];
let content = block.text.trim();
if content.is_empty() {
i += 1;
continue;
}
// Detect block type
let block_type = self.detect_block_type(block, blocks, i);
analyzed.push(AnalyzedBlock {
block_type: block_type.clone(),
content: content.to_string(),
level: self.get_heading_level(&block_type, block.font_size),
font_size: block.font_size,
y_position: block.y,
});
i += 1;
}
// Post-process to merge multi-line elements
self.merge_multiline_elements(analyzed)
}Repobility · severity-and-effort ranking · https://repobility.com
detect_block_type function · rust · L68-L113 (46 LOC)src/engines/layout_analyzer.rs
fn detect_block_type(
&self,
block: &TextBlock,
_all_blocks: &[TextBlock],
_index: usize,
) -> BlockType {
let content = block.text.trim();
// Check for formulas (high math symbol density)
if self.is_formula(content) {
return BlockType::Formula;
}
// Check for image captions
if self.is_image_caption(content) {
return BlockType::Image;
}
// Check for table content
if content.contains('|') || content.contains('\t') {
return BlockType::Table;
}
// Check for list items
if self.is_list_item(content) {
return BlockType::ListItem;
}
// Check for headings based on font size
if block.font_size > self.heading_font_threshold {
return self.classify_heading(content, block.font_size);
}
// Check for numbered sections like "1 Introduction"
if self.is_sectclassify_heading function · rust · L116-L129 (14 LOC)src/engines/layout_analyzer.rs
fn classify_heading(&self, content: &str, font_size: f32) -> BlockType {
// Title if very large font or specific keywords
if font_size >= 18.0 || content.contains("Attention Is All You Need") {
return BlockType::Title;
}
// Major section if large font
if font_size >= 14.0 {
return BlockType::Heading(1); // Changed from 2 to 1
}
// Subsection
BlockType::Heading(2) // Changed from 3 to 2
}is_section_heading function · rust · L132-L173 (42 LOC)src/engines/layout_analyzer.rs
fn is_section_heading(&self, content: &str) -> bool {
let trimmed = content.trim();
// Keywords that indicate sections (exact match at start)
let section_keywords = [
"Abstract",
"Introduction",
"Background",
"Conclusion",
"Acknowledgements",
"References",
"Appendix",
"Attention Visualizations",
];
if section_keywords
.iter()
.any(|&kw| trimmed == kw || trimmed.starts_with(&format!("{} ", kw)))
{
return true;
}
// Pattern: single digit + space + capitalized word (like "1 Introduction")
if let Some(first_char) = trimmed.chars().next() {
if first_char.is_numeric() {
let parts: Vec<&str> = trimmed.splitn(2, ' ').collect();
if parts.len() == 2 {
let number_part = parts[0];
let text_part = parts[1];
is_subsection_heading function · rust · L176-L195 (20 LOC)src/engines/layout_analyzer.rs
fn is_subsection_heading(&self, content: &str) -> bool {
let trimmed = content.trim();
// Pattern like "3.1", "3.2.1", etc.
if trimmed.len() < 150 {
let first_part = trimmed.split_whitespace().next().unwrap_or("");
let dot_count = first_part.matches('.').count();
let digit_count = first_part.chars().filter(|c| c.is_numeric()).count();
// Must have pattern like "3.1" or "3.2.1" or "3.2.2"
if dot_count >= 1 && digit_count >= 2 && first_part.len() < 10 {
// Verify there's text after the number
if trimmed.len() > first_part.len() + 1 {
return true;
}
}
}
false
}is_list_item function · rust · L198-L227 (30 LOC)src/engines/layout_analyzer.rs
fn is_list_item(&self, content: &str) -> bool {
let trimmed = content.trim();
// Bullet points
if trimmed.starts_with('•') || trimmed.starts_with('▪') || trimmed.starts_with('◦') {
return true;
}
// Dash bullets
if trimmed.starts_with("- ") || trimmed.starts_with("– ") || trimmed.starts_with("— ") {
return true;
}
// Numbered lists (1., a., i., etc.)
if let Some(first_word) = trimmed.split_whitespace().next() {
if first_word.ends_with('.') || first_word.ends_with(')') {
let without_punct = first_word.trim_end_matches(&['.', ')'][..]);
// Roman numerals, letters, or numbers
if without_punct
.chars()
.all(|c| c.is_numeric() || c.is_alphabetic())
&& without_punct.len() <= 3
{
return true;
}
}
}
is_formula function · rust · L230-L264 (35 LOC)src/engines/layout_analyzer.rs
fn is_formula(&self, content: &str) -> bool {
// Math Unicode characters
let math_chars = [
'∑', '∫', '√', '∈', '∉', '⊂', '⊃', '≤', '≥', '≠', '≈', '∞', '∂', '∇', '×', '÷', '±',
'α', 'β', 'γ', 'δ', 'θ', 'λ', 'μ', 'π', 'σ', 'ω',
];
let math_count = content.chars().filter(|c| math_chars.contains(c)).count();
let total_chars = content.chars().count();
// High density of math symbols (>10%)
if total_chars > 0 && (math_count as f32 / total_chars as f32) > 0.1 {
return true;
}
// Common LaTeX-like patterns that weren't converted
if content.contains("\\frac") || content.contains("\\sum") || content.contains("\\int") {
return true;
}
// Equations with equals and operators
if content.contains('=')
&& (content.matches('+').count()
+ content.matches('*').count()
+ content.matches('/').count())
is_image_caption function · rust · L267-L273 (7 LOC)src/engines/layout_analyzer.rs
fn is_image_caption(&self, content: &str) -> bool {
let lower = content.to_lowercase();
lower.starts_with("figure")
|| lower.starts_with("fig.")
|| lower.starts_with("image")
|| lower.starts_with("diagram")
}get_heading_level function · rust · L276-L293 (18 LOC)src/engines/layout_analyzer.rs
fn get_heading_level(&self, block_type: &BlockType, font_size: f32) -> Option<usize> {
match block_type {
BlockType::Title => Some(1),
BlockType::Heading(level) => Some(*level),
_ => {
// Derive from font size
if font_size >= 18.0 {
Some(2)
} else if font_size >= 14.0 {
Some(3)
} else if font_size >= 12.0 {
Some(4)
} else {
None
}
}
}
}Repobility — same analyzer, your code, free for public repos · /scan/
merge_multiline_elements function · rust · L296-L338 (43 LOC)src/engines/layout_analyzer.rs
fn merge_multiline_elements(&self, blocks: Vec<AnalyzedBlock>) -> Vec<AnalyzedBlock> {
if blocks.is_empty() {
return blocks;
}
let mut merged = Vec::new();
let mut current: Option<AnalyzedBlock> = None;
for block in blocks {
match (¤t, &block.block_type) {
// Merge consecutive paragraphs if Y gap is small
(Some(curr), BlockType::Paragraph)
if matches!(curr.block_type, BlockType::Paragraph) =>
{
if let Some(ref mut c) = current {
// Check if should merge (close Y positions)
let y_diff = (c.y_position - block.y_position).abs();
if y_diff < self.paragraph_y_gap * 2.0 {
c.content.push(' ');
c.content.push_str(&block.content);
} else {
merged.push(curretest_section_heading_detection function · rust · L388-L394 (7 LOC)src/engines/layout_analyzer.rs
fn test_section_heading_detection() {
let analyzer = LayoutAnalyzer::new();
assert!(analyzer.is_section_heading("1 Introduction"));
assert!(analyzer.is_section_heading("2 Background"));
assert!(analyzer.is_section_heading("Abstract"));
assert!(!analyzer.is_section_heading("This is regular text"));
}test_subsection_detection function · rust · L397-L402 (6 LOC)src/engines/layout_analyzer.rs
fn test_subsection_detection() {
let analyzer = LayoutAnalyzer::new();
assert!(analyzer.is_subsection_heading("3.1 Encoder and Decoder Stacks"));
assert!(analyzer.is_subsection_heading("3.2.1 Scaled Dot-Product"));
assert!(!analyzer.is_subsection_heading("This is not a subsection"));
}test_list_detection function · rust · L405-L411 (7 LOC)src/engines/layout_analyzer.rs
fn test_list_detection() {
let analyzer = LayoutAnalyzer::new();
assert!(analyzer.is_list_item("• First item"));
assert!(analyzer.is_list_item("- Second item"));
assert!(analyzer.is_list_item("1. Numbered item"));
assert!(!analyzer.is_list_item("Regular text"));
}test_formula_detection function · rust · L414-L419 (6 LOC)src/engines/layout_analyzer.rs
fn test_formula_detection() {
let analyzer = LayoutAnalyzer::new();
assert!(analyzer.is_formula("x = ∑ᵢ yᵢ + √z"));
assert!(analyzer.is_formula("α + β = γ"));
assert!(!analyzer.is_formula("This is regular text"));
}test_image_caption_detection function · rust · L422-L427 (6 LOC)src/engines/layout_analyzer.rs
fn test_image_caption_detection() {
let analyzer = LayoutAnalyzer::new();
assert!(analyzer.is_image_caption("Figure 1: The Transformer"));
assert!(analyzer.is_image_caption("Fig. 2: Attention mechanism"));
assert!(!analyzer.is_image_caption("Regular text"));
}new function · rust · L27-L37 (11 LOC)src/engines/layout_postprocessor.rs
fn new(elements: &[usize]) -> Self {
let mut parent = HashMap::new();
let mut rank = HashMap::new();
for &elem in elements {
parent.insert(elem, elem);
rank.insert(elem, 0);
}
Self { parent, rank }
}find function · rust · L38-L45 (8 LOC)src/engines/layout_postprocessor.rs
fn find(&mut self, x: usize) -> usize {
if self.parent[&x] != x {
let root = self.find(self.parent[&x]);
self.parent.insert(x, root); // Path compression
}
self.parent[&x]
}Repobility (the analyzer behind this table) · https://repobility.com
union function · rust · L46-L66 (21 LOC)src/engines/layout_postprocessor.rs
fn union(&mut self, x: usize, y: usize) {
let root_x = self.find(x);
let root_y = self.find(y);
if root_x == root_y {
return;
}
let rank_x = self.rank[&root_x];
let rank_y = self.rank[&root_y];
if rank_x > rank_y {
self.parent.insert(root_y, root_x);
} else if rank_x < rank_y {
self.parent.insert(root_x, root_y);
} else {
self.parent.insert(root_y, root_x);
self.rank.insert(root_x, rank_x + 1);
}
}get_groups function · rust · L67-L80 (14 LOC)src/engines/layout_postprocessor.rs
fn get_groups(&mut self) -> HashMap<usize, Vec<usize>> {
let mut groups: HashMap<usize, Vec<usize>> = HashMap::new();
// Clone keys to avoid borrowing issue
let keys: Vec<usize> = self.parent.keys().copied().collect();
for elem in keys {
let root = self.find(elem);
groups.entry(root).or_default().push(elem);
}
groups
}new function · rust · L103-L115 (13 LOC)src/engines/layout_postprocessor.rs
fn new(clusters: &[Cluster]) -> Self {
let rects: Vec<ClusterRect> = clusters
.iter()
.map(|c| ClusterRect {
id: c.id,
bbox: c.bbox,
})
.collect();
let rtree = RTree::bulk_load(rects);
Self { rtree }
}