Function bodies 1,000 total
JavaParser._get_package method · python · L186-L195 (10 LOC)kglite/code_tree/parsers/java.py
def _get_package(self, root, source: bytes) -> str:
"""Extract package name from package_declaration."""
for child in root.children:
if child.type == "package_declaration":
for sub in child.children:
if sub.type == "scoped_identifier":
return node_text(sub, source)
elif sub.type == "identifier":
return node_text(sub, source)
return ""JavaParser._file_to_module_path method · python · L197-L204 (8 LOC)kglite/code_tree/parsers/java.py
def _file_to_module_path(self, filepath: Path, src_root: Path,
package: str) -> str:
if package:
return package
# Fallback: use directory structure
rel = filepath.relative_to(src_root)
parts = list(rel.parent.parts) if rel.parent != Path(".") else []
return ".".join(parts) if parts else filepath.stemJavaParser._get_superclass method · python · L206-L218 (13 LOC)kglite/code_tree/parsers/java.py
def _get_superclass(self, node, source: bytes) -> str | None:
"""Extract extends clause from class."""
for child in node.children:
if child.type == "superclass":
for sub in child.children:
if sub.type == "type_identifier":
return node_text(sub, source)
elif sub.type == "generic_type":
for inner in sub.children:
if inner.type == "type_identifier":
return node_text(inner, source)
break
return NoneJavaParser._get_interfaces method · python · L220-L236 (17 LOC)kglite/code_tree/parsers/java.py
def _get_interfaces(self, node, source: bytes) -> list[str]:
"""Extract implements/extends interfaces from class/interface."""
interfaces: list[str] = []
for child in node.children:
if child.type in ("super_interfaces", "extends_interfaces"):
for sub in child.children:
if sub.type == "type_list":
for t in sub.children:
if t.type == "type_identifier":
interfaces.append(node_text(t, source))
elif t.type == "generic_type":
for inner in t.children:
if inner.type == "type_identifier":
interfaces.append(
node_text(inner, source))
break
return interfacesJavaParser._get_enum_constants method · python · L238-L248 (11 LOC)kglite/code_tree/parsers/java.py
def _get_enum_constants(self, node, source: bytes) -> list[str]:
"""Extract enum constant names."""
constants: list[str] = []
for child in node.children:
if child.type == "enum_body":
for sub in child.children:
if sub.type == "enum_constant":
name = self._get_name(sub, source)
if name:
constants.append(name)
return constantsJavaParser._parse_method method · python · L252-L296 (45 LOC)kglite/code_tree/parsers/java.py
def _parse_method(self, node, source: bytes, module_path: str,
rel_path: str, owner: str | None = None) -> FunctionInfo:
if node.type == "constructor_declaration":
name = self._get_name(node, source) or "unknown"
else:
name = self._get_name(node, source) or "unknown"
if owner:
prefix = f"{module_path}.{owner}"
else:
prefix = module_path
qualified_name = f"{prefix}.{name}"
body = None
for child in node.children:
if child.type in ("block", "constructor_body"):
body = child
break
metadata: dict = {}
if self._has_modifier(node, source, "static"):
metadata["is_static"] = True
if self._has_modifier(node, source, "abstract"):
metadata["is_abstract"] = True
if self._has_modifier(node, source, "native"):
metadata["is_ffi"] = True
metadata["ffJavaParser._parse_class method · python · L298-L352 (55 LOC)kglite/code_tree/parsers/java.py
def _parse_class(self, node, source: bytes, module_path: str,
rel_path: str, result: ParseResult,
outer_name: str | None = None):
"""Parse a class_declaration node (handles nested classes)."""
name = self._get_name(node, source) or "unknown"
if outer_name:
qualified_name = f"{module_path}.{outer_name}.{name}"
else:
qualified_name = f"{module_path}.{name}"
superclass = self._get_superclass(node, source)
interfaces = self._get_interfaces(node, source)
annotations = self._get_annotations(node, source)
docstring = self._get_doc_comment(node, source)
bases = [superclass] if superclass else []
metadata: dict = {}
if annotations:
metadata["decorators"] = annotations
if self._has_modifier(node, source, "abstract"):
metadata["is_abstract"] = True
result.classes.append(ClassInfo(
name=nProvenance: Repobility (https://repobility.com) — every score reproducible from /scan/
JavaParser._parse_class_body method · python · L354-L396 (43 LOC)kglite/code_tree/parsers/java.py
def _parse_class_body(self, node, source: bytes, module_path: str,
rel_path: str, class_name: str,
class_qname: str, result: ParseResult):
"""Parse methods, fields, and nested classes from a class body."""
method_type_rel = TypeRelationship(
source_type=class_qname,
target_type=None,
relationship="inherent",
)
for child in node.children:
if child.type == "class_body":
for item in child.children:
if item.type in ("method_declaration",
"constructor_declaration"):
fn = self._parse_method(
item, source, module_path, rel_path,
owner=class_name,
)
result.functions.append(fn)
method_type_rel.methods.append(fn)
elif iJavaParser._parse_field method · python · L398-L453 (56 LOC)kglite/code_tree/parsers/java.py
def _parse_field(self, node, source: bytes, module_path: str,
rel_path: str, class_name: str,
class_qname: str, result: ParseResult):
"""Parse a field_declaration as AttributeInfo or ConstantInfo."""
is_static = self._has_modifier(node, source, "static")
is_final = self._has_modifier(node, source, "final")
visibility = self._get_visibility(node, source)
# Extract type
type_ann = None
for child in node.children:
if child.type in ("type_identifier", "integral_type",
"boolean_type", "floating_point_type",
"void_type", "generic_type", "array_type",
"scoped_type_identifier"):
type_ann = node_text(child, source)
break
# Extract variable declarators
for child in node.children:
if child.type == "variable_declarator":
JavaParser._parse_interface method · python · L455-L500 (46 LOC)kglite/code_tree/parsers/java.py
def _parse_interface(self, node, source: bytes, module_path: str,
rel_path: str, result: ParseResult):
"""Parse an interface_declaration node."""
name = self._get_name(node, source) or "unknown"
qualified_name = f"{module_path}.{name}"
extends = self._get_interfaces(node, source)
docstring = self._get_doc_comment(node, source)
result.interfaces.append(InterfaceInfo(
name=name,
qualified_name=qualified_name,
kind="interface",
visibility=self._get_visibility(node, source),
file_path=rel_path,
line_number=node.start_point[0] + 1,
end_line=node.end_point[0] + 1,
docstring=docstring,
type_parameters=get_type_parameters(node, source),
))
# Interface extends edges
for base in extends:
result.type_relationships.append(TypeRelationship(
source_type=name,
JavaParser._parse_enum method · python · L502-L517 (16 LOC)kglite/code_tree/parsers/java.py
def _parse_enum(self, node, source: bytes, module_path: str,
rel_path: str, result: ParseResult):
"""Parse an enum_declaration node."""
name = self._get_name(node, source) or "unknown"
qualified_name = f"{module_path}.{name}"
result.enums.append(EnumInfo(
name=name,
qualified_name=qualified_name,
visibility=self._get_visibility(node, source),
file_path=rel_path,
line_number=node.start_point[0] + 1,
end_line=node.end_point[0] + 1,
docstring=self._get_doc_comment(node, source),
variants=self._get_enum_constants(node, source),
))JavaParser.parse_file method · python · L519-L563 (45 LOC)kglite/code_tree/parsers/java.py
def parse_file(self, filepath: Path, src_root: Path) -> ParseResult:
source = filepath.read_bytes()
tree = self._parser.parse(source)
root = tree.root_node
rel_path = str(filepath.relative_to(src_root))
package = self._get_package(root, source)
module_path = self._file_to_module_path(filepath, src_root, package)
loc = count_lines(source)
file_info = FileInfo(
path=rel_path,
filename=filepath.name,
loc=loc,
module_path=module_path,
language="java",
)
stem = filepath.stem
if (stem.endswith("Test") or stem.endswith("Tests")
or "/src/test/" in rel_path
or rel_path.startswith("src/test/")):
file_info.is_test = True
result = ParseResult()
result.files.append(file_info)
for child in root.children:
if child.type == "class_declaration":
self._pars_load_toml function · python · L15-L31 (17 LOC)kglite/code_tree/parsers/manifest.py
def _load_toml(path: Path) -> dict:
"""Load a TOML file, using stdlib tomllib or tomli fallback."""
global _tomllib
if _tomllib is None:
try:
import tomllib as _tl
except ModuleNotFoundError:
try:
import tomli as _tl # type: ignore[no-redef]
except ImportError:
raise ImportError(
"TOML parsing requires Python 3.11+ or the 'tomli' package. "
"Install with: pip install tomli"
) from None
_tomllib = _tl
with open(path, "rb") as f:
return _tomllib.load(f)PyProjectReader.read method · python · L62-L141 (80 LOC)kglite/code_tree/parsers/manifest.py
def read(self, manifest_path: Path, project_root: Path) -> ProjectInfo:
data = _load_toml(manifest_path)
project = data.get("project", {})
build_sys = data.get("build-system", {})
tool = data.get("tool", {})
name = project.get("name", project_root.name)
build_backend = build_sys.get("build-backend", "")
version = project.get("version")
if version is None and "version" in project.get("dynamic", []):
version = "dynamic"
info = ProjectInfo(
name=name,
version=version,
description=project.get("description"),
languages=["python"],
authors=_extract_authors(project.get("authors", [])),
license=_extract_license(project.get("license")),
repository_url=_extract_repo_url(project.get("urls", {})),
manifest_path=str(manifest_path.relative_to(project_root)),
build_system=build_backend or "unknown",
CargoTomlReader.read method · python · L154-L201 (48 LOC)kglite/code_tree/parsers/manifest.py
def read(self, manifest_path: Path, project_root: Path) -> ProjectInfo:
data = _load_toml(manifest_path)
package = data.get("package", {})
info = ProjectInfo(
name=package.get("name", project_root.name),
version=package.get("version"),
description=package.get("description"),
languages=["rust"],
authors=package.get("authors", []),
license=package.get("license"),
repository_url=package.get("repository"),
manifest_path=str(manifest_path.relative_to(project_root)),
build_system="cargo",
)
# Source root: src/ directory
src_dir = project_root / "src"
if src_dir.is_dir():
info.source_roots.append(SourceRoot(
path=src_dir, language="rust", label="rust-crate",
))
# Workspace members
workspace = data.get("workspace", {})
for member_glob in workspace.get("members", Want fix-PRs on findings? Install Repobility's GitHub App · github.com/apps/repobility-bot
detect_manifest function · python · L212-L218 (7 LOC)kglite/code_tree/parsers/manifest.py
def detect_manifest(project_root: Path) -> ManifestReader | None:
"""Auto-detect the primary manifest in a directory (first match wins)."""
for reader_cls in MANIFEST_READERS:
reader = reader_cls()
if (project_root / reader.manifest_filename).is_file():
return reader
return Noneread_manifest function · python · L221-L227 (7 LOC)kglite/code_tree/parsers/manifest.py
def read_manifest(project_root: Path) -> ProjectInfo | None:
"""Auto-detect and read the project manifest, if any."""
reader = detect_manifest(project_root)
if reader is None:
return None
manifest_path = project_root / reader.manifest_filename
return reader.read(manifest_path, project_root)_find_python_package function · python · L233-L247 (15 LOC)kglite/code_tree/parsers/manifest.py
def _find_python_package(project_root: Path, project_name: str) -> Path | None:
"""Find the Python package directory for a project."""
pkg_name = project_name.replace("-", "_")
# Flat layout: project_root/package/
candidate = project_root / pkg_name
if candidate.is_dir() and (candidate / "__init__.py").is_file():
return candidate
# src layout: project_root/src/package/
candidate = project_root / "src" / pkg_name
if candidate.is_dir() and (candidate / "__init__.py").is_file():
return candidate
return None_extract_authors function · python · L250-L260 (11 LOC)kglite/code_tree/parsers/manifest.py
def _extract_authors(authors: list) -> list[str]:
"""Extract author names from pyproject.toml authors list."""
result = []
for a in authors:
if isinstance(a, dict):
name = a.get("name", "")
email = a.get("email", "")
result.append(f"{name} <{email}>" if email else name)
elif isinstance(a, str):
result.append(a)
return result_extract_license function · python · L263-L269 (7 LOC)kglite/code_tree/parsers/manifest.py
def _extract_license(license_val) -> str | None:
"""Extract license string from pyproject.toml license field."""
if isinstance(license_val, str):
return license_val
if isinstance(license_val, dict):
return license_val.get("text") or license_val.get("file")
return None_extract_repo_url function · python · L272-L277 (6 LOC)kglite/code_tree/parsers/manifest.py
def _extract_repo_url(urls: dict) -> str | None:
"""Extract repository URL from pyproject.toml urls."""
for key in ("Repository", "repository", "Source", "source", "Homepage"):
if key in urls:
return urls[key]
return None_parse_dep_string function · python · L280-L289 (10 LOC)kglite/code_tree/parsers/manifest.py
def _parse_dep_string(dep_str: str) -> DependencyInfo:
"""Parse a PEP 508 dependency string like 'pandas>=1.5'."""
# Split on first version specifier character
for i, ch in enumerate(dep_str):
if ch in ">=<!~":
return DependencyInfo(
name=dep_str[:i].strip(),
version_spec=dep_str[i:].strip(),
)
return DependencyInfo(name=dep_str.strip())_parse_cargo_dep function · python · L292-L304 (13 LOC)kglite/code_tree/parsers/manifest.py
def _parse_cargo_dep(name: str, spec, *, is_dev: bool) -> DependencyInfo:
"""Parse a Cargo.toml dependency entry."""
if isinstance(spec, str):
version = spec
elif isinstance(spec, dict):
version = spec.get("version")
else:
version = None
return DependencyInfo(
name=name,
version_spec=version,
is_dev=is_dev,
)Powered by Repobility — scan your code at https://repobility.com
ParseResult.merge method · python · L130-L140 (11 LOC)kglite/code_tree/parsers/models.py
def merge(self, other: "ParseResult") -> "ParseResult":
"""Merge another ParseResult into this one (mutates self)."""
self.files.extend(other.files)
self.functions.extend(other.functions)
self.classes.extend(other.classes)
self.enums.extend(other.enums)
self.interfaces.extend(other.interfaces)
self.type_relationships.extend(other.type_relationships)
self.attributes.extend(other.attributes)
self.constants.extend(other.constants)
return selfPythonParser._get_visibility method · python · L60-L65 (6 LOC)kglite/code_tree/parsers/python.py
def _get_visibility(self, name: str) -> str:
if name.startswith("__") and not name.endswith("__"):
return "private"
if name.startswith("_"):
return "private"
return "public"PythonParser._get_name method · python · L67-L72 (6 LOC)kglite/code_tree/parsers/python.py
def _get_name(self, node, source: bytes) -> str:
"""Get the identifier name from a definition node."""
for child in node.children:
if child.type == "identifier":
return node_text(child, source)
return "unknown"PythonParser._get_block method · python · L74-L79 (6 LOC)kglite/code_tree/parsers/python.py
def _get_block(self, node):
"""Get the body block of a class/function definition."""
for child in node.children:
if child.type == "block":
return child
return NonePythonParser._get_docstring method · python · L81-L99 (19 LOC)kglite/code_tree/parsers/python.py
def _get_docstring(self, node, source: bytes) -> str | None:
"""Extract docstring from the first expression_statement in a block."""
block = self._get_block(node)
if block is None:
return None
for child in block.children:
if child.type == "expression_statement":
for sub in child.children:
if sub.type == "string":
raw = node_text(sub, source)
# Strip triple-quote delimiters
for delim in ('"""', "'''", '"', "'"):
if raw.startswith(delim) and raw.endswith(delim):
return raw[len(delim):-len(delim)].strip()
return raw
break # only first statement can be a docstring
elif child.type != "comment":
break
return NonePythonParser._get_bases method · python · L101-L123 (23 LOC)kglite/code_tree/parsers/python.py
def _get_bases(self, node, source: bytes) -> list[str]:
"""Extract base class names from a class_definition's argument_list."""
bases = []
for child in node.children:
if child.type == "argument_list":
for arg in child.children:
if arg.type == "identifier":
bases.append(node_text(arg, source))
elif arg.type == "attribute":
bases.append(node_text(arg, source))
elif arg.type == "subscript":
# e.g. Generic[T], Protocol[T] - take the base name
for sub in arg.children:
if sub.type == "identifier":
bases.append(node_text(sub, source))
break
elif sub.type == "attribute":
bases.append(node_text(sub, source))
PythonParser._get_decorators method · python · L125-L135 (11 LOC)kglite/code_tree/parsers/python.py
def _get_decorators(self, decorated_node, source: bytes) -> list[str]:
"""Extract decorator names from a decorated_definition."""
decorators = []
for child in decorated_node.children:
if child.type == "decorator":
# Get the text after @
text = node_text(child, source).strip()
if text.startswith("@"):
text = text[1:]
decorators.append(text)
return decoratorsPythonParser._get_decorated_inner method · python · L137-L142 (6 LOC)kglite/code_tree/parsers/python.py
def _get_decorated_inner(self, node):
"""Get the inner definition (function/class) from a decorated_definition."""
for child in node.children:
if child.type in ("function_definition", "class_definition"):
return child
return NoneMethodology: Repobility · https://repobility.com/research/state-of-ai-code-2026/
PythonParser._get_signature method · python · L144-L153 (10 LOC)kglite/code_tree/parsers/python.py
def _get_signature(self, node, source: bytes) -> str:
"""Extract function signature (everything before the body)."""
parts = []
for child in node.children:
if child.type == "block":
break
if child.type == "comment":
continue
parts.append(node_text(child, source))
return " ".join(parts).rstrip(" :")PythonParser._get_return_type method · python · L155-L167 (13 LOC)kglite/code_tree/parsers/python.py
def _get_return_type(self, node, source: bytes) -> str | None:
"""Extract return type annotation."""
saw_arrow = False
for child in node.children:
if not child.is_named and node_text(child, source) == "->":
saw_arrow = True
elif saw_arrow:
if child.type == ":":
break
if child.type == "block":
break
return node_text(child, source)
return NonePythonParser._is_async method · python · L169-L176 (8 LOC)kglite/code_tree/parsers/python.py
def _is_async(self, node, source: bytes) -> bool:
"""Check if function is async (look for 'async' keyword before 'def')."""
for child in node.children:
if not child.is_named and node_text(child, source) == "async":
return True
if not child.is_named and node_text(child, source) == "def":
break
return FalsePythonParser._extract_calls method · python · L184-L222 (39 LOC)kglite/code_tree/parsers/python.py
def _extract_calls(self, body_node, source: bytes) -> list[tuple[str, int]]:
"""Extract function/method names called directly within a block.
Emits qualified calls where possible: "receiver.method" for
attribute access calls, bare names for plain calls and self/cls.
Returns list of (call_name, line_number) tuples.
Scope-aware: does not descend into nested functions, lambdas, or
decorated definitions — their calls belong to them, not the parent.
"""
calls: list[tuple[str, int]] = []
def walk(node):
if node.type == "call":
line = node.start_point[0] + 1
func = node.children[0] if node.children else None
if func:
if func.type == "identifier":
calls.append((node_text(func, source), line))
elif func.type == "attribute":
parts = []
for child in PythonParser._file_to_module_path method · python · L224-L241 (18 LOC)kglite/code_tree/parsers/python.py
def _file_to_module_path(self, filepath: Path, src_root: Path) -> str:
"""Convert a Python file path to a dotted module path.
Uses src_root's name as the package prefix.
e.g. src_root=chromadb/, file=chromadb/api/client.py -> chromadb.api.client
"""
rel = filepath.relative_to(src_root)
parts = list(rel.parts)
# Strip file extension from last part
parts[-1] = parts[-1].replace(".py", "")
# __init__ means the directory itself is the module
if parts[-1] == "__init__":
parts = parts[:-1]
# Prepend the src_root directory name as the package
pkg_name = src_root.name
if parts:
return pkg_name + "." + ".".join(parts)
return pkg_namePythonParser._get_enum_variants method · python · L243-L257 (15 LOC)kglite/code_tree/parsers/python.py
def _get_enum_variants(self, node, source: bytes) -> list[str]:
"""Extract enum variant names from a class body."""
variants = []
block = self._get_block(node)
if block is None:
return variants
for child in block.children:
if child.type == "expression_statement":
for sub in child.children:
if sub.type == "assignment":
for target in sub.children:
if target.type == "identifier":
variants.append(node_text(target, source))
break
return variantsPythonParser._parse_import method · python · L259-L275 (17 LOC)kglite/code_tree/parsers/python.py
def _parse_import(self, node, source: bytes) -> str | None:
"""Parse an import statement and return the module path."""
if node.type == "import_statement":
# import foo.bar
for child in node.children:
if child.type == "dotted_name":
return node_text(child, source)
return None
elif node.type == "import_from_statement":
# from foo.bar import baz
for child in node.children:
if child.type == "dotted_name":
return node_text(child, source)
elif child.type == "relative_import":
return None # skip relative imports for now
return None
return NonePythonParser._classify_decorators method · python · L277-L292 (16 LOC)kglite/code_tree/parsers/python.py
def _classify_decorators(self, decorators: list[str]) -> dict:
"""Extract semantic flags from decorator names."""
flags = {}
for dec in decorators:
base = dec.split("(")[0].split(".")[-1]
if base == "abstractmethod":
flags["is_abstract"] = True
elif base == "property":
flags["is_property"] = True
elif base == "staticmethod":
flags["is_static"] = True
elif base == "classmethod":
flags["is_classmethod"] = True
elif base == "overload":
flags["is_overload"] = True
return flagsProvenance: Repobility (https://repobility.com) — every score reproducible from /scan/
PythonParser._extract_class_attributes method · python · L294-L358 (65 LOC)kglite/code_tree/parsers/python.py
def _extract_class_attributes(self, class_node, source: bytes,
owner_qname: str, rel_path: str,
result: ParseResult):
"""Extract attributes from class body and __init__ self assignments."""
block = self._get_block(class_node)
if block is None:
return
# 1. Class-body assignments: x = value, x: type = value
for child in block.children:
if child.type == "expression_statement":
for sub in child.children:
if sub.type == "assignment":
attr_name = None
type_ann = None
default_val = None
for sc in sub.children:
if sc.type == "identifier" and attr_name is None:
attr_name = node_text(sc, source)
elif sc.type == "type":
PythonParser._walk_self_attrs method · python · L360-L394 (35 LOC)kglite/code_tree/parsers/python.py
def _walk_self_attrs(self, node, source: bytes, owner_qname: str,
rel_path: str, result: ParseResult,
seen_names: set[str]):
"""Recursively find self.x = ... assignments."""
if node.type == "assignment":
left = node.children[0] if node.children else None
if left and left.type == "attribute":
text = node_text(left, source)
if text.startswith("self."):
attr_name = text[5:]
if "." not in attr_name and attr_name not in seen_names:
seen_names.add(attr_name)
default_val = None
saw_eq = False
for sc in node.children:
if not sc.is_named and node_text(sc, source) == "=":
saw_eq = True
elif saw_eq and sc.is_named:
val = PythonParser._parse_function method · python · L398-L422 (25 LOC)kglite/code_tree/parsers/python.py
def _parse_function(self, node, source: bytes, module_path: str,
rel_path: str, is_method: bool = False,
owner: str | None = None) -> FunctionInfo:
name = self._get_name(node, source)
sep = "."
prefix = f"{module_path}{sep}{owner}" if owner else module_path
qualified_name = f"{prefix}{sep}{name}"
block = self._get_block(node)
return FunctionInfo(
name=name,
qualified_name=qualified_name,
visibility=self._get_visibility(name),
is_async=self._is_async(node, source),
is_method=is_method,
signature=self._get_signature(node, source),
file_path=rel_path,
line_number=node.start_point[0] + 1,
end_line=node.end_point[0] + 1,
docstring=self._get_docstring(node, source),
return_type=self._get_return_type(node, source),
calls=self._extract_calls(block, souPythonParser._parse_class method · python · L424-L531 (108 LOC)kglite/code_tree/parsers/python.py
def _parse_class(self, node, source: bytes, module_path: str,
rel_path: str, result: ParseResult,
decorators: list[str] | None = None):
"""Parse a class_definition node and populate the result."""
name = self._get_name(node, source)
qualified_name = f"{module_path}.{name}"
bases = self._get_bases(node, source)
docstring = self._get_docstring(node, source)
# Determine what kind of class this is
is_enum = bool(_ENUM_BASES & set(bases))
is_protocol = "Protocol" in bases
if is_enum:
result.enums.append(EnumInfo(
name=name,
qualified_name=qualified_name,
visibility=self._get_visibility(name),
file_path=rel_path,
line_number=node.start_point[0] + 1,
end_line=node.end_point[0] + 1,
docstring=docstring,
variants=self._get_enum_variants(noPythonParser.parse_file method · python · L533-L673 (141 LOC)kglite/code_tree/parsers/python.py
def parse_file(self, filepath: Path, src_root: Path) -> ParseResult:
source = filepath.read_bytes()
tree = self._parser.parse(source)
root = tree.root_node
rel_path = str(filepath.relative_to(src_root))
module_path = self._file_to_module_path(filepath, src_root)
loc = count_lines(source)
file_info = FileInfo(
path=rel_path,
filename=filepath.name,
loc=loc,
module_path=module_path,
language="python",
)
fname = filepath.name
if (fname.startswith("test_") or fname.endswith("_test.py")
or "/tests/" in rel_path or rel_path.startswith("tests/")):
file_info.is_test = True
result = ParseResult()
result.files.append(file_info)
for child in root.children:
if child.type == "function_definition":
result.functions.append(self._parse_function(
child, sourcget_parser function · python · L29-L59 (31 LOC)kglite/code_tree/parsers/registry.py
def get_parser(language: str) -> LanguageParser:
"""Get a parser instance for the given language name."""
if language == "rust":
from .rust import RustParser
return RustParser()
elif language == "python":
from .python import PythonParser
return PythonParser()
elif language == "typescript":
from .typescript import TypeScriptParser
return TypeScriptParser()
elif language == "javascript":
from .typescript import JavaScriptParser
return JavaScriptParser()
elif language == "go":
from .go import GoParser
return GoParser()
elif language == "java":
from .java import JavaParser
return JavaParser()
elif language == "csharp":
from .csharp import CSharpParser
return CSharpParser()
elif language == "c":
from .cpp import CParser
return CParser()
elif language == "cpp":
from .cpp import CppParser
return CppParser()
detect_languages function · python · L62-L68 (7 LOC)kglite/code_tree/parsers/registry.py
def detect_languages(src_root: Path) -> set[str]:
"""Scan a directory and return the set of languages found."""
languages = set()
for path in src_root.rglob("*"):
if path.is_file() and path.suffix in EXTENSION_MAP:
languages.add(EXTENSION_MAP[path.suffix])
return languagesget_parsers_for_directory function · python · L71-L80 (10 LOC)kglite/code_tree/parsers/registry.py
def get_parsers_for_directory(src_root: Path) -> list[LanguageParser]:
"""Auto-detect languages in a directory and return parser instances."""
languages = detect_languages(src_root)
parsers = []
for lang in sorted(languages):
try:
parsers.append(get_parser(lang))
except ImportError as e:
print(f" Warning: Skipping {lang} (grammar not installed): {e}")
return parsersWant fix-PRs on findings? Install Repobility's GitHub App · github.com/apps/repobility-bot
RustParser._get_visibility method · python · L59-L66 (8 LOC)kglite/code_tree/parsers/rust.py
def _get_visibility(self, node) -> str:
for child in node.children:
if child.type == "visibility_modifier":
text = child.text.decode("utf8")
if "crate" in text:
return "pub(crate)"
return "pub"
return "private"RustParser._get_doc_comment method · python · L68-L104 (37 LOC)kglite/code_tree/parsers/rust.py
def _get_doc_comment(self, node, source: bytes) -> str | None:
"""Walk backward through siblings to collect /// or /** */ doc comments."""
doc_lines = []
sibling = node.prev_named_sibling
while sibling is not None:
if sibling.type == "line_comment":
text = node_text(sibling, source).strip()
if text.startswith("///"):
content = text[3:]
if content.startswith(" "):
content = content[1:]
doc_lines.insert(0, content)
sibling = sibling.prev_named_sibling
continue
elif sibling.type == "block_comment":
text = node_text(sibling, source).strip()
if text.startswith("/**"):
text = text[3:]
if text.endswith("*/"):
text = text[:-2]
lines = []
for line iRustParser._get_attributes method · python · L106-L119 (14 LOC)kglite/code_tree/parsers/rust.py
def _get_attributes(self, node, source: bytes) -> list[str]:
"""Walk backward through siblings to collect #[...] attributes."""
attrs = []
sibling = node.prev_named_sibling
while sibling is not None:
if sibling.type == "attribute_item":
attrs.insert(0, node_text(sibling, source))
sibling = sibling.prev_named_sibling
continue
elif sibling.type == "line_comment":
sibling = sibling.prev_named_sibling
continue
break
return attrs