Function bodies 845 total
build_test_index function · rust · L6-L117 (112 LOC)crates/fff-core/benches/bigram_bench.rs
fn build_test_index(file_count: usize) -> BigramFilter {
let builder = BigramIndexBuilder::new(file_count);
let skip_builder = BigramIndexBuilder::new(file_count);
for i in 0..file_count {
// Generate varied content so we get a mix of sparse and dense columns
let content = format!(
"struct File{i} {{ fn process() {{ let controller = read(path); }} }} // module {i}"
);
builder.add_file_content(&skip_builder, i, content.as_bytes());
}
let mut index = builder.compress(None);
let skip_index = skip_builder.compress(Some(12));
index.set_skip_index(skip_index);
index
}
fn bench_bigram_query(c: &mut Criterion) {
let file_counts = [10_000, 100_000, 500_000];
for &file_count in &file_counts {
let index = build_test_index(file_count);
eprintln!(
"Index ({} files): {} columns",
file_count,
index.columns_used(),
);
let mut group = c.benchmark_gbench_bigram_query function · rust · L23-L56 (34 LOC)crates/fff-core/benches/bigram_bench.rs
fn bench_bigram_query(c: &mut Criterion) {
let file_counts = [10_000, 100_000, 500_000];
for &file_count in &file_counts {
let index = build_test_index(file_count);
eprintln!(
"Index ({} files): {} columns",
file_count,
index.columns_used(),
);
let mut group = c.benchmark_group(format!("bigram_query_{file_count}"));
group.sample_size(500);
let queries: &[(&str, &[u8])] = &[
("short_2char", b"st"),
("medium_6char", b"struct"),
("long_14char", b"let controller"),
("multi_word", b"fn process"),
];
for (name, query) in queries {
group.bench_with_input(BenchmarkId::from_parameter(name), query, |b, q| {
b.iter(|| {
let result = index.query(black_box(q));
black_box(&result);
});
});
}
group.finish();
}
}bench_bigram_is_candidate function · rust · L57-L84 (28 LOC)crates/fff-core/benches/bigram_bench.rs
fn bench_bigram_is_candidate(c: &mut Criterion) {
let index = build_test_index(500_000);
let candidates = match index.query(b"struct") {
Some(c) => c,
None => {
// All bigrams ubiquitous at this size — skip candidate benches
eprintln!("Skipping is_candidate bench: query returned None (all bigrams ubiquitous)");
return;
}
};
c.bench_function("is_candidate_500k", |b| {
b.iter(|| {
let mut count = 0u32;
for i in 0..500_000 {
if BigramFilter::is_candidate(black_box(&candidates), i) {
count += 1;
}
}
black_box(count)
});
});
c.bench_function("count_candidates_500k", |b| {
b.iter(|| BigramFilter::count_candidates(black_box(&candidates)));
});
}load_real_files function · rust · L7-L41 (35 LOC)crates/fff-core/benches/memmem_bench.rs
fn load_real_files() -> Vec<(&'static str, Vec<u8>)> {
let manifest_dir = env!("CARGO_MANIFEST_DIR"); // crates/fff-core
let repo_root = Path::new(manifest_dir).parent().unwrap().parent().unwrap();
let files: &[(&str, &str)] = &[
("grep.rs/80KB", "crates/fff-core/src/grep.rs"),
("file_picker.rs/53KB", "crates/fff-core/src/file_picker.rs"),
("picker_ui.lua/96KB", "lua/fff/picker_ui.lua"),
];
let mut result = Vec::new();
for &(label, rel_path) in files {
let full_path = repo_root.join(rel_path);
if let Ok(data) = std::fs::read(&full_path) {
result.push((label, data));
}
}
// Also create a large synthetic file by concatenating all three
if result.len() == 3 {
let mut combined = Vec::new();
for (_, data) in &result {
combined.extend_from_slice(data);
}
// Repeat to get ~1MB
let base = combined.clone();
while combined.len() < 1024 * 1bench_memmem function · rust · L42-L98 (57 LOC)crates/fff-core/benches/memmem_bench.rs
fn bench_memmem(c: &mut Criterion) {
let mut group = c.benchmark_group("case_insensitive_memmem");
let files = load_real_files();
assert!(!files.is_empty(), "No source files found for benchmarking");
// Needles chosen to exercise different false-positive rates:
//
// "hit" needles: strings that actually appear in these source files.
// "miss" needles: strings with common first-bytes (lots of false positives
// for memchr2) but that don't exist in any of the files.
let needles: &[(&str, &[u8])] = &[
// Hits — real identifiers from the codebase
("short/hit/fn", b"fn"),
("short/hit/self", b"self"),
("medium/hit", b"search_file"),
("long/hit", b"content_cache_budget"),
// Misses — common first-bytes, guaranteed not in source
("short/miss", b"zqxjv"),
("medium/miss", b"fluxcapacitor"),
("long/miss", b"quantum_entanglement_resolver"),
];
for (file_label, haystack) in &filbench_parse_simple function · rust · L3-L18 (16 LOC)crates/fff-core/benches/parse_bench.rs
fn bench_parse_simple(c: &mut Criterion) {
let parser = QueryParser::default();
c.bench_function("parse_simple_text", |b| {
b.iter(|| parser.parse(black_box("hello world")));
});
c.bench_function("parse_extension", |b| {
b.iter(|| parser.parse(black_box("*.rs")));
});
c.bench_function("parse_text_with_extension", |b| {
b.iter(|| parser.parse(black_box("name *.rs")));
});
}bench_parse_complex function · rust · L19-L149 (131 LOC)crates/fff-core/benches/parse_bench.rs
fn bench_parse_complex(c: &mut Criterion) {
let parser = QueryParser::default();
c.bench_function("parse_complex_mixed", |b| {
b.iter(|| parser.parse(black_box("src name *.rs !test /lib/ status:modified")));
});
c.bench_function("parse_glob", |b| {
b.iter(|| parser.parse(black_box("**/*.rs")));
});
c.bench_function("parse_multiple_constraints", |b| {
b.iter(|| parser.parse(black_box("*.rs *.toml *.md !test !node_modules /src/")));
});
}
fn bench_parse_realistic_queries(c: &mut Criterion) {
let parser = QueryParser::default();
let queries = vec![
"file",
"test",
"mod.rs",
"src/*.rs",
"lib test",
"*.rs !test",
"src/lib/*.rs",
"/src/ name",
"status:modified *.rs",
"type:rust test !node_modules",
];
let mut group = c.benchmark_group("realistic_queries");
for query in queries.iter() {
group.throughput(Throughput::Bytes(query.lRepobility · open methodology · https://repobility.com/research/
bench_parse_realistic_queries function · rust · L35-L130 (96 LOC)crates/fff-core/benches/parse_bench.rs
fn bench_parse_realistic_queries(c: &mut Criterion) {
let parser = QueryParser::default();
let queries = vec![
"file",
"test",
"mod.rs",
"src/*.rs",
"lib test",
"*.rs !test",
"src/lib/*.rs",
"/src/ name",
"status:modified *.rs",
"type:rust test !node_modules",
];
let mut group = c.benchmark_group("realistic_queries");
for query in queries.iter() {
group.throughput(Throughput::Bytes(query.len() as u64));
group.bench_with_input(BenchmarkId::from_parameter(query), query, |b, q| {
b.iter(|| parser.parse(black_box(q)));
});
}
group.finish();
}
fn bench_parse_various_lengths(c: &mut Criterion) {
let parser = QueryParser::default();
let short = "*.rs";
let medium = "src name *.rs !test";
let long = "src lib test name *.rs *.toml !node_modules !test /src/ /lib/ status:modified";
let very_long =
"a b c d e f g h i j k l bench_parse_various_lengths function · rust · L61-L98 (38 LOC)crates/fff-core/benches/parse_bench.rs
fn bench_parse_various_lengths(c: &mut Criterion) {
let parser = QueryParser::default();
let short = "*.rs";
let medium = "src name *.rs !test";
let long = "src lib test name *.rs *.toml !node_modules !test /src/ /lib/ status:modified";
let very_long =
"a b c d e f g h i j k l m n o p q r s t u v w x y z *.rs *.toml *.md *.txt *.js";
let mut group = c.benchmark_group("query_lengths");
group.throughput(Throughput::Bytes(short.len() as u64));
group.bench_with_input(BenchmarkId::new("short", short.len()), &short, |b, q| {
b.iter(|| parser.parse(black_box(q)));
});
group.throughput(Throughput::Bytes(medium.len() as u64));
group.bench_with_input(BenchmarkId::new("medium", medium.len()), &medium, |b, q| {
b.iter(|| parser.parse(black_box(q)));
});
group.throughput(Throughput::Bytes(long.len() as u64));
group.bench_with_input(BenchmarkId::new("long", long.len()), &long, |b, q| {
b.iter(|| parser.parsbench_config_comparison function · rust · L99-L117 (19 LOC)crates/fff-core/benches/parse_bench.rs
fn bench_config_comparison(c: &mut Criterion) {
let file_picker = QueryParser::new(FileSearchConfig);
let grep = QueryParser::new(GrepConfig);
let query = "src name *.rs !test";
let mut group = c.benchmark_group("config_comparison");
group.bench_function("file_picker_config", |b| {
b.iter(|| file_picker.parse(black_box(query)));
});
group.bench_function("grep_config", |b| {
b.iter(|| grep.parse(black_box(query)));
});
group.finish();
}bench_worst_case function · rust · L150-L167 (18 LOC)crates/fff-core/benches/parse_bench.rs
fn bench_worst_case(c: &mut Criterion) {
let parser = QueryParser::default();
// Worst case: many constraints that all need to be checked
let worst_case = "a b c d e f g h i j k l m n o p q r s t u v w x y z";
c.bench_function("worst_case_many_text_tokens", |b| {
b.iter(|| parser.parse(black_box(worst_case)));
});
// Many constraints
let many_constraints = "*.rs *.toml *.md *.txt *.js *.ts *.jsx *.tsx *.vue *.svelte";
c.bench_function("worst_case_many_constraints", |b| {
b.iter(|| parser.parse(black_box(many_constraints)));
});
}main function · rust · L1-L35 (35 LOC)crates/fff-core/build.rs
fn main() {
// When the `zlob` feature is enabled (Zig-compiled C library):
// On Windows MSVC, explicitly link the C runtime libraries.
// Zig-compiled static libraries don't emit /DEFAULTLIB directives for the
// MSVC CRT, so symbols like strcmp, memcpy etc. would be unresolved.
if std::env::var("CARGO_FEATURE_ZLOB").is_ok() {
let target = std::env::var("TARGET").unwrap_or_default();
if target.contains("windows") && target.contains("msvc") {
println!("cargo:rustc-link-lib=msvcrt");
println!("cargo:rustc-link-lib=ucrt");
println!("cargo:rustc-link-lib=vcruntime");
}
} else if std::env::var("CI").is_ok() {
// CI must always build with zlob for production-quality binaries.
if !zig_available() {
panic!(
"CI detected but Zig is not installed. \
Please install Zig and build with `--features zlob`."
);
}
panic!(
zig_available function · rust · L38-L46 (9 LOC)crates/fff-core/build.rs
fn zig_available() -> bool {
std::process::Command::new("zig")
.arg("version")
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map(|s| s.success())
.unwrap_or(false)
}new function · rust · L45-L90 (46 LOC)crates/fff-core/src/background_watcher.rs
pub fn new(
base_path: PathBuf,
git_workdir: Option<PathBuf>,
shared_picker: SharedPicker,
shared_frecency: SharedFrecency,
mode: FFFMode,
) -> Result<Self, Error> {
info!(
"Initializing background watcher for path: {}, mode: {:?}",
base_path.display(),
mode,
);
let debouncer =
Self::create_debouncer(base_path, git_workdir, shared_picker, shared_frecency, mode)?;
info!("Background file watcher initialized successfully");
let stop_signal = Arc::new(AtomicBool::new(false));
let stop_clone = Arc::clone(&stop_signal);
// The owner thread keeps the debouncer alive and ensures proper
// cleanup: `Debouncer::stop()` joins its internal thread, then the
// watcher `Drop` signals its I/O thread to exit.
let owner_thread = std::thread::Builder::new()
.name("fff-watcher-owner".into())
.spawn(move |create_debouncer function · rust · L91-L172 (82 LOC)crates/fff-core/src/background_watcher.rs
fn create_debouncer(
base_path: PathBuf,
git_workdir: Option<PathBuf>,
shared_picker: SharedPicker,
shared_frecency: SharedFrecency,
mode: FFFMode,
) -> Result<Debouncer, Error> {
// do not follow symlinks as then notifiers spawns a bunch of events for symlinked
// files that could be git ignored, we have to property differentiate those and if
// the file was edited through a
let config = Config::default().with_follow_symlinks(false);
let git_workdir_for_handler = git_workdir.clone();
let mut debouncer = new_debouncer_opt(
DEBOUNCE_TIMEOUT,
Some(DEBOUNCE_TIMEOUT / 2), // tick rate for the event span
{
move |result: DebounceEventResult| match result {
Ok(events) => {
handle_debounced_events(
events,
&git_workdir_for_handler,
Repobility · code-quality intelligence platform · https://repobility.com
stop function · rust · L173-L185 (13 LOC)crates/fff-core/src/background_watcher.rs
pub fn stop(&mut self) {
self.stop_signal.store(true, Ordering::Release);
if let Some(handle) = self.owner_thread.take() {
handle.thread().unpark();
if let Err(e) = handle.join() {
error!("Watcher owner thread panicked: {:?}", e);
}
}
info!("Background file watcher stopped successfully");
}drop function · rust · L189-L191 (3 LOC)crates/fff-core/src/background_watcher.rs
fn drop(&mut self) {
self.stop();
}handle_debounced_events function · rust · L195-L452 (258 LOC)crates/fff-core/src/background_watcher.rs
fn handle_debounced_events(
events: Vec<DebouncedEvent>,
git_workdir: &Option<PathBuf>,
shared_picker: &SharedPicker,
shared_frecency: &SharedFrecency,
mode: FFFMode,
) {
// this will be called very often, we have to minimiy the lock time for file picker
let repo = git_workdir.as_ref().and_then(|p| Repository::open(p).ok());
let mut need_full_rescan = false;
let mut need_full_git_rescan = false;
let mut paths_to_remove = Vec::new();
let mut paths_to_add_or_modify = Vec::new();
let mut affected_paths_count = 0usize;
for debounced_event in &events {
// It is very important to not react to the access errors because we inevitably
// gonna trigger the sync by our own preview or other unnecessary noise
if matches!(
debounced_event.event.kind,
EventKind::Access(
AccessKind::Read
| AccessKind::Open(_)
| AccessKind::Close(AccessMode::Read |trigger_full_rescan function · rust · L453-L474 (22 LOC)crates/fff-core/src/background_watcher.rs
fn trigger_full_rescan(shared_picker: &SharedPicker, shared_frecency: &SharedFrecency) {
info!("Triggering full filesystem rescan");
// Note: no need to clear mmaps — they are backed by the kernel page cache
// and automatically reflect file changes. Old FileItems (and their mmaps)
// are dropped when the picker rebuilds its file list.
let Ok(mut guard) = shared_picker.write() else {
error!("Failed to acquire file picker write lock for full rescan");
return;
};
let Some(ref mut picker) = *guard else {
error!("File picker not initialized, cannot trigger rescan");
return;
};
if let Err(e) = picker.trigger_rescan(shared_frecency) {
error!("Failed to trigger full rescan: {:?}", e);
} else {
info!("Full filesystem rescan completed successfully");
}
}should_include_file function · rust · L475-L491 (17 LOC)crates/fff-core/src/background_watcher.rs
fn should_include_file(path: &Path, repo: &Option<Repository>) -> bool {
// Directories are not indexed — only regular files (and symlinks to files).
if path.is_dir() {
return false;
}
match repo.as_ref() {
Some(repo) => repo.is_path_ignored(path) != Ok(true),
None => {
// No git repo — apply basic sanity filters.
// Hidden directories are skipped by the watcher setup (hidden(true)),
// but events can still arrive for files in known non-code directories.
!is_non_code_directory(path)
}
}
}is_non_code_directory function · rust · L492-L495 (4 LOC)crates/fff-core/src/background_watcher.rs
fn is_non_code_directory(path: &Path) -> bool {
crate::ignore::is_non_code_directory(path)
}is_git_file function · rust · L498-L501 (4 LOC)crates/fff-core/src/background_watcher.rs
fn is_git_file(path: &Path) -> bool {
path.components()
.any(|component| component.as_os_str() == ".git")
}is_dotgit_change_affecting_status function · rust · L502-L535 (34 LOC)crates/fff-core/src/background_watcher.rs
pub fn is_dotgit_change_affecting_status(changed: &Path, repo: &Option<Repository>) -> bool {
let Some(repo) = repo.as_ref() else {
return false;
};
let git_dir = repo.path();
if let Ok(rel) = changed.strip_prefix(git_dir) {
if rel.starts_with("objects") || rel.starts_with("logs") || rel.starts_with("hooks") {
return false;
}
if rel == Path::new("index") || rel == Path::new("index.lock") {
return true;
}
if rel == Path::new("HEAD") {
return true;
}
if rel.starts_with("refs") || rel == Path::new("packed-refs") {
return true;
}
if rel == Path::new("info/exclude") || rel == Path::new("info/sparse-checkout") {
return true;
}
if let Some(fname) = rel.file_name().and_then(|f| f.to_str())
&& matches!(fname, "MERGE_HEAD" | "CHERRY_PICK_HEAD" | "REVERT_HEAD")
{
return true;
}
}
Repobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
is_ignore_definition_path function · rust · L536-L542 (7 LOC)crates/fff-core/src/background_watcher.rs
fn is_ignore_definition_path(path: &Path) -> bool {
matches!(
path.file_name().and_then(|f| f.to_str()),
Some(".ignore") | Some(".gitignore")
)
}watch_git_status_paths function · rust · L543-L576 (34 LOC)crates/fff-core/src/background_watcher.rs
fn watch_git_status_paths(debouncer: &mut Debouncer, git_workdir: Option<&PathBuf>) {
let Some(workdir) = git_workdir else {
return;
};
let git_dir = workdir.join(".git");
if !git_dir.is_dir() {
return;
}
// Watch .git/ non-recursively to catch top-level files:
// index, index.lock, HEAD, packed-refs, MERGE_HEAD, CHERRY_PICK_HEAD, REVERT_HEAD
if let Err(e) = debouncer.watch(&git_dir, RecursiveMode::NonRecursive) {
warn!("Failed to watch .git directory: {}", e);
return;
}
// Watch refs/ recursively to catch branch/tag changes
let refs_dir = git_dir.join("refs");
if refs_dir.is_dir()
&& let Err(e) = debouncer.watch(&refs_dir, RecursiveMode::Recursive)
{
warn!("Failed to watch .git/refs: {}", e);
}
// Watch info/ non-recursively for exclude and sparse-checkout
let info_dir = git_dir.join("info");
if info_dir.is_dir()
&& let Err(e) = debouncer.watch(&info_dir, Recollect_non_ignored_dirs function · rust · L582-L618 (37 LOC)crates/fff-core/src/background_watcher.rs
fn collect_non_ignored_dirs(base_path: &Path, has_git_repo: bool) -> Vec<PathBuf> {
use crate::ignore::non_git_repo_overrides;
use ignore::WalkBuilder;
let mut walk_builder = WalkBuilder::new(base_path);
walk_builder
.hidden(!has_git_repo)
.git_ignore(true)
.git_exclude(true)
.git_global(true)
.ignore(true)
.follow_links(false)
.max_depth(Some(1));
if !has_git_repo && let Some(overrides) = non_git_repo_overrides(base_path) {
walk_builder.overrides(overrides);
}
let walker = walk_builder.build();
let mut dirs = Vec::new();
for entry in walker {
let Ok(entry) = entry else { continue };
let path = entry.path();
// Skip the root directory itself
if path == base_path {
continue;
}
if path.is_dir() && !is_git_file(path) {
dirs.push(path.to_path_buf());
}
}
dirs
}new function · rust · L29-L43 (15 LOC)crates/fff-core/src/bigram_filter.rs
pub fn new(file_count: usize) -> Self {
let words = file_count.div_ceil(64);
let mut lookup = Vec::with_capacity(65536);
lookup.resize_with(65536, || AtomicU16::new(NO_COLUMN));
let mut col_data = Vec::with_capacity(MAX_BIGRAM_COLUMNS * words);
col_data.resize_with(MAX_BIGRAM_COLUMNS * words, || AtomicU64::new(0));
Self {
lookup,
col_data,
next_column: AtomicU16::new(0),
words,
file_count,
populated: AtomicUsize::new(0),
}
}get_or_alloc_column function · rust · L46-L65 (20 LOC)crates/fff-core/src/bigram_filter.rs
fn get_or_alloc_column(&self, key: u16) -> u16 {
let current = self.lookup[key as usize].load(Ordering::Relaxed);
if current != NO_COLUMN {
return current;
}
let new_col = self.next_column.fetch_add(1, Ordering::Relaxed);
if new_col >= MAX_BIGRAM_COLUMNS as u16 {
return NO_COLUMN;
}
match self.lookup[key as usize].compare_exchange(
NO_COLUMN,
new_col,
Ordering::Relaxed,
Ordering::Relaxed,
) {
Ok(_) => new_col,
Err(existing) => existing,
}
}column_bitset function · rust · L68-L71 (4 LOC)crates/fff-core/src/bigram_filter.rs
fn column_bitset(&self, col: u16) -> &[AtomicU64] {
let start = col as usize * self.words;
&self.col_data[start..start + self.words]
}add_file_content function · rust · L72-L142 (71 LOC)crates/fff-core/src/bigram_filter.rs
pub(crate) fn add_file_content(&self, skip_builder: &Self, file_idx: usize, content: &[u8]) {
if content.len() < 2 {
return;
}
debug_assert!(file_idx < self.file_count);
let word_idx = file_idx / 64;
let bit_mask = 1u64 << (file_idx % 64);
// Stack-local dedup bitsets: 1024 × u64 = 8 KB each, covers all 65536 bigrams with margin
// have to fit in L1 cache
let mut seen_consec = [0u64; 1024];
let mut seen_skip = [0u64; 1024];
let bytes = content;
let len = bytes.len();
// First consecutive pair (no skip bigram possible yet).
let (a, b) = (bytes[0], bytes[1]);
if (32..=126).contains(&a) && (32..=126).contains(&b) {
let key = (a.to_ascii_lowercase() as u16) << 8 | b.to_ascii_lowercase() as u16;
let w = key as usize >> 6;
let bit = 1u64 << (key as usize & 63);
seen_consec[w] |= bit;
let col = self.getis_ready function · rust · L143-L146 (4 LOC)crates/fff-core/src/bigram_filter.rs
pub fn is_ready(&self) -> bool {
self.populated.load(Ordering::Relaxed) > 0
}Want this analysis on your repo? https://repobility.com/scan/
columns_used function · rust · L147-L152 (6 LOC)crates/fff-core/src/bigram_filter.rs
pub fn columns_used(&self) -> u16 {
self.next_column
.load(Ordering::Relaxed)
.min(MAX_BIGRAM_COLUMNS as u16)
}compress function · rust · L160-L229 (70 LOC)crates/fff-core/src/bigram_filter.rs
pub fn compress(self, min_density_pct: Option<u32>) -> BigramFilter {
let cols = self.columns_used() as usize;
let words = self.words;
let file_count = self.file_count;
let populated = self.populated.load(Ordering::Relaxed);
let dense_bytes = words * 8; // cost of one dense column
let old_lookup = self.lookup;
let col_data = self.col_data;
let mut lookup: Vec<u16> = vec![NO_COLUMN; 65536];
let mut dense_data: Vec<u64> = Vec::with_capacity(cols * words);
let mut dense_count: usize = 0;
for key in 0..65536usize {
let old_col = old_lookup[key].load(Ordering::Relaxed);
if old_col == NO_COLUMN || old_col as usize >= cols {
continue;
}
let col_start = old_col as usize * words;
let bitset = &col_data[col_start..col_start + words];
// count set bits to decide if this column is worth keeping.
let mut bitset_and function · rust · L257-L262 (6 LOC)crates/fff-core/src/bigram_filter.rs
fn bitset_and(result: &mut [u64], bitset: &[u64]) {
result
.iter_mut()
.zip(bitset.iter())
.for_each(|(r, b)| *r &= *b);
}query function · rust · L267-L307 (41 LOC)crates/fff-core/src/bigram_filter.rs
pub fn query(&self, pattern: &[u8]) -> Option<Vec<u64>> {
if pattern.len() < 2 {
return None;
}
let mut result = vec![u64::MAX; self.words];
if !self.file_count.is_multiple_of(64) {
let last = self.words - 1;
result[last] = (1u64 << (self.file_count % 64)) - 1;
}
let words = self.words;
let mut has_filter = false;
let mut prev = pattern[0];
for &b in &pattern[1..] {
if (32..=126).contains(&prev) && (32..=126).contains(&b) {
let key = (prev.to_ascii_lowercase() as u16) << 8 | b.to_ascii_lowercase() as u16;
let col = self.lookup[key as usize];
if col != NO_COLUMN {
let offset = col as usize * words;
// SAFETY: compress() guarantees offset + words <= dense_data.len()
let slice = unsafe { self.dense_data.get_unchecked(offset..offset + words) };
query_skip function · rust · L311-L337 (27 LOC)crates/fff-core/src/bigram_filter.rs
fn query_skip(&self, pattern: &[u8]) -> Option<Vec<u64>> {
let mut result = vec![u64::MAX; self.words];
if !self.file_count.is_multiple_of(64) {
let last = self.words - 1;
result[last] = (1u64 << (self.file_count % 64)) - 1;
}
let words = self.words;
let mut has_filter = false;
for i in 0..pattern.len().saturating_sub(2) {
let a = pattern[i];
let b = pattern[i + 2];
if (32..=126).contains(&a) && (32..=126).contains(&b) {
let key = (a.to_ascii_lowercase() as u16) << 8 | b.to_ascii_lowercase() as u16;
let col = self.lookup[key as usize];
if col != NO_COLUMN {
let offset = col as usize * words;
let slice = unsafe { self.dense_data.get_unchecked(offset..offset + words) };
bitset_and(&mut result, slice);
has_filter = true;
}
set_skip_index function · rust · L340-L342 (3 LOC)crates/fff-core/src/bigram_filter.rs
pub fn set_skip_index(&mut self, skip: BigramFilter) {
self.skip_index = Some(Box::new(skip));
}is_candidate function · rust · L345-L349 (5 LOC)crates/fff-core/src/bigram_filter.rs
pub fn is_candidate(candidates: &[u64], file_idx: usize) -> bool {
let word = file_idx / 64;
let bit = file_idx % 64;
word < candidates.len() && candidates[word] & (1u64 << bit) != 0
}count_candidates function · rust · L350-L353 (4 LOC)crates/fff-core/src/bigram_filter.rs
pub fn count_candidates(candidates: &[u64]) -> usize {
candidates.iter().map(|w| w.count_ones() as usize).sum()
}Repobility · open methodology · https://repobility.com/research/
is_ready function · rust · L354-L357 (4 LOC)crates/fff-core/src/bigram_filter.rs
pub fn is_ready(&self) -> bool {
self.populated > 0
}file_count function · rust · L358-L361 (4 LOC)crates/fff-core/src/bigram_filter.rs
pub fn file_count(&self) -> usize {
self.file_count
}columns_used function · rust · L362-L365 (4 LOC)crates/fff-core/src/bigram_filter.rs
pub fn columns_used(&self) -> usize {
self.dense_count
}heap_bytes function · rust · L368-L373 (6 LOC)crates/fff-core/src/bigram_filter.rs
pub fn heap_bytes(&self) -> usize {
let lookup_bytes = self.lookup.len() * std::mem::size_of::<u16>();
let dense_bytes = self.dense_data.len() * std::mem::size_of::<u64>();
let skip_bytes = self.skip_index.as_ref().map_or(0, |s| s.heap_bytes());
lookup_bytes + dense_bytes + skip_bytes
}has_key function · rust · L376-L378 (3 LOC)crates/fff-core/src/bigram_filter.rs
pub fn has_key(&self, key: u16) -> bool {
self.lookup[key as usize] != NO_COLUMN
}lookup function · rust · L381-L383 (3 LOC)crates/fff-core/src/bigram_filter.rs
pub fn lookup(&self) -> &[u16] {
&self.lookup
}dense_data function · rust · L386-L388 (3 LOC)crates/fff-core/src/bigram_filter.rs
pub fn dense_data(&self) -> &[u64] {
&self.dense_data
}words function · rust · L391-L393 (3 LOC)crates/fff-core/src/bigram_filter.rs
pub fn words(&self) -> usize {
self.words
}Repobility · code-quality intelligence platform · https://repobility.com
dense_count function · rust · L396-L398 (3 LOC)crates/fff-core/src/bigram_filter.rs
pub fn dense_count(&self) -> usize {
self.dense_count
}populated function · rust · L401-L403 (3 LOC)crates/fff-core/src/bigram_filter.rs
pub fn populated(&self) -> usize {
self.populated
}skip_index function · rust · L406-L408 (3 LOC)crates/fff-core/src/bigram_filter.rs
pub fn skip_index(&self) -> Option<&BigramFilter> {
self.skip_index.as_deref()
}page 1 / 17next ›