← back to dmtrKovalenko__fff.nvim

Function bodies 845 total

All specs Real LLM only Function bodies
build_test_index function · rust · L6-L117 (112 LOC)
crates/fff-core/benches/bigram_bench.rs
fn build_test_index(file_count: usize) -> BigramFilter {
    let builder = BigramIndexBuilder::new(file_count);
    let skip_builder = BigramIndexBuilder::new(file_count);

    for i in 0..file_count {
        // Generate varied content so we get a mix of sparse and dense columns
        let content = format!(
            "struct File{i} {{ fn process() {{ let controller = read(path); }} }} // module {i}"
        );
        builder.add_file_content(&skip_builder, i, content.as_bytes());
    }

    let mut index = builder.compress(None);
    let skip_index = skip_builder.compress(Some(12));
    index.set_skip_index(skip_index);
    index
}

fn bench_bigram_query(c: &mut Criterion) {
    let file_counts = [10_000, 100_000, 500_000];

    for &file_count in &file_counts {
        let index = build_test_index(file_count);
        eprintln!(
            "Index ({} files): {} columns",
            file_count,
            index.columns_used(),
        );

        let mut group = c.benchmark_g
bench_bigram_query function · rust · L23-L56 (34 LOC)
crates/fff-core/benches/bigram_bench.rs
fn bench_bigram_query(c: &mut Criterion) {
    let file_counts = [10_000, 100_000, 500_000];

    for &file_count in &file_counts {
        let index = build_test_index(file_count);
        eprintln!(
            "Index ({} files): {} columns",
            file_count,
            index.columns_used(),
        );

        let mut group = c.benchmark_group(format!("bigram_query_{file_count}"));
        group.sample_size(500);

        let queries: &[(&str, &[u8])] = &[
            ("short_2char", b"st"),
            ("medium_6char", b"struct"),
            ("long_14char", b"let controller"),
            ("multi_word", b"fn process"),
        ];

        for (name, query) in queries {
            group.bench_with_input(BenchmarkId::from_parameter(name), query, |b, q| {
                b.iter(|| {
                    let result = index.query(black_box(q));
                    black_box(&result);
                });
            });
        }

        group.finish();
    }
}
bench_bigram_is_candidate function · rust · L57-L84 (28 LOC)
crates/fff-core/benches/bigram_bench.rs
fn bench_bigram_is_candidate(c: &mut Criterion) {
    let index = build_test_index(500_000);
    let candidates = match index.query(b"struct") {
        Some(c) => c,
        None => {
            // All bigrams ubiquitous at this size — skip candidate benches
            eprintln!("Skipping is_candidate bench: query returned None (all bigrams ubiquitous)");
            return;
        }
    };

    c.bench_function("is_candidate_500k", |b| {
        b.iter(|| {
            let mut count = 0u32;
            for i in 0..500_000 {
                if BigramFilter::is_candidate(black_box(&candidates), i) {
                    count += 1;
                }
            }
            black_box(count)
        });
    });

    c.bench_function("count_candidates_500k", |b| {
        b.iter(|| BigramFilter::count_candidates(black_box(&candidates)));
    });
}
load_real_files function · rust · L7-L41 (35 LOC)
crates/fff-core/benches/memmem_bench.rs
fn load_real_files() -> Vec<(&'static str, Vec<u8>)> {
    let manifest_dir = env!("CARGO_MANIFEST_DIR"); // crates/fff-core
    let repo_root = Path::new(manifest_dir).parent().unwrap().parent().unwrap();

    let files: &[(&str, &str)] = &[
        ("grep.rs/80KB", "crates/fff-core/src/grep.rs"),
        ("file_picker.rs/53KB", "crates/fff-core/src/file_picker.rs"),
        ("picker_ui.lua/96KB", "lua/fff/picker_ui.lua"),
    ];

    let mut result = Vec::new();
    for &(label, rel_path) in files {
        let full_path = repo_root.join(rel_path);
        if let Ok(data) = std::fs::read(&full_path) {
            result.push((label, data));
        }
    }

    // Also create a large synthetic file by concatenating all three
    if result.len() == 3 {
        let mut combined = Vec::new();
        for (_, data) in &result {
            combined.extend_from_slice(data);
        }
        // Repeat to get ~1MB
        let base = combined.clone();
        while combined.len() < 1024 * 1
bench_memmem function · rust · L42-L98 (57 LOC)
crates/fff-core/benches/memmem_bench.rs
fn bench_memmem(c: &mut Criterion) {
    let mut group = c.benchmark_group("case_insensitive_memmem");

    let files = load_real_files();
    assert!(!files.is_empty(), "No source files found for benchmarking");

    // Needles chosen to exercise different false-positive rates:
    //
    // "hit" needles: strings that actually appear in these source files.
    // "miss" needles: strings with common first-bytes (lots of false positives
    //   for memchr2) but that don't exist in any of the files.
    let needles: &[(&str, &[u8])] = &[
        // Hits — real identifiers from the codebase
        ("short/hit/fn", b"fn"),
        ("short/hit/self", b"self"),
        ("medium/hit", b"search_file"),
        ("long/hit", b"content_cache_budget"),
        // Misses — common first-bytes, guaranteed not in source
        ("short/miss", b"zqxjv"),
        ("medium/miss", b"fluxcapacitor"),
        ("long/miss", b"quantum_entanglement_resolver"),
    ];

    for (file_label, haystack) in &fil
bench_parse_simple function · rust · L3-L18 (16 LOC)
crates/fff-core/benches/parse_bench.rs
fn bench_parse_simple(c: &mut Criterion) {
    let parser = QueryParser::default();

    c.bench_function("parse_simple_text", |b| {
        b.iter(|| parser.parse(black_box("hello world")));
    });

    c.bench_function("parse_extension", |b| {
        b.iter(|| parser.parse(black_box("*.rs")));
    });

    c.bench_function("parse_text_with_extension", |b| {
        b.iter(|| parser.parse(black_box("name *.rs")));
    });
}
bench_parse_complex function · rust · L19-L149 (131 LOC)
crates/fff-core/benches/parse_bench.rs
fn bench_parse_complex(c: &mut Criterion) {
    let parser = QueryParser::default();

    c.bench_function("parse_complex_mixed", |b| {
        b.iter(|| parser.parse(black_box("src name *.rs !test /lib/ status:modified")));
    });

    c.bench_function("parse_glob", |b| {
        b.iter(|| parser.parse(black_box("**/*.rs")));
    });

    c.bench_function("parse_multiple_constraints", |b| {
        b.iter(|| parser.parse(black_box("*.rs *.toml *.md !test !node_modules /src/")));
    });
}

fn bench_parse_realistic_queries(c: &mut Criterion) {
    let parser = QueryParser::default();

    let queries = vec![
        "file",
        "test",
        "mod.rs",
        "src/*.rs",
        "lib test",
        "*.rs !test",
        "src/lib/*.rs",
        "/src/ name",
        "status:modified *.rs",
        "type:rust test !node_modules",
    ];

    let mut group = c.benchmark_group("realistic_queries");
    for query in queries.iter() {
        group.throughput(Throughput::Bytes(query.l
Repobility · open methodology · https://repobility.com/research/
bench_parse_realistic_queries function · rust · L35-L130 (96 LOC)
crates/fff-core/benches/parse_bench.rs
fn bench_parse_realistic_queries(c: &mut Criterion) {
    let parser = QueryParser::default();

    let queries = vec![
        "file",
        "test",
        "mod.rs",
        "src/*.rs",
        "lib test",
        "*.rs !test",
        "src/lib/*.rs",
        "/src/ name",
        "status:modified *.rs",
        "type:rust test !node_modules",
    ];

    let mut group = c.benchmark_group("realistic_queries");
    for query in queries.iter() {
        group.throughput(Throughput::Bytes(query.len() as u64));
        group.bench_with_input(BenchmarkId::from_parameter(query), query, |b, q| {
            b.iter(|| parser.parse(black_box(q)));
        });
    }
    group.finish();
}

fn bench_parse_various_lengths(c: &mut Criterion) {
    let parser = QueryParser::default();

    let short = "*.rs";
    let medium = "src name *.rs !test";
    let long = "src lib test name *.rs *.toml !node_modules !test /src/ /lib/ status:modified";
    let very_long =
        "a b c d e f g h i j k l 
bench_parse_various_lengths function · rust · L61-L98 (38 LOC)
crates/fff-core/benches/parse_bench.rs
fn bench_parse_various_lengths(c: &mut Criterion) {
    let parser = QueryParser::default();

    let short = "*.rs";
    let medium = "src name *.rs !test";
    let long = "src lib test name *.rs *.toml !node_modules !test /src/ /lib/ status:modified";
    let very_long =
        "a b c d e f g h i j k l m n o p q r s t u v w x y z *.rs *.toml *.md *.txt *.js";

    let mut group = c.benchmark_group("query_lengths");

    group.throughput(Throughput::Bytes(short.len() as u64));
    group.bench_with_input(BenchmarkId::new("short", short.len()), &short, |b, q| {
        b.iter(|| parser.parse(black_box(q)));
    });

    group.throughput(Throughput::Bytes(medium.len() as u64));
    group.bench_with_input(BenchmarkId::new("medium", medium.len()), &medium, |b, q| {
        b.iter(|| parser.parse(black_box(q)));
    });

    group.throughput(Throughput::Bytes(long.len() as u64));
    group.bench_with_input(BenchmarkId::new("long", long.len()), &long, |b, q| {
        b.iter(|| parser.pars
bench_config_comparison function · rust · L99-L117 (19 LOC)
crates/fff-core/benches/parse_bench.rs
fn bench_config_comparison(c: &mut Criterion) {
    let file_picker = QueryParser::new(FileSearchConfig);
    let grep = QueryParser::new(GrepConfig);

    let query = "src name *.rs !test";

    let mut group = c.benchmark_group("config_comparison");

    group.bench_function("file_picker_config", |b| {
        b.iter(|| file_picker.parse(black_box(query)));
    });

    group.bench_function("grep_config", |b| {
        b.iter(|| grep.parse(black_box(query)));
    });

    group.finish();
}
bench_worst_case function · rust · L150-L167 (18 LOC)
crates/fff-core/benches/parse_bench.rs
fn bench_worst_case(c: &mut Criterion) {
    let parser = QueryParser::default();

    // Worst case: many constraints that all need to be checked
    let worst_case = "a b c d e f g h i j k l m n o p q r s t u v w x y z";

    c.bench_function("worst_case_many_text_tokens", |b| {
        b.iter(|| parser.parse(black_box(worst_case)));
    });

    // Many constraints
    let many_constraints = "*.rs *.toml *.md *.txt *.js *.ts *.jsx *.tsx *.vue *.svelte";

    c.bench_function("worst_case_many_constraints", |b| {
        b.iter(|| parser.parse(black_box(many_constraints)));
    });
}
main function · rust · L1-L35 (35 LOC)
crates/fff-core/build.rs
fn main() {
    // When the `zlob` feature is enabled (Zig-compiled C library):
    // On Windows MSVC, explicitly link the C runtime libraries.
    // Zig-compiled static libraries don't emit /DEFAULTLIB directives for the
    // MSVC CRT, so symbols like strcmp, memcpy etc. would be unresolved.
    if std::env::var("CARGO_FEATURE_ZLOB").is_ok() {
        let target = std::env::var("TARGET").unwrap_or_default();
        if target.contains("windows") && target.contains("msvc") {
            println!("cargo:rustc-link-lib=msvcrt");
            println!("cargo:rustc-link-lib=ucrt");
            println!("cargo:rustc-link-lib=vcruntime");
        }
    } else if std::env::var("CI").is_ok() {
        // CI must always build with zlob for production-quality binaries.
        if !zig_available() {
            panic!(
                "CI detected but Zig is not installed. \
                 Please install Zig and build with `--features zlob`."
            );
        }
        panic!(
        
zig_available function · rust · L38-L46 (9 LOC)
crates/fff-core/build.rs
fn zig_available() -> bool {
    std::process::Command::new("zig")
        .arg("version")
        .stdout(std::process::Stdio::null())
        .stderr(std::process::Stdio::null())
        .status()
        .map(|s| s.success())
        .unwrap_or(false)
}
new function · rust · L45-L90 (46 LOC)
crates/fff-core/src/background_watcher.rs
    pub fn new(
        base_path: PathBuf,
        git_workdir: Option<PathBuf>,
        shared_picker: SharedPicker,
        shared_frecency: SharedFrecency,
        mode: FFFMode,
    ) -> Result<Self, Error> {
        info!(
            "Initializing background watcher for path: {}, mode: {:?}",
            base_path.display(),
            mode,
        );

        let debouncer =
            Self::create_debouncer(base_path, git_workdir, shared_picker, shared_frecency, mode)?;
        info!("Background file watcher initialized successfully");

        let stop_signal = Arc::new(AtomicBool::new(false));
        let stop_clone = Arc::clone(&stop_signal);

        // The owner thread keeps the debouncer alive and ensures proper
        // cleanup: `Debouncer::stop()` joins its internal thread, then the
        // watcher `Drop` signals its I/O thread to exit.
        let owner_thread = std::thread::Builder::new()
            .name("fff-watcher-owner".into())
            .spawn(move |
create_debouncer function · rust · L91-L172 (82 LOC)
crates/fff-core/src/background_watcher.rs
    fn create_debouncer(
        base_path: PathBuf,
        git_workdir: Option<PathBuf>,
        shared_picker: SharedPicker,
        shared_frecency: SharedFrecency,
        mode: FFFMode,
    ) -> Result<Debouncer, Error> {
        // do not follow symlinks as then notifiers spawns a bunch of events for symlinked
        // files that could be git ignored, we have to property differentiate those and if
        // the file was edited through a
        let config = Config::default().with_follow_symlinks(false);

        let git_workdir_for_handler = git_workdir.clone();
        let mut debouncer = new_debouncer_opt(
            DEBOUNCE_TIMEOUT,
            Some(DEBOUNCE_TIMEOUT / 2), // tick rate for the event span
            {
                move |result: DebounceEventResult| match result {
                    Ok(events) => {
                        handle_debounced_events(
                            events,
                            &git_workdir_for_handler,
                
Repobility · code-quality intelligence platform · https://repobility.com
stop function · rust · L173-L185 (13 LOC)
crates/fff-core/src/background_watcher.rs
    pub fn stop(&mut self) {
        self.stop_signal.store(true, Ordering::Release);
        if let Some(handle) = self.owner_thread.take() {
            handle.thread().unpark();

            if let Err(e) = handle.join() {
                error!("Watcher owner thread panicked: {:?}", e);
            }
        }

        info!("Background file watcher stopped successfully");
    }
drop function · rust · L189-L191 (3 LOC)
crates/fff-core/src/background_watcher.rs
    fn drop(&mut self) {
        self.stop();
    }
handle_debounced_events function · rust · L195-L452 (258 LOC)
crates/fff-core/src/background_watcher.rs
fn handle_debounced_events(
    events: Vec<DebouncedEvent>,
    git_workdir: &Option<PathBuf>,
    shared_picker: &SharedPicker,
    shared_frecency: &SharedFrecency,
    mode: FFFMode,
) {
    // this will be called very often, we have to minimiy the lock time for file picker
    let repo = git_workdir.as_ref().and_then(|p| Repository::open(p).ok());
    let mut need_full_rescan = false;
    let mut need_full_git_rescan = false;
    let mut paths_to_remove = Vec::new();
    let mut paths_to_add_or_modify = Vec::new();
    let mut affected_paths_count = 0usize;

    for debounced_event in &events {
        // It is very important to not react to the access errors because we inevitably
        // gonna trigger the sync by our own preview or other unnecessary noise
        if matches!(
            debounced_event.event.kind,
            EventKind::Access(
                AccessKind::Read
                    | AccessKind::Open(_)
                    | AccessKind::Close(AccessMode::Read |
trigger_full_rescan function · rust · L453-L474 (22 LOC)
crates/fff-core/src/background_watcher.rs
fn trigger_full_rescan(shared_picker: &SharedPicker, shared_frecency: &SharedFrecency) {
    info!("Triggering full filesystem rescan");

    // Note: no need to clear mmaps — they are backed by the kernel page cache
    // and automatically reflect file changes. Old FileItems (and their mmaps)
    // are dropped when the picker rebuilds its file list.

    let Ok(mut guard) = shared_picker.write() else {
        error!("Failed to acquire file picker write lock for full rescan");
        return;
    };
    let Some(ref mut picker) = *guard else {
        error!("File picker not initialized, cannot trigger rescan");
        return;
    };
    if let Err(e) = picker.trigger_rescan(shared_frecency) {
        error!("Failed to trigger full rescan: {:?}", e);
    } else {
        info!("Full filesystem rescan completed successfully");
    }
}
should_include_file function · rust · L475-L491 (17 LOC)
crates/fff-core/src/background_watcher.rs
fn should_include_file(path: &Path, repo: &Option<Repository>) -> bool {
    // Directories are not indexed — only regular files (and symlinks to files).
    if path.is_dir() {
        return false;
    }

    match repo.as_ref() {
        Some(repo) => repo.is_path_ignored(path) != Ok(true),
        None => {
            // No git repo — apply basic sanity filters.
            // Hidden directories are skipped by the watcher setup (hidden(true)),
            // but events can still arrive for files in known non-code directories.
            !is_non_code_directory(path)
        }
    }
}
is_non_code_directory function · rust · L492-L495 (4 LOC)
crates/fff-core/src/background_watcher.rs
fn is_non_code_directory(path: &Path) -> bool {
    crate::ignore::is_non_code_directory(path)
}
is_git_file function · rust · L498-L501 (4 LOC)
crates/fff-core/src/background_watcher.rs
fn is_git_file(path: &Path) -> bool {
    path.components()
        .any(|component| component.as_os_str() == ".git")
}
is_dotgit_change_affecting_status function · rust · L502-L535 (34 LOC)
crates/fff-core/src/background_watcher.rs
pub fn is_dotgit_change_affecting_status(changed: &Path, repo: &Option<Repository>) -> bool {
    let Some(repo) = repo.as_ref() else {
        return false;
    };

    let git_dir = repo.path();

    if let Ok(rel) = changed.strip_prefix(git_dir) {
        if rel.starts_with("objects") || rel.starts_with("logs") || rel.starts_with("hooks") {
            return false;
        }
        if rel == Path::new("index") || rel == Path::new("index.lock") {
            return true;
        }
        if rel == Path::new("HEAD") {
            return true;
        }
        if rel.starts_with("refs") || rel == Path::new("packed-refs") {
            return true;
        }
        if rel == Path::new("info/exclude") || rel == Path::new("info/sparse-checkout") {
            return true;
        }

        if let Some(fname) = rel.file_name().and_then(|f| f.to_str())
            && matches!(fname, "MERGE_HEAD" | "CHERRY_PICK_HEAD" | "REVERT_HEAD")
        {
            return true;
        }
    }
Repobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
is_ignore_definition_path function · rust · L536-L542 (7 LOC)
crates/fff-core/src/background_watcher.rs
fn is_ignore_definition_path(path: &Path) -> bool {
    matches!(
        path.file_name().and_then(|f| f.to_str()),
        Some(".ignore") | Some(".gitignore")
    )
}
watch_git_status_paths function · rust · L543-L576 (34 LOC)
crates/fff-core/src/background_watcher.rs
fn watch_git_status_paths(debouncer: &mut Debouncer, git_workdir: Option<&PathBuf>) {
    let Some(workdir) = git_workdir else {
        return;
    };

    let git_dir = workdir.join(".git");
    if !git_dir.is_dir() {
        return;
    }

    // Watch .git/ non-recursively to catch top-level files:
    // index, index.lock, HEAD, packed-refs, MERGE_HEAD, CHERRY_PICK_HEAD, REVERT_HEAD
    if let Err(e) = debouncer.watch(&git_dir, RecursiveMode::NonRecursive) {
        warn!("Failed to watch .git directory: {}", e);
        return;
    }

    // Watch refs/ recursively to catch branch/tag changes
    let refs_dir = git_dir.join("refs");
    if refs_dir.is_dir()
        && let Err(e) = debouncer.watch(&refs_dir, RecursiveMode::Recursive)
    {
        warn!("Failed to watch .git/refs: {}", e);
    }

    // Watch info/ non-recursively for exclude and sparse-checkout
    let info_dir = git_dir.join("info");
    if info_dir.is_dir()
        && let Err(e) = debouncer.watch(&info_dir, Re
collect_non_ignored_dirs function · rust · L582-L618 (37 LOC)
crates/fff-core/src/background_watcher.rs
fn collect_non_ignored_dirs(base_path: &Path, has_git_repo: bool) -> Vec<PathBuf> {
    use crate::ignore::non_git_repo_overrides;
    use ignore::WalkBuilder;

    let mut walk_builder = WalkBuilder::new(base_path);
    walk_builder
        .hidden(!has_git_repo)
        .git_ignore(true)
        .git_exclude(true)
        .git_global(true)
        .ignore(true)
        .follow_links(false)
        .max_depth(Some(1));

    if !has_git_repo && let Some(overrides) = non_git_repo_overrides(base_path) {
        walk_builder.overrides(overrides);
    }

    let walker = walk_builder.build();

    let mut dirs = Vec::new();
    for entry in walker {
        let Ok(entry) = entry else { continue };
        let path = entry.path();

        // Skip the root directory itself
        if path == base_path {
            continue;
        }

        if path.is_dir() && !is_git_file(path) {
            dirs.push(path.to_path_buf());
        }
    }

    dirs
}
new function · rust · L29-L43 (15 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn new(file_count: usize) -> Self {
        let words = file_count.div_ceil(64);
        let mut lookup = Vec::with_capacity(65536);
        lookup.resize_with(65536, || AtomicU16::new(NO_COLUMN));
        let mut col_data = Vec::with_capacity(MAX_BIGRAM_COLUMNS * words);
        col_data.resize_with(MAX_BIGRAM_COLUMNS * words, || AtomicU64::new(0));
        Self {
            lookup,
            col_data,
            next_column: AtomicU16::new(0),
            words,
            file_count,
            populated: AtomicUsize::new(0),
        }
    }
get_or_alloc_column function · rust · L46-L65 (20 LOC)
crates/fff-core/src/bigram_filter.rs
    fn get_or_alloc_column(&self, key: u16) -> u16 {
        let current = self.lookup[key as usize].load(Ordering::Relaxed);
        if current != NO_COLUMN {
            return current;
        }
        let new_col = self.next_column.fetch_add(1, Ordering::Relaxed);
        if new_col >= MAX_BIGRAM_COLUMNS as u16 {
            return NO_COLUMN;
        }

        match self.lookup[key as usize].compare_exchange(
            NO_COLUMN,
            new_col,
            Ordering::Relaxed,
            Ordering::Relaxed,
        ) {
            Ok(_) => new_col,
            Err(existing) => existing,
        }
    }
column_bitset function · rust · L68-L71 (4 LOC)
crates/fff-core/src/bigram_filter.rs
    fn column_bitset(&self, col: u16) -> &[AtomicU64] {
        let start = col as usize * self.words;
        &self.col_data[start..start + self.words]
    }
add_file_content function · rust · L72-L142 (71 LOC)
crates/fff-core/src/bigram_filter.rs
    pub(crate) fn add_file_content(&self, skip_builder: &Self, file_idx: usize, content: &[u8]) {
        if content.len() < 2 {
            return;
        }

        debug_assert!(file_idx < self.file_count);
        let word_idx = file_idx / 64;
        let bit_mask = 1u64 << (file_idx % 64);

        // Stack-local dedup bitsets: 1024 × u64 = 8 KB each, covers all 65536 bigrams with margin
        // have to fit in L1 cache
        let mut seen_consec = [0u64; 1024];
        let mut seen_skip = [0u64; 1024];

        let bytes = content;
        let len = bytes.len();

        // First consecutive pair (no skip bigram possible yet).
        let (a, b) = (bytes[0], bytes[1]);
        if (32..=126).contains(&a) && (32..=126).contains(&b) {
            let key = (a.to_ascii_lowercase() as u16) << 8 | b.to_ascii_lowercase() as u16;
            let w = key as usize >> 6;
            let bit = 1u64 << (key as usize & 63);
            seen_consec[w] |= bit;
            let col = self.get
is_ready function · rust · L143-L146 (4 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn is_ready(&self) -> bool {
        self.populated.load(Ordering::Relaxed) > 0
    }
Want this analysis on your repo? https://repobility.com/scan/
columns_used function · rust · L147-L152 (6 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn columns_used(&self) -> u16 {
        self.next_column
            .load(Ordering::Relaxed)
            .min(MAX_BIGRAM_COLUMNS as u16)
    }
compress function · rust · L160-L229 (70 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn compress(self, min_density_pct: Option<u32>) -> BigramFilter {
        let cols = self.columns_used() as usize;
        let words = self.words;
        let file_count = self.file_count;
        let populated = self.populated.load(Ordering::Relaxed);
        let dense_bytes = words * 8; // cost of one dense column

        let old_lookup = self.lookup;
        let col_data = self.col_data;

        let mut lookup: Vec<u16> = vec![NO_COLUMN; 65536];
        let mut dense_data: Vec<u64> = Vec::with_capacity(cols * words);
        let mut dense_count: usize = 0;

        for key in 0..65536usize {
            let old_col = old_lookup[key].load(Ordering::Relaxed);
            if old_col == NO_COLUMN || old_col as usize >= cols {
                continue;
            }

            let col_start = old_col as usize * words;
            let bitset = &col_data[col_start..col_start + words];

            // count set bits to decide if this column is worth keeping.
            let mut 
bitset_and function · rust · L257-L262 (6 LOC)
crates/fff-core/src/bigram_filter.rs
fn bitset_and(result: &mut [u64], bitset: &[u64]) {
    result
        .iter_mut()
        .zip(bitset.iter())
        .for_each(|(r, b)| *r &= *b);
}
query function · rust · L267-L307 (41 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn query(&self, pattern: &[u8]) -> Option<Vec<u64>> {
        if pattern.len() < 2 {
            return None;
        }

        let mut result = vec![u64::MAX; self.words];
        if !self.file_count.is_multiple_of(64) {
            let last = self.words - 1;
            result[last] = (1u64 << (self.file_count % 64)) - 1;
        }

        let words = self.words;
        let mut has_filter = false;

        let mut prev = pattern[0];
        for &b in &pattern[1..] {
            if (32..=126).contains(&prev) && (32..=126).contains(&b) {
                let key = (prev.to_ascii_lowercase() as u16) << 8 | b.to_ascii_lowercase() as u16;
                let col = self.lookup[key as usize];
                if col != NO_COLUMN {
                    let offset = col as usize * words;
                    // SAFETY: compress() guarantees offset + words <= dense_data.len()
                    let slice = unsafe { self.dense_data.get_unchecked(offset..offset + words) };
              
query_skip function · rust · L311-L337 (27 LOC)
crates/fff-core/src/bigram_filter.rs
    fn query_skip(&self, pattern: &[u8]) -> Option<Vec<u64>> {
        let mut result = vec![u64::MAX; self.words];
        if !self.file_count.is_multiple_of(64) {
            let last = self.words - 1;
            result[last] = (1u64 << (self.file_count % 64)) - 1;
        }

        let words = self.words;
        let mut has_filter = false;

        for i in 0..pattern.len().saturating_sub(2) {
            let a = pattern[i];
            let b = pattern[i + 2];
            if (32..=126).contains(&a) && (32..=126).contains(&b) {
                let key = (a.to_ascii_lowercase() as u16) << 8 | b.to_ascii_lowercase() as u16;
                let col = self.lookup[key as usize];
                if col != NO_COLUMN {
                    let offset = col as usize * words;
                    let slice = unsafe { self.dense_data.get_unchecked(offset..offset + words) };
                    bitset_and(&mut result, slice);
                    has_filter = true;
                }
            
set_skip_index function · rust · L340-L342 (3 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn set_skip_index(&mut self, skip: BigramFilter) {
        self.skip_index = Some(Box::new(skip));
    }
is_candidate function · rust · L345-L349 (5 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn is_candidate(candidates: &[u64], file_idx: usize) -> bool {
        let word = file_idx / 64;
        let bit = file_idx % 64;
        word < candidates.len() && candidates[word] & (1u64 << bit) != 0
    }
count_candidates function · rust · L350-L353 (4 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn count_candidates(candidates: &[u64]) -> usize {
        candidates.iter().map(|w| w.count_ones() as usize).sum()
    }
Repobility · open methodology · https://repobility.com/research/
is_ready function · rust · L354-L357 (4 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn is_ready(&self) -> bool {
        self.populated > 0
    }
file_count function · rust · L358-L361 (4 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn file_count(&self) -> usize {
        self.file_count
    }
columns_used function · rust · L362-L365 (4 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn columns_used(&self) -> usize {
        self.dense_count
    }
heap_bytes function · rust · L368-L373 (6 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn heap_bytes(&self) -> usize {
        let lookup_bytes = self.lookup.len() * std::mem::size_of::<u16>();
        let dense_bytes = self.dense_data.len() * std::mem::size_of::<u64>();
        let skip_bytes = self.skip_index.as_ref().map_or(0, |s| s.heap_bytes());
        lookup_bytes + dense_bytes + skip_bytes
    }
has_key function · rust · L376-L378 (3 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn has_key(&self, key: u16) -> bool {
        self.lookup[key as usize] != NO_COLUMN
    }
lookup function · rust · L381-L383 (3 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn lookup(&self) -> &[u16] {
        &self.lookup
    }
dense_data function · rust · L386-L388 (3 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn dense_data(&self) -> &[u64] {
        &self.dense_data
    }
words function · rust · L391-L393 (3 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn words(&self) -> usize {
        self.words
    }
Repobility · code-quality intelligence platform · https://repobility.com
dense_count function · rust · L396-L398 (3 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn dense_count(&self) -> usize {
        self.dense_count
    }
populated function · rust · L401-L403 (3 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn populated(&self) -> usize {
        self.populated
    }
skip_index function · rust · L406-L408 (3 LOC)
crates/fff-core/src/bigram_filter.rs
    pub fn skip_index(&self) -> Option<&BigramFilter> {
        self.skip_index.as_deref()
    }
page 1 / 17next ›