← back to dcho108803__vep-annotator

Function bodies 431 total

All specs Real LLM only Function bodies
AnnotationSource class · cpp · L30-L111 (82 LOC)
include/annotation_source.hpp
class AnnotationSource {
public:
    virtual ~AnnotationSource() = default;

    /**
     * Get the source name (e.g., "dbnsfp", "spliceai", "phylop")
     */
    virtual std::string name() const = 0;

    /**
     * Get the type of source for CLI help
     * (e.g., "pathogenicity", "conservation", "splice", "regulatory")
     */
    virtual std::string type() const = 0;

    /**
     * Get a description of this source
     */
    virtual std::string description() const = 0;

    /**
     * Check if the source is initialized and ready
     */
    virtual bool is_ready() const = 0;

    /**
     * Initialize the source (lazy loading)
     * Called automatically on first use if not manually initialized
     */
    virtual void initialize() = 0;

    /**
     * Annotate a variant, adding results to the annotations map
     * @param chrom Chromosome
     * @param pos Position (1-based)
     * @param ref Reference allele
     * @param alt Alternate allele
     * @param transcript Optional t
ensure_initialized method · cpp · L105-L110 (6 LOC)
include/annotation_source.hpp
    void ensure_initialized() {
        std::lock_guard<std::recursive_mutex> lock(mutex_);
        if (!is_ready()) {
            initialize();
        }
    }
ScoreAnnotationSource class · cpp · L116-L148 (33 LOC)
include/annotation_source.hpp
class ScoreAnnotationSource : public AnnotationSource {
public:
    /**
     * Get score at a specific position
     * @return Score value or nullopt if not available
     */
    virtual std::optional<double> get_score(
        const std::string& chrom,
        int pos
    ) const = 0;

    /**
     * Get scores for a range (for indels)
     * @return Vector of scores, may contain NaN for missing values
     */
    virtual std::vector<double> get_scores(
        const std::string& chrom,
        int start,
        int end
    ) const = 0;

    /**
     * Get aggregated score for a range (mean, max, min)
     */
    enum class Aggregation { MEAN, MAX, MIN, FIRST, LAST };

    virtual std::optional<double> get_aggregated_score(
        const std::string& chrom,
        int start,
        int end,
        Aggregation method = Aggregation::MEAN
    ) const;
};
IntervalAnnotationSource class · cpp · L153-L186 (34 LOC)
include/annotation_source.hpp
class IntervalAnnotationSource : public AnnotationSource {
public:
    /**
     * Feature structure for interval-based annotations
     */
    struct Feature {
        std::string chrom;
        int start;
        int end;
        std::string type;           // Feature type (e.g., "promoter", "enhancer")
        std::string id;             // Feature ID
        char strand = '.';
        std::unordered_map<std::string, std::string> attributes;
    };

    /**
     * Query overlapping features
     */
    virtual std::vector<Feature> query(
        const std::string& chrom,
        int start,
        int end
    ) const = 0;

    /**
     * Query overlapping features of a specific type
     */
    virtual std::vector<Feature> query_by_type(
        const std::string& chrom,
        int start,
        int end,
        const std::string& feature_type
    ) const = 0;
};
VariantAnnotationSource class · cpp · L192-L208 (17 LOC)
include/annotation_source.hpp
class VariantAnnotationSource : public AnnotationSource {
public:
    /**
     * Whether to require exact allele match (REF/ALT)
     */
    virtual bool requires_allele_match() const { return true; }

    /**
     * Query annotations for a specific variant
     */
    virtual std::unordered_map<std::string, std::string> query(
        const std::string& chrom,
        int pos,
        const std::string& ref,
        const std::string& alt
    ) const = 0;
};
AnnotationSourceManager class · cpp · L213-L276 (64 LOC)
include/annotation_source.hpp
class AnnotationSourceManager {
public:
    /**
     * Register an annotation source
     */
    void add_source(std::shared_ptr<AnnotationSource> source);

    /**
     * Get all registered sources
     */
    std::vector<std::shared_ptr<AnnotationSource>> get_sources() const;

    /**
     * Get source by name
     */
    std::shared_ptr<AnnotationSource> get_source(const std::string& name) const;

    /**
     * Enable/disable a source by name
     */
    void set_enabled(const std::string& name, bool enabled);

    /**
     * Check if a source is enabled
     */
    bool is_enabled(const std::string& name) const;

    /**
     * Initialize all sources
     */
    void initialize_all();

    /**
     * Annotate a variant with all enabled sources
     */
    void annotate_all(
        const std::string& chrom,
        int pos,
        const std::string& ref,
        const std::string& alt,
        const Transcript* transcript,
        std::unordered_map<std::string, std::string>& ann
get_all_dbnsfp_fields function · cpp · L189-L197 (9 LOC)
include/dbnsfp_fields.hpp
inline std::vector<DbNSFPField> get_all_dbnsfp_fields() {
    std::vector<DbNSFPField> all_fields;
    all_fields.insert(all_fields.end(), DBNSFP_PATHOGENICITY_FIELDS.begin(), DBNSFP_PATHOGENICITY_FIELDS.end());
    all_fields.insert(all_fields.end(), DBNSFP_CONSERVATION_FIELDS.begin(), DBNSFP_CONSERVATION_FIELDS.end());
    all_fields.insert(all_fields.end(), DBNSFP_SPLICE_FIELDS.begin(), DBNSFP_SPLICE_FIELDS.end());
    all_fields.insert(all_fields.end(), DBNSFP_FREQUENCY_FIELDS.begin(), DBNSFP_FREQUENCY_FIELDS.end());
    all_fields.insert(all_fields.end(), DBNSFP_CLINICAL_FIELDS.begin(), DBNSFP_CLINICAL_FIELDS.end());
    return all_fields;
}
All rows scored by the Repobility analyzer (https://repobility.com)
get_dbnsfp_field_names function · cpp · L202-L208 (7 LOC)
include/dbnsfp_fields.hpp
inline std::set<std::string> get_dbnsfp_field_names(const std::vector<DbNSFPField>& fields) {
    std::set<std::string> names;
    for (const auto& f : fields) {
        names.insert(f.name);
    }
    return names;
}
get_dbnsfp_preset function · cpp · L213-L235 (23 LOC)
include/dbnsfp_fields.hpp
inline std::vector<DbNSFPField> get_dbnsfp_preset(const std::string& preset) {
    if (preset == "essential") {
        // Most commonly used scores
        return {
            DBNSFP_PATHOGENICITY_FIELDS[0],  // SIFT_score
            DBNSFP_PATHOGENICITY_FIELDS[4],  // Polyphen2_HDIV_score
            DBNSFP_PATHOGENICITY_FIELDS[9],  // CADD_phred
            DBNSFP_PATHOGENICITY_FIELDS[10], // REVEL_score
            DBNSFP_PATHOGENICITY_FIELDS[11], // AlphaMissense_score
        };
    } else if (preset == "pathogenicity") {
        return DBNSFP_PATHOGENICITY_FIELDS;
    } else if (preset == "conservation") {
        return DBNSFP_CONSERVATION_FIELDS;
    } else if (preset == "frequency") {
        return DBNSFP_FREQUENCY_FIELDS;
    } else if (preset == "clinical") {
        return DBNSFP_CLINICAL_FIELDS;
    } else if (preset == "splicing") {
        return DBNSFP_SPLICE_FIELDS;
    }
    return get_all_dbnsfp_fields();
}
parse_dbnsfp_fields function · cpp · L241-L279 (39 LOC)
include/dbnsfp_fields.hpp
inline std::vector<DbNSFPField> parse_dbnsfp_fields(const std::string& field_spec) {
    std::vector<DbNSFPField> result;

    if (field_spec.empty() || field_spec == "all") {
        return get_all_dbnsfp_fields();
    }

    // Check for preset names
    if (field_spec == "essential" || field_spec == "pathogenicity" ||
        field_spec == "conservation" || field_spec == "frequency" ||
        field_spec == "clinical" || field_spec == "splicing") {
        return get_dbnsfp_preset(field_spec);
    }

    // Build lookup map
    std::map<std::string, DbNSFPField> lookup;
    for (const auto& f : get_all_dbnsfp_fields()) {
        lookup[f.name] = f;
    }

    // Parse comma-separated list
    std::istringstream iss(field_spec);
    std::string field;
    while (std::getline(iss, field, ',')) {
        // Trim whitespace
        size_t start = field.find_first_not_of(" \t");
        size_t end = field.find_last_not_of(" \t");
        if (start != std::string::npos) {
            fiel
to_string method · cpp · L40-L51 (12 LOC)
include/exon_intron_numbers.hpp
    std::string to_string() const {
        if (!found) return "";

        std::string result;
        if (is_exon) {
            result = std::to_string(number) + "/" + std::to_string(total_exons);
        } else {
            result = std::to_string(number) + "/" + std::to_string(total_introns);
        }
        return result;
    }
feature_type method · cpp · L52-L56 (5 LOC)
include/exon_intron_numbers.hpp
    std::string feature_type() const {
        if (!found) return "";
        return is_exon ? "exon" : "intron";
    }
get_exon_intron_number function · cpp · L68-L134 (67 LOC)
include/exon_intron_numbers.hpp
inline ExonIntronInfo get_exon_intron_number(
    int position,
    const std::vector<int>& exon_starts,
    const std::vector<int>& exon_ends,
    char strand) {

    ExonIntronInfo info;

    if (exon_starts.empty() || exon_starts.size() != exon_ends.size()) {
        return info;
    }

    int num_exons = static_cast<int>(exon_starts.size());
    info.total_exons = num_exons;
    info.total_introns = num_exons > 1 ? num_exons - 1 : 0;

    // Exons are already sorted by start position from GTF loading;
    // use the input arrays directly without copying or sorting.

    // Check each exon
    for (int i = 0; i < num_exons; ++i) {
        int start = exon_starts[i];
        int end = exon_ends[i];

        if (position >= start && position <= end) {
            // Position is in this exon
            info.found = true;
            info.is_exon = true;

            // Exon number depends on strand
            if (strand == '-') {
                info.number = num_exons - i;
        
format_exon_number function · cpp · L139-L144 (6 LOC)
include/exon_intron_numbers.hpp
inline std::string format_exon_number(const ExonIntronInfo& info) {
    if (!info.found || !info.is_exon) {
        return "";
    }
    return std::to_string(info.number) + "/" + std::to_string(info.total_exons);
}
format_intron_number function · cpp · L149-L154 (6 LOC)
include/exon_intron_numbers.hpp
inline std::string format_intron_number(const ExonIntronInfo& info) {
    if (!info.found || info.is_exon) {
        return "";
    }
    return std::to_string(info.number) + "/" + std::to_string(info.total_introns);
}
Repobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
get_cds_exon_number function · cpp · L166-L250 (85 LOC)
include/exon_intron_numbers.hpp
inline ExonIntronInfo get_cds_exon_number(
    int position,
    const std::vector<int>& exon_starts,
    const std::vector<int>& exon_ends,
    int cds_start,
    int cds_end,
    char strand) {

    ExonIntronInfo info;

    if (exon_starts.empty() || exon_starts.size() != exon_ends.size()) {
        return info;
    }

    // Count coding exons and find position
    std::vector<std::pair<int, int> > coding_exons;
    int num_exons = static_cast<int>(exon_starts.size());

    for (int i = 0; i < num_exons; ++i) {
        int ex_start = exon_starts[i];
        int ex_end = exon_ends[i];

        // Check if exon overlaps CDS
        if (ex_end >= cds_start && ex_start <= cds_end) {
            // Calculate coding portion of exon
            int coding_start = std::max(ex_start, cds_start);
            int coding_end = std::min(ex_end, cds_end);
            coding_exons.push_back(std::make_pair(coding_start, coding_end));
        }
    }

    if (coding_exons.empty()) {
        return 
calculate_cds_length function · cpp · L255-L277 (23 LOC)
include/exon_intron_numbers.hpp
inline int calculate_cds_length(
    const std::vector<int>& exon_starts,
    const std::vector<int>& exon_ends,
    int cds_start,
    int cds_end) {

    int total_length = 0;
    int num_exons = static_cast<int>(exon_starts.size());

    for (int i = 0; i < num_exons; ++i) {
        int ex_start = exon_starts[i];
        int ex_end = exon_ends[i];

        // Check if exon overlaps CDS
        if (ex_end >= cds_start && ex_start <= cds_end) {
            int coding_start = std::max(ex_start, cds_start);
            int coding_end = std::min(ex_end, cds_end);
            total_length += coding_end - coding_start + 1;
        }
    }

    return total_length;
}
calculate_cds_position function · cpp · L282-L336 (55 LOC)
include/exon_intron_numbers.hpp
inline int calculate_cds_position(
    int genomic_position,
    const std::vector<int>& exon_starts,
    const std::vector<int>& exon_ends,
    int cds_start,
    int cds_end,
    char strand) {

    // Create list of coding segments
    std::vector<std::pair<int, int> > coding_segments;
    int num_exons = static_cast<int>(exon_starts.size());

    for (int i = 0; i < num_exons; ++i) {
        int ex_start = exon_starts[i];
        int ex_end = exon_ends[i];

        if (ex_end >= cds_start && ex_start <= cds_end) {
            int coding_start = std::max(ex_start, cds_start);
            int coding_end = std::min(ex_end, cds_end);
            coding_segments.push_back(std::make_pair(coding_start, coding_end));
        }
    }

    if (coding_segments.empty()) {
        return -1;
    }

    // coding_segments are already in sorted order because exon_starts/exon_ends
    // are sorted by start position from GTF loading.

    if (strand == '-') {
        // Reverse for minus strand
  
format_splice_distance function · cpp · L343-L361 (19 LOC)
include/exon_intron_numbers.hpp
inline std::string format_splice_distance(
    int position,
    int exon_end,
    int next_exon_start) {

    // Distance from donor site (end of exon)
    int donor_dist = position - exon_end;

    // Distance to acceptor site (start of next exon)
    int acceptor_dist = next_exon_start - position;

    if (donor_dist > 0 && donor_dist <= acceptor_dist) {
        return "+" + std::to_string(donor_dist);
    } else if (acceptor_dist > 0) {
        return "-" + std::to_string(acceptor_dist);
    }

    return "";
}
normalize_chrom function · cpp · L28-L33 (6 LOC)
include/file_parsers.hpp
inline std::string normalize_chrom(const std::string& chrom) {
    if (chrom.length() > 3 && chrom.substr(0, 3) == "chr") {
        return chrom.substr(3);
    }
    return chrom;
}
TabixTSVReader class · cpp · L43-L102 (60 LOC)
include/file_parsers.hpp
class TabixTSVReader {
public:
    /**
     * Open a tabix-indexed TSV file
     * @param path Path to .tsv.gz or .txt.gz file (must have .tbi index)
     * @param chrom_col 0-based column index for chromosome
     * @param pos_col 0-based column index for position
     * @param columns Column names to extract (empty = use header)
     */
    TabixTSVReader(
        const std::string& path,
        int chrom_col = 0,
        int pos_col = 1,
        const std::vector<std::string>& columns = {}
    );

    ~TabixTSVReader();

    // Prevent copying
    TabixTSVReader(const TabixTSVReader&) = delete;
    TabixTSVReader& operator=(const TabixTSVReader&) = delete;

    /**
     * Query records at a specific position
     * @return Vector of row maps (column_name -> value)
     */
    std::vector<std::map<std::string, std::string>> query(
        const std::string& chrom,
        int pos
    );

    /**
     * Query records in a range
     */
    std::vector<std::map<std::string, std::strin
BigWigReader class · cpp · L112-L184 (73 LOC)
include/file_parsers.hpp
class BigWigReader {
public:
    /**
     * Open a bigWig file
     * @param path Path to .bw or .bigWig file
     */
    explicit BigWigReader(const std::string& path);

    ~BigWigReader();

    // Prevent copying
    BigWigReader(const BigWigReader&) = delete;
    BigWigReader& operator=(const BigWigReader&) = delete;

    /**
     * Get value at a specific position
     * @return Score value or nullopt if not available
     */
    std::optional<double> get_value(const std::string& chrom, int pos) const;

    /**
     * Get values for a range
     * @return Vector of values, NaN for missing positions
     */
    std::vector<double> get_values(
        const std::string& chrom,
        int start,
        int end
    ) const;

    /**
     * Get mean value for a range
     */
    std::optional<double> get_mean(
        const std::string& chrom,
        int start,
        int end
    ) const;

    /**
     * Get max value for a range
     */
    std::optional<double> get_max(
        c
GFF3Database class · cpp · L216-L276 (61 LOC)
include/file_parsers.hpp
class GFF3Database {
public:
    /**
     * Load GFF3 file
     * @param path Path to .gff3 or .gff3.gz file
     * @param feature_types Feature types to load (empty = all)
     */
    explicit GFF3Database(
        const std::string& path,
        const std::set<std::string>& feature_types = {}
    );

    ~GFF3Database();

    /**
     * Query features overlapping a position
     */
    std::vector<const GFF3Feature*> query(
        const std::string& chrom,
        int pos
    ) const;

    /**
     * Query features overlapping a range
     */
    std::vector<const GFF3Feature*> query(
        const std::string& chrom,
        int start,
        int end
    ) const;

    /**
     * Query features by type
     */
    std::vector<const GFF3Feature*> query_by_type(
        const std::string& chrom,
        int start,
        int end,
        const std::string& type
    ) const;

    /**
     * Get all feature types loaded
     */
    std::set<std::string> get_feature_types() const;

  
Provenance: Repobility (https://repobility.com) — every score reproducible from /scan/
IntervalTree class · cpp · L286-L352 (67 LOC)
include/file_parsers.hpp
template<typename T>
class IntervalTree {
public:
    IntervalTree() = default;

    /**
     * Insert an interval with associated data
     * @param start Interval start (inclusive)
     * @param end Interval end (inclusive)
     * @param data Data to associate with interval
     */
    void insert(int start, int end, T data);

    /**
     * Build the tree (call after all insertions)
     * Must be called before querying
     */
    void build();

    /**
     * Query intervals overlapping a point
     */
    std::vector<T> query(int point) const;

    /**
     * Query intervals overlapping a range
     */
    std::vector<T> query(int start, int end) const;

    /**
     * Check if tree is built
     */
    bool is_built() const { return built_; }

    /**
     * Get number of intervals
     */
    size_t size() const { return intervals_.size(); }

    /**
     * Clear all intervals
     */
    void clear();

private:
    struct Interval {
        int start;
        int end;
        
parse_filter_operator function · cpp · L48-L66 (19 LOC)
include/filter_vep.hpp
inline FilterOperator parse_filter_operator(const std::string& op_str) {
    std::string lower = op_str;
    for (size_t i = 0; i < lower.size(); ++i) {
        lower[i] = static_cast<char>(std::tolower(static_cast<unsigned char>(lower[i])));
    }

    if (lower == "eq" || lower == "=" || lower == "is") return FilterOperator::EQUALS;
    if (lower == "ne" || lower == "!=") return FilterOperator::NOT_EQUALS;
    if (lower == "gt" || lower == ">") return FilterOperator::GREATER;
    if (lower == "ge" || lower == ">=") return FilterOperator::GREATER_EQ;
    if (lower == "lt" || lower == "<") return FilterOperator::LESS;
    if (lower == "le" || lower == "<=") return FilterOperator::LESS_EQ;
    if (lower == "contains" || lower == "match") return FilterOperator::CONTAINS;
    if (lower == "in") return FilterOperator::IN;
    if (lower == "exists" || lower == "defined") return FilterOperator::EXISTS;
    if (lower == "regex" || lower == "re") return FilterOperator::REGEX;

    return Filte
has_any_filter method · cpp · L110-L122 (13 LOC)
include/filter_vep.hpp
    bool has_any_filter() const {
        return !conditions.empty() ||
               !consequence_filter.empty() ||
               !impact_filter.empty() ||
               !gene_filter.empty() ||
               !biotype_filter.empty() ||
               min_af >= 0 || max_af >= 0 ||
               min_cadd >= 0 || min_revel >= 0 ||
               coding_only || exclude_intergenic ||
               exclude_intronic || canonical_only ||
               mane_only || pick_one;
    }
get method · cpp · L131-L138 (8 LOC)
include/filter_vep.hpp
    std::string get(const std::string& field) const {
        auto it = fields.find(field);
        if (it != fields.end()) {
            return it->second;
        }
        return "";
    }
has method · cpp · L139-L142 (4 LOC)
include/filter_vep.hpp
    bool has(const std::string& field) const {
        return fields.count(field) > 0 && !fields.at(field).empty();
    }
get_numeric method · cpp · L143-L154 (12 LOC)
include/filter_vep.hpp
    double get_numeric(const std::string& field) const {
        std::string val = get(field);
        if (val.empty() || val == "." || val == "NA" || val == "NaN") {
            return std::numeric_limits<double>::quiet_NaN();
        }
        try {
            return std::stod(val);
        } catch (...) {
            return std::numeric_limits<double>::quiet_NaN();
        }
    }
apply_condition function · cpp · L160-L243 (84 LOC)
include/filter_vep.hpp
inline bool apply_condition(const FilterableRecord& record, const FilterCondition& cond) {
    std::string value = record.get(cond.field);
    bool result = false;

    // Handle EXISTS operator specially
    if (cond.op == FilterOperator::EXISTS) {
        result = record.has(cond.field);
        return cond.negated ? !result : result;
    }

    if (cond.op == FilterOperator::NOT_EXISTS) {
        result = !record.has(cond.field);
        return cond.negated ? !result : result;
    }

    // Try numeric comparison first
    bool is_numeric = true;
    double num_value = 0, num_target = 0;

    if (value.empty() || value == "." || value == "NA") {
        is_numeric = false;
    } else {
        try {
            num_value = std::stod(value);
            num_target = std::stod(cond.value);
        } catch (...) {
            is_numeric = false;
        }
    }

    if (cond.op == FilterOperator::EQUALS) {
        if (is_numeric) {
            result = (std::abs(num_value - num_target)
apply_filter function · cpp · L248-L394 (147 LOC)
include/filter_vep.hpp
inline bool apply_filter(const FilterableRecord& record, const FilterConfig& config) {
    // Quick filters first
    if (!config.consequence_filter.empty()) {
        std::string consequence = record.get("CONSEQUENCE");
        if (consequence.empty()) consequence = record.get("Consequence");

        bool found = false;
        for (auto it = config.consequence_filter.begin(); it != config.consequence_filter.end(); ++it) {
            if (consequence.find(*it) != std::string::npos) {
                found = true;
                break;
            }
        }
        if (!found) return false;
    }

    if (!config.impact_filter.empty()) {
        std::string impact = record.get("IMPACT");
        if (impact.empty()) impact = record.get("Impact");

        if (config.impact_filter.count(impact) == 0) {
            return false;
        }
    }

    if (!config.gene_filter.empty()) {
        std::string gene = record.get("GENE");
        if (gene.empty()) gene = record.get("Gene");
  
Generated by Repobility's multi-pass static-analysis pipeline (https://repobility.com)
parse_filter_expression function · cpp · L404-L504 (101 LOC)
include/filter_vep.hpp
inline FilterCondition parse_filter_expression(const std::string& expr) {
    FilterCondition cond;

    // Find operator
    std::vector<std::string> operators;
    operators.push_back(" is ");
    operators.push_back(" eq ");
    operators.push_back(" ne ");
    operators.push_back(" gt ");
    operators.push_back(" ge ");
    operators.push_back(" lt ");
    operators.push_back(" le ");
    operators.push_back(" contains ");
    operators.push_back(" in ");
    operators.push_back(" match ");
    operators.push_back(" exists");
    operators.push_back(">=");
    operators.push_back("<=");
    operators.push_back("!=");
    operators.push_back(">");
    operators.push_back("<");
    operators.push_back("=");

    size_t op_pos = std::string::npos;
    std::string found_op;

    for (size_t i = 0; i < operators.size(); ++i) {
        size_t pos = expr.find(operators[i]);
        if (pos != std::string::npos && (op_pos == std::string::npos || pos < op_pos)) {
            op_pos = pos;
load_gene_list function · cpp · L509-L541 (33 LOC)
include/filter_vep.hpp
inline std::unordered_set<std::string> load_gene_list(const std::string& filepath) {
    std::unordered_set<std::string> genes;
    std::ifstream file(filepath);

    if (!file.is_open()) {
        return genes;
    }

    std::string line;
    while (std::getline(file, line)) {
        // Skip comments and empty lines
        if (line.empty() || line[0] == '#') continue;

        // Trim whitespace
        while (!line.empty() && std::isspace(line[line.size() - 1])) {
            line.erase(line.size() - 1);
        }
        while (!line.empty() && std::isspace(line[0])) {
            line.erase(0, 1);
        }

        if (!line.empty()) {
            // Handle TSV format (take first column)
            size_t tab_pos = line.find('\t');
            if (tab_pos != std::string::npos) {
                line = line.substr(0, tab_pos);
            }
            genes.insert(line);
        }
    }

    return genes;
}
parse_tsv_header function · cpp · L546-L557 (12 LOC)
include/filter_vep.hpp
inline std::map<std::string, int> parse_tsv_header(const std::string& header_line) {
    std::map<std::string, int> col_map;
    std::istringstream iss(header_line);
    std::string col;
    int idx = 0;

    while (std::getline(iss, col, '\t')) {
        col_map[col] = idx++;
    }

    return col_map;
}
parse_tsv_record function · cpp · L562-L581 (20 LOC)
include/filter_vep.hpp
inline FilterableRecord parse_tsv_record(const std::string& line,
                                          const std::map<std::string, int>& col_map) {
    FilterableRecord record;
    record.original_line = line;

    std::vector<std::string> fields;
    std::istringstream iss(line);
    std::string field;
    while (std::getline(iss, field, '\t')) {
        fields.push_back(field);
    }

    for (auto it = col_map.begin(); it != col_map.end(); ++it) {
        if (it->second < static_cast<int>(fields.size())) {
            record.fields[it->first] = fields[it->second];
        }
    }

    return record;
}
filter_tsv_file function · cpp · L586-L630 (45 LOC)
include/filter_vep.hpp
inline int filter_tsv_file(const std::string& input_path,
                           const std::string& output_path,
                           const FilterConfig& config) {
    std::ifstream input(input_path);
    if (!input.is_open()) {
        return -1;
    }

    std::ofstream output(output_path);
    if (!output.is_open()) {
        return -1;
    }

    std::string line;
    std::map<std::string, int> col_map;
    int lines_passed = 0;
    int lines_total = 0;

    while (std::getline(input, line)) {
        // Handle header
        if (line.empty()) continue;
        if (line[0] == '#') {
            output << line << "\n";
            continue;
        }

        // First non-comment line is header
        if (col_map.empty()) {
            col_map = parse_tsv_header(line);
            output << line << "\n";
            continue;
        }

        lines_total++;

        FilterableRecord record = parse_tsv_record(line, col_map);

        if (apply_filter(record, config)) {
 
has_data method · cpp · L68-L71 (4 LOC)
include/gene_constraint.hpp
    bool has_data() const {
        return pLI >= 0 || oe_lof_upper >= 0;
    }
is_constrained method · cpp · L72-L78 (7 LOC)
include/gene_constraint.hpp
    bool is_constrained() const {
        // Common thresholds: pLI > 0.9 or LOEUF < 0.35
        if (pLI >= 0.9) return true;
        if (oe_lof_upper >= 0 && oe_lof_upper < 0.35) return true;
        return false;
    }
get_constraint_level method · cpp · L79-L91 (13 LOC)
include/gene_constraint.hpp
    std::string get_constraint_level() const {
        if (pLI >= 0.9 || (oe_lof_upper >= 0 && oe_lof_upper < 0.35)) {
            return "highly_constrained";
        }
        if (pLI >= 0.5 || (oe_lof_upper >= 0 && oe_lof_upper < 0.6)) {
            return "moderately_constrained";
        }
        if (pLI >= 0 || oe_lof_upper >= 0) {
            return "tolerant";
        }
        return "unknown";
    }
All rows scored by the Repobility analyzer (https://repobility.com)
GeneConstraintDB class · cpp · L99-L387 (289 LOC)
include/gene_constraint.hpp
class GeneConstraintDB {
public:
    GeneConstraintDB() : loaded_(false) {}

    /**
     * Load gnomAD constraint file
     * Expected format (TSV with header):
     * gene    transcript    pLI    oe_lof_upper    ...
     */
    bool load_gnomad_constraint(const std::string& filepath) {
        std::ifstream file(filepath);
        if (!file.is_open()) {
            return false;
        }

        std::string line;
        std::map<std::string, int> col_map;

        // Read header
        if (std::getline(file, line)) {
            std::istringstream iss(line);
            std::string col;
            int idx = 0;
            while (std::getline(iss, col, '\t')) {
                col_map[col] = idx++;
            }
        }

        // Validate required columns
        bool has_gene = col_map.count("gene") > 0 || col_map.count("gene_symbol") > 0;
        if (!has_gene) {
            return false;
        }

        // Read data rows
        while (std::getline(file, line)) {
      
load_gnomad_constraint method · cpp · L108-L249 (142 LOC)
include/gene_constraint.hpp
    bool load_gnomad_constraint(const std::string& filepath) {
        std::ifstream file(filepath);
        if (!file.is_open()) {
            return false;
        }

        std::string line;
        std::map<std::string, int> col_map;

        // Read header
        if (std::getline(file, line)) {
            std::istringstream iss(line);
            std::string col;
            int idx = 0;
            while (std::getline(iss, col, '\t')) {
                col_map[col] = idx++;
            }
        }

        // Validate required columns
        bool has_gene = col_map.count("gene") > 0 || col_map.count("gene_symbol") > 0;
        if (!has_gene) {
            return false;
        }

        // Read data rows
        while (std::getline(file, line)) {
            if (line.empty()) continue;

            std::vector<std::string> fields;
            std::istringstream iss(line);
            std::string field;
            while (std::getline(iss, field, '\t')) {
                fiel
load_pli_scores method · cpp · L255-L289 (35 LOC)
include/gene_constraint.hpp
    bool load_pli_scores(const std::string& filepath) {
        std::ifstream file(filepath);
        if (!file.is_open()) {
            return false;
        }

        std::string line;
        bool has_header = false;

        while (std::getline(file, line)) {
            if (line.empty() || line[0] == '#') continue;

            // Skip header
            if (!has_header && (line.find("gene") != std::string::npos ||
                                line.find("pLI") != std::string::npos)) {
                has_header = true;
                continue;
            }

            std::istringstream iss(line);
            std::string gene;
            double pli;

            if (!(iss >> gene >> pli)) continue;

            GeneConstraint constraint;
            constraint.gene_symbol = gene;
            constraint.pLI = pli;

            gene_data_[gene] = constraint;
        }

        loaded_ = true;
        return true;
    }
load_loeuf_scores method · cpp · L295-L333 (39 LOC)
include/gene_constraint.hpp
    bool load_loeuf_scores(const std::string& filepath) {
        std::ifstream file(filepath);
        if (!file.is_open()) {
            return false;
        }

        std::string line;
        bool has_header = false;

        while (std::getline(file, line)) {
            if (line.empty() || line[0] == '#') continue;

            if (!has_header && (line.find("gene") != std::string::npos ||
                                line.find("LOEUF") != std::string::npos ||
                                line.find("oe_lof") != std::string::npos)) {
                has_header = true;
                continue;
            }

            std::istringstream iss(line);
            std::string gene;
            double loeuf;

            if (!(iss >> gene >> loeuf)) continue;

            // Check if we already have data for this gene
            if (gene_data_.count(gene) > 0) {
                gene_data_[gene].oe_lof_upper = loeuf;
            } else {
                GeneConstraint constrain
get_by_symbol method · cpp · L338-L344 (7 LOC)
include/gene_constraint.hpp
    GeneConstraint get_by_symbol(const std::string& gene_symbol) const {
        auto it = gene_data_.find(gene_symbol);
        if (it != gene_data_.end()) {
            return it->second;
        }
        return GeneConstraint();
    }
get_by_gene_id method · cpp · L349-L355 (7 LOC)
include/gene_constraint.hpp
    GeneConstraint get_by_gene_id(const std::string& gene_id) const {
        auto it = gene_id_data_.find(gene_id);
        if (it != gene_id_data_.end()) {
            return it->second;
        }
        return GeneConstraint();
    }
get_constrained_genes method · cpp · L370-L381 (12 LOC)
include/gene_constraint.hpp
    std::vector<std::string> get_constrained_genes(double pli_threshold = 0.9,
                                                    double loeuf_threshold = 0.35) const {
        std::vector<std::string> result;
        for (auto it = gene_data_.begin(); it != gene_data_.end(); ++it) {
            const GeneConstraint& c = it->second;
            if (c.pLI >= pli_threshold ||
                (c.oe_lof_upper >= 0 && c.oe_lof_upper < loeuf_threshold)) {
                result.push_back(it->first);
            }
        }
        return result;
    }
get_gene_constraint_db function · cpp · L392-L395 (4 LOC)
include/gene_constraint.hpp
inline GeneConstraintDB& get_gene_constraint_db() {
    static GeneConstraintDB db;
    return db;
}
Repobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
format_constraint_score function · cpp · L400-L424 (25 LOC)
include/gene_constraint.hpp
inline std::string format_constraint_score(double value, int precision = 4) {
    if (value < 0 || std::isnan(value)) {
        return ".";
    }

    std::ostringstream oss;
    oss.precision(precision);
    oss << std::fixed << value;

    std::string result = oss.str();

    // Remove trailing zeros
    size_t dot_pos = result.find('.');
    if (dot_pos != std::string::npos) {
        size_t last_nonzero = result.find_last_not_of('0');
        if (last_nonzero != std::string::npos && last_nonzero > dot_pos) {
            result = result.substr(0, last_nonzero + 1);
        }
        if (result[result.size() - 1] == '.') {
            result = result.substr(0, result.size() - 1);
        }
    }

    return result;
}
aa_three_to_one function · cpp · L92-L132 (41 LOC)
include/hgvs_parser.hpp
inline std::string aa_three_to_one(const std::string& three_letter) {
    static const std::unordered_map<std::string, std::string> aa_map = [] {
        std::unordered_map<std::string, std::string> m;
        m["Ala"] = "A"; m["ALA"] = "A";
        m["Arg"] = "R"; m["ARG"] = "R";
        m["Asn"] = "N"; m["ASN"] = "N";
        m["Asp"] = "D"; m["ASP"] = "D";
        m["Cys"] = "C"; m["CYS"] = "C";
        m["Gln"] = "Q"; m["GLN"] = "Q";
        m["Glu"] = "E"; m["GLU"] = "E";
        m["Gly"] = "G"; m["GLY"] = "G";
        m["His"] = "H"; m["HIS"] = "H";
        m["Ile"] = "I"; m["ILE"] = "I";
        m["Leu"] = "L"; m["LEU"] = "L";
        m["Lys"] = "K"; m["LYS"] = "K";
        m["Met"] = "M"; m["MET"] = "M";
        m["Phe"] = "F"; m["PHE"] = "F";
        m["Pro"] = "P"; m["PRO"] = "P";
        m["Ser"] = "S"; m["SER"] = "S";
        m["Thr"] = "T"; m["THR"] = "T";
        m["Trp"] = "W"; m["TRP"] = "W";
        m["Tyr"] = "Y"; m["TYR"] = "Y";
        m["Val"] = "V"; m["VAL"] = "V"
aa_one_to_three function · cpp · L137-L156 (20 LOC)
include/hgvs_parser.hpp
inline std::string aa_one_to_three(char one_letter) {
    static const std::unordered_map<char, std::string> aa_map = [] {
        std::unordered_map<char, std::string> m;
        m['A'] = "Ala"; m['R'] = "Arg"; m['N'] = "Asn";
        m['D'] = "Asp"; m['C'] = "Cys"; m['Q'] = "Gln";
        m['E'] = "Glu"; m['G'] = "Gly"; m['H'] = "His";
        m['I'] = "Ile"; m['L'] = "Leu"; m['K'] = "Lys";
        m['M'] = "Met"; m['F'] = "Phe"; m['P'] = "Pro";
        m['S'] = "Ser"; m['T'] = "Thr"; m['W'] = "Trp";
        m['Y'] = "Tyr"; m['V'] = "Val"; m['*'] = "Ter";
        m['U'] = "Sec"; m['O'] = "Pyl"; m['X'] = "Xaa";
        return m;
    }();

    auto it = aa_map.find(one_letter);
    if (it != aa_map.end()) {
        return it->second;
    }
    return "Xaa";
}
page 1 / 9next ›