Function bodies 431 total
AnnotationSource class · cpp · L30-L111 (82 LOC)include/annotation_source.hpp
class AnnotationSource {
public:
virtual ~AnnotationSource() = default;
/**
* Get the source name (e.g., "dbnsfp", "spliceai", "phylop")
*/
virtual std::string name() const = 0;
/**
* Get the type of source for CLI help
* (e.g., "pathogenicity", "conservation", "splice", "regulatory")
*/
virtual std::string type() const = 0;
/**
* Get a description of this source
*/
virtual std::string description() const = 0;
/**
* Check if the source is initialized and ready
*/
virtual bool is_ready() const = 0;
/**
* Initialize the source (lazy loading)
* Called automatically on first use if not manually initialized
*/
virtual void initialize() = 0;
/**
* Annotate a variant, adding results to the annotations map
* @param chrom Chromosome
* @param pos Position (1-based)
* @param ref Reference allele
* @param alt Alternate allele
* @param transcript Optional tensure_initialized method · cpp · L105-L110 (6 LOC)include/annotation_source.hpp
void ensure_initialized() {
std::lock_guard<std::recursive_mutex> lock(mutex_);
if (!is_ready()) {
initialize();
}
}ScoreAnnotationSource class · cpp · L116-L148 (33 LOC)include/annotation_source.hpp
class ScoreAnnotationSource : public AnnotationSource {
public:
/**
* Get score at a specific position
* @return Score value or nullopt if not available
*/
virtual std::optional<double> get_score(
const std::string& chrom,
int pos
) const = 0;
/**
* Get scores for a range (for indels)
* @return Vector of scores, may contain NaN for missing values
*/
virtual std::vector<double> get_scores(
const std::string& chrom,
int start,
int end
) const = 0;
/**
* Get aggregated score for a range (mean, max, min)
*/
enum class Aggregation { MEAN, MAX, MIN, FIRST, LAST };
virtual std::optional<double> get_aggregated_score(
const std::string& chrom,
int start,
int end,
Aggregation method = Aggregation::MEAN
) const;
};IntervalAnnotationSource class · cpp · L153-L186 (34 LOC)include/annotation_source.hpp
class IntervalAnnotationSource : public AnnotationSource {
public:
/**
* Feature structure for interval-based annotations
*/
struct Feature {
std::string chrom;
int start;
int end;
std::string type; // Feature type (e.g., "promoter", "enhancer")
std::string id; // Feature ID
char strand = '.';
std::unordered_map<std::string, std::string> attributes;
};
/**
* Query overlapping features
*/
virtual std::vector<Feature> query(
const std::string& chrom,
int start,
int end
) const = 0;
/**
* Query overlapping features of a specific type
*/
virtual std::vector<Feature> query_by_type(
const std::string& chrom,
int start,
int end,
const std::string& feature_type
) const = 0;
};VariantAnnotationSource class · cpp · L192-L208 (17 LOC)include/annotation_source.hpp
class VariantAnnotationSource : public AnnotationSource {
public:
/**
* Whether to require exact allele match (REF/ALT)
*/
virtual bool requires_allele_match() const { return true; }
/**
* Query annotations for a specific variant
*/
virtual std::unordered_map<std::string, std::string> query(
const std::string& chrom,
int pos,
const std::string& ref,
const std::string& alt
) const = 0;
};AnnotationSourceManager class · cpp · L213-L276 (64 LOC)include/annotation_source.hpp
class AnnotationSourceManager {
public:
/**
* Register an annotation source
*/
void add_source(std::shared_ptr<AnnotationSource> source);
/**
* Get all registered sources
*/
std::vector<std::shared_ptr<AnnotationSource>> get_sources() const;
/**
* Get source by name
*/
std::shared_ptr<AnnotationSource> get_source(const std::string& name) const;
/**
* Enable/disable a source by name
*/
void set_enabled(const std::string& name, bool enabled);
/**
* Check if a source is enabled
*/
bool is_enabled(const std::string& name) const;
/**
* Initialize all sources
*/
void initialize_all();
/**
* Annotate a variant with all enabled sources
*/
void annotate_all(
const std::string& chrom,
int pos,
const std::string& ref,
const std::string& alt,
const Transcript* transcript,
std::unordered_map<std::string, std::string>& annget_all_dbnsfp_fields function · cpp · L189-L197 (9 LOC)include/dbnsfp_fields.hpp
inline std::vector<DbNSFPField> get_all_dbnsfp_fields() {
std::vector<DbNSFPField> all_fields;
all_fields.insert(all_fields.end(), DBNSFP_PATHOGENICITY_FIELDS.begin(), DBNSFP_PATHOGENICITY_FIELDS.end());
all_fields.insert(all_fields.end(), DBNSFP_CONSERVATION_FIELDS.begin(), DBNSFP_CONSERVATION_FIELDS.end());
all_fields.insert(all_fields.end(), DBNSFP_SPLICE_FIELDS.begin(), DBNSFP_SPLICE_FIELDS.end());
all_fields.insert(all_fields.end(), DBNSFP_FREQUENCY_FIELDS.begin(), DBNSFP_FREQUENCY_FIELDS.end());
all_fields.insert(all_fields.end(), DBNSFP_CLINICAL_FIELDS.begin(), DBNSFP_CLINICAL_FIELDS.end());
return all_fields;
}All rows scored by the Repobility analyzer (https://repobility.com)
get_dbnsfp_field_names function · cpp · L202-L208 (7 LOC)include/dbnsfp_fields.hpp
inline std::set<std::string> get_dbnsfp_field_names(const std::vector<DbNSFPField>& fields) {
std::set<std::string> names;
for (const auto& f : fields) {
names.insert(f.name);
}
return names;
}get_dbnsfp_preset function · cpp · L213-L235 (23 LOC)include/dbnsfp_fields.hpp
inline std::vector<DbNSFPField> get_dbnsfp_preset(const std::string& preset) {
if (preset == "essential") {
// Most commonly used scores
return {
DBNSFP_PATHOGENICITY_FIELDS[0], // SIFT_score
DBNSFP_PATHOGENICITY_FIELDS[4], // Polyphen2_HDIV_score
DBNSFP_PATHOGENICITY_FIELDS[9], // CADD_phred
DBNSFP_PATHOGENICITY_FIELDS[10], // REVEL_score
DBNSFP_PATHOGENICITY_FIELDS[11], // AlphaMissense_score
};
} else if (preset == "pathogenicity") {
return DBNSFP_PATHOGENICITY_FIELDS;
} else if (preset == "conservation") {
return DBNSFP_CONSERVATION_FIELDS;
} else if (preset == "frequency") {
return DBNSFP_FREQUENCY_FIELDS;
} else if (preset == "clinical") {
return DBNSFP_CLINICAL_FIELDS;
} else if (preset == "splicing") {
return DBNSFP_SPLICE_FIELDS;
}
return get_all_dbnsfp_fields();
}parse_dbnsfp_fields function · cpp · L241-L279 (39 LOC)include/dbnsfp_fields.hpp
inline std::vector<DbNSFPField> parse_dbnsfp_fields(const std::string& field_spec) {
std::vector<DbNSFPField> result;
if (field_spec.empty() || field_spec == "all") {
return get_all_dbnsfp_fields();
}
// Check for preset names
if (field_spec == "essential" || field_spec == "pathogenicity" ||
field_spec == "conservation" || field_spec == "frequency" ||
field_spec == "clinical" || field_spec == "splicing") {
return get_dbnsfp_preset(field_spec);
}
// Build lookup map
std::map<std::string, DbNSFPField> lookup;
for (const auto& f : get_all_dbnsfp_fields()) {
lookup[f.name] = f;
}
// Parse comma-separated list
std::istringstream iss(field_spec);
std::string field;
while (std::getline(iss, field, ',')) {
// Trim whitespace
size_t start = field.find_first_not_of(" \t");
size_t end = field.find_last_not_of(" \t");
if (start != std::string::npos) {
fielto_string method · cpp · L40-L51 (12 LOC)include/exon_intron_numbers.hpp
std::string to_string() const {
if (!found) return "";
std::string result;
if (is_exon) {
result = std::to_string(number) + "/" + std::to_string(total_exons);
} else {
result = std::to_string(number) + "/" + std::to_string(total_introns);
}
return result;
}feature_type method · cpp · L52-L56 (5 LOC)include/exon_intron_numbers.hpp
std::string feature_type() const {
if (!found) return "";
return is_exon ? "exon" : "intron";
}get_exon_intron_number function · cpp · L68-L134 (67 LOC)include/exon_intron_numbers.hpp
inline ExonIntronInfo get_exon_intron_number(
int position,
const std::vector<int>& exon_starts,
const std::vector<int>& exon_ends,
char strand) {
ExonIntronInfo info;
if (exon_starts.empty() || exon_starts.size() != exon_ends.size()) {
return info;
}
int num_exons = static_cast<int>(exon_starts.size());
info.total_exons = num_exons;
info.total_introns = num_exons > 1 ? num_exons - 1 : 0;
// Exons are already sorted by start position from GTF loading;
// use the input arrays directly without copying or sorting.
// Check each exon
for (int i = 0; i < num_exons; ++i) {
int start = exon_starts[i];
int end = exon_ends[i];
if (position >= start && position <= end) {
// Position is in this exon
info.found = true;
info.is_exon = true;
// Exon number depends on strand
if (strand == '-') {
info.number = num_exons - i;
format_exon_number function · cpp · L139-L144 (6 LOC)include/exon_intron_numbers.hpp
inline std::string format_exon_number(const ExonIntronInfo& info) {
if (!info.found || !info.is_exon) {
return "";
}
return std::to_string(info.number) + "/" + std::to_string(info.total_exons);
}format_intron_number function · cpp · L149-L154 (6 LOC)include/exon_intron_numbers.hpp
inline std::string format_intron_number(const ExonIntronInfo& info) {
if (!info.found || info.is_exon) {
return "";
}
return std::to_string(info.number) + "/" + std::to_string(info.total_introns);
}Repobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
get_cds_exon_number function · cpp · L166-L250 (85 LOC)include/exon_intron_numbers.hpp
inline ExonIntronInfo get_cds_exon_number(
int position,
const std::vector<int>& exon_starts,
const std::vector<int>& exon_ends,
int cds_start,
int cds_end,
char strand) {
ExonIntronInfo info;
if (exon_starts.empty() || exon_starts.size() != exon_ends.size()) {
return info;
}
// Count coding exons and find position
std::vector<std::pair<int, int> > coding_exons;
int num_exons = static_cast<int>(exon_starts.size());
for (int i = 0; i < num_exons; ++i) {
int ex_start = exon_starts[i];
int ex_end = exon_ends[i];
// Check if exon overlaps CDS
if (ex_end >= cds_start && ex_start <= cds_end) {
// Calculate coding portion of exon
int coding_start = std::max(ex_start, cds_start);
int coding_end = std::min(ex_end, cds_end);
coding_exons.push_back(std::make_pair(coding_start, coding_end));
}
}
if (coding_exons.empty()) {
return calculate_cds_length function · cpp · L255-L277 (23 LOC)include/exon_intron_numbers.hpp
inline int calculate_cds_length(
const std::vector<int>& exon_starts,
const std::vector<int>& exon_ends,
int cds_start,
int cds_end) {
int total_length = 0;
int num_exons = static_cast<int>(exon_starts.size());
for (int i = 0; i < num_exons; ++i) {
int ex_start = exon_starts[i];
int ex_end = exon_ends[i];
// Check if exon overlaps CDS
if (ex_end >= cds_start && ex_start <= cds_end) {
int coding_start = std::max(ex_start, cds_start);
int coding_end = std::min(ex_end, cds_end);
total_length += coding_end - coding_start + 1;
}
}
return total_length;
}calculate_cds_position function · cpp · L282-L336 (55 LOC)include/exon_intron_numbers.hpp
inline int calculate_cds_position(
int genomic_position,
const std::vector<int>& exon_starts,
const std::vector<int>& exon_ends,
int cds_start,
int cds_end,
char strand) {
// Create list of coding segments
std::vector<std::pair<int, int> > coding_segments;
int num_exons = static_cast<int>(exon_starts.size());
for (int i = 0; i < num_exons; ++i) {
int ex_start = exon_starts[i];
int ex_end = exon_ends[i];
if (ex_end >= cds_start && ex_start <= cds_end) {
int coding_start = std::max(ex_start, cds_start);
int coding_end = std::min(ex_end, cds_end);
coding_segments.push_back(std::make_pair(coding_start, coding_end));
}
}
if (coding_segments.empty()) {
return -1;
}
// coding_segments are already in sorted order because exon_starts/exon_ends
// are sorted by start position from GTF loading.
if (strand == '-') {
// Reverse for minus strand
format_splice_distance function · cpp · L343-L361 (19 LOC)include/exon_intron_numbers.hpp
inline std::string format_splice_distance(
int position,
int exon_end,
int next_exon_start) {
// Distance from donor site (end of exon)
int donor_dist = position - exon_end;
// Distance to acceptor site (start of next exon)
int acceptor_dist = next_exon_start - position;
if (donor_dist > 0 && donor_dist <= acceptor_dist) {
return "+" + std::to_string(donor_dist);
} else if (acceptor_dist > 0) {
return "-" + std::to_string(acceptor_dist);
}
return "";
}normalize_chrom function · cpp · L28-L33 (6 LOC)include/file_parsers.hpp
inline std::string normalize_chrom(const std::string& chrom) {
if (chrom.length() > 3 && chrom.substr(0, 3) == "chr") {
return chrom.substr(3);
}
return chrom;
}TabixTSVReader class · cpp · L43-L102 (60 LOC)include/file_parsers.hpp
class TabixTSVReader {
public:
/**
* Open a tabix-indexed TSV file
* @param path Path to .tsv.gz or .txt.gz file (must have .tbi index)
* @param chrom_col 0-based column index for chromosome
* @param pos_col 0-based column index for position
* @param columns Column names to extract (empty = use header)
*/
TabixTSVReader(
const std::string& path,
int chrom_col = 0,
int pos_col = 1,
const std::vector<std::string>& columns = {}
);
~TabixTSVReader();
// Prevent copying
TabixTSVReader(const TabixTSVReader&) = delete;
TabixTSVReader& operator=(const TabixTSVReader&) = delete;
/**
* Query records at a specific position
* @return Vector of row maps (column_name -> value)
*/
std::vector<std::map<std::string, std::string>> query(
const std::string& chrom,
int pos
);
/**
* Query records in a range
*/
std::vector<std::map<std::string, std::strinBigWigReader class · cpp · L112-L184 (73 LOC)include/file_parsers.hpp
class BigWigReader {
public:
/**
* Open a bigWig file
* @param path Path to .bw or .bigWig file
*/
explicit BigWigReader(const std::string& path);
~BigWigReader();
// Prevent copying
BigWigReader(const BigWigReader&) = delete;
BigWigReader& operator=(const BigWigReader&) = delete;
/**
* Get value at a specific position
* @return Score value or nullopt if not available
*/
std::optional<double> get_value(const std::string& chrom, int pos) const;
/**
* Get values for a range
* @return Vector of values, NaN for missing positions
*/
std::vector<double> get_values(
const std::string& chrom,
int start,
int end
) const;
/**
* Get mean value for a range
*/
std::optional<double> get_mean(
const std::string& chrom,
int start,
int end
) const;
/**
* Get max value for a range
*/
std::optional<double> get_max(
cGFF3Database class · cpp · L216-L276 (61 LOC)include/file_parsers.hpp
class GFF3Database {
public:
/**
* Load GFF3 file
* @param path Path to .gff3 or .gff3.gz file
* @param feature_types Feature types to load (empty = all)
*/
explicit GFF3Database(
const std::string& path,
const std::set<std::string>& feature_types = {}
);
~GFF3Database();
/**
* Query features overlapping a position
*/
std::vector<const GFF3Feature*> query(
const std::string& chrom,
int pos
) const;
/**
* Query features overlapping a range
*/
std::vector<const GFF3Feature*> query(
const std::string& chrom,
int start,
int end
) const;
/**
* Query features by type
*/
std::vector<const GFF3Feature*> query_by_type(
const std::string& chrom,
int start,
int end,
const std::string& type
) const;
/**
* Get all feature types loaded
*/
std::set<std::string> get_feature_types() const;
Provenance: Repobility (https://repobility.com) — every score reproducible from /scan/
IntervalTree class · cpp · L286-L352 (67 LOC)include/file_parsers.hpp
template<typename T>
class IntervalTree {
public:
IntervalTree() = default;
/**
* Insert an interval with associated data
* @param start Interval start (inclusive)
* @param end Interval end (inclusive)
* @param data Data to associate with interval
*/
void insert(int start, int end, T data);
/**
* Build the tree (call after all insertions)
* Must be called before querying
*/
void build();
/**
* Query intervals overlapping a point
*/
std::vector<T> query(int point) const;
/**
* Query intervals overlapping a range
*/
std::vector<T> query(int start, int end) const;
/**
* Check if tree is built
*/
bool is_built() const { return built_; }
/**
* Get number of intervals
*/
size_t size() const { return intervals_.size(); }
/**
* Clear all intervals
*/
void clear();
private:
struct Interval {
int start;
int end;
parse_filter_operator function · cpp · L48-L66 (19 LOC)include/filter_vep.hpp
inline FilterOperator parse_filter_operator(const std::string& op_str) {
std::string lower = op_str;
for (size_t i = 0; i < lower.size(); ++i) {
lower[i] = static_cast<char>(std::tolower(static_cast<unsigned char>(lower[i])));
}
if (lower == "eq" || lower == "=" || lower == "is") return FilterOperator::EQUALS;
if (lower == "ne" || lower == "!=") return FilterOperator::NOT_EQUALS;
if (lower == "gt" || lower == ">") return FilterOperator::GREATER;
if (lower == "ge" || lower == ">=") return FilterOperator::GREATER_EQ;
if (lower == "lt" || lower == "<") return FilterOperator::LESS;
if (lower == "le" || lower == "<=") return FilterOperator::LESS_EQ;
if (lower == "contains" || lower == "match") return FilterOperator::CONTAINS;
if (lower == "in") return FilterOperator::IN;
if (lower == "exists" || lower == "defined") return FilterOperator::EXISTS;
if (lower == "regex" || lower == "re") return FilterOperator::REGEX;
return Filtehas_any_filter method · cpp · L110-L122 (13 LOC)include/filter_vep.hpp
bool has_any_filter() const {
return !conditions.empty() ||
!consequence_filter.empty() ||
!impact_filter.empty() ||
!gene_filter.empty() ||
!biotype_filter.empty() ||
min_af >= 0 || max_af >= 0 ||
min_cadd >= 0 || min_revel >= 0 ||
coding_only || exclude_intergenic ||
exclude_intronic || canonical_only ||
mane_only || pick_one;
}get method · cpp · L131-L138 (8 LOC)include/filter_vep.hpp
std::string get(const std::string& field) const {
auto it = fields.find(field);
if (it != fields.end()) {
return it->second;
}
return "";
}has method · cpp · L139-L142 (4 LOC)include/filter_vep.hpp
bool has(const std::string& field) const {
return fields.count(field) > 0 && !fields.at(field).empty();
}get_numeric method · cpp · L143-L154 (12 LOC)include/filter_vep.hpp
double get_numeric(const std::string& field) const {
std::string val = get(field);
if (val.empty() || val == "." || val == "NA" || val == "NaN") {
return std::numeric_limits<double>::quiet_NaN();
}
try {
return std::stod(val);
} catch (...) {
return std::numeric_limits<double>::quiet_NaN();
}
}apply_condition function · cpp · L160-L243 (84 LOC)include/filter_vep.hpp
inline bool apply_condition(const FilterableRecord& record, const FilterCondition& cond) {
std::string value = record.get(cond.field);
bool result = false;
// Handle EXISTS operator specially
if (cond.op == FilterOperator::EXISTS) {
result = record.has(cond.field);
return cond.negated ? !result : result;
}
if (cond.op == FilterOperator::NOT_EXISTS) {
result = !record.has(cond.field);
return cond.negated ? !result : result;
}
// Try numeric comparison first
bool is_numeric = true;
double num_value = 0, num_target = 0;
if (value.empty() || value == "." || value == "NA") {
is_numeric = false;
} else {
try {
num_value = std::stod(value);
num_target = std::stod(cond.value);
} catch (...) {
is_numeric = false;
}
}
if (cond.op == FilterOperator::EQUALS) {
if (is_numeric) {
result = (std::abs(num_value - num_target)apply_filter function · cpp · L248-L394 (147 LOC)include/filter_vep.hpp
inline bool apply_filter(const FilterableRecord& record, const FilterConfig& config) {
// Quick filters first
if (!config.consequence_filter.empty()) {
std::string consequence = record.get("CONSEQUENCE");
if (consequence.empty()) consequence = record.get("Consequence");
bool found = false;
for (auto it = config.consequence_filter.begin(); it != config.consequence_filter.end(); ++it) {
if (consequence.find(*it) != std::string::npos) {
found = true;
break;
}
}
if (!found) return false;
}
if (!config.impact_filter.empty()) {
std::string impact = record.get("IMPACT");
if (impact.empty()) impact = record.get("Impact");
if (config.impact_filter.count(impact) == 0) {
return false;
}
}
if (!config.gene_filter.empty()) {
std::string gene = record.get("GENE");
if (gene.empty()) gene = record.get("Gene");
Generated by Repobility's multi-pass static-analysis pipeline (https://repobility.com)
parse_filter_expression function · cpp · L404-L504 (101 LOC)include/filter_vep.hpp
inline FilterCondition parse_filter_expression(const std::string& expr) {
FilterCondition cond;
// Find operator
std::vector<std::string> operators;
operators.push_back(" is ");
operators.push_back(" eq ");
operators.push_back(" ne ");
operators.push_back(" gt ");
operators.push_back(" ge ");
operators.push_back(" lt ");
operators.push_back(" le ");
operators.push_back(" contains ");
operators.push_back(" in ");
operators.push_back(" match ");
operators.push_back(" exists");
operators.push_back(">=");
operators.push_back("<=");
operators.push_back("!=");
operators.push_back(">");
operators.push_back("<");
operators.push_back("=");
size_t op_pos = std::string::npos;
std::string found_op;
for (size_t i = 0; i < operators.size(); ++i) {
size_t pos = expr.find(operators[i]);
if (pos != std::string::npos && (op_pos == std::string::npos || pos < op_pos)) {
op_pos = pos;
load_gene_list function · cpp · L509-L541 (33 LOC)include/filter_vep.hpp
inline std::unordered_set<std::string> load_gene_list(const std::string& filepath) {
std::unordered_set<std::string> genes;
std::ifstream file(filepath);
if (!file.is_open()) {
return genes;
}
std::string line;
while (std::getline(file, line)) {
// Skip comments and empty lines
if (line.empty() || line[0] == '#') continue;
// Trim whitespace
while (!line.empty() && std::isspace(line[line.size() - 1])) {
line.erase(line.size() - 1);
}
while (!line.empty() && std::isspace(line[0])) {
line.erase(0, 1);
}
if (!line.empty()) {
// Handle TSV format (take first column)
size_t tab_pos = line.find('\t');
if (tab_pos != std::string::npos) {
line = line.substr(0, tab_pos);
}
genes.insert(line);
}
}
return genes;
}parse_tsv_header function · cpp · L546-L557 (12 LOC)include/filter_vep.hpp
inline std::map<std::string, int> parse_tsv_header(const std::string& header_line) {
std::map<std::string, int> col_map;
std::istringstream iss(header_line);
std::string col;
int idx = 0;
while (std::getline(iss, col, '\t')) {
col_map[col] = idx++;
}
return col_map;
}parse_tsv_record function · cpp · L562-L581 (20 LOC)include/filter_vep.hpp
inline FilterableRecord parse_tsv_record(const std::string& line,
const std::map<std::string, int>& col_map) {
FilterableRecord record;
record.original_line = line;
std::vector<std::string> fields;
std::istringstream iss(line);
std::string field;
while (std::getline(iss, field, '\t')) {
fields.push_back(field);
}
for (auto it = col_map.begin(); it != col_map.end(); ++it) {
if (it->second < static_cast<int>(fields.size())) {
record.fields[it->first] = fields[it->second];
}
}
return record;
}filter_tsv_file function · cpp · L586-L630 (45 LOC)include/filter_vep.hpp
inline int filter_tsv_file(const std::string& input_path,
const std::string& output_path,
const FilterConfig& config) {
std::ifstream input(input_path);
if (!input.is_open()) {
return -1;
}
std::ofstream output(output_path);
if (!output.is_open()) {
return -1;
}
std::string line;
std::map<std::string, int> col_map;
int lines_passed = 0;
int lines_total = 0;
while (std::getline(input, line)) {
// Handle header
if (line.empty()) continue;
if (line[0] == '#') {
output << line << "\n";
continue;
}
// First non-comment line is header
if (col_map.empty()) {
col_map = parse_tsv_header(line);
output << line << "\n";
continue;
}
lines_total++;
FilterableRecord record = parse_tsv_record(line, col_map);
if (apply_filter(record, config)) {
has_data method · cpp · L68-L71 (4 LOC)include/gene_constraint.hpp
bool has_data() const {
return pLI >= 0 || oe_lof_upper >= 0;
}is_constrained method · cpp · L72-L78 (7 LOC)include/gene_constraint.hpp
bool is_constrained() const {
// Common thresholds: pLI > 0.9 or LOEUF < 0.35
if (pLI >= 0.9) return true;
if (oe_lof_upper >= 0 && oe_lof_upper < 0.35) return true;
return false;
}get_constraint_level method · cpp · L79-L91 (13 LOC)include/gene_constraint.hpp
std::string get_constraint_level() const {
if (pLI >= 0.9 || (oe_lof_upper >= 0 && oe_lof_upper < 0.35)) {
return "highly_constrained";
}
if (pLI >= 0.5 || (oe_lof_upper >= 0 && oe_lof_upper < 0.6)) {
return "moderately_constrained";
}
if (pLI >= 0 || oe_lof_upper >= 0) {
return "tolerant";
}
return "unknown";
}All rows scored by the Repobility analyzer (https://repobility.com)
GeneConstraintDB class · cpp · L99-L387 (289 LOC)include/gene_constraint.hpp
class GeneConstraintDB {
public:
GeneConstraintDB() : loaded_(false) {}
/**
* Load gnomAD constraint file
* Expected format (TSV with header):
* gene transcript pLI oe_lof_upper ...
*/
bool load_gnomad_constraint(const std::string& filepath) {
std::ifstream file(filepath);
if (!file.is_open()) {
return false;
}
std::string line;
std::map<std::string, int> col_map;
// Read header
if (std::getline(file, line)) {
std::istringstream iss(line);
std::string col;
int idx = 0;
while (std::getline(iss, col, '\t')) {
col_map[col] = idx++;
}
}
// Validate required columns
bool has_gene = col_map.count("gene") > 0 || col_map.count("gene_symbol") > 0;
if (!has_gene) {
return false;
}
// Read data rows
while (std::getline(file, line)) {
load_gnomad_constraint method · cpp · L108-L249 (142 LOC)include/gene_constraint.hpp
bool load_gnomad_constraint(const std::string& filepath) {
std::ifstream file(filepath);
if (!file.is_open()) {
return false;
}
std::string line;
std::map<std::string, int> col_map;
// Read header
if (std::getline(file, line)) {
std::istringstream iss(line);
std::string col;
int idx = 0;
while (std::getline(iss, col, '\t')) {
col_map[col] = idx++;
}
}
// Validate required columns
bool has_gene = col_map.count("gene") > 0 || col_map.count("gene_symbol") > 0;
if (!has_gene) {
return false;
}
// Read data rows
while (std::getline(file, line)) {
if (line.empty()) continue;
std::vector<std::string> fields;
std::istringstream iss(line);
std::string field;
while (std::getline(iss, field, '\t')) {
fielload_pli_scores method · cpp · L255-L289 (35 LOC)include/gene_constraint.hpp
bool load_pli_scores(const std::string& filepath) {
std::ifstream file(filepath);
if (!file.is_open()) {
return false;
}
std::string line;
bool has_header = false;
while (std::getline(file, line)) {
if (line.empty() || line[0] == '#') continue;
// Skip header
if (!has_header && (line.find("gene") != std::string::npos ||
line.find("pLI") != std::string::npos)) {
has_header = true;
continue;
}
std::istringstream iss(line);
std::string gene;
double pli;
if (!(iss >> gene >> pli)) continue;
GeneConstraint constraint;
constraint.gene_symbol = gene;
constraint.pLI = pli;
gene_data_[gene] = constraint;
}
loaded_ = true;
return true;
}load_loeuf_scores method · cpp · L295-L333 (39 LOC)include/gene_constraint.hpp
bool load_loeuf_scores(const std::string& filepath) {
std::ifstream file(filepath);
if (!file.is_open()) {
return false;
}
std::string line;
bool has_header = false;
while (std::getline(file, line)) {
if (line.empty() || line[0] == '#') continue;
if (!has_header && (line.find("gene") != std::string::npos ||
line.find("LOEUF") != std::string::npos ||
line.find("oe_lof") != std::string::npos)) {
has_header = true;
continue;
}
std::istringstream iss(line);
std::string gene;
double loeuf;
if (!(iss >> gene >> loeuf)) continue;
// Check if we already have data for this gene
if (gene_data_.count(gene) > 0) {
gene_data_[gene].oe_lof_upper = loeuf;
} else {
GeneConstraint constrainget_by_symbol method · cpp · L338-L344 (7 LOC)include/gene_constraint.hpp
GeneConstraint get_by_symbol(const std::string& gene_symbol) const {
auto it = gene_data_.find(gene_symbol);
if (it != gene_data_.end()) {
return it->second;
}
return GeneConstraint();
}get_by_gene_id method · cpp · L349-L355 (7 LOC)include/gene_constraint.hpp
GeneConstraint get_by_gene_id(const std::string& gene_id) const {
auto it = gene_id_data_.find(gene_id);
if (it != gene_id_data_.end()) {
return it->second;
}
return GeneConstraint();
}get_constrained_genes method · cpp · L370-L381 (12 LOC)include/gene_constraint.hpp
std::vector<std::string> get_constrained_genes(double pli_threshold = 0.9,
double loeuf_threshold = 0.35) const {
std::vector<std::string> result;
for (auto it = gene_data_.begin(); it != gene_data_.end(); ++it) {
const GeneConstraint& c = it->second;
if (c.pLI >= pli_threshold ||
(c.oe_lof_upper >= 0 && c.oe_lof_upper < loeuf_threshold)) {
result.push_back(it->first);
}
}
return result;
}get_gene_constraint_db function · cpp · L392-L395 (4 LOC)include/gene_constraint.hpp
inline GeneConstraintDB& get_gene_constraint_db() {
static GeneConstraintDB db;
return db;
}Repobility's GitHub App fixes findings like these · https://github.com/apps/repobility-bot
format_constraint_score function · cpp · L400-L424 (25 LOC)include/gene_constraint.hpp
inline std::string format_constraint_score(double value, int precision = 4) {
if (value < 0 || std::isnan(value)) {
return ".";
}
std::ostringstream oss;
oss.precision(precision);
oss << std::fixed << value;
std::string result = oss.str();
// Remove trailing zeros
size_t dot_pos = result.find('.');
if (dot_pos != std::string::npos) {
size_t last_nonzero = result.find_last_not_of('0');
if (last_nonzero != std::string::npos && last_nonzero > dot_pos) {
result = result.substr(0, last_nonzero + 1);
}
if (result[result.size() - 1] == '.') {
result = result.substr(0, result.size() - 1);
}
}
return result;
}aa_three_to_one function · cpp · L92-L132 (41 LOC)include/hgvs_parser.hpp
inline std::string aa_three_to_one(const std::string& three_letter) {
static const std::unordered_map<std::string, std::string> aa_map = [] {
std::unordered_map<std::string, std::string> m;
m["Ala"] = "A"; m["ALA"] = "A";
m["Arg"] = "R"; m["ARG"] = "R";
m["Asn"] = "N"; m["ASN"] = "N";
m["Asp"] = "D"; m["ASP"] = "D";
m["Cys"] = "C"; m["CYS"] = "C";
m["Gln"] = "Q"; m["GLN"] = "Q";
m["Glu"] = "E"; m["GLU"] = "E";
m["Gly"] = "G"; m["GLY"] = "G";
m["His"] = "H"; m["HIS"] = "H";
m["Ile"] = "I"; m["ILE"] = "I";
m["Leu"] = "L"; m["LEU"] = "L";
m["Lys"] = "K"; m["LYS"] = "K";
m["Met"] = "M"; m["MET"] = "M";
m["Phe"] = "F"; m["PHE"] = "F";
m["Pro"] = "P"; m["PRO"] = "P";
m["Ser"] = "S"; m["SER"] = "S";
m["Thr"] = "T"; m["THR"] = "T";
m["Trp"] = "W"; m["TRP"] = "W";
m["Tyr"] = "Y"; m["TYR"] = "Y";
m["Val"] = "V"; m["VAL"] = "V"aa_one_to_three function · cpp · L137-L156 (20 LOC)include/hgvs_parser.hpp
inline std::string aa_one_to_three(char one_letter) {
static const std::unordered_map<char, std::string> aa_map = [] {
std::unordered_map<char, std::string> m;
m['A'] = "Ala"; m['R'] = "Arg"; m['N'] = "Asn";
m['D'] = "Asp"; m['C'] = "Cys"; m['Q'] = "Gln";
m['E'] = "Glu"; m['G'] = "Gly"; m['H'] = "His";
m['I'] = "Ile"; m['L'] = "Leu"; m['K'] = "Lys";
m['M'] = "Met"; m['F'] = "Phe"; m['P'] = "Pro";
m['S'] = "Ser"; m['T'] = "Thr"; m['W'] = "Trp";
m['Y'] = "Tyr"; m['V'] = "Val"; m['*'] = "Ter";
m['U'] = "Sec"; m['O'] = "Pyl"; m['X'] = "Xaa";
return m;
}();
auto it = aa_map.find(one_letter);
if (it != aa_map.end()) {
return it->second;
}
return "Xaa";
}page 1 / 9next ›