Function bodies 431 total
parse_hgvs_type function · cpp · L161-L169 (9 LOC)include/hgvs_parser.hpp
inline HGVSType parse_hgvs_type(const std::string& type_str) {
if (type_str == "g") return HGVSType::GENOMIC;
if (type_str == "c") return HGVSType::CODING;
if (type_str == "n") return HGVSType::NONCODING;
if (type_str == "p") return HGVSType::PROTEIN;
if (type_str == "m") return HGVSType::MITOCHONDRIAL;
if (type_str == "r") return HGVSType::RNA;
return HGVSType::UNKNOWN;
}hgvs_type_to_string function · cpp · L174-L182 (9 LOC)include/hgvs_parser.hpp
inline std::string hgvs_type_to_string(HGVSType type) {
if (type == HGVSType::GENOMIC) return "g";
if (type == HGVSType::CODING) return "c";
if (type == HGVSType::NONCODING) return "n";
if (type == HGVSType::PROTEIN) return "p";
if (type == HGVSType::MITOCHONDRIAL) return "m";
if (type == HGVSType::RNA) return "r";
return "?";
}parse_genomic_hgvs function · cpp · L191-L279 (89 LOC)include/hgvs_parser.hpp
inline HGVSParseResult parse_genomic_hgvs(const std::string& reference, const std::string& change) {
HGVSParseResult result;
result.reference_id = reference;
result.hgvs_type = HGVSType::GENOMIC;
// Parse substitution: 140753336A>T
static const std::regex sub_regex("(\\d+)([ACGTacgt])>([ACGTacgt])");
std::smatch match;
if (std::regex_match(change, match, sub_regex)) {
result.variant_type = HGVSVariantType::SUBSTITUTION;
result.start_pos = std::stoi(match[1].str());
result.end_pos = result.start_pos;
result.ref_allele = match[2].str();
result.alt_allele = match[3].str();
// Convert to uppercase
for (size_t i = 0; i < result.ref_allele.size(); ++i) {
result.ref_allele[i] = static_cast<char>(std::toupper(static_cast<unsigned char>(result.ref_allele[i])));
}
for (size_t i = 0; i < result.alt_allele.size(); ++i) {
result.alt_allele[i] = static_cast<char>(std::touppeparse_coding_hgvs function · cpp · L289-L426 (138 LOC)include/hgvs_parser.hpp
inline HGVSParseResult parse_coding_hgvs(const std::string& reference, const std::string& change) {
HGVSParseResult result;
result.reference_id = reference;
result.hgvs_type = HGVSType::CODING;
// Parse substitution with optional intronic offset: 803C>T or 123+5G>A
static const std::regex sub_regex("(-?\\*?\\d+)([+-]\\d+)?([ACGTacgt])>([ACGTacgt])");
std::smatch match;
if (std::regex_match(change, match, sub_regex)) {
result.variant_type = HGVSVariantType::SUBSTITUTION;
std::string pos_str = match[1].str();
// Handle UTR positions (*123 for 3'UTR, -123 for 5'UTR)
if (!pos_str.empty() && pos_str[0] == '*') {
result.start_pos = std::stoi(pos_str.substr(1));
// Mark as 3'UTR position (positive value with flag)
} else {
result.start_pos = std::stoi(pos_str);
}
result.end_pos = result.start_pos;
if (match[2].matched) {
result.intron_offset = std::stparse_protein_hgvs function · cpp · L437-L540 (104 LOC)include/hgvs_parser.hpp
inline HGVSParseResult parse_protein_hgvs(const std::string& reference, const std::string& change) {
HGVSParseResult result;
result.reference_id = reference;
result.hgvs_type = HGVSType::PROTEIN;
// Parse missense: Val600Glu or V600E
// Three-letter: ([A-Z][a-z]{2})(\d+)([A-Z][a-z]{2})
static const std::regex missense_3letter("([A-Z][a-z]{2})(\\d+)([A-Z][a-z]{2}|\\?)");
std::smatch match;
if (std::regex_match(change, match, missense_3letter)) {
result.variant_type = HGVSVariantType::SUBSTITUTION;
result.ref_aa = match[1].str();
result.protein_pos = std::stoi(match[2].str());
result.alt_aa = match[3].str();
result.valid = true;
return result;
}
// One-letter: ([A-Z*])(\d+)([A-Z*?])
static const std::regex missense_1letter("([A-Z*])(\\d+)([A-Z*?])");
if (std::regex_match(change, match, missense_1letter)) {
result.variant_type = HGVSVariantType::SUBSTITUTION;
result.ref_aa = parse_hgvs function · cpp · L550-L605 (56 LOC)include/hgvs_parser.hpp
inline HGVSParseResult parse_hgvs(const std::string& hgvs) {
HGVSParseResult result;
// Find the colon separator
size_t colon_pos = hgvs.find(':');
if (colon_pos == std::string::npos) {
result.error_message = "Invalid HGVS format: missing colon separator";
return result;
}
std::string reference = hgvs.substr(0, colon_pos);
std::string notation = hgvs.substr(colon_pos + 1);
if (notation.size() < 3) {
result.error_message = "Invalid HGVS format: notation too short";
return result;
}
// Parse type (c., g., p., n., m., r.)
char type_char = notation[0];
if (notation[1] != '.') {
result.error_message = "Invalid HGVS format: expected '.' after type";
return result;
}
std::string change = notation.substr(2);
HGVSType hgvs_type = parse_hgvs_type(std::string(1, type_char));
try {
if (hgvs_type == HGVSType::GENOMIC || hgvs_type == HGVSType::MITOCHONDRIAL) {
resis_hgvs_notation function · cpp · L610-L629 (20 LOC)include/hgvs_parser.hpp
inline bool is_hgvs_notation(const std::string& input) {
// Must contain a colon
size_t colon_pos = input.find(':');
if (colon_pos == std::string::npos || colon_pos == 0) {
return false;
}
// Check for type indicator after colon
if (input.size() > colon_pos + 2) {
char type_char = input[colon_pos + 1];
char dot_char = input[colon_pos + 2];
if (dot_char == '.') {
return (type_char == 'c' || type_char == 'g' || type_char == 'p' ||
type_char == 'n' || type_char == 'm' || type_char == 'r');
}
}
return false;
}Provenance: Repobility (https://repobility.com) — every score reproducible from /scan/
refseq_to_chromosome function · cpp · L634-L718 (85 LOC)include/hgvs_parser.hpp
inline std::string refseq_to_chromosome(const std::string& refseq) {
static const std::unordered_map<std::string, std::string> refseq_map = [] {
std::unordered_map<std::string, std::string> m;
// GRCh38 chromosome accessions
m["NC_000001.11"] = "1";
m["NC_000002.12"] = "2";
m["NC_000003.12"] = "3";
m["NC_000004.12"] = "4";
m["NC_000005.10"] = "5";
m["NC_000006.12"] = "6";
m["NC_000007.14"] = "7";
m["NC_000008.11"] = "8";
m["NC_000009.12"] = "9";
m["NC_000010.11"] = "10";
m["NC_000011.10"] = "11";
m["NC_000012.12"] = "12";
m["NC_000013.11"] = "13";
m["NC_000014.9"] = "14";
m["NC_000015.10"] = "15";
m["NC_000016.10"] = "16";
m["NC_000017.11"] = "17";
m["NC_000018.10"] = "18";
m["NC_000019.10"] = "19";
m["NC_000020.11"] = "20";
m["NC_000021.9"] = "21";
m["NC_000022.11"] = "22";
m["NC_0000get_chrom_to_refseq function · cpp · L723-L755 (33 LOC)include/hgvs_parser.hpp
inline const std::unordered_map<std::string, std::string>& get_chrom_to_refseq() {
static const std::unordered_map<std::string, std::string> chrom_to_refseq = [] {
std::unordered_map<std::string, std::string> m;
m["1"] = "NC_000001.11";
m["2"] = "NC_000002.12";
m["3"] = "NC_000003.12";
m["4"] = "NC_000004.12";
m["5"] = "NC_000005.10";
m["6"] = "NC_000006.12";
m["7"] = "NC_000007.14";
m["8"] = "NC_000008.11";
m["9"] = "NC_000009.12";
m["10"] = "NC_000010.11";
m["11"] = "NC_000011.10";
m["12"] = "NC_000012.12";
m["13"] = "NC_000013.11";
m["14"] = "NC_000014.9";
m["15"] = "NC_000015.10";
m["16"] = "NC_000016.10";
m["17"] = "NC_000017.11";
m["18"] = "NC_000018.10";
m["19"] = "NC_000019.10";
m["20"] = "NC_000020.11";
m["21"] = "NC_000021.9";
m["22"] = "NC_000022.11";
m["X"] = "NC_000023.11";
chrom_to_refseq_lookup function · cpp · L760-L772 (13 LOC)include/hgvs_parser.hpp
inline std::string chrom_to_refseq_lookup(const std::string& chrom) {
std::string norm_chrom = chrom;
if (norm_chrom.size() > 3 && norm_chrom.substr(0, 3) == "chr") {
norm_chrom = norm_chrom.substr(3);
}
const auto& chrom_to_refseq = get_chrom_to_refseq();
auto it = chrom_to_refseq.find(norm_chrom);
if (it != chrom_to_refseq.end()) {
return it->second;
}
return chrom; // Use as-is
}generate_hgvsg function · cpp · L777-L845 (69 LOC)include/hgvs_parser.hpp
inline std::string generate_hgvsg(const std::string& chrom, int pos,
const std::string& ref, const std::string& alt) {
std::string result;
// Get RefSeq accession via shared helper
std::string refseq = chrom_to_refseq_lookup(chrom);
result = refseq + ":g.";
if (ref.size() == 1 && alt.size() == 1) {
// Substitution
result += std::to_string(pos) + ref + ">" + alt;
} else if (alt.empty() || (ref.size() > alt.size() && alt.size() <= 1)) {
// Deletion
int del_start = pos;
int del_end = pos + static_cast<int>(ref.size()) - 1;
// Strip VCF anchor base if present
if (!alt.empty() && alt.size() == 1 && ref.size() > 1 && ref[0] == alt[0]) {
del_start = pos + 1;
}
if (del_start == del_end) {
result += std::to_string(del_start) + "del";
} else {
result += std::to_string(del_start) + "_" + std::to_string(del_end) + "del";
generate_spdi function · cpp · L851-L860 (10 LOC)include/hgvs_parser.hpp
inline std::string generate_spdi(const std::string& chrom, int pos,
const std::string& ref, const std::string& alt) {
// SPDI uses 0-based coordinates
int spdi_pos = pos - 1;
// Get RefSeq accession via shared helper
std::string refseq = chrom_to_refseq_lookup(chrom);
return refseq + ":" + std::to_string(spdi_pos) + ":" + ref + ":" + alt;
}parse_spdi function · cpp · L878-L932 (55 LOC)include/hgvs_parser.hpp
inline SPDIParseResult parse_spdi(const std::string& spdi) {
SPDIParseResult result;
// Split on colons - expect exactly 4 parts
std::vector<std::string> parts;
size_t start = 0;
size_t pos = spdi.find(':');
while (pos != std::string::npos) {
parts.push_back(spdi.substr(start, pos - start));
start = pos + 1;
pos = spdi.find(':', start);
}
parts.push_back(spdi.substr(start));
if (parts.size() != 4) {
result.error_message = "Invalid SPDI format: expected 4 colon-separated fields";
return result;
}
// Map RefSeq accession to chromosome name
std::string chrom = refseq_to_chromosome(parts[0]);
if (chrom.empty()) {
chrom = parts[0];
}
// Normalize chr prefix
if (chrom.size() > 3 && chrom.substr(0, 3) == "chr") {
chrom = chrom.substr(3);
}
result.chromosome = chrom;
// Convert 0-based SPDI position to 1-based
try {
result.position = std::stoi(pis_spdi_notation function · cpp · L938-L969 (32 LOC)include/hgvs_parser.hpp
inline bool is_spdi_notation(const std::string& input) {
int colon_count = 0;
size_t first_colon = std::string::npos;
size_t second_colon = std::string::npos;
for (size_t i = 0; i < input.size(); ++i) {
if (input[i] == ':') {
colon_count++;
if (colon_count == 1) first_colon = i;
if (colon_count == 2) second_colon = i;
}
}
if (colon_count != 3) return false;
// Check if the second field is numeric (the position)
if (first_colon == std::string::npos || second_colon == std::string::npos) return false;
std::string pos_str = input.substr(first_colon + 1, second_colon - first_colon - 1);
if (pos_str.empty()) return false;
for (size_t i = 0; i < pos_str.size(); ++i) {
if (!std::isdigit(static_cast<unsigned char>(pos_str[i]))) return false;
}
// Distinguish from CHR:POS:REF:ALT by checking if first field looks like a RefSeq accession
// or if the position field is very long (SPparse_ensembl_format function · cpp · L987-L1050 (64 LOC)include/hgvs_parser.hpp
inline EnsemblFormatResult parse_ensembl_format(const std::string& line) {
EnsemblFormatResult result;
std::istringstream iss(line);
std::string chrom, allele, strand;
int start_pos, end_pos;
if (!(iss >> chrom >> start_pos >> end_pos >> allele)) {
result.error_message = "Invalid Ensembl format: need at least 4 fields";
return result;
}
// Strand is optional
if (!(iss >> strand)) {
strand = "+";
}
// Parse allele: REF/ALT
size_t slash_pos = allele.find('/');
if (slash_pos == std::string::npos) {
result.error_message = "Invalid allele format: expected REF/ALT";
return result;
}
result.chromosome = chrom;
result.position = start_pos;
result.ref_allele = allele.substr(0, slash_pos);
result.alt_allele = allele.substr(slash_pos + 1);
// Handle - for deletions/insertions
if (result.ref_allele == "-") result.ref_allele = "";
if (result.alt_allele == "-") result.alt_alCitation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
is_ensembl_format function · cpp · L1057-L1074 (18 LOC)include/hgvs_parser.hpp
inline bool is_ensembl_format(const std::string& line) {
std::istringstream iss(line);
std::string f1, f2, f3, f4;
if (!(iss >> f1 >> f2 >> f3 >> f4)) return false;
// f2 and f3 must be numeric
for (size_t i = 0; i < f2.size(); ++i) {
if (!std::isdigit(static_cast<unsigned char>(f2[i]))) return false;
}
for (size_t i = 0; i < f3.size(); ++i) {
if (!std::isdigit(static_cast<unsigned char>(f3[i]))) return false;
}
// f4 must contain a /
if (f4.find('/') == std::string::npos) return false;
return true;
}is_rest_region_format function · cpp · L1090-L1106 (17 LOC)include/hgvs_parser.hpp
inline bool is_rest_region_format(const std::string& input) {
// Pattern: CHR:START-END:STRAND/ALLELE
size_t first_colon = input.find(':');
if (first_colon == std::string::npos) return false;
size_t dash = input.find('-', first_colon + 1);
if (dash == std::string::npos) return false;
size_t second_colon = input.find(':', dash + 1);
if (second_colon == std::string::npos) return false;
size_t slash = input.find('/', second_colon + 1);
if (slash == std::string::npos) return false;
return true;
}parse_rest_region function · cpp · L1107-L1143 (37 LOC)include/hgvs_parser.hpp
inline RESTRegionResult parse_rest_region(const std::string& input) {
RESTRegionResult result;
size_t first_colon = input.find(':');
size_t dash = input.find('-', first_colon + 1);
size_t second_colon = input.find(':', dash + 1);
size_t slash = input.find('/', second_colon + 1);
if (first_colon == std::string::npos || dash == std::string::npos ||
second_colon == std::string::npos || slash == std::string::npos) {
result.error_message = "Invalid REST region format";
return result;
}
try {
result.chromosome = input.substr(0, first_colon);
result.position = std::stoi(input.substr(first_colon + 1, dash - first_colon - 1));
result.end_position = std::stoi(input.substr(dash + 1, second_colon - dash - 1));
result.strand = std::stoi(input.substr(second_colon + 1, slash - second_colon - 1));
result.alt_allele = input.substr(slash + 1);
// For single-base substitutions, ref is inferred froparse_output_format function · cpp · L37-L46 (10 LOC)include/output_writer.hpp
inline OutputFormat parse_output_format(const std::string& format) {
std::string lower = format;
for (size_t i = 0; i < lower.size(); ++i) {
lower[i] = static_cast<char>(std::tolower(static_cast<unsigned char>(lower[i])));
}
if (lower == "json") return OutputFormat::JSON;
if (lower == "vcf") return OutputFormat::VCF;
return OutputFormat::TSV;
}add method · cpp · L57-L71 (15 LOC)include/output_writer.hpp
void add(const VariantAnnotation& ann) {
total_variants++;
if (!ann.gene_symbol.empty()) {
annotated_variants++;
}
for (const auto& csq : ann.consequences) {
consequence_counts[consequence_to_string(csq)]++;
}
impact_counts[impact_to_string(ann.impact)]++;
if (!ann.biotype.empty()) {
biotype_counts[ann.biotype]++;
}
}to_string method · cpp · L72-L87 (16 LOC)include/output_writer.hpp
std::string to_string() const {
std::ostringstream oss;
oss << "=== Annotation Statistics ===\n";
oss << "Total variants: " << total_variants << "\n";
oss << "Annotated variants: " << annotated_variants << "\n";
oss << "\nConsequence counts:\n";
for (const auto& pair : consequence_counts) {
oss << " " << pair.first << ": " << pair.second << "\n";
}
oss << "\nImpact counts:\n";
for (const auto& pair : impact_counts) {
oss << " " << pair.first << ": " << pair.second << "\n";
}
return oss.str();
}to_json method · cpp · L88-L112 (25 LOC)include/output_writer.hpp
std::string to_json() const {
std::ostringstream oss;
oss << "{\n";
oss << " \"total_variants\": " << total_variants << ",\n";
oss << " \"annotated_variants\": " << annotated_variants << ",\n";
oss << " \"consequence_counts\": {";
bool first = true;
for (const auto& pair : consequence_counts) {
if (!first) oss << ",";
oss << "\n \"" << pair.first << "\": " << pair.second;
first = false;
}
oss << "\n },\n";
oss << " \"impact_counts\": {";
first = true;
for (const auto& pair : impact_counts) {
if (!first) oss << ",";
oss << "\n \"" << pair.first << "\": " << pair.second;
first = false;
}
oss << "\n }\n";
oss << "}";
return oss.str();
}OutputWriter class · cpp · L118-L161 (44 LOC)include/output_writer.hpp
class OutputWriter {
public:
virtual ~OutputWriter() = default;
virtual void write_header(const std::vector<std::string>& custom_columns) = 0;
virtual void write_annotation(const VariantAnnotation& ann,
const std::vector<std::string>& custom_columns) = 0;
virtual void write_annotations(const std::vector<VariantAnnotation>& anns,
const std::vector<std::string>& custom_columns) = 0;
virtual void write_footer() = 0;
virtual void close() = 0;
// Convenience overloads without custom columns
void write_header() { write_header(std::vector<std::string>()); }
void write_annotation(const VariantAnnotation& ann) {
write_annotation(ann, std::vector<std::string>());
}
void write_annotations(const std::vector<VariantAnnotation>& anns) {
write_annotations(anns, std::vector<std::string>());
}
const AnnotationStats& get_stats() const { return stats_; }
void seOpen data scored by Repobility · https://repobility.com
write_annotation method · cpp · L132-L134 (3 LOC)include/output_writer.hpp
void write_annotation(const VariantAnnotation& ann) {
write_annotation(ann, std::vector<std::string>());
}write_annotations method · cpp · L135-L137 (3 LOC)include/output_writer.hpp
void write_annotations(const std::vector<VariantAnnotation>& anns) {
write_annotations(anns, std::vector<std::string>());
}format_consequence method · cpp · L150-L160 (11 LOC)include/output_writer.hpp
std::string format_consequence(const VariantAnnotation& ann) const {
if (term_style_ == "display") {
std::string result;
for (size_t i = 0; i < ann.consequences.size(); ++i) {
if (i > 0) result += "&";
result += consequence_to_display_term(ann.consequences[i]);
}
return result;
}
return ann.get_consequence_string();
}ends_with_gz function · cpp · L164-L166 (3 LOC)include/output_writer.hpp
inline bool ends_with_gz(const std::string& path) {
return path.size() > 3 && path.compare(path.size() - 3, 3, ".gz") == 0;
}format_position_with_total function · cpp · L170-L179 (10 LOC)include/output_writer.hpp
inline std::string format_position_with_total(
int start, int end, const std::string& total_key,
const VariantAnnotation& ann, const std::string& empty_val) {
if (start <= 0) return empty_val;
std::string s = std::to_string(start);
if (end > 0 && end != start) s += "-" + std::to_string(end);
auto it = ann.custom_annotations.find(total_key);
if (it != ann.custom_annotations.end() && !it->second.empty()) s += "/" + it->second;
return s;
}format_protein_position function · cpp · L182-L195 (14 LOC)include/output_writer.hpp
inline std::string format_protein_position(const VariantAnnotation& ann, const std::string& empty_val) {
if (ann.protein_position <= 0) return empty_val;
std::string s = std::to_string(ann.protein_position);
if (ann.protein_end > 0 && ann.protein_end != ann.protein_position)
s += "-" + std::to_string(ann.protein_end);
auto cl = ann.custom_annotations.find("CDS_LENGTH");
if (cl != ann.custom_annotations.end() && !cl->second.empty()) {
try {
int cds_len = std::stoi(cl->second);
if (cds_len > 0) s += "/" + std::to_string(cds_len / 3);
} catch (...) {}
}
return s;
}TSVWriter class · cpp · L200-L437 (238 LOC)include/output_writer.hpp
class TSVWriter : public OutputWriter {
public:
explicit TSVWriter(const std::string& output_path, bool compress = false)
: output_path_(output_path), compress_(compress), gz_file_(nullptr),
use_stdout_(output_path.empty() || output_path == "-" || output_path == "STDOUT") {
if (use_stdout_) {
compress_ = false; // Cannot compress stdout
} else if (compress_ || ends_with_gz(output_path_)) {
compress_ = true;
gz_file_ = gzopen(output_path_.c_str(), "wb");
if (!gz_file_) {
throw std::runtime_error("Cannot open output file: " + output_path_);
}
} else {
output_.open(output_path_);
if (!output_.is_open()) {
throw std::runtime_error("Cannot open output file: " + output_path_);
}
}
}
~TSVWriter() override {
close();
}
void write_header(const std::vector<std::string>& custom_columns) ove~TSVWriter method · cpp · L221-L224 (4 LOC)include/output_writer.hpp
~TSVWriter() override {
close();
}Powered by Repobility — scan your code at https://repobility.com
write_header method · cpp · L225-L241 (17 LOC)include/output_writer.hpp
void write_header(const std::vector<std::string>& custom_columns) override {
if (skip_header_) return;
std::ostringstream header;
// Perl VEP header comment line
header << "## ENSEMBL VARIANT EFFECT PREDICTOR\n";
// Perl VEP column names
header << "#Uploaded_variation\tLocation\tAllele\tGene\tFeature\t"
<< "Feature_type\tConsequence\tcDNA_position\tCDS_position\t"
<< "Protein_position\tAmino_acids\tCodons\tExisting_variation\tExtra";
header << "\n";
write_string(header.str());
custom_columns_ = custom_columns;
}write_annotation method · cpp · L242-L397 (156 LOC)include/output_writer.hpp
void write_annotation(const VariantAnnotation& ann,
const std::vector<std::string>& custom_columns) override {
stats_.add(ann);
std::ostringstream line;
// #Uploaded_variation: Use VCF ID (rs#) if available, else CHR_POS_ALLELES
if (!ann.vcf_id.empty() && ann.vcf_id != ".") {
line << ann.vcf_id << "\t";
} else {
// Ensembl format: CHR_POS_REF/ALT using display alleles
std::string d_ref = ann.display_ref.empty() ? ann.ref_allele : ann.display_ref;
std::string d_alt = ann.display_alt.empty() ? ann.alt_allele : ann.display_alt;
int d_start = ann.display_start > 0 ? ann.display_start : ann.position;
line << ann.chromosome << "_" << d_start << "_" << d_ref << "/" << d_alt << "\t";
}
// Location: CHROM:POS or CHROM:POS-END using display coords
{
int d_start = ann.display_start > 0 ? ann.display_start : ann.positiwrite_annotations method · cpp · L398-L404 (7 LOC)include/output_writer.hpp
void write_annotations(const std::vector<VariantAnnotation>& anns,
const std::vector<std::string>& custom_columns) override {
for (const auto& ann : anns) {
write_annotation(ann, custom_columns);
}
}write_footer method · cpp · L405-L408 (4 LOC)include/output_writer.hpp
void write_footer() override {
// TSV has no footer
}close method · cpp · L409-L418 (10 LOC)include/output_writer.hpp
void close() override {
if (gz_file_) {
gzclose(gz_file_);
gz_file_ = nullptr;
}
if (output_.is_open()) {
output_.close();
}
}write_string method · cpp · L427-L436 (10 LOC)include/output_writer.hpp
void write_string(const std::string& s) {
if (use_stdout_) {
std::cout << s;
} else if (compress_ && gz_file_) {
gzwrite(gz_file_, s.c_str(), static_cast<unsigned int>(s.size()));
} else {
output_ << s;
}
}~JSONWriter method · cpp · L466-L469 (4 LOC)include/output_writer.hpp
~JSONWriter() override {
close();
}write_header method · cpp · L472-L475 (4 LOC)include/output_writer.hpp
void write_header(const std::vector<std::string>& /*custom_columns*/) override {
if (!skip_header_) write_string("[\n");
}Provenance: Repobility (https://repobility.com) — every score reproducible from /scan/
write_annotation method · cpp · L476-L492 (17 LOC)include/output_writer.hpp
void write_annotation(const VariantAnnotation& ann,
const std::vector<std::string>& /*custom_columns*/) override {
stats_.add(ann);
// Build variant key for grouping: chrom:pos:ref:alt
std::string variant_key = ann.chromosome + ":" + std::to_string(ann.position)
+ ":" + ann.ref_allele + ":" + ann.alt_allele;
if (variant_key != current_variant_key_) {
// Flush the previous variant group
flush_current_variant();
current_variant_key_ = variant_key;
}
buffered_annotations_.push_back(ann);
}write_annotations method · cpp · L493-L499 (7 LOC)include/output_writer.hpp
void write_annotations(const std::vector<VariantAnnotation>& anns,
const std::vector<std::string>& custom_columns) override {
for (const auto& ann : anns) {
write_annotation(ann, custom_columns);
}
}write_footer method · cpp · L500-L504 (5 LOC)include/output_writer.hpp
void write_footer() override {
flush_current_variant();
if (!skip_header_) write_string("\n]\n");
}close method · cpp · L505-L514 (10 LOC)include/output_writer.hpp
void close() override {
if (gz_file_) {
gzclose(gz_file_);
gz_file_ = nullptr;
}
if (output_.is_open()) {
output_.close();
}
}flush_current_variant method · cpp · L529-L741 (213 LOC)include/output_writer.hpp
void flush_current_variant() {
if (buffered_annotations_.empty()) return;
if (!first_variant_) {
write_string(",\n");
}
first_variant_ = false;
const auto& first = buffered_annotations_[0];
// Determine most_severe_consequence across all transcript annotations
int best_rank = 999;
ConsequenceType most_severe_csq = ConsequenceType::UNKNOWN;
for (const auto& ann : buffered_annotations_) {
for (const auto& csq : ann.consequences) {
int rank = get_consequence_rank(csq);
if (rank < best_rank) {
best_rank = rank;
most_severe_csq = csq;
}
}
}
// Compute variant-level end position
int end_pos = first.position + static_cast<int>(first.ref_allele.size()) - 1;
if (end_pos < first.position) end_pos = first.position;
std::ostringstream json;
json <<write_transcript_consequence method · cpp · L742-L995 (254 LOC)include/output_writer.hpp
void write_transcript_consequence(std::ostringstream& json, const VariantAnnotation& ann) {
json << " {\n";
json << " \"gene_id\": \"" << escape_json(ann.gene_id) << "\",\n";
json << " \"gene_symbol\": \"" << escape_json(ann.gene_symbol) << "\",\n";
{
auto ss_it = ann.custom_annotations.find("SYMBOL_SOURCE");
if (ss_it != ann.custom_annotations.end() && !ss_it->second.empty()) {
json << " \"gene_symbol_source\": \"" << escape_json(ss_it->second) << "\",\n";
}
auto hgnc_it = ann.custom_annotations.find("HGNC_ID");
if (hgnc_it != ann.custom_annotations.end() && !hgnc_it->second.empty()) {
json << " \"hgnc_id\": \"" << escape_json(hgnc_it->second) << "\",\n";
}
}
json << " \"transcript_id\": \"" << escape_json(ann.transcript_id) << "\",\n";
if (!ann.source.empty()) {
json << write_string method · cpp · L996-L1005 (10 LOC)include/output_writer.hpp
void write_string(const std::string& s) {
if (use_stdout_) {
std::cout << s;
} else if (compress_ && gz_file_) {
gzwrite(gz_file_, s.c_str(), static_cast<unsigned int>(s.size()));
} else {
output_ << s;
}
}is_valid_json_number method · cpp · L1010-L1028 (19 LOC)include/output_writer.hpp
static bool is_valid_json_number(const std::string& s) {
if (s.empty()) return false;
size_t i = 0;
if (s[i] == '-') { ++i; if (i >= s.size()) return false; }
if (i >= s.size() || !std::isdigit(static_cast<unsigned char>(s[i]))) return false;
while (i < s.size() && std::isdigit(static_cast<unsigned char>(s[i]))) ++i;
if (i < s.size() && s[i] == '.') {
++i;
if (i >= s.size() || !std::isdigit(static_cast<unsigned char>(s[i]))) return false;
while (i < s.size() && std::isdigit(static_cast<unsigned char>(s[i]))) ++i;
}
if (i < s.size() && (s[i] == 'e' || s[i] == 'E')) {
++i;
if (i < s.size() && (s[i] == '+' || s[i] == '-')) ++i;
if (i >= s.size() || !std::isdigit(static_cast<unsigned char>(s[i]))) return false;
while (i < s.size() && std::isdigit(static_cast<unsigned char>(s[i]))) ++i;
}
return i == s.size();
}Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
escape_json method · cpp · L1029-L1046 (18 LOC)include/output_writer.hpp
static std::string escape_json(const std::string& s) {
std::string result;
result.reserve(s.size());
for (char c : s) {
switch (c) {
case '"': result += "\\\""; break;
case '\\': result += "\\\\"; break;
case '\b': result += "\\b"; break;
case '\f': result += "\\f"; break;
case '\n': result += "\\n"; break;
case '\r': result += "\\r"; break;
case '\t': result += "\\t"; break;
default: result += c; break;
}
}
return result;
}VCFWriter class · cpp · L1052-L1375 (324 LOC)include/output_writer.hpp
class VCFWriter : public OutputWriter {
public:
explicit VCFWriter(const std::string& output_path, bool compress = false)
: output_path_(output_path), compress_(compress), gz_file_(nullptr),
info_field_name_("CSQ"),
use_stdout_(output_path.empty() || output_path == "-" || output_path == "STDOUT") {
if (use_stdout_) {
compress_ = false;
} else if (compress_ || ends_with_gz(output_path_)) {
compress_ = true;
gz_file_ = gzopen(output_path_.c_str(), "wb");
if (!gz_file_) {
throw std::runtime_error("Cannot open output file: " + output_path_);
}
} else {
output_.open(output_path_);
if (!output_.is_open()) {
throw std::runtime_error("Cannot open output file: " + output_path_);
}
}
}
~VCFWriter() override {
close();
}
void set_info_field_name(const std::string& name) { info_fiel~VCFWriter method · cpp · L1074-L1077 (4 LOC)include/output_writer.hpp
~VCFWriter() override {
close();
}