← back to dcho108803__vep-annotator

Function bodies 431 total

All specs Real LLM only Function bodies
parse_hgvs_type function · cpp · L161-L169 (9 LOC)
include/hgvs_parser.hpp
inline HGVSType parse_hgvs_type(const std::string& type_str) {
    if (type_str == "g") return HGVSType::GENOMIC;
    if (type_str == "c") return HGVSType::CODING;
    if (type_str == "n") return HGVSType::NONCODING;
    if (type_str == "p") return HGVSType::PROTEIN;
    if (type_str == "m") return HGVSType::MITOCHONDRIAL;
    if (type_str == "r") return HGVSType::RNA;
    return HGVSType::UNKNOWN;
}
hgvs_type_to_string function · cpp · L174-L182 (9 LOC)
include/hgvs_parser.hpp
inline std::string hgvs_type_to_string(HGVSType type) {
    if (type == HGVSType::GENOMIC) return "g";
    if (type == HGVSType::CODING) return "c";
    if (type == HGVSType::NONCODING) return "n";
    if (type == HGVSType::PROTEIN) return "p";
    if (type == HGVSType::MITOCHONDRIAL) return "m";
    if (type == HGVSType::RNA) return "r";
    return "?";
}
parse_genomic_hgvs function · cpp · L191-L279 (89 LOC)
include/hgvs_parser.hpp
inline HGVSParseResult parse_genomic_hgvs(const std::string& reference, const std::string& change) {
    HGVSParseResult result;
    result.reference_id = reference;
    result.hgvs_type = HGVSType::GENOMIC;

    // Parse substitution: 140753336A>T
    static const std::regex sub_regex("(\\d+)([ACGTacgt])>([ACGTacgt])");
    std::smatch match;

    if (std::regex_match(change, match, sub_regex)) {
        result.variant_type = HGVSVariantType::SUBSTITUTION;
        result.start_pos = std::stoi(match[1].str());
        result.end_pos = result.start_pos;
        result.ref_allele = match[2].str();
        result.alt_allele = match[3].str();

        // Convert to uppercase
        for (size_t i = 0; i < result.ref_allele.size(); ++i) {
            result.ref_allele[i] = static_cast<char>(std::toupper(static_cast<unsigned char>(result.ref_allele[i])));
        }
        for (size_t i = 0; i < result.alt_allele.size(); ++i) {
            result.alt_allele[i] = static_cast<char>(std::touppe
parse_coding_hgvs function · cpp · L289-L426 (138 LOC)
include/hgvs_parser.hpp
inline HGVSParseResult parse_coding_hgvs(const std::string& reference, const std::string& change) {
    HGVSParseResult result;
    result.reference_id = reference;
    result.hgvs_type = HGVSType::CODING;

    // Parse substitution with optional intronic offset: 803C>T or 123+5G>A
    static const std::regex sub_regex("(-?\\*?\\d+)([+-]\\d+)?([ACGTacgt])>([ACGTacgt])");
    std::smatch match;

    if (std::regex_match(change, match, sub_regex)) {
        result.variant_type = HGVSVariantType::SUBSTITUTION;

        std::string pos_str = match[1].str();
        // Handle UTR positions (*123 for 3'UTR, -123 for 5'UTR)
        if (!pos_str.empty() && pos_str[0] == '*') {
            result.start_pos = std::stoi(pos_str.substr(1));
            // Mark as 3'UTR position (positive value with flag)
        } else {
            result.start_pos = std::stoi(pos_str);
        }
        result.end_pos = result.start_pos;

        if (match[2].matched) {
            result.intron_offset = std::st
parse_protein_hgvs function · cpp · L437-L540 (104 LOC)
include/hgvs_parser.hpp
inline HGVSParseResult parse_protein_hgvs(const std::string& reference, const std::string& change) {
    HGVSParseResult result;
    result.reference_id = reference;
    result.hgvs_type = HGVSType::PROTEIN;

    // Parse missense: Val600Glu or V600E
    // Three-letter: ([A-Z][a-z]{2})(\d+)([A-Z][a-z]{2})
    static const std::regex missense_3letter("([A-Z][a-z]{2})(\\d+)([A-Z][a-z]{2}|\\?)");
    std::smatch match;

    if (std::regex_match(change, match, missense_3letter)) {
        result.variant_type = HGVSVariantType::SUBSTITUTION;
        result.ref_aa = match[1].str();
        result.protein_pos = std::stoi(match[2].str());
        result.alt_aa = match[3].str();
        result.valid = true;
        return result;
    }

    // One-letter: ([A-Z*])(\d+)([A-Z*?])
    static const std::regex missense_1letter("([A-Z*])(\\d+)([A-Z*?])");
    if (std::regex_match(change, match, missense_1letter)) {
        result.variant_type = HGVSVariantType::SUBSTITUTION;
        result.ref_aa = 
parse_hgvs function · cpp · L550-L605 (56 LOC)
include/hgvs_parser.hpp
inline HGVSParseResult parse_hgvs(const std::string& hgvs) {
    HGVSParseResult result;

    // Find the colon separator
    size_t colon_pos = hgvs.find(':');
    if (colon_pos == std::string::npos) {
        result.error_message = "Invalid HGVS format: missing colon separator";
        return result;
    }

    std::string reference = hgvs.substr(0, colon_pos);
    std::string notation = hgvs.substr(colon_pos + 1);

    if (notation.size() < 3) {
        result.error_message = "Invalid HGVS format: notation too short";
        return result;
    }

    // Parse type (c., g., p., n., m., r.)
    char type_char = notation[0];
    if (notation[1] != '.') {
        result.error_message = "Invalid HGVS format: expected '.' after type";
        return result;
    }

    std::string change = notation.substr(2);
    HGVSType hgvs_type = parse_hgvs_type(std::string(1, type_char));

    try {
        if (hgvs_type == HGVSType::GENOMIC || hgvs_type == HGVSType::MITOCHONDRIAL) {
            res
is_hgvs_notation function · cpp · L610-L629 (20 LOC)
include/hgvs_parser.hpp
inline bool is_hgvs_notation(const std::string& input) {
    // Must contain a colon
    size_t colon_pos = input.find(':');
    if (colon_pos == std::string::npos || colon_pos == 0) {
        return false;
    }

    // Check for type indicator after colon
    if (input.size() > colon_pos + 2) {
        char type_char = input[colon_pos + 1];
        char dot_char = input[colon_pos + 2];

        if (dot_char == '.') {
            return (type_char == 'c' || type_char == 'g' || type_char == 'p' ||
                    type_char == 'n' || type_char == 'm' || type_char == 'r');
        }
    }

    return false;
}
Provenance: Repobility (https://repobility.com) — every score reproducible from /scan/
refseq_to_chromosome function · cpp · L634-L718 (85 LOC)
include/hgvs_parser.hpp
inline std::string refseq_to_chromosome(const std::string& refseq) {
    static const std::unordered_map<std::string, std::string> refseq_map = [] {
        std::unordered_map<std::string, std::string> m;
        // GRCh38 chromosome accessions
        m["NC_000001.11"] = "1";
        m["NC_000002.12"] = "2";
        m["NC_000003.12"] = "3";
        m["NC_000004.12"] = "4";
        m["NC_000005.10"] = "5";
        m["NC_000006.12"] = "6";
        m["NC_000007.14"] = "7";
        m["NC_000008.11"] = "8";
        m["NC_000009.12"] = "9";
        m["NC_000010.11"] = "10";
        m["NC_000011.10"] = "11";
        m["NC_000012.12"] = "12";
        m["NC_000013.11"] = "13";
        m["NC_000014.9"] = "14";
        m["NC_000015.10"] = "15";
        m["NC_000016.10"] = "16";
        m["NC_000017.11"] = "17";
        m["NC_000018.10"] = "18";
        m["NC_000019.10"] = "19";
        m["NC_000020.11"] = "20";
        m["NC_000021.9"] = "21";
        m["NC_000022.11"] = "22";
        m["NC_0000
get_chrom_to_refseq function · cpp · L723-L755 (33 LOC)
include/hgvs_parser.hpp
inline const std::unordered_map<std::string, std::string>& get_chrom_to_refseq() {
    static const std::unordered_map<std::string, std::string> chrom_to_refseq = [] {
        std::unordered_map<std::string, std::string> m;
        m["1"] = "NC_000001.11";
        m["2"] = "NC_000002.12";
        m["3"] = "NC_000003.12";
        m["4"] = "NC_000004.12";
        m["5"] = "NC_000005.10";
        m["6"] = "NC_000006.12";
        m["7"] = "NC_000007.14";
        m["8"] = "NC_000008.11";
        m["9"] = "NC_000009.12";
        m["10"] = "NC_000010.11";
        m["11"] = "NC_000011.10";
        m["12"] = "NC_000012.12";
        m["13"] = "NC_000013.11";
        m["14"] = "NC_000014.9";
        m["15"] = "NC_000015.10";
        m["16"] = "NC_000016.10";
        m["17"] = "NC_000017.11";
        m["18"] = "NC_000018.10";
        m["19"] = "NC_000019.10";
        m["20"] = "NC_000020.11";
        m["21"] = "NC_000021.9";
        m["22"] = "NC_000022.11";
        m["X"] = "NC_000023.11";
      
chrom_to_refseq_lookup function · cpp · L760-L772 (13 LOC)
include/hgvs_parser.hpp
inline std::string chrom_to_refseq_lookup(const std::string& chrom) {
    std::string norm_chrom = chrom;
    if (norm_chrom.size() > 3 && norm_chrom.substr(0, 3) == "chr") {
        norm_chrom = norm_chrom.substr(3);
    }

    const auto& chrom_to_refseq = get_chrom_to_refseq();
    auto it = chrom_to_refseq.find(norm_chrom);
    if (it != chrom_to_refseq.end()) {
        return it->second;
    }
    return chrom;  // Use as-is
}
generate_hgvsg function · cpp · L777-L845 (69 LOC)
include/hgvs_parser.hpp
inline std::string generate_hgvsg(const std::string& chrom, int pos,
                                   const std::string& ref, const std::string& alt) {
    std::string result;

    // Get RefSeq accession via shared helper
    std::string refseq = chrom_to_refseq_lookup(chrom);

    result = refseq + ":g.";

    if (ref.size() == 1 && alt.size() == 1) {
        // Substitution
        result += std::to_string(pos) + ref + ">" + alt;
    } else if (alt.empty() || (ref.size() > alt.size() && alt.size() <= 1)) {
        // Deletion
        int del_start = pos;
        int del_end = pos + static_cast<int>(ref.size()) - 1;
        // Strip VCF anchor base if present
        if (!alt.empty() && alt.size() == 1 && ref.size() > 1 && ref[0] == alt[0]) {
            del_start = pos + 1;
        }
        if (del_start == del_end) {
            result += std::to_string(del_start) + "del";
        } else {
            result += std::to_string(del_start) + "_" + std::to_string(del_end) + "del";
 
generate_spdi function · cpp · L851-L860 (10 LOC)
include/hgvs_parser.hpp
inline std::string generate_spdi(const std::string& chrom, int pos,
                                  const std::string& ref, const std::string& alt) {
    // SPDI uses 0-based coordinates
    int spdi_pos = pos - 1;

    // Get RefSeq accession via shared helper
    std::string refseq = chrom_to_refseq_lookup(chrom);

    return refseq + ":" + std::to_string(spdi_pos) + ":" + ref + ":" + alt;
}
parse_spdi function · cpp · L878-L932 (55 LOC)
include/hgvs_parser.hpp
inline SPDIParseResult parse_spdi(const std::string& spdi) {
    SPDIParseResult result;

    // Split on colons - expect exactly 4 parts
    std::vector<std::string> parts;
    size_t start = 0;
    size_t pos = spdi.find(':');
    while (pos != std::string::npos) {
        parts.push_back(spdi.substr(start, pos - start));
        start = pos + 1;
        pos = spdi.find(':', start);
    }
    parts.push_back(spdi.substr(start));

    if (parts.size() != 4) {
        result.error_message = "Invalid SPDI format: expected 4 colon-separated fields";
        return result;
    }

    // Map RefSeq accession to chromosome name
    std::string chrom = refseq_to_chromosome(parts[0]);
    if (chrom.empty()) {
        chrom = parts[0];
    }

    // Normalize chr prefix
    if (chrom.size() > 3 && chrom.substr(0, 3) == "chr") {
        chrom = chrom.substr(3);
    }

    result.chromosome = chrom;

    // Convert 0-based SPDI position to 1-based
    try {
        result.position = std::stoi(p
is_spdi_notation function · cpp · L938-L969 (32 LOC)
include/hgvs_parser.hpp
inline bool is_spdi_notation(const std::string& input) {
    int colon_count = 0;
    size_t first_colon = std::string::npos;
    size_t second_colon = std::string::npos;
    for (size_t i = 0; i < input.size(); ++i) {
        if (input[i] == ':') {
            colon_count++;
            if (colon_count == 1) first_colon = i;
            if (colon_count == 2) second_colon = i;
        }
    }
    if (colon_count != 3) return false;

    // Check if the second field is numeric (the position)
    if (first_colon == std::string::npos || second_colon == std::string::npos) return false;
    std::string pos_str = input.substr(first_colon + 1, second_colon - first_colon - 1);
    if (pos_str.empty()) return false;
    for (size_t i = 0; i < pos_str.size(); ++i) {
        if (!std::isdigit(static_cast<unsigned char>(pos_str[i]))) return false;
    }

    // Distinguish from CHR:POS:REF:ALT by checking if first field looks like a RefSeq accession
    // or if the position field is very long (SP
parse_ensembl_format function · cpp · L987-L1050 (64 LOC)
include/hgvs_parser.hpp
inline EnsemblFormatResult parse_ensembl_format(const std::string& line) {
    EnsemblFormatResult result;

    std::istringstream iss(line);
    std::string chrom, allele, strand;
    int start_pos, end_pos;

    if (!(iss >> chrom >> start_pos >> end_pos >> allele)) {
        result.error_message = "Invalid Ensembl format: need at least 4 fields";
        return result;
    }

    // Strand is optional
    if (!(iss >> strand)) {
        strand = "+";
    }

    // Parse allele: REF/ALT
    size_t slash_pos = allele.find('/');
    if (slash_pos == std::string::npos) {
        result.error_message = "Invalid allele format: expected REF/ALT";
        return result;
    }

    result.chromosome = chrom;
    result.position = start_pos;
    result.ref_allele = allele.substr(0, slash_pos);
    result.alt_allele = allele.substr(slash_pos + 1);

    // Handle - for deletions/insertions
    if (result.ref_allele == "-") result.ref_allele = "";
    if (result.alt_allele == "-") result.alt_al
Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
is_ensembl_format function · cpp · L1057-L1074 (18 LOC)
include/hgvs_parser.hpp
inline bool is_ensembl_format(const std::string& line) {
    std::istringstream iss(line);
    std::string f1, f2, f3, f4;
    if (!(iss >> f1 >> f2 >> f3 >> f4)) return false;

    // f2 and f3 must be numeric
    for (size_t i = 0; i < f2.size(); ++i) {
        if (!std::isdigit(static_cast<unsigned char>(f2[i]))) return false;
    }
    for (size_t i = 0; i < f3.size(); ++i) {
        if (!std::isdigit(static_cast<unsigned char>(f3[i]))) return false;
    }

    // f4 must contain a /
    if (f4.find('/') == std::string::npos) return false;

    return true;
}
is_rest_region_format function · cpp · L1090-L1106 (17 LOC)
include/hgvs_parser.hpp
inline bool is_rest_region_format(const std::string& input) {
    // Pattern: CHR:START-END:STRAND/ALLELE
    size_t first_colon = input.find(':');
    if (first_colon == std::string::npos) return false;

    size_t dash = input.find('-', first_colon + 1);
    if (dash == std::string::npos) return false;

    size_t second_colon = input.find(':', dash + 1);
    if (second_colon == std::string::npos) return false;

    size_t slash = input.find('/', second_colon + 1);
    if (slash == std::string::npos) return false;

    return true;
}
parse_rest_region function · cpp · L1107-L1143 (37 LOC)
include/hgvs_parser.hpp
inline RESTRegionResult parse_rest_region(const std::string& input) {
    RESTRegionResult result;

    size_t first_colon = input.find(':');
    size_t dash = input.find('-', first_colon + 1);
    size_t second_colon = input.find(':', dash + 1);
    size_t slash = input.find('/', second_colon + 1);

    if (first_colon == std::string::npos || dash == std::string::npos ||
        second_colon == std::string::npos || slash == std::string::npos) {
        result.error_message = "Invalid REST region format";
        return result;
    }

    try {
        result.chromosome = input.substr(0, first_colon);
        result.position = std::stoi(input.substr(first_colon + 1, dash - first_colon - 1));
        result.end_position = std::stoi(input.substr(dash + 1, second_colon - dash - 1));
        result.strand = std::stoi(input.substr(second_colon + 1, slash - second_colon - 1));
        result.alt_allele = input.substr(slash + 1);

        // For single-base substitutions, ref is inferred fro
parse_output_format function · cpp · L37-L46 (10 LOC)
include/output_writer.hpp
inline OutputFormat parse_output_format(const std::string& format) {
    std::string lower = format;
    for (size_t i = 0; i < lower.size(); ++i) {
        lower[i] = static_cast<char>(std::tolower(static_cast<unsigned char>(lower[i])));
    }

    if (lower == "json") return OutputFormat::JSON;
    if (lower == "vcf") return OutputFormat::VCF;
    return OutputFormat::TSV;
}
add method · cpp · L57-L71 (15 LOC)
include/output_writer.hpp
    void add(const VariantAnnotation& ann) {
        total_variants++;
        if (!ann.gene_symbol.empty()) {
            annotated_variants++;
        }

        for (const auto& csq : ann.consequences) {
            consequence_counts[consequence_to_string(csq)]++;
        }
        impact_counts[impact_to_string(ann.impact)]++;
        if (!ann.biotype.empty()) {
            biotype_counts[ann.biotype]++;
        }
    }
to_string method · cpp · L72-L87 (16 LOC)
include/output_writer.hpp
    std::string to_string() const {
        std::ostringstream oss;
        oss << "=== Annotation Statistics ===\n";
        oss << "Total variants: " << total_variants << "\n";
        oss << "Annotated variants: " << annotated_variants << "\n";
        oss << "\nConsequence counts:\n";
        for (const auto& pair : consequence_counts) {
            oss << "  " << pair.first << ": " << pair.second << "\n";
        }
        oss << "\nImpact counts:\n";
        for (const auto& pair : impact_counts) {
            oss << "  " << pair.first << ": " << pair.second << "\n";
        }
        return oss.str();
    }
to_json method · cpp · L88-L112 (25 LOC)
include/output_writer.hpp
    std::string to_json() const {
        std::ostringstream oss;
        oss << "{\n";
        oss << "  \"total_variants\": " << total_variants << ",\n";
        oss << "  \"annotated_variants\": " << annotated_variants << ",\n";
        oss << "  \"consequence_counts\": {";
        bool first = true;
        for (const auto& pair : consequence_counts) {
            if (!first) oss << ",";
            oss << "\n    \"" << pair.first << "\": " << pair.second;
            first = false;
        }
        oss << "\n  },\n";
        oss << "  \"impact_counts\": {";
        first = true;
        for (const auto& pair : impact_counts) {
            if (!first) oss << ",";
            oss << "\n    \"" << pair.first << "\": " << pair.second;
            first = false;
        }
        oss << "\n  }\n";
        oss << "}";
        return oss.str();
    }
OutputWriter class · cpp · L118-L161 (44 LOC)
include/output_writer.hpp
class OutputWriter {
public:
    virtual ~OutputWriter() = default;

    virtual void write_header(const std::vector<std::string>& custom_columns) = 0;
    virtual void write_annotation(const VariantAnnotation& ann,
                                  const std::vector<std::string>& custom_columns) = 0;
    virtual void write_annotations(const std::vector<VariantAnnotation>& anns,
                                   const std::vector<std::string>& custom_columns) = 0;
    virtual void write_footer() = 0;
    virtual void close() = 0;

    // Convenience overloads without custom columns
    void write_header() { write_header(std::vector<std::string>()); }
    void write_annotation(const VariantAnnotation& ann) {
        write_annotation(ann, std::vector<std::string>());
    }
    void write_annotations(const std::vector<VariantAnnotation>& anns) {
        write_annotations(anns, std::vector<std::string>());
    }

    const AnnotationStats& get_stats() const { return stats_; }

    void se
Open data scored by Repobility · https://repobility.com
write_annotation method · cpp · L132-L134 (3 LOC)
include/output_writer.hpp
    void write_annotation(const VariantAnnotation& ann) {
        write_annotation(ann, std::vector<std::string>());
    }
write_annotations method · cpp · L135-L137 (3 LOC)
include/output_writer.hpp
    void write_annotations(const std::vector<VariantAnnotation>& anns) {
        write_annotations(anns, std::vector<std::string>());
    }
format_consequence method · cpp · L150-L160 (11 LOC)
include/output_writer.hpp
    std::string format_consequence(const VariantAnnotation& ann) const {
        if (term_style_ == "display") {
            std::string result;
            for (size_t i = 0; i < ann.consequences.size(); ++i) {
                if (i > 0) result += "&";
                result += consequence_to_display_term(ann.consequences[i]);
            }
            return result;
        }
        return ann.get_consequence_string();
    }
ends_with_gz function · cpp · L164-L166 (3 LOC)
include/output_writer.hpp
inline bool ends_with_gz(const std::string& path) {
    return path.size() > 3 && path.compare(path.size() - 3, 3, ".gz") == 0;
}
format_position_with_total function · cpp · L170-L179 (10 LOC)
include/output_writer.hpp
inline std::string format_position_with_total(
    int start, int end, const std::string& total_key,
    const VariantAnnotation& ann, const std::string& empty_val) {
    if (start <= 0) return empty_val;
    std::string s = std::to_string(start);
    if (end > 0 && end != start) s += "-" + std::to_string(end);
    auto it = ann.custom_annotations.find(total_key);
    if (it != ann.custom_annotations.end() && !it->second.empty()) s += "/" + it->second;
    return s;
}
format_protein_position function · cpp · L182-L195 (14 LOC)
include/output_writer.hpp
inline std::string format_protein_position(const VariantAnnotation& ann, const std::string& empty_val) {
    if (ann.protein_position <= 0) return empty_val;
    std::string s = std::to_string(ann.protein_position);
    if (ann.protein_end > 0 && ann.protein_end != ann.protein_position)
        s += "-" + std::to_string(ann.protein_end);
    auto cl = ann.custom_annotations.find("CDS_LENGTH");
    if (cl != ann.custom_annotations.end() && !cl->second.empty()) {
        try {
            int cds_len = std::stoi(cl->second);
            if (cds_len > 0) s += "/" + std::to_string(cds_len / 3);
        } catch (...) {}
    }
    return s;
}
TSVWriter class · cpp · L200-L437 (238 LOC)
include/output_writer.hpp
class TSVWriter : public OutputWriter {
public:
    explicit TSVWriter(const std::string& output_path, bool compress = false)
        : output_path_(output_path), compress_(compress), gz_file_(nullptr),
          use_stdout_(output_path.empty() || output_path == "-" || output_path == "STDOUT") {

        if (use_stdout_) {
            compress_ = false;  // Cannot compress stdout
        } else if (compress_ || ends_with_gz(output_path_)) {
            compress_ = true;
            gz_file_ = gzopen(output_path_.c_str(), "wb");
            if (!gz_file_) {
                throw std::runtime_error("Cannot open output file: " + output_path_);
            }
        } else {
            output_.open(output_path_);
            if (!output_.is_open()) {
                throw std::runtime_error("Cannot open output file: " + output_path_);
            }
        }
    }

    ~TSVWriter() override {
        close();
    }

    void write_header(const std::vector<std::string>& custom_columns) ove
~TSVWriter method · cpp · L221-L224 (4 LOC)
include/output_writer.hpp
    ~TSVWriter() override {
        close();
    }
Powered by Repobility — scan your code at https://repobility.com
write_header method · cpp · L225-L241 (17 LOC)
include/output_writer.hpp
    void write_header(const std::vector<std::string>& custom_columns) override {
        if (skip_header_) return;

        std::ostringstream header;
        // Perl VEP header comment line
        header << "## ENSEMBL VARIANT EFFECT PREDICTOR\n";
        // Perl VEP column names
        header << "#Uploaded_variation\tLocation\tAllele\tGene\tFeature\t"
               << "Feature_type\tConsequence\tcDNA_position\tCDS_position\t"
               << "Protein_position\tAmino_acids\tCodons\tExisting_variation\tExtra";

        header << "\n";

        write_string(header.str());
        custom_columns_ = custom_columns;
    }
write_annotation method · cpp · L242-L397 (156 LOC)
include/output_writer.hpp
    void write_annotation(const VariantAnnotation& ann,
                          const std::vector<std::string>& custom_columns) override {
        stats_.add(ann);

        std::ostringstream line;

        // #Uploaded_variation: Use VCF ID (rs#) if available, else CHR_POS_ALLELES
        if (!ann.vcf_id.empty() && ann.vcf_id != ".") {
            line << ann.vcf_id << "\t";
        } else {
            // Ensembl format: CHR_POS_REF/ALT using display alleles
            std::string d_ref = ann.display_ref.empty() ? ann.ref_allele : ann.display_ref;
            std::string d_alt = ann.display_alt.empty() ? ann.alt_allele : ann.display_alt;
            int d_start = ann.display_start > 0 ? ann.display_start : ann.position;
            line << ann.chromosome << "_" << d_start << "_" << d_ref << "/" << d_alt << "\t";
        }

        // Location: CHROM:POS or CHROM:POS-END using display coords
        {
            int d_start = ann.display_start > 0 ? ann.display_start : ann.positi
write_annotations method · cpp · L398-L404 (7 LOC)
include/output_writer.hpp
    void write_annotations(const std::vector<VariantAnnotation>& anns,
                           const std::vector<std::string>& custom_columns) override {
        for (const auto& ann : anns) {
            write_annotation(ann, custom_columns);
        }
    }
write_footer method · cpp · L405-L408 (4 LOC)
include/output_writer.hpp
    void write_footer() override {
        // TSV has no footer
    }
close method · cpp · L409-L418 (10 LOC)
include/output_writer.hpp
    void close() override {
        if (gz_file_) {
            gzclose(gz_file_);
            gz_file_ = nullptr;
        }
        if (output_.is_open()) {
            output_.close();
        }
    }
write_string method · cpp · L427-L436 (10 LOC)
include/output_writer.hpp
    void write_string(const std::string& s) {
        if (use_stdout_) {
            std::cout << s;
        } else if (compress_ && gz_file_) {
            gzwrite(gz_file_, s.c_str(), static_cast<unsigned int>(s.size()));
        } else {
            output_ << s;
        }
    }
~JSONWriter method · cpp · L466-L469 (4 LOC)
include/output_writer.hpp
    ~JSONWriter() override {
        close();
    }
write_header method · cpp · L472-L475 (4 LOC)
include/output_writer.hpp
    void write_header(const std::vector<std::string>& /*custom_columns*/) override {
        if (!skip_header_) write_string("[\n");
    }
Provenance: Repobility (https://repobility.com) — every score reproducible from /scan/
write_annotation method · cpp · L476-L492 (17 LOC)
include/output_writer.hpp
    void write_annotation(const VariantAnnotation& ann,
                          const std::vector<std::string>& /*custom_columns*/) override {
        stats_.add(ann);

        // Build variant key for grouping: chrom:pos:ref:alt
        std::string variant_key = ann.chromosome + ":" + std::to_string(ann.position)
                                  + ":" + ann.ref_allele + ":" + ann.alt_allele;

        if (variant_key != current_variant_key_) {
            // Flush the previous variant group
            flush_current_variant();
            current_variant_key_ = variant_key;
        }

        buffered_annotations_.push_back(ann);
    }
write_annotations method · cpp · L493-L499 (7 LOC)
include/output_writer.hpp
    void write_annotations(const std::vector<VariantAnnotation>& anns,
                           const std::vector<std::string>& custom_columns) override {
        for (const auto& ann : anns) {
            write_annotation(ann, custom_columns);
        }
    }
write_footer method · cpp · L500-L504 (5 LOC)
include/output_writer.hpp
    void write_footer() override {
        flush_current_variant();
        if (!skip_header_) write_string("\n]\n");
    }
close method · cpp · L505-L514 (10 LOC)
include/output_writer.hpp
    void close() override {
        if (gz_file_) {
            gzclose(gz_file_);
            gz_file_ = nullptr;
        }
        if (output_.is_open()) {
            output_.close();
        }
    }
flush_current_variant method · cpp · L529-L741 (213 LOC)
include/output_writer.hpp
    void flush_current_variant() {
        if (buffered_annotations_.empty()) return;

        if (!first_variant_) {
            write_string(",\n");
        }
        first_variant_ = false;

        const auto& first = buffered_annotations_[0];

        // Determine most_severe_consequence across all transcript annotations
        int best_rank = 999;
        ConsequenceType most_severe_csq = ConsequenceType::UNKNOWN;
        for (const auto& ann : buffered_annotations_) {
            for (const auto& csq : ann.consequences) {
                int rank = get_consequence_rank(csq);
                if (rank < best_rank) {
                    best_rank = rank;
                    most_severe_csq = csq;
                }
            }
        }

        // Compute variant-level end position
        int end_pos = first.position + static_cast<int>(first.ref_allele.size()) - 1;
        if (end_pos < first.position) end_pos = first.position;

        std::ostringstream json;
        json <<
write_transcript_consequence method · cpp · L742-L995 (254 LOC)
include/output_writer.hpp
    void write_transcript_consequence(std::ostringstream& json, const VariantAnnotation& ann) {
        json << "      {\n";
        json << "        \"gene_id\": \"" << escape_json(ann.gene_id) << "\",\n";
        json << "        \"gene_symbol\": \"" << escape_json(ann.gene_symbol) << "\",\n";
        {
            auto ss_it = ann.custom_annotations.find("SYMBOL_SOURCE");
            if (ss_it != ann.custom_annotations.end() && !ss_it->second.empty()) {
                json << "        \"gene_symbol_source\": \"" << escape_json(ss_it->second) << "\",\n";
            }
            auto hgnc_it = ann.custom_annotations.find("HGNC_ID");
            if (hgnc_it != ann.custom_annotations.end() && !hgnc_it->second.empty()) {
                json << "        \"hgnc_id\": \"" << escape_json(hgnc_it->second) << "\",\n";
            }
        }
        json << "        \"transcript_id\": \"" << escape_json(ann.transcript_id) << "\",\n";
        if (!ann.source.empty()) {
            json << 
write_string method · cpp · L996-L1005 (10 LOC)
include/output_writer.hpp
    void write_string(const std::string& s) {
        if (use_stdout_) {
            std::cout << s;
        } else if (compress_ && gz_file_) {
            gzwrite(gz_file_, s.c_str(), static_cast<unsigned int>(s.size()));
        } else {
            output_ << s;
        }
    }
is_valid_json_number method · cpp · L1010-L1028 (19 LOC)
include/output_writer.hpp
    static bool is_valid_json_number(const std::string& s) {
        if (s.empty()) return false;
        size_t i = 0;
        if (s[i] == '-') { ++i; if (i >= s.size()) return false; }
        if (i >= s.size() || !std::isdigit(static_cast<unsigned char>(s[i]))) return false;
        while (i < s.size() && std::isdigit(static_cast<unsigned char>(s[i]))) ++i;
        if (i < s.size() && s[i] == '.') {
            ++i;
            if (i >= s.size() || !std::isdigit(static_cast<unsigned char>(s[i]))) return false;
            while (i < s.size() && std::isdigit(static_cast<unsigned char>(s[i]))) ++i;
        }
        if (i < s.size() && (s[i] == 'e' || s[i] == 'E')) {
            ++i;
            if (i < s.size() && (s[i] == '+' || s[i] == '-')) ++i;
            if (i >= s.size() || !std::isdigit(static_cast<unsigned char>(s[i]))) return false;
            while (i < s.size() && std::isdigit(static_cast<unsigned char>(s[i]))) ++i;
        }
        return i == s.size();
    }
Citation: Repobility (2026). State of AI-Generated Code. https://repobility.com/research/
escape_json method · cpp · L1029-L1046 (18 LOC)
include/output_writer.hpp
    static std::string escape_json(const std::string& s) {
        std::string result;
        result.reserve(s.size());
        for (char c : s) {
            switch (c) {
                case '"': result += "\\\""; break;
                case '\\': result += "\\\\"; break;
                case '\b': result += "\\b"; break;
                case '\f': result += "\\f"; break;
                case '\n': result += "\\n"; break;
                case '\r': result += "\\r"; break;
                case '\t': result += "\\t"; break;
                default: result += c; break;
            }
        }
        return result;
    }
VCFWriter class · cpp · L1052-L1375 (324 LOC)
include/output_writer.hpp
class VCFWriter : public OutputWriter {
public:
    explicit VCFWriter(const std::string& output_path, bool compress = false)
        : output_path_(output_path), compress_(compress), gz_file_(nullptr),
          info_field_name_("CSQ"),
          use_stdout_(output_path.empty() || output_path == "-" || output_path == "STDOUT") {

        if (use_stdout_) {
            compress_ = false;
        } else if (compress_ || ends_with_gz(output_path_)) {
            compress_ = true;
            gz_file_ = gzopen(output_path_.c_str(), "wb");
            if (!gz_file_) {
                throw std::runtime_error("Cannot open output file: " + output_path_);
            }
        } else {
            output_.open(output_path_);
            if (!output_.is_open()) {
                throw std::runtime_error("Cannot open output file: " + output_path_);
            }
        }
    }

    ~VCFWriter() override {
        close();
    }

    void set_info_field_name(const std::string& name) { info_fiel
~VCFWriter method · cpp · L1074-L1077 (4 LOC)
include/output_writer.hpp
    ~VCFWriter() override {
        close();
    }
‹ prevpage 2 / 9next ›