← back to domovinatv__fetch.domovina.tv

Function bodies 195 total

All specs Real LLM only Function bodies
getArg function · javascript · L329-L332 (4 LOC)
screenshot_youtube.js
    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }
main function · javascript · L369-L454 (86 LOC)
screenshot_youtube.js
async function main() {
    const opts = parseArgs();

    console.log("");
    console.log("╔══════════════════════════════════════════════════╗");
    console.log("║   📸 YOUTUBE SCREENSHOT EXTRACTOR                ║");
    console.log("╚══════════════════════════════════════════════════╝");
    console.log(`   🔧 yt-dlp + ffmpeg (best available quality)`);
    console.log(`   🍪 Cookies: ${BROWSER_NAME}`);

    // ── Single file mode ──
    if (opts.mode === "single") {
        console.log(`   📂 Datoteka: ${opts.file}`);
        console.log("");

        const result = await processArticle(opts.file);
        console.log("");
        console.log(`   📊 Ukupno: ${result.total} | Novo: ${result.captured} | Preskočeno: ${result.skipped} | Neuspjelo: ${result.failed}`);
        console.log("");
        return;
    }

    // ── Batch mode ──
    const { inputDir, channel, limit, dryRun } = opts;
    console.log(`   📂 Input:   ${inputDir}`);
    if (channel) console.log(`   🎯 Kanal:   ${chann
sleep function · javascript · L117-L119 (3 LOC)
summarize_gemini.js
function sleep(ms) {
    return new Promise((resolve) => setTimeout(resolve, ms));
}
formatDuration function · javascript · L124-L129 (6 LOC)
summarize_gemini.js
function formatDuration(seconds) {
    const h = Math.floor(seconds / 3600);
    const m = Math.floor((seconds % 3600) / 60);
    const s = Math.floor(seconds % 60);
    return `${h}h ${m}m ${s}s`;
}
extractVideoIdFromFilename function · javascript · L136-L139 (4 LOC)
summarize_gemini.js
function extractVideoIdFromFilename(filename) {
    const match = filename.match(/_yt_([a-zA-Z0-9_-]{11})/);
    return match ? match[1] : null;
}
extractDateFromFilename function · javascript · L145-L148 (4 LOC)
summarize_gemini.js
function extractDateFromFilename(filename) {
    const match = filename.match(/^(\d{4})(\d{2})(\d{2})_/);
    return match ? `${match[1]}-${match[2]}-${match[3]}` : null;
}
extractChannelFromPath function · javascript · L154-L158 (5 LOC)
summarize_gemini.js
function extractChannelFromPath(filePath, inputDir) {
    const relative = path.relative(inputDir, filePath);
    const parts = relative.split(path.sep);
    return parts.length >= 2 ? parts[0] : "unknown";
}
Methodology: Repobility · https://repobility.com/research/state-of-ai-code-2026/
loadInfoJson function · javascript · L166-L186 (21 LOC)
summarize_gemini.js
function loadInfoJson(srtFilePath) {
    const dir = path.dirname(srtFilePath);
    const basename = path.basename(srtFilePath);

    // Odrežemo .wav.canary.diarized.srt da dobijemo bazu
    // npr: "20241212_bk_podcast_1_..._yt_XXX.wav.canary.diarized.srt"
    //  → "20241212_bk_podcast_1_..._yt_XXX"
    const base = basename.replace(/\.wav\.canary\.diarized\.srt$/, "");

    // info.json ima sufiks .info.json direktno na bazi
    const infoPath = path.join(dir, base + INFO_JSON_SUFFIX);

    if (!fs.existsSync(infoPath)) return null;

    try {
        return JSON.parse(fs.readFileSync(infoPath, "utf-8"));
    } catch (e) {
        console.error(`   ⚠️  Neispravan .info.json: ${infoPath}`);
        return null;
    }
}
srtToText function · javascript · L201-L216 (16 LOC)
summarize_gemini.js
function srtToText(srtContent) {
    const lines = srtContent.split("\n");
    const textLines = [];

    for (const line of lines) {
        const trimmed = line.trim();
        // Preskoči prazne linije, indeksne linije (samo broj), i timestamp linije
        if (!trimmed) continue;
        if (/^\d+$/.test(trimmed)) continue;
        if (/^\d{2}:\d{2}:\d{2}[,.]/.test(trimmed)) continue;

        textLines.push(trimmed);
    }

    return textLines.join("\n");
}
getAccessToken function · javascript · L220-L227 (8 LOC)
summarize_gemini.js
function getAccessToken() {
    try {
        return execSync("gcloud auth print-access-token", { encoding: "utf-8" }).trim();
    } catch (err) {
        console.error("❌ Ne mogu dohvatiti access token. Pokreni: gcloud auth login");
        process.exit(1);
    }
}
getOrRefreshAccessToken function · javascript · L233-L241 (9 LOC)
summarize_gemini.js
function getOrRefreshAccessToken() {
    const now = Date.now();
    if (cachedAccessToken && now < tokenExpiry) {
        return cachedAccessToken;
    }
    cachedAccessToken = getAccessToken();
    tokenExpiry = now + 50 * 60 * 1000;
    return cachedAccessToken;
}
callGemini function · javascript · L253-L372 (120 LOC)
summarize_gemini.js
async function callGemini(transcript, metadata) {
    // Konstruiraj korisnički prompt s metapodacima + transkriptom
    let userMessage = "";

    if (metadata) {
        userMessage += "=== METAPODACI ===\n";
        if (metadata.title) userMessage += `Naslov: ${metadata.title}\n`;
        if (metadata.description) userMessage += `Opis: ${metadata.description}\n`;
        if (metadata.tags && metadata.tags.length > 0) {
            userMessage += `Tagovi: ${metadata.tags.slice(0, 20).join(", ")}\n`;
        }
        if (metadata.duration) {
            const mins = Math.floor(metadata.duration / 60);
            userMessage += `Trajanje: ${mins} minuta\n`;
        }
        if (metadata.channel) userMessage += `Kanal: ${metadata.channel}\n`;
        userMessage += "\n";
    }

    userMessage += "=== DIARIZIRANI TRANSKRIPT ===\n";
    userMessage += transcript;

    // Vertex AI payload — systemInstruction odvojen od contents
    const payload = {
        contents: [
            {
 
buildSummaryJson function · javascript · L386-L421 (36 LOC)
summarize_gemini.js
function buildSummaryJson(geminiResult, srtFilename, channel, metadata) {
    const base = path.basename(srtFilename).replace(/\.wav\.canary\.diarized\.srt$/, "");
    const youtubeId = extractVideoIdFromFilename(base);
    const uploadDate = extractDateFromFilename(base);

    return {
        version: SCHEMA_VERSION,
        generated_at: new Date().toISOString(),
        model: GEMINI_MODEL,

        // Izvorni podaci o datoteci
        source: {
            filename: base,
            channel: channel,
            youtube_id: youtubeId,
            title: metadata?.title || geminiResult.title_hr || base,
            upload_date: uploadDate,
            duration_seconds: metadata?.duration || null
        },

        // Gemini generirani sažetak
        summary: {
            title_hr: geminiResult.title_hr || "",
            abstract_hr: geminiResult.abstract_hr || "",
            key_topics: geminiResult.key_topics || [],
            speakers: geminiResult.speakers || [],
        
buildSummaryMarkdown function · javascript · L430-L505 (76 LOC)
summarize_gemini.js
function buildSummaryMarkdown(summaryJson) {
    const s = summaryJson.summary;
    const src = summaryJson.source;

    let md = "";

    // Naslov
    md += `# ${s.title_hr || src.title}\n\n`;

    // Metapodaci
    md += `**Kanal:** ${src.channel}  \n`;
    if (src.upload_date) md += `**Datum:** ${src.upload_date}  \n`;
    if (src.duration_seconds) {
        const mins = Math.floor(src.duration_seconds / 60);
        md += `**Trajanje:** ${mins} min  \n`;
    }
    if (src.youtube_id) md += `**YouTube:** https://youtu.be/${src.youtube_id}  \n`;
    md += `**Model:** ${summaryJson.model}  \n`;
    md += "\n";

    // Sažetak
    if (s.abstract_hr) {
        md += `## Sažetak\n\n${s.abstract_hr}\n\n`;
    }

    // Ključne teme
    if (s.key_topics && s.key_topics.length > 0) {
        md += `## Ključne teme\n\n`;
        s.key_topics.forEach((t) => { md += `- ${t}\n`; });
        md += "\n";
    }

    // Govornici
    if (s.speakers && s.speakers.length > 0) {
        md += `## Gov
parseArgs function · javascript · L509-L542 (34 LOC)
summarize_gemini.js
function parseArgs() {
    const args = process.argv.slice(2);

    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }

    const inputDir = getArg("--input-dir");
    const channel = getArg("--channel");
    const limit = getArg("--limit") ? parseInt(getArg("--limit"), 10) : null;
    const dryRun = args.includes("--dry-run");

    // --model flag za override modela
    const model = getArg("--model");
    if (model) {
        GEMINI_MODEL = model;
    }

    if (!inputDir) {
        console.error("❌ Obavezan argument: --input-dir <putanja>");
        console.error("");
        console.error("Primjeri:");
        console.error("  node summarize_gemini.js --input-dir /Volumes/DOMOVINA1TB/fetch_domovina_tv_output");
        console.error("  node summarize_gemini.js --input-dir ... --channel bozanstvena_komedija --limit 5");
        console.error("  node summarize_gemini.js --input-dir ... --mod
Repobility analyzer · published findings · https://repobility.com
getArg function · javascript · L512-L515 (4 LOC)
summarize_gemini.js
    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }
discoverFiles function · javascript · L555-L600 (46 LOC)
summarize_gemini.js
function discoverFiles(inputDir, channelFilter) {
    const results = [];

    if (!fs.existsSync(inputDir)) {
        console.error(`❌ Input direktorij ne postoji: ${inputDir}`);
        process.exit(1);
    }

    // Skeniramo podirektorije (svaki je kanal)
    const entries = fs.readdirSync(inputDir, { withFileTypes: true });

    for (const entry of entries) {
        if (!entry.isDirectory()) continue;
        if (entry.name.startsWith(".")) continue;

        const channelName = entry.name;

        // Filtriraj po kanalu ako je navedeno
        if (channelFilter && channelName !== channelFilter) continue;

        const channelDir = path.join(inputDir, channelName);
        const files = fs.readdirSync(channelDir);

        for (const file of files) {
            if (!file.endsWith(DIARIZED_SRT_SUFFIX)) continue;
            if (file.startsWith("._")) continue;  // macOS resource forks

            const srtPath = path.join(channelDir, file);
            const base = file.replac
main function · javascript · L604-L762 (159 LOC)
summarize_gemini.js
async function main() {
    const { inputDir, channel, limit, dryRun } = parseArgs();

    console.log("");
    console.log("╔══════════════════════════════════════════════════╗");
    console.log("║   📝 GEMINI SUMARIZACIJA TRANSKRIPATA           ║");
    console.log("╚══════════════════════════════════════════════════╝");
    console.log(`   📂 Input:   ${inputDir}`);
    console.log(`   🤖 Model:   ${GEMINI_MODEL}`);
    console.log(`   🌐 Endpoint: Vertex AI (OAuth Bearer)`);
    console.log(`   📋 Projekt:  ${VERTEX_PROJECT} (${VERTEX_REGION})`);
    if (channel) console.log(`   🎯 Kanal:   ${channel}`);
    if (limit) console.log(`   🔢 Limit:   ${limit}`);
    if (dryRun) console.log("   ⚠️  DRY RUN — samo prikaz, bez API poziva");
    console.log("");

    // ── Pronađi datoteke ──
    const allFiles = discoverFiles(inputDir, channel);
    const blocked = allFiles.filter((f) => f.isBlocked && !f.hasSummary);
    const toProcess = allFiles.filter((f) => !f.hasSummary && !f.isBlocked);
sanitizeDescription function · javascript · L38-L49 (12 LOC)
transcribe_diarized.js
function sanitizeDescription(str) {
    if (!str) return "nepoznat_naslov";
    str = str.toLowerCase();
    const map = {
        'č': 'c', 'ć': 'c', 'ž': 'z', 'š': 's', 'đ': 'd',
        'Č': 'c', 'Ć': 'c', 'Ž': 'z', 'Š': 's', 'Đ': 'd'
    };
    str = str.replace(/[čćžšđČĆŽŠĐ]/g, (char) => map[char] || char);
    str = str.replace(/[^a-z0-9]/g, '_');
    str = str.replace(/_+/g, '_').replace(/^_|_$/g, '');
    return str || "nepoznat_naslov";
}
extractVideoId function · javascript · L51-L56 (6 LOC)
transcribe_diarized.js
function extractVideoId(url) {
    url = url.trim();
    if (!url) return null;
    const m = url.match(/(?:youtu\.be\/|v=)([a-zA-Z0-9_-]{11})/);
    return m ? m[1] : null;
}
extractDataFromLine function · javascript · L58-L75 (18 LOC)
transcribe_diarized.js
function extractDataFromLine(line) {
    line = line.trim();
    if (!line || line.startsWith("#")) return null;
    if (line.includes("|")) {
        const parts = line.split("|");
        const url = parts[parts.length - 1].trim();
        let title = "nepoznat_naslov";
        let date = "NA";
        if (parts.length >= 3) {
            date = parts[0].trim();
            title = parts.slice(1, parts.length - 1).join(" ").trim();
        } else if (parts.length === 2) {
            title = parts[0].trim();
        }
        return { url, title, date };
    }
    return { url: line, title: "nepoznat_naslov", date: "NA" };
}
loadState function · javascript · L77-L86 (10 LOC)
transcribe_diarized.js
function loadState(stateFile) {
    if (fs.existsSync(stateFile)) {
        try {
            return JSON.parse(fs.readFileSync(stateFile, "utf-8"));
        } catch (e) {
            console.error(`[GREŠKA] Neispravan JSON stanja: ${stateFile}`);
        }
    }
    return { completed: [], failed: [] };
}
findFile function · javascript · L88-L95 (8 LOC)
transcribe_diarized.js
function findFile(outputDir, videoId, suffix) {
    if (!fs.existsSync(outputDir)) return null;
    const files = fs.readdirSync(outputDir);
    const match = files.find(f =>
        !f.startsWith("._") && f.includes(`_yt_${videoId}`) && f.endsWith(suffix)
    );
    return match ? path.join(outputDir, match) : null;
}
Want this analysis on your repo? https://repobility.com/scan/
formatDuration function · javascript · L97-L102 (6 LOC)
transcribe_diarized.js
function formatDuration(seconds) {
    const h = Math.floor(seconds / 3600);
    const m = Math.floor((seconds % 3600) / 60);
    const s = Math.floor(seconds % 60);
    return `${h}h ${m}m ${s}s`;
}
runDiarization function · javascript · L113-L140 (28 LOC)
transcribe_diarized.js
function runDiarization(wavFile, srtFile, outputFile, hfToken) {
    const args = [
        DIARIZE_SCRIPT,
        "--wav", wavFile,
        "--srt", srtFile,
        "--output", outputFile,
        "--hf-token", hfToken,
    ];

    const startTime = Date.now();

    return new Promise((resolve, reject) => {
        const proc = spawn("python3", args, { stdio: "inherit" });

        proc.on("close", (code) => {
            const elapsed = (Date.now() - startTime) / 1000;
            if (code === 0) {
                resolve({ elapsed });
            } else {
                reject(new Error(`diarize.py exit code: ${code} (trajalo: ${formatDuration(elapsed)})`));
            }
        });

        proc.on("error", (err) => {
            reject(new Error(`Nije moguće pokrenuti python3: ${err.message}. Je li Python 3 instaliran?`));
        });
    });
}
main function · javascript · L144-L299 (156 LOC)
transcribe_diarized.js
async function main() {
    const args = process.argv.slice(2);
    const outputDirIdx = args.indexOf("--output-dir");
    const baseOutputDir = outputDirIdx !== -1 ? args[outputDirIdx + 1] : DEFAULT_OUTPUT_DIR;
    const dryRun = args.includes("--dry-run");
    const channelIdx = args.indexOf("--channel");
    const channelFilter = channelIdx !== -1 ? args[channelIdx + 1] : null;

    // HF token
    const tokenIdx = args.indexOf("--hf-token");
    const hfToken = tokenIdx !== -1 ? args[tokenIdx + 1] : null;

    if (!hfToken && !dryRun) {
        console.error("❌ HuggingFace token je OBAVEZAN za diarizaciju.");
        console.error("   Dodaj: --hf-token TVOJ_TOKEN");
        process.exit(1);
    }

    // Provjera preduvjeta
    if (!fs.existsSync(DIARIZE_SCRIPT)) {
        console.error(`❌ Python skripta ne postoji: ${DIARIZE_SCRIPT}`);
        process.exit(1);
    }
    if (!fs.existsSync(LISTS_DIR)) {
        console.error(`❌ Nema direktorija s listama: ${LISTS_DIR}`);
        pr
sanitizeDescription function · javascript · L41-L52 (12 LOC)
transcribe.js
function sanitizeDescription(str) {
    if (!str) return "nepoznat_naslov";
    str = str.toLowerCase();
    const map = {
        'č': 'c', 'ć': 'c', 'ž': 'z', 'š': 's', 'đ': 'd',
        'Č': 'c', 'Ć': 'c', 'Ž': 'z', 'Š': 's', 'Đ': 'd'
    };
    str = str.replace(/[čćžšđČĆŽŠĐ]/g, (char) => map[char] || char);
    str = str.replace(/[^a-z0-9]/g, '_');
    str = str.replace(/_+/g, '_').replace(/^_|_$/g, '');
    return str || "nepoznat_naslov";
}
extractVideoId function · javascript · L54-L59 (6 LOC)
transcribe.js
function extractVideoId(url) {
    url = url.trim();
    if (!url) return null;
    const m = url.match(/(?:youtu\.be\/|v=)([a-zA-Z0-9_-]{11})/);
    return m ? m[1] : null;
}
extractDataFromLine function · javascript · L61-L78 (18 LOC)
transcribe.js
function extractDataFromLine(line) {
    line = line.trim();
    if (!line || line.startsWith("#")) return null;
    if (line.includes("|")) {
        const parts = line.split("|");
        const url = parts[parts.length - 1].trim();
        let title = "nepoznat_naslov";
        let date = "NA";
        if (parts.length >= 3) {
            date = parts[0].trim();
            title = parts.slice(1, parts.length - 1).join(" ").trim();
        } else if (parts.length === 2) {
            title = parts[0].trim();
        }
        return { url, title, date };
    }
    return { url: line, title: "nepoznat_naslov", date: "NA" };
}
loadState function · javascript · L80-L89 (10 LOC)
transcribe.js
function loadState(stateFile) {
    if (fs.existsSync(stateFile)) {
        try {
            return JSON.parse(fs.readFileSync(stateFile, "utf-8"));
        } catch (e) {
            console.error(`[GREŠKA] Neispravan JSON stanja: ${stateFile}`);
        }
    }
    return { completed: [], failed: [] };
}
findFile function · javascript · L93-L100 (8 LOC)
transcribe.js
function findFile(outputDir, videoId, suffix) {
    if (!fs.existsSync(outputDir)) return null;
    const files = fs.readdirSync(outputDir);
    const match = files.find(f =>
        !f.startsWith("._") && f.includes(`_yt_${videoId}`) && f.endsWith(suffix)
    );
    return match ? path.join(outputDir, match) : null;
}
Repobility · MCP-ready · https://repobility.com
formatDuration function · javascript · L107-L112 (6 LOC)
transcribe.js
function formatDuration(seconds) {
    const h = Math.floor(seconds / 3600);
    const m = Math.floor((seconds % 3600) / 60);
    const s = Math.floor(seconds % 60);
    return `${h}h ${m}m ${s}s`;
}
runWhisper function · javascript · L118-L153 (36 LOC)
transcribe.js
function runWhisper(wavFile, promptFile, threads) {
    const args = [
        "-m", WHISPER_MODEL,
        "-f", wavFile,
        "-l", WHISPER_LANGUAGE,
        "-osrt",                    // Output u SRT formatu
        "-t", String(threads),      // Broj threadova
    ];

    // Dodaj prompt ako postoji
    if (promptFile && fs.existsSync(promptFile)) {
        const promptContent = fs.readFileSync(promptFile, "utf-8").trim();
        if (promptContent.length > 0) {
            args.push("--prompt", promptContent);
        }
    }

    const startTime = Date.now();

    return new Promise((resolve, reject) => {
        const proc = spawn(WHISPER_CLI, args, { stdio: "inherit" });

        proc.on("close", (code) => {
            const elapsed = (Date.now() - startTime) / 1000;
            if (code === 0) {
                resolve({ elapsed });
            } else {
                reject(new Error(`whisper-cli exit code: ${code} (trajalo: ${formatDuration(elapsed)})`));
            }
main function · javascript · L157-L322 (166 LOC)
transcribe.js
async function main() {
    const args = process.argv.slice(2);
    const outputDirIdx = args.indexOf("--output-dir");
    const baseOutputDir = outputDirIdx !== -1 ? args[outputDirIdx + 1] : DEFAULT_OUTPUT_DIR;
    const dryRun = args.includes("--dry-run");
    const channelIdx = args.indexOf("--channel");
    const channelFilter = channelIdx !== -1 ? args[channelIdx + 1] : null;
    const threadsIdx = args.indexOf("--threads");
    const threads = threadsIdx !== -1 ? parseInt(args[threadsIdx + 1], 10) : DEFAULT_THREADS;

    // Provjeri preduvjete
    if (!fs.existsSync(WHISPER_CLI)) {
        console.error(`❌ whisper-cli binary ne postoji: ${WHISPER_CLI}`);
        process.exit(1);
    }
    if (!fs.existsSync(WHISPER_MODEL)) {
        console.error(`❌ Whisper model ne postoji: ${WHISPER_MODEL}`);
        process.exit(1);
    }
    if (!fs.existsSync(LISTS_DIR)) {
        console.error(`❌ Nema direktorija s listama: ${LISTS_DIR}`);
        process.exit(1);
    }
    if (!fs.existsSy
sanitizeDescription function · javascript · L56-L67 (12 LOC)
transcribe_nvidia_canary.mjs
function sanitizeDescription(str) {
    if (!str) return "nepoznat_naslov";
    str = str.toLowerCase();
    const map = {
        'č': 'c', 'ć': 'c', 'ž': 'z', 'š': 's', 'đ': 'd',
        'Č': 'c', 'Ć': 'c', 'Ž': 'z', 'Š': 's', 'Đ': 'd'
    };
    str = str.replace(/[čćžšđČĆŽŠĐ]/g, (char) => map[char] || char);
    str = str.replace(/[^a-z0-9]/g, '_');
    str = str.replace(/_+/g, '_').replace(/^_|_$/g, '');
    return str || "nepoznat_naslov";
}
extractVideoId function · javascript · L69-L74 (6 LOC)
transcribe_nvidia_canary.mjs
function extractVideoId(url) {
    url = url.trim();
    if (!url) return null;
    const m = url.match(/(?:youtu\.be\/|v=)([a-zA-Z0-9_-]{11})/);
    return m ? m[1] : null;
}
extractDataFromLine function · javascript · L76-L93 (18 LOC)
transcribe_nvidia_canary.mjs
function extractDataFromLine(line) {
    line = line.trim();
    if (!line || line.startsWith("#")) return null;
    if (line.includes("|")) {
        const parts = line.split("|");
        const url = parts[parts.length - 1].trim();
        let title = "nepoznat_naslov";
        let date = "NA";
        if (parts.length >= 3) {
            date = parts[0].trim();
            title = parts.slice(1, parts.length - 1).join(" ").trim();
        } else if (parts.length === 2) {
            title = parts[0].trim();
        }
        return { url, title, date };
    }
    return { url: line, title: "nepoznat_naslov", date: "NA" };
}
loadState function · javascript · L95-L104 (10 LOC)
transcribe_nvidia_canary.mjs
function loadState(stateFile) {
    if (fs.existsSync(stateFile)) {
        try {
            return JSON.parse(fs.readFileSync(stateFile, "utf-8"));
        } catch (e) {
            console.error(`[GREŠKA] Neispravan JSON stanja: ${stateFile}`);
        }
    }
    return { completed: [], failed: [] };
}
findFile function · javascript · L106-L113 (8 LOC)
transcribe_nvidia_canary.mjs
function findFile(outputDir, videoId, suffix) {
    if (!fs.existsSync(outputDir)) return null;
    const files = fs.readdirSync(outputDir);
    const match = files.find(f =>
        !f.startsWith("._") && f.includes(`_yt_${videoId}`) && f.endsWith(suffix)
    );
    return match ? path.join(outputDir, match) : null;
}
Methodology: Repobility · https://repobility.com/research/state-of-ai-code-2026/
formatDuration function · javascript · L115-L120 (6 LOC)
transcribe_nvidia_canary.mjs
function formatDuration(seconds) {
    const h = Math.floor(seconds / 3600);
    const m = Math.floor((seconds % 3600) / 60);
    const s = Math.floor(seconds % 60);
    return `${h}h ${m}m ${s}s`;
}
parseArgs function · javascript · L125-L191 (67 LOC)
transcribe_nvidia_canary.mjs
function parseArgs() {
    const args = process.argv.slice(2);
    const config = {
        outputDir: DEFAULT_OUTPUT_DIR,
        sourceLang: DEFAULT_SOURCE_LANG,
        targetLang: DEFAULT_TARGET_LANG,
        dryRun: false,
        singleFile: null,
        channelFilter: null,
        hfToken: null,
    };

    for (let i = 0; i < args.length; i++) {
        switch (args[i]) {
            case "--output-dir":
                config.outputDir = args[++i];
                break;
            case "--channel":
                config.channelFilter = args[++i];
                break;
            case "--source-lang":
                config.sourceLang = args[++i];
                break;
            case "--target-lang":
                config.targetLang = args[++i];
                break;
            case "--dry-run":
                config.dryRun = true;
                break;
            case "--file":
                config.singleFile = args[++i];
                break;
            ca
downloadFile function · javascript · L197-L212 (16 LOC)
transcribe_nvidia_canary.mjs
async function downloadFile(url, outputPath) {
    // Sigurnosna provjera — nikada ne prepiši postojeću datoteku
    if (fs.existsSync(outputPath)) {
        console.log(`      ⚠️  Datoteka već postoji, preskačem: ${path.basename(outputPath)}`);
        return false;
    }

    const response = await fetch(url);
    if (!response.ok) {
        throw new Error(`HTTP ${response.status} pri downloadu: ${url}`);
    }

    const buffer = Buffer.from(await response.arrayBuffer());
    fs.writeFileSync(outputPath, buffer);
    return true;
}
transcribeFile function · javascript · L217-L268 (52 LOC)
transcribe_nvidia_canary.mjs
async function transcribeFile(client, wavFile, sourceLang, targetLang) {
    const startTime = Date.now();

    const fileSize = fs.statSync(wavFile).size;
    const fileSizeMB = (fileSize / (1024 * 1024)).toFixed(1);
    console.log(`      ⏳ Uploadiram WAV (${fileSizeMB} MB)...`);

    // Pročitaj WAV datoteku i pošalji kao Blob
    const wavBuffer = fs.readFileSync(wavFile);
    const wavBlob = new Blob([wavBuffer], { type: "audio/wav" });

    let result;
    try {
        result = await client.predict(API_ENDPOINT, {
            audio_path: handle_file(wavBlob),
            source_lang: sourceLang,
            target_lang: targetLang,
        });
    } catch (err) {
        // Verbose error logging — prikaži puni server response
        console.error(`      🔴 GREŠKA od servera:`);
        console.error(`         Poruka:  ${err.message}`);
        if (err.response) {
            console.error(`         Status:  ${err.response.status} ${err.response.statusText}`);
            try {
 
processEntry function · javascript · L274-L322 (49 LOC)
transcribe_nvidia_canary.mjs
async function processEntry(client, wavFile, config) {
    const srtOutput = wavFile + CANARY_SRT_SUFFIX;
    const csvOutput = wavFile + CANARY_CSV_SUFFIX;

    // Već postoji canary transkript?
    if (fs.existsSync(srtOutput)) {
        return { status: "skipped" };
    }

    if (config.dryRun) {
        const size = (fs.statSync(wavFile).size / (1024 * 1024)).toFixed(1);
        console.log(`   🔄 [TRANSKRIBIRAO BI] ${path.basename(wavFile, ".wav")}`);
        console.log(`      📄 WAV: ${path.basename(wavFile)} (${size} MB)`);
        console.log(`         → ${path.basename(srtOutput)}`);
        console.log(`         → ${path.basename(csvOutput)}`);
        return { status: "dry-run" };
    }

    const baseName = path.basename(wavFile, ".wav");
    console.log(`\n   🎙️  [TRANSKRIBIRAM] ${baseName}`);

    const result = await transcribeFile(client, wavFile, config.sourceLang, config.targetLang);

    console.log(`      ⏱️  Transkripcija trajala: ${formatDuration(result.elapsed)}`
main function · javascript · L326-L522 (197 LOC)
transcribe_nvidia_canary.mjs
async function main() {
    const config = parseArgs();

    console.log("╔══════════════════════════════════════════════════╗");
    console.log("║   🐤 NVIDIA CANARY 1B v2 — REMOTE TRANSKRIPCIJA ║");
    console.log("╚══════════════════════════════════════════════════╝");
    console.log(`   🌐 HF Space: ${HF_SPACE}`);
    console.log(`   � HF Token: ${config.hfToken ? '✅ (proslijeđen)' : '❌ (anonimna kvota — dodaj --hf-token za više GPU vremena)'}`);
    console.log(`   �🗣️  Izvorni jezik: ${config.sourceLang}`);
    console.log(`   💬 Ciljni jezik: ${config.targetLang}`);

    // --- NAČIN 1: Pojedinačna datoteka (--file) ---
    if (config.singleFile) {
        if (!fs.existsSync(config.singleFile)) {
            console.error(`❌ Datoteka ne postoji: ${config.singleFile}`);
            process.exit(1);
        }
        if (!config.singleFile.endsWith(".wav")) {
            console.error(`❌ Datoteka nije WAV: ${config.singleFile}`);
            process.exit(1);
        }

        con
‹ prevpage 4 / 4