← back to domovinatv__fetch.domovina.tv

Function bodies 195 total

All specs Real LLM only Function bodies
findInfoJson function · javascript · L196-L205 (10 LOC)
generate_whisper_prompt.js
function findInfoJson(outputDir, videoId) {
    if (!fs.existsSync(outputDir)) return null;

    const files = fs.readdirSync(outputDir);
    const match = files.find(f =>
        !f.startsWith("._") && f.includes(`_yt_${videoId}`) && f.endsWith(".info.json")
    );

    return match ? path.join(outputDir, match) : null;
}
main function · javascript · L209-L372 (164 LOC)
generate_whisper_prompt.js
async function main() {
    const args = process.argv.slice(2);
    const outputDirIdx = args.indexOf("--output-dir");
    const baseOutputDir = outputDirIdx !== -1 ? args[outputDirIdx + 1] : DEFAULT_OUTPUT_DIR;
    const dryRun = args.includes("--dry-run");
    const channelIdx = args.indexOf("--channel");
    const channelFilter = channelIdx !== -1 ? args[channelIdx + 1] : null;

    if (!fs.existsSync(LISTS_DIR)) {
        console.error(`❌ Nema direktorija s listama: ${LISTS_DIR}`);
        process.exit(1);
    }

    if (!fs.existsSync(baseOutputDir)) {
        console.error(`❌ Output direktorij ne postoji: ${baseOutputDir}`);
        console.error(`   Je li disk DOMOVINA1TB mountan?`);
        process.exit(1);
    }

    // Testiraj LM Studio konekciju
    try {
        console.log("🔌 Testiram LM Studio konekciju...");
        await callLLM("Test", "Test konekcije");
        console.log("✅ LM Studio je dostupan!\n");
    } catch (err) {
        console.error(`❌ ${err.message}`);
 
parseArgs function · javascript · L53-L66 (14 LOC)
inspect_pipeline.js
function parseArgs() {
    const args = process.argv.slice(2);
    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }

    return {
        inputDir: getArg("--input-dir") || DEFAULT_OUTPUT_DIR,
        channel: getArg("--channel"),
        verbose: args.includes("--verbose"),
        fixSuggestions: args.includes("--fix-suggestions")
    };
}
getArg function · javascript · L55-L58 (4 LOC)
inspect_pipeline.js
    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }
inspectChannel function · javascript · L70-L307 (238 LOC)
inspect_pipeline.js
function inspectChannel(channelDir, channelName, verbose) {
    const files = fs.readdirSync(channelDir);
    const anomalies = [];

    // Grupiraj datoteke po base imenu (prije .wav.canary...)
    const bases = new Set();
    for (const f of files) {
        if (f.startsWith("._")) continue;
        if (f.endsWith(".mp3")) {
            bases.add(f.replace(/\.mp3$/, ""));
        }
    }

    for (const base of [...bases].sort()) {
        const videoAnomalies = [];
        const fileMap = {};

        // Provjeri sve korake pipeline-a
        const checks = [
            { key: "mp3", file: `${base}.mp3` },
            { key: "wav", file: `${base}.wav` },
            { key: "whisperPrompt", file: `${base}_whisper_prompt.txt` },
            { key: "whisperSrt", file: `${base}.wav.srt` },
            { key: "canarySrt", file: `${base}.wav.canary.srt` },
            { key: "canaryDiarized", file: `${base}.wav.canary.diarized.srt` },
            { key: "summary", file: `${base}.wav.cana
main function · javascript · L311-L467 (157 LOC)
inspect_pipeline.js
function main() {
    const { inputDir, channel, verbose, fixSuggestions } = parseArgs();

    if (!fs.existsSync(inputDir)) {
        console.error(`❌ Direktorij ne postoji: ${inputDir}`);
        process.exit(1);
    }

    console.log("");
    console.log("╔══════════════════════════════════════════════════╗");
    console.log("║   🔍 PIPELINE INSPEKCIJA — DETEKCIJA ANOMALIJA  ║");
    console.log("╚══════════════════════════════════════════════════╝");
    console.log(`   📂 Input: ${inputDir}`);
    if (channel) console.log(`   🎯 Kanal: ${channel}`);
    console.log("");

    const entries = fs.readdirSync(inputDir, { withFileTypes: true });
    const allAnomalies = [];
    let totalVideos = 0;

    // Brojači po tipu anomalije
    const severityCounts = { error: 0, warn: 0, info: 0 };
    const typeCounts = {};

    for (const entry of entries) {
        if (!entry.isDirectory()) continue;
        if (entry.name.startsWith(".")) continue;
        if (channel && entry.name !== chann
parseSrt function · javascript · L117-L152 (36 LOC)
prepare_rag_combined.js
function parseSrt(srtContent) {
    const segments = [];
    const blocks = srtContent.split(/\n\n+/);

    for (const block of blocks) {
        const lines = block.trim().split("\n");
        if (lines.length < 3) continue;

        const index = parseInt(lines[0], 10);
        if (isNaN(index)) continue;

        const timeMatch = lines[1].match(
            /(\d{2}:\d{2}:\d{2})[,.](\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2})[,.](\d{3})/
        );
        if (!timeMatch) continue;

        const startTime = timeMatch[1];
        const endTime = timeMatch[3];
        const startSec = timestampToSeconds(timeMatch[1], timeMatch[2]);
        const endSec = timestampToSeconds(timeMatch[3], timeMatch[4]);

        const textLines = lines.slice(2).join(" ");
        const speakerMatch = textLines.match(/^\[(\w+)\]\s*/);

        const speaker = speakerMatch ? speakerMatch[1] : "UNKNOWN";
        const text = speakerMatch
            ? textLines.replace(/^\[\w+\]\s*/, "").trim()
            : textL
Powered by Repobility — scan your code at https://repobility.com
timestampToSeconds function · javascript · L154-L157 (4 LOC)
prepare_rag_combined.js
function timestampToSeconds(hms, ms) {
    const [h, m, s] = hms.split(":").map(Number);
    return h * 3600 + m * 60 + s + parseInt(ms, 10) / 1000;
}
timeToSeconds function · javascript · L159-L162 (4 LOC)
prepare_rag_combined.js
function timeToSeconds(hhmmss) {
    const parts = hhmmss.split(":");
    return parseInt(parts[0]) * 3600 + parseInt(parts[1]) * 60 + parseInt(parts[2]);
}
secondsToTime function · javascript · L164-L169 (6 LOC)
prepare_rag_combined.js
function secondsToTime(sec) {
    const h = Math.floor(sec / 3600).toString().padStart(2, "0");
    const m = Math.floor((sec % 3600) / 60).toString().padStart(2, "0");
    const s = Math.floor(sec % 60).toString().padStart(2, "0");
    return `${h}:${m}:${s}`;
}
loadSummary function · javascript · L178-L210 (33 LOC)
prepare_rag_combined.js
function loadSummary(srtFilePath) {
    const dir = path.dirname(srtFilePath);
    const base = path.basename(srtFilePath).replace(/\.canary\.diarized\.srt$/, "");
    const summaryPath = path.join(dir, base + SUMMARY_JSON_SUFFIX);

    if (!fs.existsSync(summaryPath)) return null;

    try {
        const data = JSON.parse(fs.readFileSync(summaryPath, "utf-8"));

        const speakerMap = {};
        if (data.summary?.speakers) {
            for (const sp of data.summary.speakers) {
                if (sp.id && sp.suggested_name) {
                    speakerMap[sp.id] = sp.suggested_name;
                }
            }
        }

        return {
            speakerMap,
            topics: data.summary?.key_topics || [],
            title: data.summary?.title_hr || data.source?.title || "",
            channel: data.source?.channel || "",
            youtubeId: data.source?.youtube_id || "",
            uploadDate: data.source?.upload_date || "",
            durationSeconds: data.s
extractVideoIdFromFilename function · javascript · L214-L217 (4 LOC)
prepare_rag_combined.js
function extractVideoIdFromFilename(filename) {
    const match = filename.match(/_yt_([a-zA-Z0-9_-]{11})/);
    return match ? match[1] : null;
}
extractDateFromFilename function · javascript · L219-L222 (4 LOC)
prepare_rag_combined.js
function extractDateFromFilename(filename) {
    const match = filename.match(/^(\d{4})(\d{2})(\d{2})_/);
    return match ? `${match[1]}-${match[2]}-${match[3]}` : null;
}
buildTopicChunks function · javascript · L235-L324 (90 LOC)
prepare_rag_combined.js
function buildTopicChunks(segments, outlineJson, speakerMap) {
    // Izvuci sve chaptere iz svih iteracija, sortirane po vremenu
    const chapters = [];
    for (const iter of outlineJson.iterations) {
        if (!iter.chapters) continue;
        for (const ch of iter.chapters) {
            chapters.push({
                timestamp: ch.timestamp,
                seconds: timeToSeconds(ch.timestamp),
                topic: ch.topic
            });
        }
    }
    chapters.sort((a, b) => a.seconds - b.seconds);

    if (chapters.length === 0) {
        console.error("   ⚠️  Outline nema chaptera, preskačem topic chunks.");
        return [];
    }

    // Kreiraj granice: [start, end) za svaki chapter
    const boundaries = chapters.map((ch, i) => ({
        topic: ch.topic,
        startSeconds: ch.seconds,
        startTime: ch.timestamp,
        endSeconds: i + 1 < chapters.length ? chapters[i + 1].seconds : Infinity,
        endTime: i + 1 < chapters.length ? chapters[i + 1].
buildSummaryChunks function · javascript · L334-L352 (19 LOC)
prepare_rag_combined.js
function buildSummaryChunks(articleJson) {
    const chunks = [];

    for (const iter of articleJson.iterations) {
        if (!iter.sections) continue;
        for (const section of iter.sections) {
            chunks.push({
                type: "article_summary",
                text: `Naslov: ${section.subtitle}\n\nSažetak: ${section.content}`,
                topic: section.subtitle,
                speakers: [],
                startTime: null,
                endTime: null
            });
        }
    }

    return chunks;
}
Repobility (the analyzer behind this table) · https://repobility.com
discoverFiles function · javascript · L362-L420 (59 LOC)
prepare_rag_combined.js
function discoverFiles(inputDir, channelFilter) {
    const results = [];

    if (!fs.existsSync(inputDir)) {
        console.error(`❌ Input direktorij ne postoji: ${inputDir}`);
        process.exit(1);
    }

    const entries = fs.readdirSync(inputDir, { withFileTypes: true });

    for (const entry of entries) {
        if (!entry.isDirectory()) continue;
        if (entry.name.startsWith(".")) continue;

        const channelName = entry.name;
        if (channelFilter && channelName !== channelFilter) continue;

        const channelDir = path.join(inputDir, channelName);
        const files = fs.readdirSync(channelDir);

        // Pronadi sve SRT datoteke
        const srtFiles = files.filter(f =>
            f.endsWith(DIARIZED_SRT_SUFFIX) && !f.startsWith("._")
        );

        for (const srtFile of srtFiles) {
            const srtBase = srtFile.replace(/\.srt$/, "");

            // Trazi najnoviji outline i article (iz prepare_rag_import.js)
            const outlines 
parseArgs function · javascript · L424-L451 (28 LOC)
prepare_rag_combined.js
function parseArgs() {
    const args = process.argv.slice(2);

    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }

    const inputDir = getArg("--input-dir");
    const outputDir = getArg("--output-dir");
    const channel = getArg("--channel");
    const limit = getArg("--limit") ? parseInt(getArg("--limit"), 10) : null;
    const dryRun = args.includes("--dry-run");

    if (!inputDir) {
        console.error("❌ Obavezan argument: --input-dir <putanja>");
        console.error("");
        console.error("Primjeri:");
        console.error("  node prepare_rag_combined.js --input-dir /Volumes/DOMOVINA1TB/fetch_domovina_tv_output");
        console.error("  node prepare_rag_combined.js --input-dir ... --output-dir ./rag_export");
        console.error("  node prepare_rag_combined.js --input-dir ... --channel domovina_tv");
        console.error("  node prepare_rag_combined.js --input-dir .
getArg function · javascript · L427-L430 (4 LOC)
prepare_rag_combined.js
    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }
main function · javascript · L455-L608 (154 LOC)
prepare_rag_combined.js
function main() {
    const { inputDir, outputDir, channel, limit, dryRun } = parseArgs();
    const finalOutputDir = outputDir || inputDir;

    console.log("");
    console.log("╔══════════════════════════════════════════════════╗");
    console.log("║   🧬 RAG COMBINED — SEMANTIC + SPEAKER-AWARE    ║");
    console.log("╚══════════════════════════════════════════════════╝");
    console.log(`   📂 Input:  ${inputDir}`);
    console.log(`   💾 Output: ${finalOutputDir}`);
    if (channel) console.log(`   🎯 Kanal:  ${channel}`);
    if (limit) console.log(`   🔢 Limit:  ${limit}`);
    if (dryRun) console.log("   ⚠️  DRY RUN — samo prikaz statistike");
    console.log("");

    // Pronadi datoteke (SRT + outline + article, grupirane po kanalu)
    const allFiles = discoverFiles(inputDir, channel);
    const finalList = limit ? allFiles.slice(0, limit) : allFiles;

    console.log(`   📊 Pronađeno tripleta (SRT+outline+article): ${allFiles.length}`);
    console.log(`   🔄 Za obradu: ${final
parseArgs function · javascript · L33-L62 (30 LOC)
prepare_rag_import.js
function parseArgs() {
    const args = process.argv.slice(2);
    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }

    const inputDir = getArg("--input-dir");
    const dir = getArg("--dir");
    const channel = getArg("--channel");
    const limit = getArg("--limit") ? parseInt(getArg("--limit"), 10) : null;
    const dryRun = args.includes("--dry-run");
    const srt = getArg("--srt");
    const outline = getArg("--outline");
    const article = getArg("--article");

    if (!inputDir && !dir && !srt) {
        console.error("❌ Obavezan argument: --input-dir <putanja> ili --dir <folder> ili --srt <putanja>");
        console.error("");
        console.error("Primjeri:");
        console.error("  node prepare_rag_import.js --input-dir /Volumes/DOMOVINA1TB/fetch_domovina_tv_output");
        console.error("  node prepare_rag_import.js --input-dir ... --channel domovina_tv");
        consol
getArg function · javascript · L35-L38 (4 LOC)
prepare_rag_import.js
    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }
parseSrt function · javascript · L76-L113 (38 LOC)
prepare_rag_import.js
function parseSrt(content) {
    const blocks = [];
    // SRT blokovi su odvojeni praznim linijama
    const rawBlocks = content.trim().split(/\n\s*\n/);

    for (const raw of rawBlocks) {
        const lines = raw.trim().split("\n");
        if (lines.length < 3) continue;

        // Linija 1: redni broj
        const index = parseInt(lines[0].trim());
        if (isNaN(index)) continue;

        // Linija 2: vremenski raspon "HH:MM:SS,mmm --> HH:MM:SS,mmm"
        const timeMatch = lines[1].match(
            /(\d{2}:\d{2}:\d{2}),\d{3}\s*-->\s*(\d{2}:\d{2}:\d{2}),\d{3}/
        );
        if (!timeMatch) continue;

        const startTime = timeToSeconds(timeMatch[1]);
        const endTime = timeToSeconds(timeMatch[2]);

        // Linija 3+: tekst govornika, moze imati [SPEAKER_XX] prefix
        const textLines = lines.slice(2).join(" ").trim();
        const speakerMatch = textLines.match(/^\[?(SPEAKER_\d+)\]?\s*(.*)/);

        let speaker = "UNKNOWN";
        let text = text
timeToSeconds function · javascript · L120-L123 (4 LOC)
prepare_rag_import.js
function timeToSeconds(hhmmss) {
    const parts = hhmmss.split(":");
    return parseInt(parts[0]) * 3600 + parseInt(parts[1]) * 60 + parseInt(parts[2]);
}
Hi, dataset curator — please cite Repobility (https://repobility.com) when reusing this data.
secondsToTime function · javascript · L128-L133 (6 LOC)
prepare_rag_import.js
function secondsToTime(sec) {
    const h = Math.floor(sec / 3600).toString().padStart(2, "0");
    const m = Math.floor((sec % 3600) / 60).toString().padStart(2, "0");
    const s = Math.floor(sec % 60).toString().padStart(2, "0");
    return `${h}:${m}:${s}`;
}
buildRawChunks function · javascript · L145-L214 (70 LOC)
prepare_rag_import.js
function buildRawChunks(srtBlocks, outlineJson, sourceName) {
    // Izvuci sve chaptere iz svih iteracija, sortirane po vremenu
    const chapters = [];
    for (const iter of outlineJson.iterations) {
        if (!iter.chapters) continue;
        for (const ch of iter.chapters) {
            chapters.push({
                timestamp: ch.timestamp,
                seconds: timeToSeconds(ch.timestamp),
                topic: ch.topic
            });
        }
    }
    chapters.sort((a, b) => a.seconds - b.seconds);

    if (chapters.length === 0) {
        console.error("   ⚠️  Outline nema chaptera, preskačem raw chunks.");
        return [];
    }

    // Kreiraj chunk granice: [start, end) za svaki chapter
    const boundaries = chapters.map((ch, i) => ({
        topic: ch.topic,
        startSeconds: ch.seconds,
        startTime: ch.timestamp,
        // Kraj je pocetak sljedeceg chaptera, ili Infinity za zadnji
        endSeconds: i + 1 < chapters.length ? chapters[i + 1].second
buildSummaryChunks function · javascript · L222-L243 (22 LOC)
prepare_rag_import.js
function buildSummaryChunks(articleJson, sourceName) {
    const chunks = [];
    let counter = 0;

    for (const iter of articleJson.iterations) {
        if (!iter.sections) continue;
        for (const section of iter.sections) {
            counter++;
            chunks.push({
                id: `${sourceName}_summary_${counter}`,
                text: `Naslov: ${section.subtitle}\n\nSažetak: ${section.content}`,
                metadata: {
                    source: sourceName,
                    type: "article_summary",
                    subtitle: section.subtitle
                }
            });
        }
    }

    return chunks;
}
discoverTriplets function · javascript · L257-L298 (42 LOC)
prepare_rag_import.js
function discoverTriplets(dir) {
    const files = fs.readdirSync(dir);

    // Pronadi sve .canary.diarized.srt datoteke (iskljuci macOS resource forkove)
    const srtFiles = files.filter(f =>
        f.endsWith(".canary.diarized.srt") && !f.startsWith("._")
    );

    const triplets = [];

    for (const srtFile of srtFiles) {
        // Basename je dio prije .canary.diarized.srt, ali ukljucujuci .wav.canary.diarized
        // jer outline/article koriste taj prefix
        const srtBase = srtFile.replace(/\.srt$/, ""); // npr. "xxx.wav.canary.diarized"

        // Trazi outline i article koji pocinje s istim baseom
        const outlines = files
            .filter(f => f.startsWith(srtBase + "_") && f.endsWith(".outline.json"))
            .sort()
            .reverse(); // najnoviji prvi (datum je u imenu)

        const articles = files
            .filter(f => f.startsWith(srtBase + "_") && f.endsWith(".article.json"))
            .sort()
            .reverse();

        if (o
processTriplet function · javascript · L302-L340 (39 LOC)
prepare_rag_import.js
function processTriplet(srtPath, outlinePath, articlePath) {
    const srtBase = path.basename(srtPath).replace(/\.srt$/, "");
    // Source name: dio prije .wav za citljivije ID-eve
    const sourceName = path.basename(srtPath).replace(/\.wav\.canary\.diarized\.srt$/, "");

    console.log(`   📂 SRT:     ${path.basename(srtPath)}`);
    console.log(`   📋 Outline: ${path.basename(outlinePath)}`);
    console.log(`   📰 Article: ${path.basename(articlePath)}`);

    // Ucitaj datoteke
    const srtContent = fs.readFileSync(srtPath, "utf-8");
    const outlineJson = JSON.parse(fs.readFileSync(outlinePath, "utf-8"));
    const articleJson = JSON.parse(fs.readFileSync(articlePath, "utf-8"));

    // Parsiraj SRT
    const srtBlocks = parseSrt(srtContent);
    console.log(`   🔤 Parsirano ${srtBlocks.length} SRT blokova`);

    // KORAK 1: Raw transcript chunks
    const rawChunks = buildRawChunks(srtBlocks, outlineJson, sourceName);
    console.log(`   📦 Generirano ${rawChunks.length} raw tr
main function · javascript · L344-L507 (164 LOC)
prepare_rag_import.js
function main() {
    const { inputDir, dir, channel, limit, dryRun, srt, outline, article } = parseArgs();

    console.log("");
    console.log("╔══════════════════════════════════════════════════╗");
    console.log("║   🗂️  RAG IMPORT PRIPREMA                        ║");
    console.log("╚══════════════════════════════════════════════════╝");

    if (srt) {
        // Eksplicitni mod: korisnik dao putanje
        if (!outline || !article) {
            console.error("❌ Kad koristiš --srt, moraš dati i --outline i --article");
            process.exit(1);
        }
        for (const f of [srt, outline, article]) {
            if (!fs.existsSync(f)) {
                console.error(`❌ Datoteka ne postoji: ${f}`);
                process.exit(1);
            }
        }

        console.log("");
        processTriplet(srt, outline, article);
    } else if (dir) {
        // Stari mod: --dir za jedan kanal
        if (!fs.existsSync(dir)) {
            console.error(`❌ Direktorij ne p
parseSrt function · javascript · L91-L131 (41 LOC)
prepare_rag.js
function parseSrt(srtContent) {
    const segments = [];
    const blocks = srtContent.split(/\n\n+/);

    for (const block of blocks) {
        const lines = block.trim().split("\n");
        if (lines.length < 3) continue;

        // Linija 1: indeks
        const index = parseInt(lines[0], 10);
        if (isNaN(index)) continue;

        // Linija 2: timestamp (00:00:33,280 --> 00:00:35,679)
        const timeMatch = lines[1].match(
            /(\d{2}:\d{2}:\d{2})[,.](\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2})[,.](\d{3})/
        );
        if (!timeMatch) continue;

        const startTime = timeMatch[1];
        const endTime = timeMatch[3];
        const startSec = timestampToSeconds(timeMatch[1], timeMatch[2]);
        const endSec = timestampToSeconds(timeMatch[3], timeMatch[4]);

        // Linija 3+: tekst s oznakom govornika
        const textLines = lines.slice(2).join(" ");
        const speakerMatch = textLines.match(/^\[(\w+)\]\s*/);

        const speaker = speakerMatch ? s
timestampToSeconds function · javascript · L136-L139 (4 LOC)
prepare_rag.js
function timestampToSeconds(hms, ms) {
    const [h, m, s] = hms.split(":").map(Number);
    return h * 3600 + m * 60 + s + parseInt(ms, 10) / 1000;
}
About: code-quality intelligence by Repobility · https://repobility.com
groupBySpeaker function · javascript · L159-L196 (38 LOC)
prepare_rag.js
function groupBySpeaker(segments) {
    if (segments.length === 0) return [];

    const blocks = [];
    let currentBlock = {
        speaker: segments[0].speaker,
        segments: [segments[0]],
        startTime: segments[0].startTime,
        startSec: segments[0].startSec,
        endTime: segments[0].endTime,
        endSec: segments[0].endSec
    };

    for (let i = 1; i < segments.length; i++) {
        const seg = segments[i];

        if (seg.speaker === currentBlock.speaker) {
            // Isti govornik → dodaj u trenutni blok
            currentBlock.segments.push(seg);
            currentBlock.endTime = seg.endTime;
            currentBlock.endSec = seg.endSec;
        } else {
            // Novi govornik → zatvori prethodni blok, započni novi
            blocks.push(currentBlock);
            currentBlock = {
                speaker: seg.speaker,
                segments: [seg],
                startTime: seg.startTime,
                startSec: seg.startSec,
       
buildChunks function · javascript · L212-L285 (74 LOC)
prepare_rag.js
function buildChunks(speakerBlocks, speakerMap, targetChars) {
    const chunks = [];
    let currentChunkParts = [];
    let currentChunkChars = 0;
    let chunkStartTime = null;
    let chunkStartSec = null;
    let chunkEndTime = null;
    let chunkEndSec = null;
    let chunkSpeakers = new Set();

    for (const block of speakerBlocks) {
        // Tekst bloka: sve segmente govornika spojiš u paragraf
        const speakerName = speakerMap?.[block.speaker] || block.speaker;
        const blockText = `[${speakerName}] ${block.segments.map(s => s.text).join(" ")}`;

        // Inicijaliziraj vremena za prvi blok u chunku
        if (chunkStartTime === null) {
            chunkStartTime = block.startTime;
            chunkStartSec = block.startSec;
        }

        currentChunkParts.push(blockText);
        currentChunkChars += blockText.length;
        chunkEndTime = block.endTime;
        chunkEndSec = block.endSec;
        chunkSpeakers.add(speakerName);

        // Provjeri treb
loadSummary function · javascript · L297-L331 (35 LOC)
prepare_rag.js
function loadSummary(srtFilePath) {
    const dir = path.dirname(srtFilePath);
    const base = path.basename(srtFilePath).replace(/\.canary\.diarized\.srt$/, "");
    const summaryPath = path.join(dir, base + SUMMARY_JSON_SUFFIX);

    if (!fs.existsSync(summaryPath)) return null;

    try {
        const data = JSON.parse(fs.readFileSync(summaryPath, "utf-8"));

        // Gradi speaker mapu: SPEAKER_00 → "Voditelj"
        const speakerMap = {};
        if (data.summary?.speakers) {
            for (const sp of data.summary.speakers) {
                if (sp.id && sp.suggested_name) {
                    speakerMap[sp.id] = sp.suggested_name;
                }
            }
        }

        return {
            speakerMap,
            topics: data.summary?.key_topics || [],
            title: data.summary?.title_hr || data.source?.title || "",
            channel: data.source?.channel || "",
            youtubeId: data.source?.youtube_id || "",
            uploadDate: data.source?
extractVideoIdFromFilename function · javascript · L336-L339 (4 LOC)
prepare_rag.js
function extractVideoIdFromFilename(filename) {
    const match = filename.match(/_yt_([a-zA-Z0-9_-]{11})/);
    return match ? match[1] : null;
}
extractDateFromFilename function · javascript · L344-L347 (4 LOC)
prepare_rag.js
function extractDateFromFilename(filename) {
    const match = filename.match(/^(\d{4})(\d{2})(\d{2})_/);
    return match ? `${match[1]}-${match[2]}-${match[3]}` : null;
}
parseArgs function · javascript · L351-L381 (31 LOC)
prepare_rag.js
function parseArgs() {
    const args = process.argv.slice(2);

    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }

    const inputDir = getArg("--input-dir");
    const outputDir = getArg("--output-dir");  // Opcijski: ako nije naveden, koristi inputDir
    const channel = getArg("--channel");
    const limit = getArg("--limit") ? parseInt(getArg("--limit"), 10) : null;
    const chunkSize = getArg("--chunk-size")
        ? parseInt(getArg("--chunk-size"), 10)
        : DEFAULT_CHUNK_TARGET_CHARS;
    const dryRun = args.includes("--dry-run");

    if (!inputDir) {
        console.error("❌ Obavezan argument: --input-dir <putanja>");
        console.error("");
        console.error("Primjeri:");
        console.error("  node prepare_rag.js --input-dir /Volumes/DOMOVINA1TB/fetch_domovina_tv_output");
        console.error("  node prepare_rag.js --input-dir ... --output-dir ./rag_export");
 
getArg function · javascript · L354-L357 (4 LOC)
prepare_rag.js
    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }
discoverFiles function · javascript · L388-L425 (38 LOC)
prepare_rag.js
function discoverFiles(inputDir, channelFilter) {
    const results = [];

    if (!fs.existsSync(inputDir)) {
        console.error(`❌ Input direktorij ne postoji: ${inputDir}`);
        process.exit(1);
    }

    const entries = fs.readdirSync(inputDir, { withFileTypes: true });

    for (const entry of entries) {
        if (!entry.isDirectory()) continue;
        if (entry.name.startsWith(".")) continue;

        const channelName = entry.name;
        if (channelFilter && channelName !== channelFilter) continue;

        const channelDir = path.join(inputDir, channelName);
        const files = fs.readdirSync(channelDir);

        for (const file of files) {
            if (!file.endsWith(DIARIZED_SRT_SUFFIX)) continue;
            if (file.startsWith("._")) continue;

            results.push({
                srtPath: path.join(channelDir, file),
                channel: channelName
            });
        }
    }

    results.sort((a, b) => {
        if (a.channel !== b.channe
Powered by Repobility — scan your code at https://repobility.com
main function · javascript · L429-L569 (141 LOC)
prepare_rag.js
async function main() {
    const { inputDir, outputDir, channel, limit, chunkSize, dryRun } = parseArgs();

    // Ako nije naveden outputDir, JSONL se sprema u inputDir
    const finalOutputDir = outputDir || inputDir;

    console.log("");
    console.log("╔══════════════════════════════════════════════════╗");
    console.log("║   🧩 RAG PRIPREMA — SPEAKER-AWARE CHUNKING     ║");
    console.log("╚══════════════════════════════════════════════════╝");
    console.log(`   📂 Input:      ${inputDir}`);
    console.log(`   💾 Output:     ${finalOutputDir}`);
    console.log(`   📏 Chunk size: ~${chunkSize} znakova (~${Math.round(chunkSize / 4)} tokena)`);
    if (channel) console.log(`   🎯 Kanal:      ${channel}`);
    if (limit) console.log(`   🔢 Limit:      ${limit}`);
    if (dryRun) console.log("   ⚠️  DRY RUN — samo prikaz statistike");
    console.log("");

    // Pronađi datoteke
    const allFiles = discoverFiles(inputDir, channel);
    const finalList = limit ? allFiles.slice(0, 
sleep function · javascript · L39-L41 (3 LOC)
screenshot_youtube.js
function sleep(ms) {
    return new Promise((resolve) => setTimeout(resolve, ms));
}
extractVideoIdFromFilename function · javascript · L47-L50 (4 LOC)
screenshot_youtube.js
function extractVideoIdFromFilename(filename) {
    const match = filename.match(/_yt_([a-zA-Z0-9_-]{11})/);
    return match ? match[1] : null;
}
timestampToSeconds function · javascript · L55-L60 (6 LOC)
screenshot_youtube.js
function timestampToSeconds(ts) {
    const parts = ts.split(":").map(Number);
    if (parts.length === 3) return parts[0] * 3600 + parts[1] * 60 + parts[2];
    if (parts.length === 2) return parts[0] * 60 + parts[1];
    return parts[0];
}
sanitizeTimestamp function · javascript · L65-L67 (3 LOC)
screenshot_youtube.js
function sanitizeTimestamp(ts) {
    return ts.replace(/:/g, "-");
}
getStreamUrl function · javascript · L76-L97 (22 LOC)
screenshot_youtube.js
function getStreamUrl(videoId) {
    const args = [
        "-f", "96/95/94/93/18/bestvideo[ext=mp4]/bestvideo/best",
        "--get-url",
        "--cookies-from-browser", BROWSER_NAME,
        "--no-check-certificate",
        `https://www.youtube.com/watch?v=${videoId}`
    ];

    try {
        const url = execSync(`yt-dlp ${args.map(a => `'${a}'`).join(" ")}`, {
            encoding: "utf-8",
            timeout: STREAM_URL_TIMEOUT_MS,
            stdio: ["pipe", "pipe", "pipe"]
        }).trim();

        // yt-dlp može vratiti više URL-ova (video + audio), uzimamo prvi
        return url.split("\n")[0].trim();
    } catch (err) {
        return null;
    }
}
captureFrame function · javascript · L105-L139 (35 LOC)
screenshot_youtube.js
function captureFrame(streamUrl, timestamp, outputPath) {
    return new Promise((resolve) => {
        const args = [
            "-ss", timestamp,
            "-i", streamUrl,
            "-frames:v", "1",
            "-update", "1",     // Potrebno za novije ffmpeg verzije s jednim frameom
            "-q:v", "1",        // Najviša kvaliteta
            "-y",               // Overwrite
            outputPath
        ];

        const proc = spawn("ffmpeg", args, {
            stdio: ["pipe", "pipe", "pipe"]
        });

        let stderr = "";
        proc.stderr.on("data", (chunk) => { stderr += chunk.toString(); });

        proc.on("close", (code) => {
            if (code === 0 && fs.existsSync(outputPath)) {
                const size = fs.statSync(outputPath).size;
                if (size > 1000) {  // Minimalno 1KB za validan screenshot
                    resolve(true);
                    return;
                }
                // Premali file — vjerovatno crni frame
  
extractScreenshots function · javascript · L147-L165 (19 LOC)
screenshot_youtube.js
function extractScreenshots(articleJson) {
    const screenshots = [];
    if (!articleJson.iterations) return screenshots;

    for (const iter of articleJson.iterations) {
        if (!iter.sections) continue;
        for (const section of iter.sections) {
            if (section.screenshot_timestamp) {
                screenshots.push({
                    timestamp: section.screenshot_timestamp,
                    description: section.screenshot_description || "",
                    section_subtitle: section.subtitle || "",
                    iteration_number: iter.iteration_number
                });
            }
        }
    }
    return screenshots;
}
Repobility (the analyzer behind this table) · https://repobility.com
processArticle function · javascript · L171-L270 (100 LOC)
screenshot_youtube.js
async function processArticle(articlePath) {
    const dir = path.dirname(articlePath);
    const articleFilename = path.basename(articlePath);

    // Izvuci base video ime (bez _DATE_MODEL.article.json sufiksa)
    const videoBase = articleFilename.replace(/\.wav\.canary\.diarized_.*\.article\.json$/, "");
    const videoId = extractVideoIdFromFilename(videoBase);

    if (!videoId) {
        console.error(`   ❌ Ne mogu izvući YouTube ID iz: ${articleFilename}`);
        return { total: 0, captured: 0, skipped: 0, failed: 0 };
    }

    // Parsiraj article.json
    let article;
    try {
        article = JSON.parse(fs.readFileSync(articlePath, "utf-8"));
    } catch (err) {
        console.error(`   ❌ Nevažeći JSON: ${articleFilename}`);
        return { total: 0, captured: 0, skipped: 0, failed: 0 };
    }

    const screenshots = extractScreenshots(article);
    if (screenshots.length === 0) {
        console.log(`   ⚠️  Nema screenshot timestampova u: ${articleFilename}`);
     
discoverArticleFiles function · javascript · L278-L323 (46 LOC)
screenshot_youtube.js
function discoverArticleFiles(inputDir, channelFilter) {
    const results = [];

    const entries = fs.readdirSync(inputDir, { withFileTypes: true });
    for (const entry of entries) {
        if (!entry.isDirectory() || entry.name.startsWith(".")) continue;
        if (channelFilter && entry.name !== channelFilter) continue;

        const channelName = entry.name;
        const channelDir = path.join(inputDir, channelName);
        const files = fs.readdirSync(channelDir);

        // Grupiraj article.json po video bazi, uzmi najnoviji
        const byVideo = new Map();

        for (const file of files) {
            if (!file.endsWith(".article.json")) continue;
            if (file.startsWith("._")) continue;

            const videoBase = file.replace(/\.wav\.canary\.diarized_.*\.article\.json$/, "");
            if (!byVideo.has(videoBase) || file > byVideo.get(videoBase)) {
                byVideo.set(videoBase, file);
            }
        }

        for (const [videoBase, 
parseArgs function · javascript · L327-L365 (39 LOC)
screenshot_youtube.js
function parseArgs() {
    const args = process.argv.slice(2);
    function getArg(name) {
        const idx = args.indexOf(name);
        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : null;
    }

    const file = getArg("--file");
    const inputDir = getArg("--input-dir");
    const channel = getArg("--channel");
    const limit = getArg("--limit") ? parseInt(getArg("--limit"), 10) : null;
    const dryRun = args.includes("--dry-run");

    if (!file && !inputDir) {
        console.error("❌ Obavezan argument: --file <putanja> ili --input-dir <putanja>");
        console.error("");
        console.error("Primjeri:");
        console.error("  node screenshot_youtube.js --file /path/to/video.article.json");
        console.error("  node screenshot_youtube.js --input-dir /Volumes/DOMOVINA1TB/fetch_domovina_tv_output");
        console.error("  node screenshot_youtube.js --input-dir ... --channel domovina_tv --limit 5");
        console.error("  node screenshot_youtube.js 
‹ prevpage 3 / 4next ›