← back to dany2048__morningside-xml-pipeline

Function bodies 59 total

_get_client function · python · L11-L15 (5 LOC)

transcribe.py

def _get_client():
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise RuntimeError("OPENAI_API_KEY not set in environment")
    return OpenAI(api_key=api_key)

transcribe_chunk_api function · python · L18-L37 (20 LOC)

transcribe.py

def transcribe_chunk_api(chunk_path: str, offset_seconds: float) -> list[dict]:
    """Transcribe via OpenAI Whisper API. Returns list of {word, start, end}."""
    client = _get_client()
    with open(chunk_path, "rb") as f:
        result = client.audio.transcriptions.create(
            model="whisper-1",
            file=f,
            response_format="verbose_json",
            timestamp_granularities=["word"],
        )

    words = []
    for w in getattr(result, "words", []) or []:
        words.append({
            "word": w.word.strip() if hasattr(w, "word") else w["word"].strip(),
            "start": (w.start if hasattr(w, "start") else w["start"]) + offset_seconds,
            "end": (w.end if hasattr(w, "end") else w["end"]) + offset_seconds,
        })

    return words

transcribe_chunk_local function · python · L40-L54 (15 LOC)

transcribe.py

def transcribe_chunk_local(chunk_path: str, offset_seconds: float) -> list[dict]:
    """Transcribe via local openai-whisper model. Returns list of {word, start, end}."""
    import whisper
    model = whisper.load_model(WHISPER_MODEL)
    result = model.transcribe(chunk_path, word_timestamps=True, language="en")

    words = []
    for segment in result.get("segments", []):
        for w in segment.get("words", []):
            words.append({
                "word": w["word"].strip(),
                "start": w["start"] + offset_seconds,
                "end": w["end"] + offset_seconds,
            })
    return words

transcribe_all function · python · L57-L92 (36 LOC)

transcribe.py

def transcribe_all(chunks: list[tuple[str, float]], use_local: bool = False) -> list[dict]:
    """Transcribe all chunks and merge, deduplicating overlap regions."""
    all_words = []
    transcribe_fn = transcribe_chunk_local if use_local else transcribe_chunk_api

    for i, (chunk_path, offset) in enumerate(chunks):
        chunk_size_mb = os.path.getsize(chunk_path) / (1024 * 1024)
        print(f"  Transcribing chunk {i+1}/{len(chunks)} ({chunk_size_mb:.1f} MB)...")
        words = transcribe_fn(chunk_path, offset)

        if i == 0 or not all_words:
            all_words.extend(words)
            continue

        # Deduplicate overlap region
        overlap_start = offset
        overlap_end = offset + CHUNK_OVERLAP_SECONDS

        new_words = []
        for w in words:
            if w["start"] >= overlap_end:
                new_words.append(w)
            elif w["start"] >= overlap_start:
                is_dup = False
                for existing in all_words[-20:]:

_match_fps function · python · L10-L23 (14 LOC)

xml_gen.py

def _match_fps(fps: float) -> tuple[float, int]:
    """Match fps to nearest standard value. Returns (matched_fps, timebase)."""
    known = {
        23.976: 24,
        24.0: 24,
        25.0: 25,
        29.97: 30,
        30.0: 30,
        50.0: 50,
        59.94: 60,
        60.0: 60,
    }
    best_key = min(known.keys(), key=lambda k: abs(k - fps))
    return best_key, known[best_key]

_add_rate_element function · python · L34-L37 (4 LOC)

xml_gen.py

def _add_rate_element(parent: ET.Element, timebase: int, ntsc: bool):
    rate = ET.SubElement(parent, "rate")
    ET.SubElement(rate, "timebase").text = str(timebase)
    ET.SubElement(rate, "ntsc").text = "TRUE" if ntsc else "FALSE"

_build_file_element function · python · L40-L72 (33 LOC)

xml_gen.py

def _build_file_element(parent: ET.Element, source_filename: str, source_path: str | None,
                        timebase: int, ntsc: bool, total_frames: int,
                        width: int, height: int, sample_rate: int, audio_channels: int,
                        file_id: str = "file-1", define: bool = True) -> ET.Element:
    """Build a <file> element. If define=True, includes full metadata. Otherwise just a reference."""
    f_el = ET.SubElement(parent, "file", id=file_id)
    if not define:
        return f_el

    ET.SubElement(f_el, "name").text = source_filename
    if source_path:
        from urllib.parse import quote
        encoded = quote(source_path, safe="/:")
        ET.SubElement(f_el, "pathurl").text = f"file://localhost{encoded}"
    else:
        ET.SubElement(f_el, "pathurl").text = f"file://localhost/RELINK_ME/{source_filename}"
    _add_rate_element(f_el, timebase, ntsc)
    ET.SubElement(f_el, "duration").text = str(total_frames)

    f_media = ET.SubElem

Generated by Repobility's multi-pass static-analysis pipeline (https://repobility.com)

_build_source_nest function · python · L75-L149 (75 LOC)

xml_gen.py

def _build_source_nest(parent: ET.Element, source_filename: str, source_path: str | None,
                       timebase: int, ntsc: bool, total_frames: int,
                       width: int, height: int, sample_rate: int, audio_channels: int,
                       nest_id: str = "source-nest") -> ET.Element:
    """Build a full-length nested sequence containing the source clip + empty track for external audio.

    The editor drops external mic audio into this nest. Cuts in the main
    timeline reference this nest, so both audio sources stay synced.
    """
    nest = ET.SubElement(parent, "sequence", id=nest_id)
    ET.SubElement(nest, "name").text = f"NEST - {source_filename}"
    ET.SubElement(nest, "duration").text = str(total_frames)
    _add_rate_element(nest, timebase, ntsc)

    tc = ET.SubElement(nest, "timecode")
    _add_rate_element(tc, timebase, ntsc)
    ET.SubElement(tc, "string").text = "00:00:00:00"
    ET.SubElement(tc, "frame").text = "0"
    ET.SubElement(tc, "

generate_fcpxml function · python · L152-L293 (142 LOC)

xml_gen.py

def generate_fcpxml(
    segments: list[dict],
    metadata: dict,
    source_filename: str,
    output_path: str,
    source_path: str | None = None,
) -> str:
    """Generate FCP 7 XML (Premiere Pro compatible) from segments and video metadata.

    Output contains two sequences:
    1. "NEST - filename" — full-length source clip with empty audio track for external mic
    2. "Clean Cut - filename" — the cut timeline referencing the nest

    Workflow: drop external mic audio into the nest, cuts propagate automatically.
    """
    fps = metadata["fps"]
    width = metadata["width"]
    height = metadata["height"]
    total_duration = metadata["duration_seconds"]
    audio_channels = metadata.get("audio_channels", 2)
    sample_rate = metadata.get("sample_rate", 48000)

    matched_fps, timebase = _match_fps(fps)
    ntsc = _is_ntsc(matched_fps)
    total_frames = _seconds_to_frames(total_duration, matched_fps)

    # Root
    xmeml = ET.Element("xmeml", version="5")

    # --- Sourc

‹ prevpage 2 / 2