Function bodies 161 total

weighted_query_with_provenance function · python · L171-L185 (15 LOC)

dex_weights.py

def weighted_query_with_provenance(
    query_text: str,
    n_results: int = 5,
    include_external: bool = False
) -> tuple[list[dict], str]:
    results = weighted_query(query_text, n_results, include_external)

    label_counts: dict[str, int] = {}
    for r in results:
        label_counts[r["label"]] = label_counts.get(r["label"], 0) + 1

    parts      = [f"{count}×{label}" for label, count in sorted(label_counts.items())]
    provenance = f"[Sources: {' | '.join(parts)}]"

    return results, provenance

print_weight_stats function · python · L189-L200 (12 LOC)

dex_weights.py

def print_weight_stats():
    print("\n  DEX JR. SOURCE WEIGHT TABLE")
    print("  " + "="*55)
    print(f"  {'Collection':<15} {'File Type':<20} {'Weight':>8}")
    print("  " + "-"*55)

    for coll, cconf in COLLECTIONS.items():
        base = cconf["base_weight"]
        for ftype, mult in sorted(FILE_TYPE_WEIGHTS.items(), key=lambda x: -x[1]):
            effective = round(base * mult, 4)
            print(f"  {coll:<15} {ftype:<20} {effective:>8.4f}")
        print()

build_header function · python · L35-L56 (22 LOC)

dex-whisper.py

def build_header(filename, language, duration_seconds, model_used):
    now = datetime.datetime.now().isoformat()
    return f"""# =====================================================================
# DEX JR. VIDEO TRANSCRIPT
# =====================================================================
# source_file:       {filename}
# source_type:       video_transcript
# origin:            tiktok_public
# platform:          tiktok
# account:           @dropdownlogistics
# era:               pre_dexverse
# confidence:        whisper_auto
# processing_stage:  unstructured
# language:          {language}
# duration_seconds:  {duration_seconds:.1f}
# whisper_model:     {model_used}
# transcribed_at:    {now}
# tier:              foundation
# status:            pre_canonical
# =====================================================================

"""

main function · python · L61-L159 (99 LOC)

dex-whisper.py

def main():
    parser = argparse.ArgumentParser(description="Dex Jr Whisper Batch Transcription Pipeline")
    parser.add_argument("--dir",           default=DEFAULT_INPUT_DIR,  help="Directory containing video files")
    parser.add_argument("--output",        default=DEFAULT_OUTPUT_DIR, help="Output directory for transcripts")
    parser.add_argument("--model",         default=DEFAULT_MODEL,      help="Whisper model: tiny, base, small, medium, large")
    parser.add_argument("--skip-existing", action="store_true",        help="Skip files that already have a transcript")
    parser.add_argument("--language",      default=None,               help="Force language (e.g. en). Auto-detect if not set.")
    args = parser.parse_args()

    input_dir  = args.dir
    output_dir = args.output

    os.makedirs(output_dir, exist_ok=True)

    # Scan for video files
    files = [
        f for f in os.listdir(input_dir)
        if os.path.splitext(f)[1].lower() in SUPPORTED_EXTENSIONS
    ]
    f

xlsx_to_text function · python · L50-L123 (74 LOC)

dex-xlsx.py

def xlsx_to_text(filepath, preview=False):
    """Convert an xlsx file to structured text."""
    try:
        wb = load_workbook(filepath, read_only=True, data_only=True)
    except Exception as e:
        print(f"  ERROR reading {filepath}: {e}")
        return None

    filename = os.path.basename(filepath)
    output_parts = []

    # Header
    output_parts.append(f"XLSX CONVERSION: {filename}")
    output_parts.append(f"Source: {filepath}")
    output_parts.append(f"Converted: {datetime.datetime.now().isoformat()}")
    output_parts.append(f"Sheets: {len(wb.sheetnames)}")
    output_parts.append(f"Sheet names: {', '.join(wb.sheetnames)}")
    output_parts.append("=" * 60)

    total_rows = 0
    total_cells = 0

    for sheet_name in wb.sheetnames:
        ws = wb[sheet_name]
        output_parts.append(f"\n{'─' * 60}")
        output_parts.append(f"SHEET: {sheet_name}")
        output_parts.append(f"{'─' * 60}\n")

        row_count = 0
        for row in ws.iter_rows(values_onl

save_text function · python · L128-L138 (11 LOC)

dex-xlsx.py

def save_text(result, output_dir):
    """Save converted text to a file."""
    os.makedirs(output_dir, exist_ok=True)
    name = os.path.splitext(result["filename"])[0]
    output_path = os.path.join(output_dir, f"{name}_xlsx.txt")

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(result["text"])

    print(f"  Saved: {output_path} ({result['char_count']:,} chars)")
    return output_path

ingest_text function · python · L140-L151 (12 LOC)

dex-xlsx.py

def ingest_text(result):
    """Save to canon folder for corpus ingestion."""
    os.makedirs(CANON_DIR, exist_ok=True)
    name = os.path.splitext(result["filename"])[0]
    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_path = os.path.join(CANON_DIR, f"XLSX_{name}_{ts}.txt")

    with open(output_path, "w", encoding="utf-8") as f:
        f.write(result["text"])

    print(f"  Ingested: {output_path} ({result['char_count']:,} chars)")
    return output_path

Repobility analyzer · published findings · https://repobility.com

find_xlsx_files function · python · L156-L162 (7 LOC)

dex-xlsx.py

def find_xlsx_files(folder):
    """Find all .xlsx files in a folder (non-recursive)."""
    files = []
    for filename in os.listdir(folder):
        if filename.endswith(".xlsx") and not filename.startswith("~$"):
            files.append(os.path.join(folder, filename))
    return files

find_xlsx_recursive function · python · L164-L171 (8 LOC)

dex-xlsx.py

def find_xlsx_recursive(folder):
    """Find all .xlsx files recursively."""
    files = []
    for root, dirs, filenames in os.walk(folder):
        for filename in filenames:
            if filename.endswith(".xlsx") and not filename.startswith("~$"):
                files.append(os.path.join(root, filename))
    return files

log_conversion function · python · L176-L191 (16 LOC)

dex-xlsx.py

def log_conversion(filepath, result, action):
    entry = {
        "timestamp": datetime.datetime.now().isoformat(),
        "source": filepath,
        "filename": result["filename"],
        "sheets": result["sheets"],
        "total_rows": result["total_rows"],
        "total_cells": result["total_cells"],
        "char_count": result["char_count"],
        "action": action,
    }
    try:
        with open(LOG_FILE, "a", encoding="utf-8") as f:
            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
    except:
        pass

main function · python · L196-L288 (93 LOC)

dex-xlsx.py

def main():
    parser = argparse.ArgumentParser(description="Dex Jr XLSX Converter v1.0")
    parser.add_argument("path", help="Path to .xlsx file or folder")
    parser.add_argument("--all", action="store_true", help="Convert all .xlsx in folder")
    parser.add_argument("--recursive", action="store_true", help="Search subfolders too")
    parser.add_argument("--ingest", action="store_true", help="Save to canon for ingestion")
    parser.add_argument("--save", default=None, help="Save to specified folder")
    parser.add_argument("--preview", action="store_true", help="Show first 50 rows per sheet")

    args = parser.parse_args()

    # Single file mode
    if not args.all and os.path.isfile(args.path):
        print(f"\n  Converting: {args.path}")
        result = xlsx_to_text(args.path, preview=args.preview)
        if not result:
            return

        print(f"  Sheets: {result['sheets']} ({', '.join(result['sheet_names'])})")
        print(f"  Rows: {result['total_rows']:,}

‹ prevpage 4 / 4