← back to dkd-dobberkau__Hiraku

Function bodies 3 total

All specs Real LLM only Function bodies
parse_document function · python · L36-L93 (58 LOC)
main.py
async def parse_document(request: Request, file: UploadFile = File(...)):
    # Validierung: Dateityp
    ext = Path(file.filename).suffix.lower()
    if ext not in ALLOWED_EXTENSIONS:
        return templates.TemplateResponse("partials/error.html", {
            "request": request,
            "message": f"Das Format {ext} wird nicht unterstützt.",
            "allowed": ", ".join(sorted(ALLOWED_EXTENSIONS)),
        })

    # Validierung: Dateigröße
    content = await file.read()
    if len(content) > MAX_UPLOAD_SIZE:
        return templates.TemplateResponse("partials/error.html", {
            "request": request,
            "message": f"Datei ist zu groß (max. {MAX_UPLOAD_SIZE // (1024 * 1024)} MB).",
            "allowed": ", ".join(sorted(ALLOWED_EXTENSIONS)),
        })

    # Datei temporär speichern
    job_id = str(uuid.uuid4())
    filepath = UPLOAD_DIR / f"{job_id}{ext}"
    filepath.write_bytes(content)

    try:
        start = time.monotonic()
        result = parser.p
download_text function · python · L97-L112 (16 LOC)
main.py
async def download_text(job_id: str):
    if job_id not in results:
        raise HTTPException(status_code=404, detail="Ergebnis nicht gefunden")

    data = results[job_id]
    original_stem = Path(data["filename"]).stem
    output_filename = f"{original_stem}_extracted.txt"

    output_path = UPLOAD_DIR / f"{job_id}.txt"
    output_path.write_text(data["text"], encoding="utf-8")

    return FileResponse(
        path=str(output_path),
        filename=output_filename,
        media_type="text/plain; charset=utf-8",
    )
download_markdown function · python · L116-L145 (30 LOC)
main.py
async def download_markdown(job_id: str):
    if job_id not in results:
        raise HTTPException(status_code=404, detail="Ergebnis nicht gefunden")

    data = results[job_id]
    original_stem = Path(data["filename"]).stem
    pages_data = data.get("pages_data", [])

    # Markdown mit Seitenstruktur aufbauen
    md_parts = [f"# {data['filename']}\n"]

    if pages_data and len(pages_data) > 1:
        for page in pages_data:
            md_parts.append(f"## Seite {page.pageNum}\n")
            md_parts.append(page.text.strip())
            md_parts.append("")
    else:
        md_parts.append(data["text"])

    md_content = "\n".join(md_parts)
    output_filename = f"{original_stem}_extracted.md"

    output_path = UPLOAD_DIR / f"{job_id}.md"
    output_path.write_text(md_content, encoding="utf-8")

    return FileResponse(
        path=str(output_path),
        filename=output_filename,
        media_type="text/markdown; charset=utf-8",
    )