Function bodies 3 total
parse_document function · python · L36-L93 (58 LOC)main.py
async def parse_document(request: Request, file: UploadFile = File(...)):
# Validierung: Dateityp
ext = Path(file.filename).suffix.lower()
if ext not in ALLOWED_EXTENSIONS:
return templates.TemplateResponse("partials/error.html", {
"request": request,
"message": f"Das Format {ext} wird nicht unterstützt.",
"allowed": ", ".join(sorted(ALLOWED_EXTENSIONS)),
})
# Validierung: Dateigröße
content = await file.read()
if len(content) > MAX_UPLOAD_SIZE:
return templates.TemplateResponse("partials/error.html", {
"request": request,
"message": f"Datei ist zu groß (max. {MAX_UPLOAD_SIZE // (1024 * 1024)} MB).",
"allowed": ", ".join(sorted(ALLOWED_EXTENSIONS)),
})
# Datei temporär speichern
job_id = str(uuid.uuid4())
filepath = UPLOAD_DIR / f"{job_id}{ext}"
filepath.write_bytes(content)
try:
start = time.monotonic()
result = parser.pdownload_text function · python · L97-L112 (16 LOC)main.py
async def download_text(job_id: str):
if job_id not in results:
raise HTTPException(status_code=404, detail="Ergebnis nicht gefunden")
data = results[job_id]
original_stem = Path(data["filename"]).stem
output_filename = f"{original_stem}_extracted.txt"
output_path = UPLOAD_DIR / f"{job_id}.txt"
output_path.write_text(data["text"], encoding="utf-8")
return FileResponse(
path=str(output_path),
filename=output_filename,
media_type="text/plain; charset=utf-8",
)download_markdown function · python · L116-L145 (30 LOC)main.py
async def download_markdown(job_id: str):
if job_id not in results:
raise HTTPException(status_code=404, detail="Ergebnis nicht gefunden")
data = results[job_id]
original_stem = Path(data["filename"]).stem
pages_data = data.get("pages_data", [])
# Markdown mit Seitenstruktur aufbauen
md_parts = [f"# {data['filename']}\n"]
if pages_data and len(pages_data) > 1:
for page in pages_data:
md_parts.append(f"## Seite {page.pageNum}\n")
md_parts.append(page.text.strip())
md_parts.append("")
else:
md_parts.append(data["text"])
md_content = "\n".join(md_parts)
output_filename = f"{original_stem}_extracted.md"
output_path = UPLOAD_DIR / f"{job_id}.md"
output_path.write_text(md_content, encoding="utf-8")
return FileResponse(
path=str(output_path),
filename=output_filename,
media_type="text/markdown; charset=utf-8",
)