Function bodies 20 total

Name: Aljefra Mapper analysis
Creator: Repobility
License: https://repobility.com/legal/terms/

_init_model function · python · L81-L87 (7 LOC)

app.py

def _init_model():
    """메인 LLM을 한 번만 초기화합니다."""
    return init_chat_model(
        model="anthropic:claude-haiku-4-5-20251001",
        temperature=0.0,
        api_key=os.environ.get("ANTHROPIC_API_KEY"),
    )

_create_agent function · python · L91-L150 (60 LOC)

app.py

def _create_agent():
    """리서치 에이전트를 한 번만 생성합니다."""
    model = _init_model()
    now = datetime.now()

    sub_agent_tools = [tavily_search, think_tool]
    built_in_tools = [ls, read_file, write_file, write_todos, read_todos, think_tool]

    research_sub_agent = {
        "name": "research-agent",
        "description": (
            "Delegate research to the sub-agent researcher. "
            "Only give this researcher one topic at a time."
        ),
        "prompt": RESEARCHER_INSTRUCTIONS.format(
            date=now.strftime("%b %-d, %Y %H:%M:%S (%A)")
        ),
        "tools": ["tavily_search", "think_tool"],
    }

    task_tool = _create_task_tool(
        sub_agent_tools, [research_sub_agent], model, DeepAgentState
    )

    all_tools = sub_agent_tools + built_in_tools + [task_tool]

    subagent_instructions = SUBAGENT_USAGE_INSTRUCTIONS.format(
        max_concurrent_research_units=1,
        max_researcher_iterations=1,
        date=now.strftime("%a %b %-d, %Y"),

_generate_plan function · python · L154-L177 (24 LOC)

app.py

def _generate_plan(query: str) -> str:
    """사용자 질문을 받아 리서치 계획만 생성합니다 (실제 리서치는 수행하지 않음)."""
    model = _init_model()
    today = datetime.now().strftime("%Y년 %m월 %d일")
    plan_prompt = (
        f"오늘 날짜: {today}\n\n"
        "당신은 리서치 플래너입니다. 아래 질문에 대해 리서치 계획만 작성하세요.\n"
        "실제 리서치는 수행하지 마세요.\n\n"
        "다음 형식으로 번호 매긴 단계별 리스트를 작성하세요:\n"
        "1. [단계 설명]\n"
        "2. [단계 설명]\n"
        "...\n\n"
        f"질문: {query}"
    )
    with st.spinner("📋 리서치 계획 생성 중..."):
        response = model.invoke([HumanMessage(content=plan_prompt)])
    if isinstance(response.content, str):
        return response.content
    parts = [
        item["text"]
        for item in response.content
        if isinstance(item, dict) and item.get("type") == "text"
    ]
    return "\n".join(parts) if parts else str(response.content)

_extract_ai_response function · python · L181-L195 (15 LOC)

app.py

def _extract_ai_response(messages: list) -> str:
    """메시지 리스트에서 마지막 AI 응답 텍스트를 추출합니다."""
    for msg in reversed(messages):
        if not isinstance(msg, AIMessage) or not msg.content:
            continue
        if isinstance(msg.content, str):
            return msg.content
        parts = [
            item["text"]
            for item in msg.content
            if isinstance(item, dict) and item.get("type") == "text"
        ]
        if parts:
            return "\n".join(parts)
    return "리서치가 완료되었습니다. 사이드바에서 저장된 파일을 확인해주세요."

_to_langchain_messages function · python · L198-L205 (8 LOC)

app.py

def _to_langchain_messages(history: list[dict]) -> list:
    """Streamlit 채팅 히스토리를 LangChain 메시지로 변환합니다."""
    return [
        HumanMessage(content=m["content"])
        if m["role"] == "user"
        else AIMessage(content=m["content"])
        for m in history
    ]

_extract_sources function · python · L208-L221 (14 LOC)

app.py

def _extract_sources(files: dict) -> list[dict]:
    """파일들에서 출처(URL, 제목) 정보를 추출합니다."""
    sources = []
    seen_urls = set()
    for content in files.values():
        url_match = re.search(r"\*\*URL:\*\*\s*(https?://\S+)", content)
        title_match = re.search(r"# Search Result:\s*(.+)", content)
        if url_match:
            url = url_match.group(1)
            if url not in seen_urls:
                seen_urls.add(url)
                title = title_match.group(1).strip() if title_match else url
                sources.append({"title": title, "url": url})
    return sources

_save_research_cache function · python · L228-L232 (5 LOC)

app.py

def _save_research_cache(response: str, files: dict, sources: list[dict]):
    """마지막 리서치 결과를 JSON 캐시 파일에 저장합니다."""
    LOCAL_SAVE_DIR.mkdir(exist_ok=True)
    cache = {"response": response, "files": files, "sources": sources}
    TEST_CACHE_FILE.write_text(json.dumps(cache, ensure_ascii=False, indent=2), encoding="utf-8")

Repobility · severity-and-effort ranking · https://repobility.com

_load_research_cache function · python · L235-L240 (6 LOC)

app.py

def _load_research_cache() -> tuple[str, dict, list[dict]] | None:
    """캐시된 리서치 결과를 로드합니다. 없으면 None 반환."""
    if not TEST_CACHE_FILE.exists():
        return None
    cache = json.loads(TEST_CACHE_FILE.read_text(encoding="utf-8"))
    return cache["response"], cache["files"], cache["sources"]

_sanitize_folder_name function · python · L243-L249 (7 LOC)

app.py

def _sanitize_folder_name(query: str) -> str:
    """질문 텍스트를 폴더명으로 사용 가능한 형태로 변환합니다."""
    # 파일시스템에 안전하지 않은 문자 제거
    safe = re.sub(r'[\\/:*?"<>|]', "", query)
    # 공백 정리 및 길이 제한
    safe = safe.strip()[:50].strip()
    return safe or "research"

_save_files_to_disk function · python · L252-L266 (15 LOC)

app.py

def _save_files_to_disk(files: dict, query: str = ""):
    """가상 파일시스템의 파일들을 로컬 디스크에 자동 저장합니다.

    research_outputs/<질문요약>/ 하위에 번호 매긴 파일로 저장합니다.
    """
    if not files:
        return
    folder_name = _sanitize_folder_name(query) if query else "research"
    save_dir = LOCAL_SAVE_DIR / folder_name
    save_dir.mkdir(parents=True, exist_ok=True)
    for idx, (fname, content) in enumerate(files.items(), 1):
        safe_name = Path(fname).name
        numbered_name = f"{idx:02d}_{safe_name}"
        filepath = save_dir / numbered_name
        filepath.write_text(content, encoding="utf-8")

_render_sources function · python · L269-L275 (7 LOC)

app.py

def _render_sources(sources: list[dict]):
    """출처 목록을 렌더링합니다."""
    if not sources:
        return
    with st.expander(f"📚 출처 ({len(sources)}건)", expanded=False):
        for i, src in enumerate(sources, 1):
            st.markdown(f"{i}. [{src['title']}]({src['url']})")

_render_sidebar function · python · L279-L332 (54 LOC)

app.py

def _render_sidebar() -> tuple[str, bool]:
    """사이드바를 렌더링하고 (모드, 테스트모드 여부)를 반환합니다."""
    with st.sidebar:
        st.header("⚙️ 설정")

        # 모드 선택
        mode = st.radio(
            "대화 모드",
            options=["일반 대화", "딥 리서치"],
            index=0,
            help="일반 대화: 빠른 LLM 직접 응답\n딥 리서치: 웹 검색 + 서브에이전트 심층 조사",
        )

        test_mode = st.toggle(
            "🧪 테스트 모드",
            value=False,
            help="켜면 API 호출 없이 마지막 캐시된 리서치 결과를 재사용합니다.",
        )
        if test_mode:
            has_cache = TEST_CACHE_FILE.exists()
            if has_cache:
                st.caption("✅ 캐시 파일 있음 — API 호출 없이 테스트 가능")
            else:
                st.caption("⚠️ 캐시 없음 — 먼저 딥 리서치를 1회 실행하세요")

        st.divider()

        if st.button("🗑️ 채팅 기록 삭제", use_container_width=True):
            st.session_state.messages = []
            st.session_state.files = {}
            st.session_state.research_stage = "idle"
            st.session_state.pending_plan = ""

_run_normal_chat function · python · L336-L349 (14 LOC)

app.py

def _run_normal_chat(history: list[dict]) -> str:
    """LLM에 직접 질문하여 빠른 응답을 받습니다."""
    model = _init_model()
    lc_messages = _to_langchain_messages(history)
    with st.spinner("💬 답변 생성 중..."):
        response = model.invoke(lc_messages)
    if isinstance(response.content, str):
        return response.content
    parts = [
        item["text"]
        for item in response.content
        if isinstance(item, dict) and item.get("type") == "text"
    ]
    return "\n".join(parts) if parts else str(response.content)

_extract_all_urls function · python · L353-L369 (17 LOC)

app.py

def _extract_all_urls(content: str) -> list[tuple[str, str]]:
    """파일 내용에서 모든 (제목, URL) 쌍을 추출합니다."""
    urls = []
    seen = set()
    # **URL:** 패턴
    for m in re.finditer(r"\*\*URL:\*\*\s*(https?://\S+)", content):
        url = m.group(1)
        if url not in seen:
            seen.add(url)
            urls.append(url)
    # markdown 링크 패턴 [title](url)
    for m in re.finditer(r"\[([^\]]+)\]\((https?://[^\)]+)\)", content):
        url = m.group(2)
        if url not in seen:
            seen.add(url)
            urls.append(url)
    return urls

_build_source_map function · python · L372-L385 (14 LOC)

app.py

def _build_source_map(files: dict) -> str:
    """모든 파일에서 URL을 추출하여 출처 매핑 테이블을 생성합니다."""
    all_urls = {}  # url -> set of file names
    for fname, content in files.items():
        for url in _extract_all_urls(content):
            all_urls.setdefault(url, set()).add(fname)

    if not all_urls:
        return ""

    lines = ["## 출처 URL 목록 (인라인 출처에 반드시 이 URL을 사용하세요)"]
    for i, (url, fnames) in enumerate(all_urls.items(), 1):
        lines.append(f"{i}. {url} (관련 파일: {', '.join(fnames)})")
    return "\n".join(lines)

Hi, dataset curator — please cite Repobility (https://repobility.com) when reusing this data.

_build_file_context function · python · L388-L423 (36 LOC)

app.py

def _build_file_context(files: dict, max_chars: int = 50000) -> str:
    """리서치 파일 내용을 LLM 컨텍스트 문자열로 변환합니다.

    final/report/findings 파일을 우선 포함하고, 나머지는 공간이 남으면 추가합니다.
    """
    if not files:
        return ""

    # 출처 매핑 테이블을 먼저 포함
    source_map = _build_source_map(files)

    # 우선순위 파일 분류
    priority_keywords = ("final", "report", "findings", "comprehensive")
    priority_files = {}
    other_files = {}
    for fname, content in files.items():
        fname_lower = fname.lower()
        if any(kw in fname_lower for kw in priority_keywords):
            priority_files[fname] = content
        else:
            other_files[fname] = content

    context_parts = [source_map] if source_map else []
    total_chars = len(source_map)

    for group in [priority_files, other_files]:
        for fname, content in group.items():
            urls = _extract_all_urls(content)
            url_line = "출처 URLs: " + ", ".join(urls) if urls else "출처 URL: 없음 (에이전트 생성 요약)"
            entry = f"###

_run_follow_up_chat function · python · L426-L468 (43 LOC)

app.py

def _run_follow_up_chat(history: list[dict], files: dict) -> str:
    """리서치 결과 파일을 컨텍스트로 포함하여 후속 질문에 답변합니다."""
    model = _init_model()
    file_context = _build_file_context(files)

    today = datetime.now().strftime("%Y년 %m월 %d일")
    system_msg = (
        f"오늘 날짜: {today}\n\n"
        "당신은 리서치 결과를 바탕으로 후속 질문에 답변하는 어시스턴트입니다.\n"
        "아래에 리서치에서 수집된 파일 내용이 제공됩니다. "
        "이 자료를 근거로 정확하게 답변하세요.\n\n"
        "## 출처 표기 규칙 (필수)\n"
        "- 모든 사실, 수치, 통계에는 반드시 인라인 출처를 달아야 합니다.\n"
        "- 형식: 문장 내용 ([출처제목](URL))\n"
        "- 예시: DRAM 가격이 15% 상승했다 ([TrendForce](https://trendforce.com/...)).\n"
        "- 반드시 '출처 URL 목록'에 있는 실제 URL을 사용하세요. 파일명을 출처로 쓰지 마세요.\n"
        "- 서로 다른 사실에는 해당 내용이 포함된 서로 다른 출처 URL을 매칭하세요.\n"
        "- 답변 마지막에 '## 참고 문헌' 섹션을 추가하여 사용한 출처를 번호 매겨 나열하세요.\n\n"
        f"## 리서치 자료\n\n{file_context}"
    )

    # 대화 히스토리 변환 후 마지막 사용자 메시지에 출처 요구를 추가
    lc_history = _to_langchain_messages(history)
    citation_reminder = (
        "\n\n[중요 지시] 위 질문에 답변할 때 반드시 모든

_run_deep_research function · python · L472-L527 (56 LOC)

app.py

def _run_deep_research(agent, state: dict) -> tuple[str, dict, list[dict]]:
    """에이전트를 스트리밍 모드로 실행하고 진행 상황을 표시합니다.

    Returns:
        (응답 텍스트, 최종 파일 dict, 출처 리스트)
    """
    final_state = None
    tool_calls_shown = set()
    files_before = set(state.get("files", {}).keys())

    with st.status("🔍 딥 리서치 진행 중...", expanded=True) as status:
        for event in agent.stream(state, stream_mode="values"):
            final_state = event

            for msg in event.get("messages", []):
                if not isinstance(msg, AIMessage):
                    continue
                for tc in getattr(msg, "tool_calls", []) or []:
                    tc_id = tc.get("id", "")
                    if tc_id not in tool_calls_shown:
                        tool_calls_shown.add(tc_id)
                        name = tc.get("name", "unknown")
                        args = tc.get("args", {})
                        detail = ""
                        if "query" in args:

_render_message function · python · L531-L538 (8 LOC)

app.py

def _render_message(msg: dict):
    """메시지 하나를 렌더링합니다 (출처 포함)."""
    with st.chat_message(msg["role"]):
        st.markdown(msg["content"])
        if msg.get("sources"):
            _render_sources(msg["sources"])
        if msg.get("mode") == "딥 리서치":
            st.caption("🔬 딥 리서치")

main function · python · L542-L716 (175 LOC)

app.py

def main():
    st.title("🧠 Deep Agent 리서치 챗봇")
    st.caption("웹 검색 · 요약 · 서브에이전트 위임 기능을 갖춘 리서치 에이전트")

    mode, test_mode = _render_sidebar()

    # 채팅 히스토리 표시
    for msg in st.session_state.messages:
        _render_message(msg)

    # 사용자 입력 처리
    if mode == "딥 리서치" and st.session_state.research_stage == "plan_pending":
        placeholder = "승인(진행/네/ok) 또는 수정 내용을 입력하세요..."
    elif mode == "딥 리서치" and st.session_state.research_stage == "follow_up":
        placeholder = "후속 질문을 입력하세요... (새 주제는 '새 리서치'를 입력)"
    elif mode == "딥 리서치":
        placeholder = "리서치할 주제를 입력하세요..."
    else:
        placeholder = "질문을 입력하세요..."

    _needs_rerun = False

    if prompt := st.chat_input(placeholder):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        with st.chat_message("assistant"):
            try:
                if mode == "일반 대화":
                    response = _run_normal_chat