From 6b49f8d425f962308d5b8585306b380df0679109 Mon Sep 17 00:00:00 2001 From: Junseo1026 Date: Mon, 27 Oct 2025 13:20:14 +0900 Subject: [PATCH 1/2] 1027 13:20 --- routers/note.py | 429 +++++++++++++++-------------------- utils/llm.py | 590 +++++++++++++++++++++++++----------------------- 2 files changed, 496 insertions(+), 523 deletions(-) diff --git a/routers/note.py b/routers/note.py index 446e3a6..f0ab49a 100644 --- a/routers/note.py +++ b/routers/note.py @@ -1,40 +1,47 @@ import os +import re +import json +import difflib +from datetime import datetime +from typing import List, Optional + from dotenv import load_dotenv from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks, Query, Request from fastapi.responses import StreamingResponse from sqlalchemy.orm import Session -from typing import List -from datetime import datetime -import traceback -import re -import json -from db import get_db, SessionLocal +from db import get_db from models.note import Note from models.file import File as FileModel from schemas.note import NoteCreate, NoteUpdate, NoteResponse, FavoriteUpdate, NoteFile from utils.jwt_utils import get_current_user -from utils.llm import stream_summary_with_langchain, _strip_top_level_h1_outside_code, _hf_generate_once, _system_prompt -from utils.llm import _hf_generate_once, _system_prompt +from utils.llm import ( + stream_summary_with_langchain, + _strip_top_level_h1_outside_code, + _hf_generate_once, + _system_prompt, + count_slides, + normalize_and_renumber_slides, +) load_dotenv() -HF_TOKEN = os.getenv("HF_API_TOKEN") router = APIRouter(prefix="/api/v1", tags=["Notes"]) - -# 환경변수에서 BASE_API_URL 가져와 파일 다운로드 URL 구성 BASE_API_URL = os.getenv("BASE_API_URL", "http://localhost:8000") +HF_MAX_NEW_TOKENS_LONG = int(os.getenv("HF_MAX_NEW_TOKENS_LONG", "32000")) +HF_MAP_MAX_NEW_TOKENS = int(os.getenv("HF_MAP_MAX_NEW_TOKENS", "12000")) +ENSURE_COMPLETION_PASSES = int(os.getenv("ENSURE_COMPLETION_PASSES", "3")) +SLIDES_MIN = int(os.getenv("SUMMARY_SLIDES_MIN", "8")) +SLIDES_MAX = int(os.getenv("SUMMARY_SLIDES_MAX", "40")) +SUMMARY_CHUNK_CHARS = int(os.getenv("SUMMARY_CHUNK_CHARS", "12000")) +SUMMARY_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", "1200")) + # ───────────────────────────────────────────── -# 공통: Note → NoteResponse 직렬화 + files 채우기 +# 직렬화 # ───────────────────────────────────────────── def serialize_note(db: Session, note: Note, base_url: str) -> NoteResponse: - """ - Note ORM → NoteResponse 수동 매핑. - 관계(note.files)로 인해 Pydantic가 ORM 객체를 바로 검증하려다 실패하는 문제를 피하기 위해 - 기본 스칼라 필드만 직접 채우고, files는 별도 쿼리로 구성한다. - """ files = ( db.query(FileModel) .filter(FileModel.note_id == note.id, FileModel.user_id == note.user_id) @@ -66,8 +73,10 @@ def serialize_note(db: Session, note: Note, base_url: str) -> NoteResponse: ) +# ───────────────────────────────────────────── +# 간단 추출 요약 (백업용) +# ───────────────────────────────────────────── def _fallback_extractive_summary(text: str) -> str: - """Simple extractive fallback: pick leading sentences and format as TL;DR + bullets.""" if not text: return "## TL;DR\n요약할 내용이 없습니다." sents = re.split(r"(?<=[.!?。])\s+|\n+", text) @@ -84,36 +93,40 @@ def _fallback_extractive_summary(text: str) -> str: def _is_summary_complete(s: str) -> bool: - """Heuristic: check presence of key sections and reasonable length.""" if not s or not s.strip(): return False low = s.lower() - # require TL;DR or 핵심 요점 and some detail if ('## tl;dr' in low or '## 핵심' in low or '## 핵심 요점' in low) and len(s) > 300: return True - # if contains multiple section headers, consider complete headers = len(re.findall(r"^##\s+", s, flags=re.M)) if headers >= 2 and len(s) > 200: return True - # otherwise likely incomplete return False +def _similarity_ratio(a: str, b: str) -> float: + a_norm = re.sub(r"\s+", " ", (a or "")).strip() + b_norm = re.sub(r"\s+", " ", (b or "")).strip() + if not a_norm or not b_norm: + return 0.0 + return difflib.SequenceMatcher(None, a_norm, b_norm).ratio() + + async def _ensure_completion(full: str, domain: str | None = None, length: str = 'long') -> str: - """If `full` looks truncated, attempt up to 3 continuation passes to complete it.""" try: - for i in range(3): + for _ in range(ENSURE_COMPLETION_PASSES): if _is_summary_complete(full) and re.search(r"[\.\!\?]\s*$", full.strip()): return full - # build continuation prompt sys_prompt = _system_prompt(domain or 'general', phase='final', output_format='md', length=length) - cont_prompt = "The following summary appears incomplete. Continue and finish the summary without repeating previous text:\n\n" + full + "\n\nContinue:" + cont_prompt = ( + "The following summary appears incomplete. Continue and finish the summary **without repeating previous text**.\n\n" + + full + "\n\nContinue:" + ) try: - cont = await _hf_generate_once(sys_prompt, cont_prompt, max_new_tokens=int(os.getenv('HF_MAX_NEW_TOKENS_LONG', '32000'))) + cont = await _hf_generate_once(sys_prompt, cont_prompt, max_new_tokens=HF_MAX_NEW_TOKENS_LONG) except Exception: cont = '' if cont and cont.strip(): - # append continuation full = (full + "\n\n" + cont.strip()).strip() else: break @@ -122,28 +135,80 @@ async def _ensure_completion(full: str, domain: str | None = None, length: str = return full -# 1) 모든 노트 조회 +async def _ensure_slide_coverage(full: str, target_slides: int, source_text: str, domain: str | None = None) -> str: + try: + for _ in range(ENSURE_COMPLETION_PASSES): + cur = count_slides(full) + if cur >= target_slides: + return full + + next_idx = cur + 1 + sys_prompt = _system_prompt(domain or 'general', phase='final', output_format='md', length='long') + cont_user = ( + "아래는 기존 요약입니다. '## 슬라이드' 섹션의 슬라이드 수가 목표보다 적습니다.\n" + f"목표 슬라이드 수: {target_slides}\n" + f"현재 슬라이드 수: {cur}\n\n" + "요청: 이전 내용을 반복하지 말고, **'## 슬라이드' 섹션만** 이어서 작성하세요. " + f"번호는 '### 슬라이드 {next_idx}'부터 연속으로 증가시키세요. " + "각 슬라이드는 제목 + 3–6개 불릿로 작성하고, 아직 다루지 않은 원문 토픽을 중심으로 추가하세요.\n\n" + "=== 기존 요약(참고) ===\n" + full[-12000:] + "\n\n" + "=== 원문(발췌; 필요시) ===\n" + (source_text[:12000] if source_text else "") + ) + try: + extra = await _hf_generate_once(sys_prompt, cont_user, max_new_tokens=HF_MAX_NEW_TOKENS_LONG) + except Exception: + extra = "" + + if extra and extra.strip(): + full = (full.rstrip() + "\n\n" + extra.strip()).strip() + else: + break + except Exception: + pass + return full + + +async def _force_compress_if_similar(full: str, source: str, domain: str | None = None) -> str: + try: + ratio = _similarity_ratio(full, source) + if ratio >= 0.85 or len(full.strip()) >= max(300, int(len(source.strip()) * 0.95)): + sys_prompt = _system_prompt(domain or 'general', phase='final', output_format='md', length='medium') + user = ( + "다음 원문을 20–40% 길이로 정확하게 요약해. 절대 원문을 그대로 복사하지 말고, " + "출력은 반드시 '## TL;DR', '## 핵심 요점', '## 상세 설명', '## 슬라이드' 섹션을 포함하라.\n\n" + + (source[:80000] if source else "") + ) + try: + compressed = await _hf_generate_once(sys_prompt, user, max_new_tokens=HF_MAX_NEW_TOKENS_LONG) + except Exception: + compressed = _fallback_extractive_summary(source) + if compressed and compressed.strip(): + return compressed + except Exception: + pass + return full + + +# ───────────────────────────────────────────── +# 목록/CRUD +# ───────────────────────────────────────────── @router.get("/notes", response_model=List[NoteResponse]) def list_notes( request: Request, - q: str | None = Query(default=None, description="Optional search query (title or content)"), + q: Optional[str] = Query(default=None, description="Optional search query (title or content)"), db: Session = Depends(get_db), user = Depends(get_current_user) ): - """List notes for the current user. If `q` is provided, filter by title or content (case-insensitive). - """ query = db.query(Note).filter(Note.user_id == user.u_id) if q and q.strip(): like = f"%{q.strip()}%" query = query.filter((Note.title.ilike(like)) | (Note.content.ilike(like))) notes = query.order_by(Note.created_at.desc()).all() - # 각 노트의 files도 채워 반환 base_url = os.getenv("BASE_API_URL") or str(request.base_url).rstrip('/') return [serialize_note(db, n, base_url) for n in notes] -# 2) 최근 접근한 노트 조회 (상위 10개) @router.get("/notes/recent", response_model=List[NoteResponse]) def recent_notes( request: Request, @@ -161,7 +226,6 @@ def recent_notes( return [serialize_note(db, n, base_url) for n in notes] -# 3) 노트 생성 @router.post("/notes", response_model=NoteResponse) def create_note( request: Request, @@ -182,7 +246,6 @@ def create_note( return serialize_note(db, note, base_url) -# 4) 노트 수정 (제목/내용/폴더) @router.patch("/notes/{note_id}", response_model=NoteResponse) def update_note( request: Request, @@ -210,8 +273,6 @@ def update_note( base_url = os.getenv("BASE_API_URL") or str(request.base_url).rstrip('/') return serialize_note(db, note, base_url) - -# 5) 노트 단일 조회 (마지막 접근 시간 업데이트 포함) @router.get("/notes/{note_id}", response_model=NoteResponse) def get_note( request: Request, @@ -230,7 +291,6 @@ def get_note( return serialize_note(db, note, base_url) -# 6) 노트 삭제 @router.delete("/notes/{note_id}") def delete_note( note_id: int, @@ -246,7 +306,6 @@ def delete_note( return {"message": "Note deleted successfully"} -# 7) 즐겨찾기 토글 @router.patch("/notes/{note_id}/favorite", response_model=NoteResponse) def toggle_favorite( request: Request, @@ -268,160 +327,101 @@ def toggle_favorite( # ───────────────────────────────────────────── -# (참고) 요약 스트리밍 API - 완료 후에도 serialize_note 사용 안 함 -# (요약은 새 노트를 생성하고 SSE로 알림만 보냄) +# 요약 (동기, 긴 문서 완전 지원) +# ───────────────────────────────────────────── # ───────────────────────────────────────────── -@router.post("/notes/{note_id}/summarize") -async def summarize_stream_langchain( +# 요약 (HF 비활성 환경 대응 - TextRank 기반) +# ───────────────────────────────────────────── +@router.post("/notes/{note_id}/summarize_sync", response_model=NoteResponse) +async def summarize_sync( note_id: int, - background_tasks: BackgroundTasks, - domain: str | None = Query(default=None, description="meeting | code | paper | general | auto(None)"), - longdoc: bool = Query(default=True, description="Enable long-document map→reduce"), + domain: str | None = Query(default=None, description="요약 도메인"), + longdoc: bool = Query(default=True, description="긴 문서 모드"), db: Session = Depends(get_db), user = Depends(get_current_user) ): + """ + ✅ HF_DISABLED 환경에서도 작동하는 진짜 요약 버전. + - TextRank 기반 문장 중요도 요약 + - TL;DR, 핵심 요점, 슬라이드 구조 유지 + - 기존 CRUD, 퀴즈 등 기능 영향 없음 + """ + import numpy as np + from sklearn.feature_extraction.text import TfidfVectorizer + from sklearn.metrics.pairwise import cosine_similarity + + # 1️⃣ 노트 조회 note = db.query(Note).filter(Note.id == note_id, Note.user_id == user.u_id).first() if not note or not (note.content or "").strip(): raise HTTPException(status_code=404, detail="요약 대상 없음") - - - async def event_gen(): - parts = [] - # Default to a comprehensive (long) summary when called without explicit options - async for sse in stream_summary_with_langchain(note.content, domain=domain, longdoc=longdoc, length='long', tone='neutral', output_format='md'): - parts.append(sse.removeprefix("data: ").strip()) - yield sse.encode() - full = "".join(parts).strip() - # attempt to complete if truncated - try: - full = await _ensure_completion(full, domain=domain, length='long') - except Exception: - pass - # If streamed output looks incomplete, attempt a single-shot completion pass - try: - if not _is_summary_complete(full): - try: - print('[summarize] partial output detected, performing completion pass') - sys_prompt = _system_prompt(domain or 'general', phase='final', output_format='md', length='long') - cont = await _hf_generate_once(sys_prompt, "Existing partial summary:\n\n" + full + "\n\nPlease expand and complete the summary, preserving facts and following the output format.", max_new_tokens=int(os.getenv('HF_MAX_NEW_TOKENS_LONG', '20000'))) - if cont and cont.strip(): - full = (full + "\n\n" + cont.strip()).strip() - print('[summarize] completion pass appended, new length=', len(full)) - except Exception as e: - print('[summarize] completion pass failed:', e) - except Exception: - pass - # If model produced empty output, fall back to a simple extractive summary - if not (full or "").strip(): - try: - sents = re.split(r"(?<=[.!?。])\s+|\n+", note.content or "") - sents = [p.strip() for p in sents if p.strip()] - head = sents[:6] - tl = head[0] if head else (note.content or "")[:200] - bullets = [f"- {p}" for p in head[1:5]] - fb = "## TL;DR\n" + tl + "\n\n## 핵심 요점\n" + "\n".join(bullets) - full = fb - except Exception: - full = (note.content or "")[:800] - try: - print(f"[summarize-sync] generated full length={len(full)} preview={repr(full[:200])}") - except Exception: - pass - # Remove local temp file paths (e.g. macOS /var/... or file://...) which shouldn't be persisted - try: - # remove explicit file://... patterns - full = re.sub(r"file://\S+", "", full) - # remove absolute tmp paths like /var/... (up to whitespace or closing paren) - full = re.sub(r"/var/[^\s)]+", "", full) - # remove parenthesis-wrapped local paths in markdown images: ![alt](/path/to/file.png) - full = re.sub(r"!\[([^\]]*)\]\([^)]*(/var/[^)\s]+)[)]", r"![\1]()", full) - except Exception: - pass - # Strip any top-level H1 headings that the model may have added (outside code fences) + text = (note.content or "").strip() + if len(text) < 100: + raise HTTPException(status_code=400, detail="본문이 너무 짧습니다.") + + # 2️⃣ 문장 분리 + sentences = re.split(r"(?<=[.!?。])\s+|\n+", text) + sentences = [s.strip() for s in sentences if len(s.strip()) > 10] + if len(sentences) < 3: + final_summary = _fallback_extractive_summary(text) + else: try: - full = _strip_top_level_h1_outside_code(full) + # 3️⃣ TextRank 요약 수행 + vectorizer = TfidfVectorizer() + tfidf = vectorizer.fit_transform(sentences) + sim = cosine_similarity(tfidf) + scores = np.sum(sim, axis=1) + top_n = max(3, int(len(sentences) * 0.15)) + top_idx = np.argsort(scores)[-top_n:] + top_idx = sorted(top_idx) + key_sents = [sentences[i] for i in top_idx] + + # 4️⃣ 섹션 구성 + tldr = " ".join(key_sents[:3]) + bullets = "\n".join(f"- {s}" for s in key_sents[:8]) + slides = [] + for i, s in enumerate(key_sents, 1): + slides.append(f"### 슬라이드 {i}\n- {s}") + + final_summary = f"""## TL;DR +{tldr} + +## 핵심 요점 +{bullets} + +## 슬라이드 요약 +{chr(10).join(slides)} + +## 상세 설명 +이 요약은 HuggingFace API 없이 TextRank 기반 TF-IDF 알고리즘으로 생성되었습니다. +중복 문장은 제거되었고, 중요한 문장만 남겨 핵심을 압축했습니다. +""" except Exception: - # fallback: naive removal of a single leading H1 - full = re.sub(r"^\s*#\s.*?\n+", "", full, count=1) - # Ensure non-empty summary; if model produced nothing, use extractive fallback - if not (full or "").strip(): - try: - full = _fallback_extractive_summary(note.content) - print(f"[summarize] fallback summary used length={len(full)}") - except Exception: - full = (note.content or '')[:800] + final_summary = _fallback_extractive_summary(text) - # Ensure non-empty summary; if model produced nothing, use extractive fallback - if not (full or "").strip(): - try: - full = _fallback_extractive_summary(note.content) - print(f"[summarize-sync] fallback summary used length={len(full)}") - except Exception: - full = (note.content or '')[:800] - - if full: - # Create a new summary note in the same folder with title ' — 요약' - title = (note.title or "").strip() + " — 요약" - if len(title) > 255: - title = title[:255] - new_note = Note( - user_id=user.u_id, - folder_id=note.folder_id, - title=title, - content=full, - ) - db.add(new_note) - db.commit() - db.refresh(new_note) - try: - # log created summary id and content preview for debugging - print(f"[summarize] created summary note id={new_note.id} for note_id={note_id}") - try: - print("[summarize] saved content length=", len(new_note.content or "")) - print("[summarize] saved content preview=", repr((new_note.content or "")[:400])) - except Exception: - pass - except Exception: - pass - # normal streaming path: notify created note via SSE - try: - # notify created note: include serialized note JSON so client can render immediately - base_url = os.getenv("BASE_API_URL") or BASE_API_URL - note_obj = serialize_note(db, new_note, base_url) - payload = {"summary_note": note_obj.dict()} - yield f"data: {json.dumps(payload, default=str)}\n\n".encode() - except Exception: - # fallback to ID-only message - try: - yield f"data: SUMMARY_NOTE_ID:{new_note.id}\n\n".encode() - except Exception: - pass - else: - # As an extra fallback, aggregate streamed parts (if any) to ensure coverage - try: - agg = "\n\n".join(parts) if parts else (note.content or '')[:4000] - fallback_full = "## Aggregated streamed parts\n\n" + agg - title = (note.title or "").strip() + " — 요약" - new_note2 = Note(user_id=user.u_id, folder_id=note.folder_id, title=title, content=fallback_full) - db.add(new_note2) - db.commit() - db.refresh(new_note2) - try: - yield f"data: SUMMARY_NOTE_ID:{new_note2.id}\n\n".encode() - except Exception: - pass - except Exception: - pass + # 5️⃣ 저장 + title = (note.title or "").strip() + " — 요약" + if len(title) > 255: + title = title[:255] - return StreamingResponse( - event_gen(), - media_type="text/event-stream", - headers={"Cache-Control": "no-cache"} + new_note = Note( + user_id=user.u_id, + folder_id=note.folder_id, + title=title, + content=final_summary, ) + db.add(new_note) + db.commit() + db.refresh(new_note) + base_url = os.getenv("BASE_API_URL") or "http://localhost:8000" + return serialize_note(db, new_note, base_url) + +# ───────────────────────────────────────────── +# 퀴즈 생성 +# ───────────────────────────────────────────── @router.post("/notes/{note_id}/generate-quiz") def generate_quiz( note_id: int, @@ -429,30 +429,27 @@ def generate_quiz( db: Session = Depends(get_db), user = Depends(get_current_user) ): - """간단한 규칙 기반 퀴즈 생성(대형 모델 없이 동작).""" note = db.query(Note).filter(Note.id == note_id, Note.user_id == user.u_id).first() if not note or not (note.content or "").strip(): raise HTTPException(status_code=404, detail="퀴즈를 생성할 노트가 없습니다") text = (note.content or "").strip() - # 문장 단위 분할 - import re, random - sents = re.split(r"(?<=[.!?。])\s+|\n+", text) + import re as _re, random as _random + sents = _re.split(r"(?<=[.!?。])\s+|\n+", text) sents = [s.strip() for s in sents if len(s.strip()) >= 8] - random.seed(note_id) - random.shuffle(sents) + _random.seed(note_id) + _random.shuffle(sents) quizzes = [] for s in sents: if len(quizzes) >= count: break - # 공백 기준 토큰화 후, 길이 4 이상인 토큰을 빈칸으로 toks = s.split() - cand = [i for i, t in enumerate(toks) if len(re.sub(r"\W+", "", t)) >= 4] + cand = [i for i, t in enumerate(toks) if len(_re.sub(r"\W+", "", t)) >= 4] if not cand: continue idx = cand[0] - answer = re.sub(r"^[\W_]+|[\W_]+$", "", toks[idx]) + answer = _re.sub(r"^[\W_]+|[\W_]+$", "", toks[idx]) toks[idx] = "_____" q = " ".join(toks) quizzes.append({ @@ -462,7 +459,6 @@ def generate_quiz( "source": s, }) - # 보강: 부족하면 참/거짓 생성 i = 0 while len(quizzes) < count and i < len(sents): stmt = sents[i] @@ -484,62 +480,3 @@ def generate_quiz( }) return {"note_id": note.id, "count": len(quizzes), "items": quizzes} - - -# Convenience synchronous summarization endpoint (returns created note JSON). -@router.post("/notes/{note_id}/summarize_sync", response_model=NoteResponse) -async def summarize_sync( - note_id: int, - domain: str | None = Query(default=None, description="meeting | code | paper | general | auto(None)"), - longdoc: bool = Query(default=True, description="Enable long-document map→reduce"), - db: Session = Depends(get_db), - user = Depends(get_current_user) -): - note = db.query(Note).filter(Note.id == note_id, Note.user_id == user.u_id).first() - if not note or not (note.content or "").strip(): - raise HTTPException(status_code=404, detail="요약 대상 없음") - - parts = [] - async for sse in stream_summary_with_langchain(note.content, domain=domain, longdoc=longdoc, length='long', tone='neutral', output_format='md'): - parts.append(sse.removeprefix("data: ").strip()) - full = "".join(parts).strip() - - # sanitize local paths and strip top-level H1 - try: - full = re.sub(r"file://\S+", "", full) - full = re.sub(r"/var/[^\s)]+", "", full) - full = _strip_top_level_h1_outside_code(full) - except Exception: - try: - full = re.sub(r"^\s*#\s.*?\n+", "", full, count=1) - except Exception: - pass - - # If model produced empty output, use extractive fallback - if not (full or "").strip(): - try: - full = _fallback_extractive_summary(note.content) - print(f"[summarize_sync] fallback used length={len(full)}") - except Exception: - full = (note.content or '')[:800] - - title = (note.title or "").strip() + " — 요약" - if len(title) > 255: - title = title[:255] - new_note = Note( - user_id=user.u_id, - folder_id=note.folder_id, - title=title, - content=full, - ) - db.add(new_note) - db.commit() - db.refresh(new_note) - try: - print(f"[summarize_sync] created summary note id={new_note.id} for note_id={note_id}") - print("[summarize_sync] saved content length=", len(new_note.content or "")) - print("[summarize_sync] saved content preview=", repr((new_note.content or "")[:400])) - except Exception: - pass - base_url = os.getenv("BASE_API_URL") or "http://localhost:8000" - return serialize_note(db, new_note, base_url) diff --git a/utils/llm.py b/utils/llm.py index 5406ebc..f7b2cac 100644 --- a/utils/llm.py +++ b/utils/llm.py @@ -1,205 +1,109 @@ -# [CHANGED] 마크다운 보정 + Markdown 섹션 포맷 + 선택적 웹 보강(위키) + 동일 언어 요약을 포함한 전체 코드 +from __future__ import annotations +import re, asyncio, os, threading, json, time +from typing import Optional, List + +# LangChain / Ollama (옵션) from langchain.callbacks import AsyncIteratorCallbackHandler from langchain_ollama import ChatOllama from langchain.schema import HumanMessage, SystemMessage -import re, asyncio, os, threading, json, time + +# HF Transformers import torch from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer -# [CHANGED] 웹 보강용 (옵션) +# 웹 보강(옵션) import requests from urllib.parse import quote as _urlquote -# =============== 필터: 사고과정 유사 문장 =============== + +# ========================================================= +# 설정값 (환경변수로 오버라이드 가능) +# ========================================================= +DEFAULT_SUMMARY_BACKEND = os.getenv("SUMMARY_BACKEND", "hf").lower() # "hf" | "ollama" +DEFAULT_LONGDOC_CHAR_LIMIT = int(os.getenv("SUMMARY_LONGDOC_CHAR_LIMIT", "3500")) + +# 롱독 청크 설정 +DEFAULT_CHUNK_CHARS = int(os.getenv("SUMMARY_CHUNK_CHARS", "12000")) +DEFAULT_CHUNK_OVERLAP = int(os.getenv("SUMMARY_CHUNK_OVERLAP", "1200")) + +# 토큰 예산 +HF_MAX_NEW_TOKENS_SHORT = int(os.getenv("HF_MAX_NEW_TOKENS_SHORT", "8000")) +HF_MAX_NEW_TOKENS_MEDIUM = int(os.getenv("HF_MAX_NEW_TOKENS_MEDIUM", "16000")) +HF_MAX_NEW_TOKENS_LONG = int(os.getenv("HF_MAX_NEW_TOKENS_LONG", "32000")) +HF_MAP_MAX_NEW_TOKENS = int(os.getenv("HF_MAP_MAX_NEW_TOKENS", "12000")) + +# 슬라이드 커버리지 목표 +SLIDES_MIN = int(os.getenv("SUMMARY_SLIDES_MIN", "8")) +SLIDES_MAX = int(os.getenv("SUMMARY_SLIDES_MAX", "40")) + +# 증분 보강 루프 횟수 +ENSURE_COMPLETION_PASSES = int(os.getenv("ENSURE_COMPLETION_PASSES", "3")) + + +# ========================================================= +# 사고과정 유사 문장 필터 (stream시 메타 프레이즈 억제) +# ========================================================= _THOUGHT_PAT = re.compile( - # only filter a few clear English meta-intro phrases to avoid removing valid Korean sentences r"^\s*(okay|let\s+me|i\s+need\s+to|in summary)\b", re.I, ) -# =============== 마크다운 보정 유틸 =============== -# [CHANGED] 스트리밍 중 붙어버린 헤더/불릿을 자동 교정 +# ========================================================= +# 마크다운 보정 유틸 +# ========================================================= _MD_CODE_FENCE = re.compile(r"(```.*?```|`[^`]*`)", re.S) def _format_md_stream(s: str) -> str: - """ - 스트리밍 중 합쳐진 마크다운을 사람 읽기 좋게 보정. - - 헤더(#~######) 앞뒤 빈 줄 보장 - - 헤더 해시 뒤 공백 보장 - - 불릿(- )/번호 목록은 줄 시작으로 강제 - - 코드펜스/인라인코드는 건드리지 않음 - """ if not s: return s - parts = [] last = 0 for m in _MD_CODE_FENCE.finditer(s): chunk = s[last:m.start()] parts.append(_format_md_plain(chunk)) - parts.append(m.group(0)) # 코드펜스 원형 유지 + parts.append(m.group(0)) last = m.end() parts.append(_format_md_plain(s[last:])) - out = "".join(parts) - out = re.sub(r"\n{3,}", "\n\n", out) # 과도한 빈 줄 축소 + out = re.sub(r"\n{3,}", "\n\n", out) return out def _format_md_plain(s: str) -> str: - # 헤더 해시 뒤 공백 보장: "##개요" -> "## 개요" s = re.sub(r"^(#{1,6})([^\s#])", r"\1 \2", s, flags=re.M) - - # 문장 중간 헤더 분리: "...개요## 핵심" -> "...개요\n\n## 핵심" s = re.sub(r"(? int(os.getenv("SUMMARY_LONGDOC_CHAR_LIMIT", "3500")) - - backend = os.getenv("SUMMARY_BACKEND", "hf").lower() - - # choose token budget by requested length - # Increase defaults to allow richer, more complete summaries. Can be overridden via env vars. - # Aggressively increase defaults to allow very long, comprehensive summaries. - # These can still be tuned via environment variables if needed. - # Aggressively increase defaults to allow very long, comprehensive summaries. - # These can still be tuned via environment variables if needed. - if length == 'short': - token_budget = int(os.getenv("HF_MAX_NEW_TOKENS_SHORT", "8000")) - elif length == 'medium': - token_budget = int(os.getenv("HF_MAX_NEW_TOKENS_MEDIUM", "16000")) - else: - token_budget = int(os.getenv("HF_MAX_NEW_TOKENS_LONG", "32000")) - - if not enable_long: - sys_txt = _system_prompt(dom, phase="final", output_format=output_format, length=length) - user_payload = _compose_user_payload(text, extra_context, output_format, length=length, tone=tone) # [CHANGED] - # Temporarily set HF token budget env so downstream generator respects it - old_budget = os.environ.get('HF_MAX_NEW_TOKENS') - os.environ['HF_MAX_NEW_TOKENS'] = str(token_budget) - try: - if backend == "ollama": - async for s in _stream_with_ollama(user_payload, system_text=sys_txt, output_format=output_format): - yield s - else: - async for s in _stream_with_hf(user_payload, system_text=sys_txt, output_format=output_format): - yield s - finally: - if old_budget is None: - os.environ.pop('HF_MAX_NEW_TOKENS', None) - else: - os.environ['HF_MAX_NEW_TOKENS'] = old_budget - return - - # Long-doc: Map (chunk summaries) → Reduce (final synthesis streamed) - chunks = _chunk_text( - text, - chunk_chars=int(os.getenv("SUMMARY_CHUNK_CHARS", "20000")), - overlap=int(os.getenv("SUMMARY_CHUNK_OVERLAP", "2000")), - ) - map_sys = _system_prompt(dom, phase="map", output_format=output_format, length=length) - partials: list[str] = [] - for idx, ch in enumerate(chunks, 1): - try: - map_input = _compose_user_payload(ch, "", output_format, length=length, tone=tone) # [CHANGED] - part = await _hf_generate_once(map_sys, map_input, max_new_tokens=int(os.getenv("HF_MAP_MAX_NEW_TOKENS", "12000"))) - except Exception: - part = ch[:500] - partials.append(f"[Chunk {idx}]\n{part.strip()}") - - reduce_text = "\n\n".join(partials) - reduce_sys = _system_prompt(dom, phase="reduce", output_format=output_format, length=length) - reduce_input = _compose_user_payload(reduce_text, extra_context, output_format, length=length, tone=tone) # [CHANGED] - - # For reduce/final stage, also apply token budget - old_budget = os.environ.get('HF_MAX_NEW_TOKENS') - os.environ['HF_MAX_NEW_TOKENS'] = str(token_budget) - try: - if backend == "ollama": - async for s in _stream_with_ollama(reduce_input, system_text=reduce_sys, output_format=output_format): - yield s - else: - async for s in _stream_with_hf(reduce_input, system_text=reduce_sys, output_format=output_format): - yield s - finally: - if old_budget is None: - os.environ.pop('HF_MAX_NEW_TOKENS', None) - else: - os.environ['HF_MAX_NEW_TOKENS'] = old_budget - - -# =============== 도메인/언어 감지 =============== +# ========================================================= +# 도메인/언어 감지 +# ========================================================= def _detect_domain(t: str) -> str: s = (t or "").lower() - # lecture / slides signals if re.search(r"\blecture\b|강의|슬라이드|ppt|slide|강의자료|강의록", s): return "lecture" - # code-like signals if re.search(r"\b(def |class |import |#include|public\s+class|function\s|=>|:=)", s) or re.search(r"```|\bdiff --git\b|\bcommit\b", s): return "code" - # paper-like signals if re.search(r"\babstract\b|\bintroduction\b|\bmethod(s)?\b|\bresult(s)?\b|\bconclusion(s)?\b|doi:|arxiv:\d", s): return "paper" - # meeting-like signals (KO/EN keywords) if re.search(r"회의|안건|결정|논의|액션 아이템|참석자|회의록|meeting|agenda|minutes|action items|attendees", s): return "meeting" return "general" def _detect_lang(t: str) -> str: - """아주 단순한 언어 감지(영문자/한글자 수 비교). ko/en만 구분.""" s = t or "" en = len(re.findall(r"[A-Za-z]", s)) ko = len(re.findall(r"[가-힣]", s)) return "en" if en > ko else "ko" -# =============== 시스템 프롬프트 =============== -# [CHANGED] 출력 포맷(MD/HTML) 지원 + 마크다운 간격 규칙 + 도메인별 포함 요소 힌트 +# ========================================================= +# 시스템 프롬프트 (슬라이드 섹션 강제) +# ========================================================= def _system_prompt(domain: str, phase: str = "final", output_format: str = "md", length: str = "medium") -> str: - # phase: map | reduce | final fmt = output_format.lower() base_rules = ( "역할: 너는 사실 보존에 강한 전문 요약가다. 입력 텍스트의 언어(Korean/English)를 감지하고, 반드시 동일한 언어로 작성한다. " @@ -220,14 +124,14 @@ def _system_prompt(domain: str, phase: str = "final", output_format: str = "md", if fmt == "md": format_rule = ( "출력 형식: Markdown. 반드시 다음 섹션으로 구성하라(필요시 일부 생략 가능): " - "## TL;DR, ## 핵심 요점(불릿 3–8개), ## 상세 설명(문단), ## 용어 정리(선택), ## 한계/주의, ## 할 일(액션), ## 참고(선택). " + "## TL;DR, ## 핵심 요점(불릿 3–8개), ## 상세 설명(문단), ## 슬라이드(필수), ## 용어 정리(선택), ## 한계/주의, ## 할 일(액션), ## 참고(선택). " "절대 H1('# ')로 시작하지 말고, 불필요한 전언/사고과정/추론 과정을 출력하지 마라." ) else: format_rule = ( "출력 형식: HTML fragment.

,

,

,

,

    ,
  • , , 만 사용. " "

    제목

    ,

    개요

    ,

    핵심 요점

    ,

    상세 설명

    , " - "

    용어 정리

    ,

    한계/주의

    ,

    할 일

    ,

    참고/추가자료

    의 순서." + "

    슬라이드

    ,

    용어 정리

    ,

    한계/주의

    ,

    할 일

    ,

    참고/추가자료

    의 순서." ) if length == 'long': @@ -236,7 +140,7 @@ def _system_prompt(domain: str, phase: str = "final", output_format: str = "md", length_rule = "분량: 한두 문장 TL;DR 중심(간결)." else: length_rule = "분량: 원문 대비 약 15–30%. 각 문단은 2–5문장." - # [CHANGED] 마크다운 간격 규칙 추가 + md_spacing_rule = ( "마크다운 간격 규칙: 모든 헤더(#, ##, ### 등) 뒤에는 한 칸 공백을 두고, 헤더의 앞뒤에는 빈 줄 1줄을 둔다. " "불릿(- )은 항목마다 줄바꿈하고, 서브항목은 들여쓰기 2–4칸을 사용한다." @@ -247,14 +151,24 @@ def _system_prompt(domain: str, phase: str = "final", output_format: str = "md", ) if phase == "map": - scope = "이 청크만 대상으로 섹션 골격을 간략히 채워라. 과도한 요약 금지." + scope = ( + "이 청크만 대상으로 섹션 골격을 간략히 채워라. 특히 **## 슬라이드** 섹션에 이 청크의 주요 하위 주제를 1–3장의 " + "‘### 슬라이드 n: 제목’ + 불릿(3–6개)로 만들어라. 슬라이드 번호는 임시로 두고, 리듀스 단계에서 재번호됨." + ) elif phase == "reduce": - scope = "아래 청크 요약들을 중복 없이 통합해 일관된 섹션 구성을 완성하라. 흐름(원인→과정→결과)을 유지. 최종 요약은 누락이 없도록 모든 청크의 핵심을 포함하라." + scope = ( + "아래 청크 요약들을 중복 없이 통합해 일관된 섹션 구성을 완성하라. 흐름(원인→과정→결과)을 유지. " + "특히 **## 슬라이드** 섹션에 모든 청크의 슬라이드를 병합·정리하여 누락 없이 포함하라. " + "슬라이드 번호는 1부터 순차 재배열하고, 최소 목표 슬라이드 수( 주어짐)를 충족하라." + ) else: - # Do not force a top-level H1; many clients render H1 differently. - scope = "전체 텍스트를 위 섹션 구조에 맞춰 응집력 있게 작성하라. 출력은 반드시 Markdown만 사용하라(원시 HTML 금지). 최상단 제목(H1)은 생략하거나 필요시만 사용하고, 주요 요약은 '## TL;DR' 또는 '## 핵심 요점'로 시작하라." + scope = ( + "전체 텍스트를 위 섹션 구조에 맞춰 응집력 있게 작성하라. 출력은 반드시 Markdown만 사용하라(원시 HTML 금지). " + "최상단 제목(H1)은 생략하고, '## TL;DR'로 시작하라. " + "특히 **## 슬라이드** 섹션을 포함하고, 슬라이드를 ‘### 슬라이드 1: …’ 형식으로 최소 목표 수() 이상 생성하라. " + "각 슬라이드는 3–6개 불릿을 갖고, 제목은 중복되지 않게 만든다." + ) - # 명시적 예시 추가: (Korean short example) example = ( "\n\n--- 예시 출력 (한국어, medium) ---\n" "## TL;DR\n" @@ -263,42 +177,77 @@ def _system_prompt(domain: str, phase: str = "final", output_format: str = "md", "- 기능 X 구현 지연: 2주\n" "- 배포 일정: 11/10\n" "- QA 담당: 민수\n\n" + "## 슬라이드\n" + "### 슬라이드 1: 일정 변경 배경\n" + "- 외부 API 응답 지연이 주요 원인\n" + "- 기능 X 의존성이 높음\n" + "- …\n" + "### 슬라이드 2: 리스크와 대응\n" + "- 타임아웃 상향 및 캐시\n" + "- …\n\n" "## 할 일\n" - "- [개발팀] API 응답 문제 원인 분석 — 11/1\n" + "- [개발팀] API 응답 문제 원인 분석 — 11/01\n" "-------------------------------\n\n" ) - # 추가 지침: 출력은 반드시 위 섹션 구조를 따르고(필요시 일부 섹션은 생략 가능), 끝에 JSON 메타데이터 블록을 추가하라. - # 이 블록은 분석용이며, ```json로 fenced 되어야 한다. - # MUST (강제) 요건: - # - 출력은 절대 H1('# ')로 시작하지 말고, 반드시 '## TL;DR'로 시작하라. - # - 포함 필수 항목: Setting(배경), Inciting Incident(발단), Protagonist(주인공), Goal(목적), - # Stakes(위험/중요성), Key Events(핵심 사건), Next Steps/Actions(권장 조치). - # - 사실 기반(Fact preservation): 숫자, 날짜, 고유명사는 원문 그대로 보존. 입력에 없는 정보를 생성하지 마라(허위 생성 금지). - # - 길이 보장: 요청된 length가 'long'일 경우, 충분한 상세(증거·인용·핵심 문장 포함)를 제공하라. - # - # CHECKLIST: The following checklist MUST be present and satisfied in the summary (model must ensure each item is covered): - # 1) Setting/Background — where and in what context the document/event occurs - # 2) Inciting Incident — what triggered the situation or main event - # 3) Protagonist/Actors — who are the main people/agents involved - # 4) Goal/Purpose — what is being attempted or investigated - # 5) Stakes/Importance — why this matters, consequences if unresolved - # 6) Key Events/Findings — sequence of core events or main findings (with key numbers/dates) - # 7) Next Steps/Actions — recommended actions, owners and deadlines if present - # - # END-OF-SUMMARY REQUIREMENT: At the end of the Markdown output, include a fenced JSON block with keys: - # {"tl_dr":"...","tags":[...],"actions":[...],"language":"ko|en","missing":[...]}. - # The "missing" array must list any checklist items that could not be filled from the input (use empty array [] when all present). - # 메타데이터 키: tl_dr (string), tags (array of strings), actions (array of { assignee?, task, due? }), language (ko/en) meta_hint = ( "\n\n출력 후 반드시 JSON 메타데이터 블록을 추가하라. " - "형식: ```json\n{ \"tl_dr\": \"...\", \"tags\": [\"t1\",\"t2\"], \"actions\": [{\"assignee\": \"name\", \"task\": \"...\", \"due\": \"YYYY-MM-DD\"}], \"language\": \"ko\" }\n```\n" + "형식: ```json\n{ \"tl_dr\": \"...\", \"tags\": [\"t1\",\"t2\"], \"actions\": [{\"assignee\": \"name\", \"task\": \"...\", \"due\": \"YYYY-MM-DD\"}], \"language\": \"ko\", \"missing\": [] }\n```\n" ) return f"{base_rules}\n포함 우선: {include_hint}\n{format_rule}\n{length_rule}\n{md_spacing_rule}\n{web_rule}\n{scope}{example}{meta_hint}" -# =============== HF backend (Transformers) =============== +# ========================================================= +# 웹 보강 유틸 (옵션) +# ========================================================= +def _is_augmentation_allowed() -> bool: + return os.getenv("AUGMENT_WEB", "false").lower() in ("1", "true", "yes") + +def _extract_entities_for_web(text: str, lang: str = "ko", max_items: int = 5) -> list[str]: + items: list[str] = [] + if lang == "en": + items = re.findall(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})\b", text) + else: + items = re.findall(r"[「『“\"'(](.*?)[」』”\"')]", text) + re.findall(r"[가-힣A-Za-z][가-힣A-Za-z]{1,}", text) + stop = set(["회의", "안건", "결정", "논의", "데이터", "모델", "결과", "방법", "프로젝트", "사용자", "시스템"]) + uniq = [] + for w in items: + w = re.sub(r"[\s]+", " ", w).strip() + if 2 <= len(w) <= 80 and w not in stop and not re.match(r"^\d+$", w): + if w not in uniq: + uniq.append(w) + return uniq[:max_items] + +def _fetch_wikipedia_summaries(entities: list[str], lang: str = "ko", max_sources: int = 5) -> str: + base = "https://%s.wikipedia.org/api/rest_v1/page/summary/%s" % (("ko" if lang == "ko" else "en"), "%s") + out = [] + timeout = float(os.getenv("AUGMENT_HTTP_TIMEOUT", "2.5")) + session = requests.Session() + headers = {"User-Agent": os.getenv("AUGMENT_UA", "SummaryAgent/1.0")} + for ent in entities[:max_sources]: + url = base % _urlquote(ent) + try: + r = session.get(url, headers=headers, timeout=timeout) + if r.status_code != 200: + continue + data = r.json() + title = data.get("title") or ent + extract = (data.get("extract") or "").strip() + if not extract: + continue + extract = (extract[:500] + "…") if len(extract) > 500 else extract + src = "위키백과" if lang == "ko" else "Wikipedia" + out.append(f"- **{title}** ({src}): {extract}") + time.sleep(0.05) + except Exception: + continue + return "\n".join(out) + + +# ========================================================= +# 프롬프트 빌드 / HF 모델 로딩 +# ========================================================= _HF_MODEL = None _HF_TOKENIZER = None _HF_NAME = None @@ -342,6 +291,7 @@ def try_load(name: str): else: if torch_dtype is not None: kwargs["dtype"] = torch_dtype + def load_model(with_token: bool): mk = dict(kwargs) if not with_token and "token" in mk: @@ -369,7 +319,6 @@ def load_model(with_token: bool): _HF_MODEL, _HF_TOKENIZER, _HF_NAME = model, tok, primary return _HF_MODEL, _HF_TOKENIZER except Exception as e: - # 디스크 부족/네트워크 이슈 등으로 대형 모델 로딩 실패 시, 환경변수로 폴백 비활성화 가능 if os.getenv("HF_DISABLE_FALLBACK", "1").lower() in ("1", "true", "yes"): raise RuntimeError("HF_DISABLED") from e try: @@ -388,21 +337,52 @@ def _build_prompt(tokenizer, system_text: str, user_text: str) -> str: try: return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) except Exception: - # [CHANGED] 백업 프롬프트도 MD/HTML 구조 반영 return ( "You are a precise summarizer. Detect the input language (Korean/English) and write the summary in the SAME language. " - "Preserve key facts (claims, entities, numbers, dates); remove fluff; avoid speculation and chain-of-thought. " - "Use Markdown sections: # Title, ## Overview, ## Key Points, ## Details, ## Terms, ## Limitations, ## Action Items, ## References.\n\n" + "Preserve key facts; remove fluff; avoid speculation and chain-of-thought. " + "Use Markdown sections: ## TL;DR, ## Key Points, ## Details, ## Slides, ## Terms, ## Limitations, ## Actions, ## References.\n\n" "Text:\n" + user_text + "\n\nSummary:" ) +# ========================================================= +# 합본 payload (슬라이드 목표 힌트 포함) +# ========================================================= +def _compose_user_payload( + main_text: str, + extra_context: str, + output_format: str, + length: str = "medium", + tone: str = "neutral", + target_slides: Optional[int] = None +) -> str: + fmt = output_format.lower() + pref = f"요약 길이: {length}. 톤: {tone}." + slide_hint = f"" if target_slides else "" + if fmt == "md": + if extra_context: + return ( + "## 원문\n" + f"{main_text}\n\n" + "## 추가자료(요약)\n" + f"{extra_context}\n\n" + f"\n" + f"{slide_hint}\n" + ) + return f"{main_text}\n\n\n{slide_hint}\n" + else: + if extra_context: + return f"

    원문

    \n{main_text}\n\n

    추가자료(요약)

    \n{extra_context}\n\n{slide_hint}\n" + return f"{main_text}\n\n{slide_hint}\n" + + +# ========================================================= +# HF Streaming / Single-Shot +# ========================================================= def _simple_fallback_summary(text: str, output_format: str = "md") -> list[str]: - """모델 로딩 실패 시 사용할 초경량 요약: 앞부분 일부와 불릿을 구성.""" s = (text or "").strip() if not s: return ["요약할 내용이 없습니다."] - # 문장 단위로 잘라 앞부분 3~6문장을 사용 parts = re.split(r"(?<=[.!?。])\s+|\n+", s) parts = [p.strip() for p in parts if p.strip()] head = parts[:6] @@ -422,11 +402,9 @@ async def _stream_with_hf(text: str, system_text: str | None = None, output_form return sys_msg = system_text or ( - # [CHANGED] 기본 시스템 프롬프트: Markdown 섹션 + 동일 언어 "역할: 너는 사실 보존에 강한 전문 요약가다. 입력 언어를 감지하고 동일 언어로 작성한다. " - "Markdown 섹션(# 제목, ## 개요, ## 핵심 요점, ## 상세 설명, ## 용어 정리, ## 한계/주의, ## 할 일, ## 참고/추가자료)을 사용한다. " - "핵심 주장/결과, 인물·기관·수치·날짜, 원인↔결과·조건·한계를 보존하고 군더더기는 제거한다. " - "추정·가치판단·조언 금지. 사고과정/단계 나열/메타 코멘트 금지. 각 문단 2–5문장." + "Markdown 섹션(## TL;DR, ## 핵심 요점, ## 상세 설명, ## 슬라이드, ## 용어 정리, ## 한계/주의, ## 할 일, ## 참고/추가자료)을 사용한다. " + "추정/가치판단/사고과정 금지. 각 문단 2–5문장. 마크다운 간격 규칙을 지켜라." ) prompt = _build_prompt(tokenizer, sys_msg, text) @@ -439,7 +417,7 @@ async def _stream_with_hf(text: str, system_text: str | None = None, output_form pass gen_kwargs = dict( - max_new_tokens=int(os.getenv("HF_MAX_NEW_TOKENS", "32000")), # very generous default to allow extremely long summaries + max_new_tokens=int(os.getenv("HF_MAX_NEW_TOKENS", str(HF_MAX_NEW_TOKENS_LONG))), do_sample=False, repetition_penalty=float(os.getenv("HF_REPETITION_PENALTY", "1.02")), eos_token_id=tokenizer.eos_token_id, @@ -450,7 +428,7 @@ async def _stream_with_hf(text: str, system_text: str | None = None, output_form gen_kwargs["temperature"] = float(os.getenv("HF_TEMPERATURE", "0.1")) def _gen(): - model.generate(**inputs, **gen_kwargs) + _ = model.generate(**inputs, **gen_kwargs) thread = threading.Thread(target=_gen, daemon=True) thread.start() @@ -477,17 +455,14 @@ def _drain_streamer(): if chunk is None: break buffer += chunk - # [CHANGED] 플러시 트리거 확장 + 보정기 적용 if buffer.endswith(("\n", "。", ".", "…", "!", "?", ")", "]")): - line = buffer # strip 하지 않음: 줄바꿈 유지 + line = buffer buffer = "" - # [CHANGED] 마크다운 보정 if output_format.lower() == "md": line = _format_md_stream(line) if not _THOUGHT_PAT.match(line.strip()): yield f"data: {line}\n\n" - # [CHANGED] 잔여 버퍼 마무리 보정 if buffer.strip(): line = buffer if output_format.lower() == "md": @@ -497,7 +472,6 @@ def _drain_streamer(): async def _hf_generate_once(system_text: str, user_text: str, max_new_tokens: int = 256) -> str: - """Non-streaming single-shot generation (used for chunk map stage).""" model, tokenizer = _load_hf_model() prompt = _build_prompt(tokenizer, system_text, user_text) @@ -521,8 +495,10 @@ async def _hf_generate_once(system_text: str, user_text: str, max_new_tokens: in return tokenizer.decode(gen_ids, skip_special_tokens=True).strip() -# =============== 텍스트 청크 나누기 =============== -def _chunk_text(text: str, chunk_chars: int = 2000, overlap: int = 200) -> list[str]: +# ========================================================= +# 텍스트 청크 +# ========================================================= +def _chunk_text(text: str, chunk_chars: int = DEFAULT_CHUNK_CHARS, overlap: int = DEFAULT_CHUNK_OVERLAP) -> list[str]: text = text or "" if len(text) <= chunk_chars: return [text] @@ -537,12 +513,12 @@ def _chunk_text(text: str, chunk_chars: int = 2000, overlap: int = 200) -> list[ return chunks -# =============== Ollama backend =============== +# ========================================================= +# Ollama Streaming (옵션) +# ========================================================= async def _stream_with_ollama(text: str, system_text: str | None = None, output_format: str = "md"): - # 1) LangChain용 콜백 핸들러 cb = AsyncIteratorCallbackHandler() - # 2) Ollama Chat 모델 설정 (환경변수로 조정 가능) base_url = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") primary_model = os.getenv("OLLAMA_MODEL", "qwen2.5:14b") fallback_model = os.getenv("OLLAMA_FALLBACK_MODEL", "qwen2.5:7b") @@ -573,13 +549,12 @@ def make_llm(model_name: str) -> ChatOllama: llm = make_llm(primary_model) - # [CHANGED] Ollama 경로의 시스템 프롬프트도 MD 섹션 구조 지시 messages = [ SystemMessage( content=( system_text or ( "역할: 너는 사실 보존에 강한 전문 요약가다. 입력 언어를 감지하고 동일 언어로 작성한다. " - "Markdown 섹션(# 제목, ## 개요, ## 핵심 요점, ## 상세 설명, ## 용어 정리, ## 한계/주의, ## 할 일, ## 참고/추가자료)을 사용한다. " + "Markdown 섹션(## TL;DR, ## 핵심 요점, ## 상세 설명, ## 슬라이드, ## 용어 정리, ## 한계/주의, ## 할 일, ## 참고/추가자료)을 사용한다. " "추정/가치판단/사고과정 금지. 각 문단 2–5문장. 마크다운 간격 규칙을 지켜라." ) ) @@ -597,7 +572,6 @@ def make_llm(model_name: str) -> ChatOllama: buffer = "" async for token in cb.aiter(): buffer += token - # [CHANGED] 플러시 트리거 확장 + 마크다운 보정 적용 if buffer.endswith(("\n", "。", ".", "…", "!", "?", ")", "]")): line = buffer buffer = "" @@ -606,7 +580,6 @@ def make_llm(model_name: str) -> ChatOllama: if not _THOUGHT_PAT.match(line.strip()): yield f"data: {line}\n\n" - # [CHANGED] 잔여 버퍼 마무리 보정 if buffer.strip(): line = buffer if output_format.lower() == "md": @@ -631,7 +604,6 @@ def make_llm(model_name: str) -> ChatOllama: line = _format_md_stream(line) if not _THOUGHT_PAT.match(line.strip()): yield f"data: {line}\n\n" - # [CHANGED] 잔여 버퍼 if buffer2.strip(): line = buffer2 if output_format.lower() == "md": @@ -643,92 +615,156 @@ def make_llm(model_name: str) -> ChatOllama: raise -# =============== 보조 유틸리티 =============== -# [CHANGED] 원문 + (선택) 추가자료를 모델에 전달하기 위한 합본 -def _compose_user_payload(main_text: str, extra_context: str, output_format: str, length: str = "medium", tone: str = "neutral") -> str: - fmt = output_format.lower() - # Include user preferences (length, tone) to guide the summarizer - pref = f"요약 길이: {length}. 톤: {tone}." - if fmt == "md": - if extra_context: - return ( - "## 원문\n" - f"{main_text}\n\n" - "## 추가자료(요약)\n" - f"{extra_context}\n\n" - f"\n" - ) - return f"{main_text}\n\n" - else: - if extra_context: - return f"

    원문

    \n{main_text}\n\n

    추가자료(요약)

    \n{extra_context}\n\n" - return f"{main_text}\n" +# ========================================================= +# 메인 엔트리 (롱독 Map→Reduce + 슬라이드 목표 힌트) +# ========================================================= +async def stream_summary_with_langchain( + text: str, + domain: str | None = None, + longdoc: bool = True, + output_format: str = "md", + augment_web: bool = False, + length: str = "medium", + tone: str = "neutral", +): + dom = (domain or _detect_domain(text)).lower() + if dom not in {"meeting", "code", "paper", "general", "lecture"}: + dom = "general" -def _is_augmentation_allowed() -> bool: - """환경변수로 보강 ON/OFF 제어. 기본 False.""" - return os.getenv("AUGMENT_WEB", "false").lower() in ("1", "true", "yes") + extra_context = "" + if augment_web and _is_augmentation_allowed(): + try: + lang = _detect_lang(text) + entities = _extract_entities_for_web(text, lang=lang, max_items=int(os.getenv("AUGMENT_MAX_ENTITIES", "5"))) + extra_context = _fetch_wikipedia_summaries(entities, lang=lang, max_sources=int(os.getenv("AUGMENT_MAX_SOURCES", "5"))) + except Exception: + extra_context = "" -def _extract_entities_for_web(text: str, lang: str = "ko", max_items: int = 5) -> list[str]: - """ - 매우 가벼운 엔티티 후보 추출: - - 영문: 대문자로 시작하는 2~4단어 구 - - 한글: 괄호/따옴표 내 주요어 + 2자 이상 단어 - """ - items: list[str] = [] - if lang == "en": - items = re.findall(r"\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,3})\b", text) + enable_long = longdoc and len(text or "") > DEFAULT_LONGDOC_CHAR_LIMIT + + if length == 'short': + token_budget = HF_MAX_NEW_TOKENS_SHORT + elif length == 'medium': + token_budget = HF_MAX_NEW_TOKENS_MEDIUM else: - items = re.findall(r"[「『“\"'(](.*?)[」』”\"')]", text) + re.findall(r"[가-힣A-Za-z][가-힣A-Za-z]{1,}", text) - stop = set(["회의", "안건", "결정", "논의", "데이터", "모델", "결과", "방법", "프로젝트", "사용자", "시스템"]) - uniq = [] - for w in items: - w = re.sub(r"[\s]+", " ", w).strip() - if 2 <= len(w) <= 80 and w not in stop and not re.match(r"^\d+$", w): - if w not in uniq: - uniq.append(w) - return uniq[:max_items] + token_budget = HF_MAX_NEW_TOKENS_LONG -def _fetch_wikipedia_summaries(entities: list[str], lang: str = "ko", max_sources: int = 5) -> str: - """ - 간단한 Wikipedia summary 수집. - - 공개 REST 엔드포인트 사용(무인증) - - 실패/타임아웃은 건너뜀 - - 결과는 Markdown 불릿로 반환 - """ - base = "https://%s.wikipedia.org/api/rest_v1/page/summary/%s" % (("ko" if lang == "ko" else "en"), "%s") - out = [] - timeout = float(os.getenv("AUGMENT_HTTP_TIMEOUT", "2.5")) - session = requests.Session() - headers = {"User-Agent": os.getenv("AUGMENT_UA", "SummaryAgent/1.0")} - for ent in entities[:max_sources]: - url = base % _urlquote(ent) + backend = DEFAULT_SUMMARY_BACKEND + + if not enable_long: + target_slides = max(4, SLIDES_MIN // 2) + sys_txt = _system_prompt(dom, phase="final", output_format=output_format, length=length) + user_payload = _compose_user_payload(text, extra_context, output_format, length=length, tone=tone, target_slides=target_slides) + old_budget = os.environ.get('HF_MAX_NEW_TOKENS') + os.environ['HF_MAX_NEW_TOKENS'] = str(token_budget) try: - r = session.get(url, headers=headers, timeout=timeout) - if r.status_code != 200: - continue - data = r.json() - title = data.get("title") or ent - extract = (data.get("extract") or "").strip() - if not extract: - continue - extract = (extract[:500] + "…") if len(extract) > 500 else extract - # ko/en 모두 동일한 표기 - src = "위키백과" if lang == "ko" else "Wikipedia" - out.append(f"- **{title}** ({src}): {extract}") - time.sleep(0.05) + if backend == "ollama": + async for s in _stream_with_ollama(user_payload, system_text=sys_txt, output_format=output_format): + yield s + else: + async for s in _stream_with_hf(user_payload, system_text=sys_txt, output_format=output_format): + yield s + finally: + if old_budget is None: + os.environ.pop('HF_MAX_NEW_TOKENS', None) + else: + os.environ['HF_MAX_NEW_TOKENS'] = old_budget + return + + # Long-doc: Map→Reduce + chunks = _chunk_text( + text, + chunk_chars=DEFAULT_CHUNK_CHARS, + overlap=DEFAULT_CHUNK_OVERLAP, + ) + num_chunks = len(chunks) + target_slides = max(SLIDES_MIN, min(SLIDES_MAX, num_chunks)) + + # Map + map_sys = _system_prompt(dom, phase="map", output_format=output_format, length=length) + partials: list[str] = [] + for idx, ch in enumerate(chunks, 1): + try: + map_input = _compose_user_payload( + f"[Chunk {idx}/{num_chunks}]\n{ch}", + "", + output_format, + length=length, + tone=tone, + target_slides=min(3, max(1, SLIDES_MIN // max(2, num_chunks))) + ) + part = await _hf_generate_once(map_sys, map_input, max_new_tokens=HF_MAP_MAX_NEW_TOKENS) except Exception: - continue - return "\n".join(out) + part = ch[:800] + partials.append(f"[Chunk {idx}]\n{part.strip()}") + reduce_text = "\n\n".join(partials) + reduce_sys = _system_prompt(dom, phase="reduce", output_format=output_format, length=length) + reduce_input = _compose_user_payload(reduce_text, extra_context, output_format, length=length, tone=tone, target_slides=target_slides) + old_budget = os.environ.get('HF_MAX_NEW_TOKENS') + os.environ['HF_MAX_NEW_TOKENS'] = str(token_budget) + try: + if backend == "ollama": + async for s in _stream_with_ollama(reduce_input, system_text=reduce_sys, output_format=output_format): + yield s + else: + async for s in _stream_with_hf(reduce_input, system_text=reduce_sys, output_format=output_format): + yield s + finally: + if old_budget is None: + os.environ.pop('HF_MAX_NEW_TOKENS', None) + else: + os.environ['HF_MAX_NEW_TOKENS'] = old_budget + + +# ========================================================= +# H1 제거 유틸 (저장 전 위생 처리) +# ========================================================= def _strip_top_level_h1_outside_code(s: str) -> str: - """Remove top-level H1 lines (lines starting with '# ') outside of code fences. - Preserves content inside ```code fences```. - """ if not s: return s parts = re.split(r'(```[\s\S]*?```)', s) for i in range(0, len(parts), 2): - # only operate on non-code parts (even indices) parts[i] = re.sub(r'(?m)^[ \t]*#\s+.*\n?', '', parts[i]) return ''.join(parts) + + +# ========================================================= +# (엔드포인트에서 재사용) 슬라이드/정규화 헬퍼 +# ========================================================= +def count_slides(md: str) -> int: + if not md: + return 0 + return len(re.findall(r'(?mi)^###\s*슬라이드\s*\d+', md)) + +def normalize_and_renumber_slides(md: str) -> str: + """'# 슬라이드1' 같은 난형식도 '### 슬라이드 n: 제목'으로 통일하고 번호 재배열.""" + if not md: + return md + lines = md.splitlines() + out = [] + has_section = any(re.match(r'(?mi)^##\s*슬라이드\s*$', ln.strip()) for ln in lines) + inserted_section = False + + slide_idx = 0 + header_pat = re.compile(r'(?mi)^\s*#{1,3}\s*슬라이드\s*(\d+)?\s*[::]?\s*(.*)$') + + for ln in lines: + m = header_pat.match(ln.strip()) + if m: + if not has_section and not inserted_section: + out.append("## 슬라이드") + out.append("") + inserted_section = True + slide_idx += 1 + title = (m.group(2) or "").strip() + if not title: + title = "요약" + out.append(f"### 슬라이드 {slide_idx}: {title}") + else: + out.append(ln) + + if (not has_section) and (slide_idx == 0): + return md + return "\n".join(out) From 98bea6922e35b53fa433f795b5af0dfb6a50aefe Mon Sep 17 00:00:00 2001 From: Junseo1026 Date: Mon, 27 Oct 2025 16:40:14 +0900 Subject: [PATCH 2/2] 1027 1640 --- logs/hf_summary.log | 10 +++ routers/note.py | 150 ++++++++++++++++++++++++++++---------------- 2 files changed, 106 insertions(+), 54 deletions(-) create mode 100644 logs/hf_summary.log diff --git a/logs/hf_summary.log b/logs/hf_summary.log new file mode 100644 index 0000000..36aa330 --- /dev/null +++ b/logs/hf_summary.log @@ -0,0 +1,10 @@ + +[2025-10-27T04:49:15.827939] ▶ HF Request to Qwen2.5-7B-Instruct +Payload preview: 다음 텍스트를 자세하고 명확하게 한국어로 요약해주세요. 결과는 Markdown 형식으로 작성하고, '## 요약', '## 핵심 요점', '## 슬라이드 요약', '## 상세 설명' 섹션을 반드시 포함하세요. + +# 슬라이드1 + +학생 여러분! 안녕하세요 건국대학교 컴퓨터공학과 이철원 교수입니다 자, 이번 시간에는 사 다시 일주차 파이썬 프로그래밍 리스트의 이해와 활용이라는 주제로 여러분을 만나뵙게 되었습니다 지금까지 우리는 변수와 기본적인 자료형, 각종 연산자들에 대해 배웠죠? 이제 파이썬의 꽃이라고 할 수 있는 '자료 구조' 중 하나... +Response status: 404 +Response text preview: Not Found +-------------------------------------------------------------------------------- diff --git a/routers/note.py b/routers/note.py index f0ab49a..41ff3e8 100644 --- a/routers/note.py +++ b/routers/note.py @@ -325,81 +325,123 @@ def toggle_favorite( base_url = os.getenv("BASE_API_URL") or str(request.base_url).rstrip('/') return serialize_note(db, note, base_url) - -# ───────────────────────────────────────────── -# 요약 (동기, 긴 문서 완전 지원) -# ───────────────────────────────────────────── # ───────────────────────────────────────────── -# 요약 (HF 비활성 환경 대응 - TextRank 기반) +# 요약 (로컬 Qwen 모델 기반, ChatGPT 스타일 자연요약) # ───────────────────────────────────────────── @router.post("/notes/{note_id}/summarize_sync", response_model=NoteResponse) async def summarize_sync( note_id: int, - domain: str | None = Query(default=None, description="요약 도메인"), - longdoc: bool = Query(default=True, description="긴 문서 모드"), db: Session = Depends(get_db), - user = Depends(get_current_user) + user=Depends(get_current_user) ): """ - ✅ HF_DISABLED 환경에서도 작동하는 진짜 요약 버전. - - TextRank 기반 문장 중요도 요약 - - TL;DR, 핵심 요점, 슬라이드 구조 유지 - - 기존 CRUD, 퀴즈 등 기능 영향 없음 + ✅ ChatGPT 스타일 요약 + 요약 완료 후 메모리 해제 """ + import torch import numpy as np + import gc + from transformers import AutoTokenizer, AutoModelForCausalLM from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity - # 1️⃣ 노트 조회 note = db.query(Note).filter(Note.id == note_id, Note.user_id == user.u_id).first() if not note or not (note.content or "").strip(): raise HTTPException(status_code=404, detail="요약 대상 없음") - text = (note.content or "").strip() - if len(text) < 100: + source = note.content.strip() + if len(source) < 50: raise HTTPException(status_code=400, detail="본문이 너무 짧습니다.") - # 2️⃣ 문장 분리 - sentences = re.split(r"(?<=[.!?。])\s+|\n+", text) - sentences = [s.strip() for s in sentences if len(s.strip()) > 10] - if len(sentences) < 3: - final_summary = _fallback_extractive_summary(text) - else: + full_summary = "" + failed = False + + try: + print("[summarize_sync] 🚀 Qwen2.5-7B-Instruct 로드 중...") + model_name = "Qwen/Qwen2.5-7B-Instruct" + tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.float16, + device_map="auto", + trust_remote_code=True + ) + + messages = [ + { + "role": "system", + "content": ( + "당신은 전문적인 과학기술 문서 요약가입니다. " + "텍스트를 자연스럽고 명확하게 요약하세요. " + "결과는 Markdown 형식으로 작성하고, 다음 구조를 유지하세요:\n\n" + "## 요약\n\n" + "## 핵심 요점\n\n" + "## 상세 설명\n" + ), + }, + { + "role": "user", + "content": f"아래 내용을 ChatGPT처럼 깔끔하고 자연스럽게 요약해줘:\n\n{source}", + }, + ] + + inputs = tokenizer.apply_chat_template( + messages, + tokenize=True, + add_generation_prompt=True, + return_tensors="pt", + return_dict=True, + ).to(model.device) + + print("[summarize_sync] 🧠 요약 생성 중...") + with torch.no_grad(): + outputs = model.generate(**inputs, max_new_tokens=1500, temperature=0.4, top_p=0.9) + generated = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True) + full_summary = generated.strip() + + print("[summarize_sync] ✅ 요약 완료") + + except Exception as e: + print(f"[summarize_sync] ❌ 모델 요약 실패: {e}") + failed = True + + finally: + # ✅ 메모리 해제 try: - # 3️⃣ TextRank 요약 수행 - vectorizer = TfidfVectorizer() - tfidf = vectorizer.fit_transform(sentences) - sim = cosine_similarity(tfidf) - scores = np.sum(sim, axis=1) - top_n = max(3, int(len(sentences) * 0.15)) - top_idx = np.argsort(scores)[-top_n:] - top_idx = sorted(top_idx) - key_sents = [sentences[i] for i in top_idx] - - # 4️⃣ 섹션 구성 - tldr = " ".join(key_sents[:3]) - bullets = "\n".join(f"- {s}" for s in key_sents[:8]) - slides = [] - for i, s in enumerate(key_sents, 1): - slides.append(f"### 슬라이드 {i}\n- {s}") - - final_summary = f"""## TL;DR -{tldr} - -## 핵심 요점 -{bullets} - -## 슬라이드 요약 -{chr(10).join(slides)} - -## 상세 설명 -이 요약은 HuggingFace API 없이 TextRank 기반 TF-IDF 알고리즘으로 생성되었습니다. -중복 문장은 제거되었고, 중요한 문장만 남겨 핵심을 압축했습니다. -""" + del model + del tokenizer + gc.collect() + if torch.cuda.is_available(): + torch.cuda.empty_cache() + print("[summarize_sync] 🧹 모델 메모리 해제 완료") + except Exception as e: + print(f"[summarize_sync] ⚠️ 메모리 해제 실패: {e}") + + # ─────────────── + # Fallback (TextRank) + # ─────────────── + if failed or not full_summary: + print("[summarize_sync] ⚠️ TextRank 백업 사용") + try: + sents = re.split(r"(?<=[.!?。])\s+|\n+", source) + sents = [s.strip() for s in sents if len(s.strip()) > 10] + if len(sents) < 3: + full_summary = _fallback_extractive_summary(source) + else: + vec = TfidfVectorizer() + tfidf = vec.fit_transform(sents) + sim = cosine_similarity(tfidf) + scores = np.sum(sim, axis=1) + top_n = max(3, int(len(sents) * 0.2)) + top_idx = np.argsort(scores)[-top_n:] + key_sents = [sents[i] for i in sorted(top_idx)] + bullets = "\n".join(f"- {s}" for s in key_sents[:5]) + full_summary = f"## 요약\n{' '.join(key_sents[:2])}\n\n## 핵심 요점\n{bullets}\n\n## 상세 설명\n이 요약은 TextRank 기반 로컬 요약입니다." except Exception: - final_summary = _fallback_extractive_summary(text) + full_summary = _fallback_extractive_summary(source) - # 5️⃣ 저장 + # ─────────────── + # DB 저장 + # ─────────────── title = (note.title or "").strip() + " — 요약" if len(title) > 255: title = title[:255] @@ -408,7 +450,7 @@ async def summarize_sync( user_id=user.u_id, folder_id=note.folder_id, title=title, - content=final_summary, + content=full_summary, ) db.add(new_note) db.commit()