import os import streamlit as st from typing import Dict, Any from api.pdf_processor import PDFProcessor from api.summarizer import BookSummarizer DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "sshleifer/distilbart-cnn-12-6") AVAILABLE_MODELS = BookSummarizer(DEFAULT_MODEL).get_available_models() st.set_page_config( page_title="Book Summarizer", page_icon="📚", layout="wide", initial_sidebar_state="expanded", ) @st.cache_resource def get_pdf_processor() -> PDFProcessor: return PDFProcessor() @st.cache_resource def get_summarizer(model_name: str) -> BookSummarizer: summarizer = BookSummarizer(model_name=model_name) summarizer.load_model() return summarizer def summarize_pdf( uploaded_file, model_name: str, max_length: int, min_length: int, chunk_size: int, overlap: int, ) -> Dict[str, Any]: pdf_bytes = uploaded_file.getvalue() processor = get_pdf_processor() validation = processor.validate_pdf(pdf_bytes) if not validation["valid"]: raise ValueError(validation["message"]) metadata = processor.get_pdf_metadata(pdf_bytes) extraction = processor.extract_text_from_pdf(pdf_bytes) if not extraction["success"]: raise RuntimeError(extraction["message"]) summarizer = get_summarizer(model_name) summary_result = summarizer.summarize_book( text=extraction["text"], chunk_size=chunk_size, overlap=overlap, max_length=max_length, min_length=min_length, ) if not summary_result["success"]: raise RuntimeError(summary_result.get("error", "Summarization failed")) return { "metadata": metadata, "validation": validation, "extraction": extraction, "summary": summary_result, } def sidebar_controls(): st.header("Settings") model_names = [m["name"] for m in AVAILABLE_MODELS] model_descriptions = {m["name"]: m["description"] for m in AVAILABLE_MODELS} selected_model = st.selectbox( "Model", model_names, index=model_names.index(DEFAULT_MODEL) if DEFAULT_MODEL in model_names else 0, help="Free, locally run Hugging Face models. First run downloads weights.", ) st.caption(model_descriptions.get(selected_model, "")) max_length = st.slider( "Maximum summary length (words)", min_value=50, max_value=250, value=140, step=10, ) min_length_limit = min(120, max_length - 10) min_length = st.slider( "Minimum summary length (words)", min_value=20, max_value=min_length_limit, value=min(50, max_length - 20), step=5, ) chunk_size = st.slider( "Chunk size (characters)", min_value=600, max_value=2000, value=1200, step=50, help="Longer chunks preserve context but take longer.", ) overlap = st.slider( "Chunk overlap (characters)", min_value=50, max_value=300, value=120, step=10, ) return { "model": selected_model, "max_length": max_length, "min_length": min_length, "chunk_size": chunk_size, "overlap": overlap, } def show_file_info(uploaded_file): size_mb = len(uploaded_file.getvalue()) / (1024 * 1024) st.info(f"Selected: **{uploaded_file.name}** ({size_mb:.1f} MB)") def show_results(result: Dict[str, Any]): summary_text = result["summary"]["summary"] stats = result["summary"]["statistics"] original_stats = result["extraction"]["statistics"] st.success("Summary ready!") col1, col2, col3, col4 = st.columns(4) col1.metric("Pages", result["validation"]["pages"]) col2.metric("Original words", f"{original_stats.get('total_words', 0):,}") col3.metric("Summary words", f"{stats.get('final_summary_length', 0):,}") compression = stats.get("overall_compression_ratio", 0) col4.metric("Compression", f"{compression:.1%}" if compression else "N/A") st.subheader("Summary") st.text_area("Generated summary", value=summary_text, height=400, label_visibility="collapsed") st.download_button( label="Download summary", data=summary_text.encode("utf-8"), file_name=f"{result['metadata'].get('title', 'summary').replace(' ', '_')}.txt", mime="text/plain", ) st.subheader("Book snapshot") preview = result["extraction"]["text"][:1500] if len(result["extraction"]["text"]) > 1500: preview += " ..." st.text_area("First 1500 characters", value=preview, height=220, label_visibility="collapsed") def main(): st.title("📚 AI-Powered Book Summarizer") st.write( "Upload a PDF (under 50MB) to generate a concise summary locally with free, open models. " "No paid API keys required—first run will download model weights." ) st.divider() with st.sidebar: controls = sidebar_controls() uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) if uploaded_file: show_file_info(uploaded_file) if st.button("Generate summary", type="primary"): with st.spinner("Extracting text and generating summary..."): try: result = summarize_pdf( uploaded_file=uploaded_file, model_name=controls["model"], max_length=controls["max_length"], min_length=controls["min_length"], chunk_size=controls["chunk_size"], overlap=controls["overlap"], ) show_results(result) except Exception as exc: st.error(f"Could not summarize this PDF: {exc}") else: st.info("Upload a small/medium PDF to get started. Scans or image-only PDFs will not work well.") if __name__ == "__main__": main()