ND06-25 commited on
Commit
be5f84f
Β·
1 Parent(s): effc43f
Files changed (7) hide show
  1. Dockerfile +25 -0
  2. api/utils.py +3 -3
  3. app.py +184 -301
  4. requirements.txt +1 -7
  5. start.bat +8 -17
  6. start.py +23 -122
  7. start.sh +9 -19
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # System deps for PDF parsing
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ build-essential \
6
+ poppler-utils \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ WORKDIR /app
10
+
11
+ # Install Python deps first for better cache hits
12
+ COPY requirements.txt /app/requirements.txt
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Copy application code
16
+ COPY . /app
17
+
18
+ # Default model can be overridden in Space settings
19
+ ENV DEFAULT_MODEL=t5-small
20
+ ENV PORT=7860
21
+
22
+ EXPOSE 7860
23
+
24
+ # Start Streamlit on the expected port/interface
25
+ CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"]
api/utils.py CHANGED
@@ -1,5 +1,4 @@
1
  import re
2
- import nltk
3
  from typing import List, Dict, Any
4
  import logging
5
 
@@ -113,7 +112,8 @@ def get_text_statistics(text: str) -> Dict[str, Any]:
113
  Get basic statistics about the text.
114
  """
115
  words = text.split()
116
- sentences = nltk.sent_tokenize(text)
 
117
 
118
  return {
119
  'total_characters': len(text),
@@ -121,4 +121,4 @@ def get_text_statistics(text: str) -> Dict[str, Any]:
121
  'total_sentences': len(sentences),
122
  'average_words_per_sentence': len(words) / len(sentences) if sentences else 0,
123
  'estimated_reading_time_minutes': len(words) / 200 # Average reading speed
124
- }
 
1
  import re
 
2
  from typing import List, Dict, Any
3
  import logging
4
 
 
112
  Get basic statistics about the text.
113
  """
114
  words = text.split()
115
+ # Lightweight sentence split to avoid NLTK downloads
116
+ sentences = [s.strip() for s in re.split(r'(?<=[\.\!\?])\s+', text) if s.strip()]
117
 
118
  return {
119
  'total_characters': len(text),
 
121
  'total_sentences': len(sentences),
122
  'average_words_per_sentence': len(words) / len(sentences) if sentences else 0,
123
  'estimated_reading_time_minutes': len(words) / 200 # Average reading speed
124
+ }
app.py CHANGED
@@ -1,314 +1,197 @@
 
1
  import streamlit as st
2
- import requests
3
- import json
4
- import time
5
- from typing import Dict, Any, Optional
6
- import io
 
 
 
7
 
8
- # Page configuration
9
  st.set_page_config(
10
- page_title="Book Summarizer AI",
11
  page_icon="πŸ“š",
12
  layout="wide",
13
- initial_sidebar_state="expanded"
14
  )
15
 
16
- # API configuration
17
- API_BASE_URL = "http://localhost:8000"
18
 
19
- def main():
20
- # Custom CSS for better styling
21
- st.markdown("""
22
- <style>
23
- .main-header {
24
- font-size: 3rem;
25
- font-weight: bold;
26
- text-align: center;
27
- color: #1f77b4;
28
- margin-bottom: 2rem;
29
- }
30
- .sub-header {
31
- font-size: 1.5rem;
32
- color: #666;
33
- text-align: center;
34
- margin-bottom: 2rem;
35
- }
36
- .success-box {
37
- background-color: #d4edda;
38
- border: 1px solid #c3e6cb;
39
- border-radius: 5px;
40
- padding: 1rem;
41
- margin: 1rem 0;
42
- }
43
- .error-box {
44
- background-color: #f8d7da;
45
- border: 1px solid #f5c6cb;
46
- border-radius: 5px;
47
- padding: 1rem;
48
- margin: 1rem 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  }
50
- .info-box {
51
- background-color: #d1ecf1;
52
- border: 1px solid #bee5eb;
53
- border-radius: 5px;
54
- padding: 1rem;
55
- margin: 1rem 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  }
57
- </style>
58
- """, unsafe_allow_html=True)
59
-
60
- # Header
61
- st.markdown('<h1 class="main-header">πŸ“š Book Summarizer AI</h1>', unsafe_allow_html=True)
62
- st.markdown('<p class="sub-header">Transform your PDF books into intelligent summaries using AI</p>', unsafe_allow_html=True)
63
-
64
- # Sidebar
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  with st.sidebar:
66
- st.header("βš™οΈ Settings")
67
-
68
- # Model selection
69
- st.subheader("AI Model")
70
- try:
71
- models_response = requests.get(f"{API_BASE_URL}/models")
72
- if models_response.status_code == 200:
73
- models_data = models_response.json()
74
- models = models_data.get('models', [])
75
- current_model = models_data.get('current_model', '')
76
-
77
- model_names = [model['name'] for model in models]
78
- selected_model = st.selectbox(
79
- "Choose AI Model",
80
- model_names,
81
- index=model_names.index(current_model) if current_model in model_names else 0
82
- )
83
-
84
- # Show model description
85
- selected_model_info = next((m for m in models if m['name'] == selected_model), None)
86
- if selected_model_info:
87
- st.info(f"**{selected_model_info['description']}**")
88
- else:
89
- st.error("Failed to load models")
90
- selected_model = "facebook/bart-large-cnn"
91
- except Exception as e:
92
- st.error(f"Error loading models: {str(e)}")
93
- selected_model = "facebook/bart-large-cnn"
94
-
95
- # Summary settings
96
- st.subheader("Summary Settings")
97
- max_length = st.slider("Maximum Summary Length", 50, 500, 150, help="Maximum number of words in the summary")
98
- min_length = st.slider("Minimum Summary Length", 10, 200, 50, help="Minimum number of words in the summary")
99
-
100
- # Advanced settings
101
- with st.expander("Advanced Settings"):
102
- chunk_size = st.slider("Chunk Size", 500, 2000, 1000, help="Size of text chunks for processing")
103
- overlap = st.slider("Chunk Overlap", 50, 200, 100, help="Overlap between text chunks")
104
-
105
- # API status
106
- st.subheader("API Status")
107
- try:
108
- health_response = requests.get(f"{API_BASE_URL}/health")
109
- if health_response.status_code == 200:
110
- st.success("βœ… API Connected")
111
- else:
112
- st.error("❌ API Error")
113
- except:
114
- st.error("❌ API Unavailable")
115
-
116
- # Main content
117
- tab1, tab2, tab3 = st.tabs(["πŸ“– Summarize Book", "πŸ“Š Text Analysis", "ℹ️ About"])
118
-
119
- with tab1:
120
- st.header("πŸ“– Book Summarization")
121
-
122
- # File upload
123
- uploaded_file = st.file_uploader(
124
- "Choose a PDF book file",
125
- type=['pdf'],
126
- help="Upload a PDF file (max 50MB)"
127
- )
128
-
129
- if uploaded_file is not None:
130
- # File info
131
- file_size = len(uploaded_file.getvalue()) / (1024 * 1024) # MB
132
- st.info(f"πŸ“„ **File:** {uploaded_file.name} ({file_size:.1f} MB)")
133
-
134
- # Validate file
135
- if st.button("πŸ” Validate PDF", type="secondary"):
136
- with st.spinner("Validating PDF..."):
137
- try:
138
- files = {"file": uploaded_file.getvalue()}
139
- response = requests.post(f"{API_BASE_URL}/upload-pdf", files=files)
140
-
141
- if response.status_code == 200:
142
- data = response.json()
143
- st.success(f"βœ… {data['message']}")
144
-
145
- # Display metadata
146
- metadata = data.get('metadata', {})
147
- col1, col2, col3 = st.columns(3)
148
- with col1:
149
- st.metric("Pages", data['pages'])
150
- with col2:
151
- st.metric("Size", f"{data['size_mb']:.1f} MB")
152
- with col3:
153
- st.metric("Title", metadata.get('title', 'Unknown'))
154
- else:
155
- st.error(f"❌ Validation failed: {response.json().get('detail', 'Unknown error')}")
156
- except Exception as e:
157
- st.error(f"❌ Error: {str(e)}")
158
-
159
- # Summarize button
160
- if st.button("πŸš€ Generate Summary", type="primary"):
161
- if uploaded_file is not None:
162
- with st.spinner("Processing your book..."):
163
- try:
164
- # Prepare request
165
- files = {"file": uploaded_file.getvalue()}
166
- data = {
167
- "max_length": max_length,
168
- "min_length": min_length,
169
- "chunk_size": chunk_size,
170
- "overlap": overlap,
171
- "model_name": selected_model
172
- }
173
-
174
- # Send request
175
- response = requests.post(f"{API_BASE_URL}/summarize", files=files, data=data)
176
-
177
- if response.status_code == 200:
178
- result = response.json()
179
-
180
- # Display success message
181
- st.success("βœ… Summary generated successfully!")
182
-
183
- # Display statistics
184
- col1, col2, col3, col4 = st.columns(4)
185
- stats = result.get('statistics', {})
186
- orig_stats = result.get('original_statistics', {})
187
-
188
- with col1:
189
- st.metric("Original Words", f"{orig_stats.get('total_words', 0):,}")
190
- with col2:
191
- st.metric("Summary Words", f"{stats.get('final_summary_length', 0):,}")
192
- with col3:
193
- compression = stats.get('overall_compression_ratio', 0)
194
- st.metric("Compression", f"{compression:.1%}")
195
- with col4:
196
- st.metric("Chunks Processed", stats.get('total_chunks', 0))
197
-
198
- # Display summary
199
- st.subheader("πŸ“ Generated Summary")
200
- summary = result.get('summary', '')
201
- st.text_area(
202
- "Summary",
203
- value=summary,
204
- height=400,
205
- disabled=True
206
- )
207
-
208
- # Download button
209
- summary_bytes = summary.encode('utf-8')
210
- st.download_button(
211
- label="πŸ“₯ Download Summary",
212
- data=summary_bytes,
213
- file_name=f"{uploaded_file.name.replace('.pdf', '')}_summary.txt",
214
- mime="text/plain"
215
- )
216
-
217
- else:
218
- error_msg = response.json().get('detail', 'Unknown error')
219
- st.error(f"❌ Summarization failed: {error_msg}")
220
-
221
- except Exception as e:
222
- st.error(f"❌ Error: {str(e)}")
223
-
224
- with tab2:
225
- st.header("πŸ“Š Text Analysis")
226
-
227
- if uploaded_file is not None:
228
- if st.button("πŸ“Š Analyze Text"):
229
- with st.spinner("Analyzing text..."):
230
- try:
231
- files = {"file": uploaded_file.getvalue()}
232
- response = requests.post(f"{API_BASE_URL}/extract-text", files=files)
233
-
234
- if response.status_code == 200:
235
- data = response.json()
236
- stats = data.get('statistics', {})
237
-
238
- # Display statistics
239
- col1, col2, col3, col4 = st.columns(4)
240
-
241
- with col1:
242
- st.metric("Total Words", f"{stats.get('total_words', 0):,}")
243
- with col2:
244
- st.metric("Total Sentences", f"{stats.get('total_sentences', 0):,}")
245
- with col3:
246
- st.metric("Avg Words/Sentence", f"{stats.get('average_words_per_sentence', 0):.1f}")
247
- with col4:
248
- st.metric("Reading Time", f"{stats.get('estimated_reading_time_minutes', 0):.1f} min")
249
-
250
- # Text preview
251
- st.subheader("πŸ“„ Text Preview")
252
- text_response = requests.post(f"{API_BASE_URL}/extract-text", files=files)
253
- if text_response.status_code == 200:
254
- text_data = text_response.json()
255
- preview_text = text_data.get('text', '')[:1000] + "..." if len(text_data.get('text', '')) > 1000 else text_data.get('text', '')
256
- st.text_area("First 1000 characters:", value=preview_text, height=200, disabled=True)
257
- else:
258
- st.error(f"❌ Analysis failed: {response.json().get('detail', 'Unknown error')}")
259
- except Exception as e:
260
- st.error(f"❌ Error: {str(e)}")
261
- else:
262
- st.info("πŸ“„ Please upload a PDF file to analyze its text.")
263
-
264
- with tab3:
265
- st.header("ℹ️ About")
266
-
267
- st.markdown("""
268
- ## πŸ€– Book Summarizer AI
269
-
270
- This application uses advanced AI models to automatically summarize PDF books.
271
- It processes the text in chunks and generates comprehensive summaries while
272
- maintaining the key information and context.
273
-
274
- ### ✨ Features
275
-
276
- - **PDF Text Extraction**: Advanced PDF processing with fallback methods
277
- - **AI Summarization**: State-of-the-art transformer models
278
- - **Configurable Settings**: Adjust summary length and processing parameters
279
- - **Multiple Models**: Choose from different AI models for various use cases
280
- - **Text Analysis**: Detailed statistics about the book content
281
-
282
- ### πŸ› οΈ Technology Stack
283
-
284
- - **Frontend**: Streamlit
285
- - **Backend**: FastAPI
286
- - **AI Models**: Hugging Face Transformers (BART, T5)
287
- - **PDF Processing**: PyPDF2, pdfplumber
288
- - **Text Processing**: NLTK
289
-
290
- ### πŸ“‹ How It Works
291
-
292
- 1. **Upload**: Select a PDF book file (max 50MB)
293
- 2. **Extract**: The system extracts and cleans text from the PDF
294
- 3. **Chunk**: Large texts are split into manageable chunks
295
- 4. **Summarize**: AI models process each chunk and generate summaries
296
- 5. **Combine**: Individual summaries are combined into a final summary
297
- 6. **Download**: Get your summary in text format
298
-
299
- ### πŸš€ Getting Started
300
-
301
- 1. Make sure the API server is running (`uvicorn api.main:app --reload`)
302
- 2. Upload a PDF book file
303
- 3. Configure your preferred settings
304
- 4. Click "Generate Summary" and wait for processing
305
- 5. Download your AI-generated summary
306
-
307
- ### πŸ“ž Support
308
-
309
- For issues or questions, please check the API documentation at `/docs`
310
- when the server is running.
311
- """)
312
 
313
  if __name__ == "__main__":
314
- main()
 
1
+ import os
2
  import streamlit as st
3
+ from typing import Dict, Any
4
+
5
+ from api.pdf_processor import PDFProcessor
6
+ from api.summarizer import BookSummarizer
7
+
8
+ DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "sshleifer/distilbart-cnn-12-6")
9
+ AVAILABLE_MODELS = BookSummarizer(DEFAULT_MODEL).get_available_models()
10
+
11
 
 
12
  st.set_page_config(
13
+ page_title="Book Summarizer",
14
  page_icon="πŸ“š",
15
  layout="wide",
16
+ initial_sidebar_state="expanded",
17
  )
18
 
 
 
19
 
20
+ @st.cache_resource
21
+ def get_pdf_processor() -> PDFProcessor:
22
+ return PDFProcessor()
23
+
24
+
25
+ @st.cache_resource
26
+ def get_summarizer(model_name: str) -> BookSummarizer:
27
+ summarizer = BookSummarizer(model_name=model_name)
28
+ summarizer.load_model()
29
+ return summarizer
30
+
31
+
32
+ def summarize_pdf(
33
+ uploaded_file,
34
+ model_name: str,
35
+ max_length: int,
36
+ min_length: int,
37
+ chunk_size: int,
38
+ overlap: int,
39
+ ) -> Dict[str, Any]:
40
+ pdf_bytes = uploaded_file.getvalue()
41
+ processor = get_pdf_processor()
42
+
43
+ validation = processor.validate_pdf(pdf_bytes)
44
+ if not validation["valid"]:
45
+ raise ValueError(validation["message"])
46
+
47
+ metadata = processor.get_pdf_metadata(pdf_bytes)
48
+ extraction = processor.extract_text_from_pdf(pdf_bytes)
49
+ if not extraction["success"]:
50
+ raise RuntimeError(extraction["message"])
51
+
52
+ summarizer = get_summarizer(model_name)
53
+ summary_result = summarizer.summarize_book(
54
+ text=extraction["text"],
55
+ chunk_size=chunk_size,
56
+ overlap=overlap,
57
+ max_length=max_length,
58
+ min_length=min_length,
59
+ )
60
+
61
+ if not summary_result["success"]:
62
+ raise RuntimeError(summary_result.get("error", "Summarization failed"))
63
+
64
+ return {
65
+ "metadata": metadata,
66
+ "validation": validation,
67
+ "extraction": extraction,
68
+ "summary": summary_result,
69
  }
70
+
71
+
72
+ def sidebar_controls():
73
+ st.header("Settings")
74
+
75
+ model_names = [m["name"] for m in AVAILABLE_MODELS]
76
+ model_descriptions = {m["name"]: m["description"] for m in AVAILABLE_MODELS}
77
+
78
+ selected_model = st.selectbox(
79
+ "Model",
80
+ model_names,
81
+ index=model_names.index(DEFAULT_MODEL) if DEFAULT_MODEL in model_names else 0,
82
+ help="Free, locally run Hugging Face models. First run downloads weights.",
83
+ )
84
+ st.caption(model_descriptions.get(selected_model, ""))
85
+
86
+ max_length = st.slider(
87
+ "Maximum summary length (words)",
88
+ min_value=50,
89
+ max_value=250,
90
+ value=140,
91
+ step=10,
92
+ )
93
+ min_length = st.slider(
94
+ "Minimum summary length (words)",
95
+ min_value=20,
96
+ max_value=min_length_limit := min(120, max_length - 10),
97
+ value=min(50, max_length - 20),
98
+ step=5,
99
+ )
100
+
101
+ chunk_size = st.slider(
102
+ "Chunk size (characters)",
103
+ min_value=600,
104
+ max_value=2000,
105
+ value=1200,
106
+ step=50,
107
+ help="Longer chunks preserve context but take longer.",
108
+ )
109
+ overlap = st.slider(
110
+ "Chunk overlap (characters)",
111
+ min_value=50,
112
+ max_value=300,
113
+ value=120,
114
+ step=10,
115
+ )
116
+
117
+ return {
118
+ "model": selected_model,
119
+ "max_length": max_length,
120
+ "min_length": min_length,
121
+ "chunk_size": chunk_size,
122
+ "overlap": overlap,
123
  }
124
+
125
+
126
+ def show_file_info(uploaded_file):
127
+ size_mb = len(uploaded_file.getvalue()) / (1024 * 1024)
128
+ st.info(f"Selected: **{uploaded_file.name}** ({size_mb:.1f} MB)")
129
+
130
+
131
+ def show_results(result: Dict[str, Any]):
132
+ summary_text = result["summary"]["summary"]
133
+ stats = result["summary"]["statistics"]
134
+ original_stats = result["extraction"]["statistics"]
135
+
136
+ st.success("Summary ready!")
137
+
138
+ col1, col2, col3, col4 = st.columns(4)
139
+ col1.metric("Pages", result["validation"]["pages"])
140
+ col2.metric("Original words", f"{original_stats.get('total_words', 0):,}")
141
+ col3.metric("Summary words", f"{stats.get('final_summary_length', 0):,}")
142
+ compression = stats.get("overall_compression_ratio", 0)
143
+ col4.metric("Compression", f"{compression:.1%}" if compression else "N/A")
144
+
145
+ st.subheader("Summary")
146
+ st.text_area("Generated summary", value=summary_text, height=400, label_visibility="collapsed")
147
+
148
+ st.download_button(
149
+ label="Download summary",
150
+ data=summary_text.encode("utf-8"),
151
+ file_name=f"{result['metadata'].get('title', 'summary').replace(' ', '_')}.txt",
152
+ mime="text/plain",
153
+ )
154
+
155
+ st.subheader("Book snapshot")
156
+ preview = result["extraction"]["text"][:1500]
157
+ if len(result["extraction"]["text"]) > 1500:
158
+ preview += " ..."
159
+ st.text_area("First 1500 characters", value=preview, height=220, label_visibility="collapsed")
160
+
161
+
162
+ def main():
163
+ st.title("πŸ“š AI-Powered Book Summarizer")
164
+ st.write(
165
+ "Upload a PDF (under 50MB) to generate a concise summary locally with free, open models. "
166
+ "No paid API keys requiredβ€”first run will download model weights."
167
+ )
168
+
169
+ st.divider()
170
+
171
  with st.sidebar:
172
+ controls = sidebar_controls()
173
+
174
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
175
+
176
+ if uploaded_file:
177
+ show_file_info(uploaded_file)
178
+ if st.button("Generate summary", type="primary"):
179
+ with st.spinner("Extracting text and generating summary..."):
180
+ try:
181
+ result = summarize_pdf(
182
+ uploaded_file=uploaded_file,
183
+ model_name=controls["model"],
184
+ max_length=controls["max_length"],
185
+ min_length=controls["min_length"],
186
+ chunk_size=controls["chunk_size"],
187
+ overlap=controls["overlap"],
188
+ )
189
+ show_results(result)
190
+ except Exception as exc:
191
+ st.error(f"Could not summarize this PDF: {exc}")
192
+ else:
193
+ st.info("Upload a small/medium PDF to get started. Scans or image-only PDFs will not work well.")
194
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  if __name__ == "__main__":
197
+ main()
requirements.txt CHANGED
@@ -1,12 +1,6 @@
1
  streamlit==1.28.1
2
- fastapi==0.104.1
3
- uvicorn==0.24.0
4
- python-multipart==0.0.6
5
  PyPDF2==3.0.1
6
  pdfplumber==0.10.3
7
  transformers==4.35.2
8
  torch>=2.2.0
9
- nltk==3.8.1
10
- requests==2.31.0
11
- python-dotenv==1.0.0
12
- pydantic==2.5.0
 
1
  streamlit==1.28.1
 
 
 
2
  PyPDF2==3.0.1
3
  pdfplumber==0.10.3
4
  transformers==4.35.2
5
  torch>=2.2.0
6
+ sentencepiece>=0.1.99
 
 
 
start.bat CHANGED
@@ -1,32 +1,23 @@
1
  @echo off
2
- echo πŸ“š Book Summarizer AI - Windows Startup
3
- echo ======================================
4
 
5
- echo.
6
- echo πŸ”§ Checking Python installation...
7
  python --version >nul 2>&1
8
  if errorlevel 1 (
9
- echo ❌ Python is not installed or not in PATH
10
- echo Please install Python from https://python.org
11
  pause
12
  exit /b 1
13
  )
14
 
15
- echo βœ… Python found
16
-
17
- echo.
18
- echo πŸ“¦ Installing dependencies...
19
  pip install -r requirements.txt
20
  if errorlevel 1 (
21
- echo ❌ Failed to install dependencies
22
  pause
23
  exit /b 1
24
  )
25
 
26
- echo βœ… Dependencies installed
27
-
28
- echo.
29
- echo πŸš€ Starting Book Summarizer AI...
30
- python start.py
31
 
32
- pause
 
1
  @echo off
2
+ echo Book Summarizer - Windows Startup
3
+ echo ================================
4
 
 
 
5
  python --version >nul 2>&1
6
  if errorlevel 1 (
7
+ echo Python is not installed or not in PATH.
 
8
  pause
9
  exit /b 1
10
  )
11
 
12
+ echo Installing dependencies (if needed)...
 
 
 
13
  pip install -r requirements.txt
14
  if errorlevel 1 (
15
+ echo Failed to install dependencies.
16
  pause
17
  exit /b 1
18
  )
19
 
20
+ echo Launching Streamlit...
21
+ python -m streamlit run app.py --server.port 8501 --server.address 0.0.0.0
 
 
 
22
 
23
+ pause
start.py CHANGED
@@ -1,135 +1,36 @@
1
  #!/usr/bin/env python3
2
  """
3
- Startup script for Book Summarizer AI
4
- This script helps you start both the FastAPI backend and Streamlit frontend.
5
  """
6
 
7
  import subprocess
8
  import sys
9
- import time
10
- import requests
11
- import os
12
- from pathlib import Path
13
 
14
- def check_dependencies():
15
- """Check if required packages are installed."""
16
- required_packages = [
17
- 'streamlit', 'fastapi', 'uvicorn', 'transformers',
18
- 'torch', 'PyPDF2', 'pdfplumber', 'nltk'
19
- ]
20
-
21
- missing_packages = []
22
- for package in required_packages:
23
- try:
24
- __import__(package)
25
- except ImportError:
26
- missing_packages.append(package)
27
-
28
- if missing_packages:
29
- print("❌ Missing required packages:")
30
- for package in missing_packages:
31
- print(f" - {package}")
32
- print("\nπŸ“¦ Install them with: pip install -r requirements.txt")
33
- return False
34
-
35
- print("βœ… All dependencies are installed")
36
- return True
37
 
38
- def download_nltk_data():
39
- """Download required NLTK data."""
40
- try:
41
- import nltk
42
- nltk.download('punkt', quiet=True)
43
- nltk.download('stopwords', quiet=True)
44
- print("βœ… NLTK data downloaded")
45
- except Exception as e:
46
- print(f"⚠️ Warning: Could not download NLTK data: {e}")
47
-
48
- def check_api_health():
49
- """Check if the API is running and healthy."""
50
- try:
51
- response = requests.get("http://localhost:8000/health", timeout=5)
52
- return response.status_code == 200
53
- except:
54
- return False
55
-
56
- def start_api():
57
- """Start the FastAPI backend."""
58
- print("πŸš€ Starting FastAPI backend...")
59
-
60
- # Check if API is already running
61
- if check_api_health():
62
- print("βœ… API is already running")
63
- return True
64
-
65
- try:
66
- # Start the API server
67
- api_process = subprocess.Popen([
68
- sys.executable, "-m", "uvicorn",
69
- "api.main:app",
70
- "--reload",
71
- "--port", "8000",
72
- "--host", "0.0.0.0"
73
- ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
74
-
75
- # Wait for API to start
76
- print("⏳ Waiting for API to start...")
77
- for i in range(30): # Wait up to 30 seconds
78
- time.sleep(1)
79
- if check_api_health():
80
- print("βœ… API started successfully")
81
- return True
82
-
83
- print("❌ API failed to start within 30 seconds")
84
- return False
85
-
86
- except Exception as e:
87
- print(f"❌ Error starting API: {e}")
88
- return False
89
-
90
- def start_frontend():
91
- """Start the Streamlit frontend."""
92
- print("🌐 Starting Streamlit frontend...")
93
-
94
  try:
95
- # Start Streamlit
96
- subprocess.run([
97
- sys.executable, "-m", "streamlit", "run", "app.py",
98
- "--server.port", "8501",
99
- "--server.address", "0.0.0.0"
100
- ])
 
 
 
 
 
 
 
 
 
 
 
101
  except KeyboardInterrupt:
102
- print("\nπŸ‘‹ Shutting down...")
103
- except Exception as e:
104
- print(f"❌ Error starting frontend: {e}")
105
 
106
- def main():
107
- """Main startup function."""
108
- print("πŸ“š Book Summarizer AI - Startup")
109
- print("=" * 40)
110
-
111
- # Check dependencies
112
- if not check_dependencies():
113
- sys.exit(1)
114
-
115
- # Download NLTK data
116
- download_nltk_data()
117
-
118
- print("\nπŸ”§ Starting services...")
119
-
120
- # Start API
121
- if not start_api():
122
- print("❌ Failed to start API. Please check the logs.")
123
- sys.exit(1)
124
-
125
- print("\nπŸŽ‰ Ready! Opening the application...")
126
- print("πŸ“– Frontend: http://localhost:8501")
127
- print("πŸ”Œ API: http://localhost:8000")
128
- print("πŸ“š API Docs: http://localhost:8000/docs")
129
- print("\nπŸ’‘ Press Ctrl+C to stop the application")
130
-
131
- # Start frontend
132
- start_frontend()
133
 
134
  if __name__ == "__main__":
135
- main()
 
1
  #!/usr/bin/env python3
2
  """
3
+ Minimal launcher for the Streamlit book summarizer.
 
4
  """
5
 
6
  import subprocess
7
  import sys
 
 
 
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ def main():
11
+ print("Starting Streamlit app...")
12
+ print("If dependencies are missing, install with: pip install -r requirements.txt\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
+ subprocess.run(
15
+ [
16
+ sys.executable,
17
+ "-m",
18
+ "streamlit",
19
+ "run",
20
+ "app.py",
21
+ "--server.port",
22
+ "8501",
23
+ "--server.address",
24
+ "0.0.0.0",
25
+ ],
26
+ check=True,
27
+ )
28
+ except subprocess.CalledProcessError as exc:
29
+ print(f"Streamlit exited with an error: {exc}")
30
+ sys.exit(exc.returncode)
31
  except KeyboardInterrupt:
32
+ print("\nStopping Streamlit...")
 
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  if __name__ == "__main__":
36
+ main()
start.sh CHANGED
@@ -1,28 +1,18 @@
1
  #!/bin/bash
2
 
3
- echo "πŸ“š Book Summarizer AI - Unix/Linux/Mac Startup"
4
- echo "=============================================="
5
 
6
- echo ""
7
- echo "πŸ”§ Checking Python installation..."
8
  if ! command -v python3 &> /dev/null; then
9
- echo "❌ Python 3 is not installed or not in PATH"
10
- echo "Please install Python 3 from https://python.org"
11
  exit 1
12
  fi
13
 
14
- echo "βœ… Python 3 found"
15
-
16
- echo ""
17
- echo "πŸ“¦ Installing dependencies..."
18
- pip3 install -r requirements.txt
19
- if [ $? -ne 0 ]; then
20
- echo "❌ Failed to install dependencies"
21
  exit 1
22
- fi
23
-
24
- echo "βœ… Dependencies installed"
25
 
26
- echo ""
27
- echo "πŸš€ Starting Book Summarizer AI..."
28
- python3 start.py
 
1
  #!/bin/bash
2
 
3
+ echo "Book Summarizer - Startup"
4
+ echo "========================="
5
 
 
 
6
  if ! command -v python3 &> /dev/null; then
7
+ echo "Python 3 is not installed or not in PATH."
 
8
  exit 1
9
  fi
10
 
11
+ echo "Installing dependencies (if needed)..."
12
+ pip3 install -r requirements.txt || {
13
+ echo "Failed to install dependencies."
 
 
 
 
14
  exit 1
15
+ }
 
 
16
 
17
+ echo "Launching Streamlit..."
18
+ python3 -m streamlit run app.py --server.port 8501 --server.address 0.0.0.0