File size: 6,735 Bytes
6880cd9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f393828
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Dict, Any, Optional
import logging
import asyncio
from .pdf_processor import PDFProcessor
from .summarizer import BookSummarizer

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize FastAPI app
app = FastAPI(
    title="Book Summarizer API",
    description="AI-powered book summarization service",
    version="1.0.0"
)

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # In production, specify your frontend URL
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize components
pdf_processor = PDFProcessor()
summarizer = BookSummarizer()

# Pydantic models
class SummaryRequest(BaseModel):
    max_length: int = 150
    min_length: int = 50
    chunk_size: int = 1000
    overlap: int = 100
    model_name: Optional[str] = None

class SummaryResponse(BaseModel):
    success: bool
    summary: str
    statistics: Dict[str, Any]
    message: str

@app.on_event("startup")
async def startup_event():
    """Initialize components on startup."""
    logger.info("Starting Book Summarizer API...")
    try:
        # Load the summarization model
        summarizer.load_model()
        logger.info("API startup completed successfully")
    except Exception as e:
        logger.error(f"Error during startup: {str(e)}")

@app.get("/")
async def root():
    """Root endpoint."""
    return {
        "message": "Book Summarizer API",
        "version": "1.0.0",
        "status": "running"
    }

@app.get("/health")
async def health_check():
    """Health check endpoint."""
    return {
        "status": "healthy",
        "model_loaded": summarizer.summarizer is not None
    }

@app.post("/upload-pdf")
async def upload_pdf(file: UploadFile = File(...)):
    """
    Upload and validate a PDF file.
    """
    try:
        # Check file type
        if not file.filename.lower().endswith('.pdf'):
            raise HTTPException(status_code=400, detail="Only PDF files are supported")
        
        # Read file content
        content = await file.read()
        
        # Validate PDF
        validation_result = pdf_processor.validate_pdf(content)
        if not validation_result['valid']:
            raise HTTPException(status_code=400, detail=validation_result['message'])
        
        # Extract metadata
        metadata = pdf_processor.get_pdf_metadata(content)
        
        return {
            "success": True,
            "filename": file.filename,
            "size_mb": validation_result['size_mb'],
            "pages": validation_result['pages'],
            "metadata": metadata,
            "message": "PDF uploaded and validated successfully"
        }
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error uploading PDF: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")

@app.post("/extract-text")
async def extract_text(file: UploadFile = File(...)):
    """
    Extract text from uploaded PDF.
    """
    try:
        # Read file content
        content = await file.read()
        
        # Extract text
        result = pdf_processor.extract_text_from_pdf(content)
        
        if not result['success']:
            raise HTTPException(status_code=400, detail=result['message'])
        
        return {
            "success": True,
            "text_length": len(result['text']),
            "statistics": result['statistics'],
            "pages": result['pages'],
            "message": result['message']
        }
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error extracting text: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error extracting text: {str(e)}")

@app.post("/summarize")
async def summarize_book(
    file: UploadFile = File(...),
    request: SummaryRequest = SummaryRequest()
):
    """
    Summarize a book from uploaded PDF.
    """
    try:
        # Read file content
        content = await file.read()
        
        # Extract text
        extraction_result = pdf_processor.extract_text_from_pdf(content)
        if not extraction_result['success']:
            raise HTTPException(status_code=400, detail=extraction_result['message'])
        
        # Change model if specified
        if request.model_name:
            summarizer.change_model(request.model_name)
        
        # Summarize the book
        summary_result = summarizer.summarize_book(
            text=extraction_result['text'],
            chunk_size=request.chunk_size,
            overlap=request.overlap,
            max_length=request.max_length,
            min_length=request.min_length
        )
        
        if not summary_result['success']:
            raise HTTPException(status_code=500, detail=summary_result.get('error', 'Summarization failed'))
        
        return {
            "success": True,
            "summary": summary_result['summary'],
            "statistics": summary_result['statistics'],
            "original_statistics": extraction_result['statistics'],
            "message": "Book summarized successfully"
        }
        
    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error summarizing book: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error summarizing book: {str(e)}")

@app.get("/models")
async def get_available_models():
    """
    Get list of available summarization models.
    """
    try:
        models = summarizer.get_available_models()
        return {
            "success": True,
            "models": models,
            "current_model": summarizer.model_name
        }
    except Exception as e:
        logger.error(f"Error getting models: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error getting models: {str(e)}")

@app.post("/change-model")
async def change_model(model_name: str):
    """
    Change the summarization model.
    """
    try:
        summarizer.change_model(model_name)
        summarizer.load_model()
        
        return {
            "success": True,
            "message": f"Model changed to {model_name}",
            "current_model": model_name
        }
    except Exception as e:
        logger.error(f"Error changing model: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error changing model: {str(e)}")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)