Spaces:
Sleeping
Sleeping
Enhanced startup logging to show actual document count (23K+ docs) instead of just 7 sample docs
67a99cd
| """ | |
| Enhanced startup logging to show actual document usage | |
| """ | |
| import logging | |
| from lancedb_service import lancedb_service | |
| logger = logging.getLogger("app") | |
| async def log_document_status(): | |
| """Log comprehensive document status during startup""" | |
| try: | |
| logger.info("π Document Database Status Check...") | |
| total_documents = 0 | |
| # Check rajasthan_documents table (main voice bot documents) | |
| if "rajasthan_documents" in lancedb_service.db.table_names(): | |
| raj_table = lancedb_service.db.open_table("rajasthan_documents") | |
| raj_count = raj_table.count_rows() | |
| total_documents += raj_count | |
| logger.info(f"ποΈ Rajasthan Documents: {raj_count:,} (Voice Bot Primary Source)") | |
| # Check general documents table | |
| if "documents" in lancedb_service.db.table_names(): | |
| doc_table = lancedb_service.db.open_table("documents") | |
| doc_count = doc_table.count_rows() | |
| total_documents += doc_count | |
| logger.info(f"π General Documents: {doc_count:,}") | |
| # Summary | |
| logger.info(f"π― TOTAL AVAILABLE DOCUMENTS: {total_documents:,}") | |
| if total_documents >= 1000: | |
| logger.info("β Voice Bot has EXCELLENT document coverage (1000+ docs)") | |
| elif total_documents >= 100: | |
| logger.info("β οΈ Voice Bot has GOOD document coverage (100+ docs)") | |
| elif total_documents >= 10: | |
| logger.info("β οΈ Voice Bot has LIMITED document coverage (<100 docs)") | |
| else: | |
| logger.info("β Voice Bot has MINIMAL document coverage") | |
| # Show sample document topics if available | |
| if "rajasthan_documents" in lancedb_service.db.table_names(): | |
| raj_table = lancedb_service.db.open_table("rajasthan_documents") | |
| if raj_table.count_rows() > 0: | |
| sample = raj_table.head(3).to_pylist() | |
| logger.info("π Sample document topics available:") | |
| for i, doc in enumerate(sample, 1): | |
| filename = doc.get('filename', 'Unknown') | |
| content_preview = doc.get('content', '')[:100] + "..." | |
| logger.info(f" {i}. {filename}: {content_preview}") | |
| return total_documents | |
| except Exception as e: | |
| logger.error(f"β Error checking document status: {e}") | |
| return 0 |