PensionBot / document_status_logger.py
ChAbhishek28's picture
Enhanced startup logging to show actual document count (23K+ docs) instead of just 7 sample docs
67a99cd
"""
Enhanced startup logging to show actual document usage
"""
import logging
from lancedb_service import lancedb_service
logger = logging.getLogger("app")
async def log_document_status():
"""Log comprehensive document status during startup"""
try:
logger.info("πŸ“Š Document Database Status Check...")
total_documents = 0
# Check rajasthan_documents table (main voice bot documents)
if "rajasthan_documents" in lancedb_service.db.table_names():
raj_table = lancedb_service.db.open_table("rajasthan_documents")
raj_count = raj_table.count_rows()
total_documents += raj_count
logger.info(f"πŸ›οΈ Rajasthan Documents: {raj_count:,} (Voice Bot Primary Source)")
# Check general documents table
if "documents" in lancedb_service.db.table_names():
doc_table = lancedb_service.db.open_table("documents")
doc_count = doc_table.count_rows()
total_documents += doc_count
logger.info(f"πŸ“š General Documents: {doc_count:,}")
# Summary
logger.info(f"🎯 TOTAL AVAILABLE DOCUMENTS: {total_documents:,}")
if total_documents >= 1000:
logger.info("βœ… Voice Bot has EXCELLENT document coverage (1000+ docs)")
elif total_documents >= 100:
logger.info("⚠️ Voice Bot has GOOD document coverage (100+ docs)")
elif total_documents >= 10:
logger.info("⚠️ Voice Bot has LIMITED document coverage (<100 docs)")
else:
logger.info("❌ Voice Bot has MINIMAL document coverage")
# Show sample document topics if available
if "rajasthan_documents" in lancedb_service.db.table_names():
raj_table = lancedb_service.db.open_table("rajasthan_documents")
if raj_table.count_rows() > 0:
sample = raj_table.head(3).to_pylist()
logger.info("πŸ“„ Sample document topics available:")
for i, doc in enumerate(sample, 1):
filename = doc.get('filename', 'Unknown')
content_preview = doc.get('content', '')[:100] + "..."
logger.info(f" {i}. {filename}: {content_preview}")
return total_documents
except Exception as e:
logger.error(f"❌ Error checking document status: {e}")
return 0