PensionBot / check_documents.py
ChAbhishek28's picture
Add 899999999999999999999999
224c593
#!/usr/bin/env python3
"""
Check the number of documents in the LanceDB database
"""
import asyncio
import logging
from lancedb_service import lancedb_service
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("check_docs")
async def check_document_count():
"""Check how many documents are in the database"""
try:
logger.info("πŸ” Checking LanceDB document count...")
# Get all tables in the database
db = lancedb_service.db
table_names = await asyncio.to_thread(lambda: db.table_names())
logger.info(f"πŸ“Š Found {len(table_names)} tables in database:")
total_documents = 0
for table_name in table_names:
try:
table = await asyncio.to_thread(lambda: db.open_table(table_name))
count = await asyncio.to_thread(lambda: table.count_rows())
logger.info(f" πŸ“„ {table_name}: {count} documents")
total_documents += count
except Exception as e:
logger.error(f" ❌ Error checking table {table_name}: {e}")
logger.info(f"πŸ“ˆ Total documents across all tables: {total_documents}")
# Also check if we can search the main documents table
logger.info("\nπŸ” Testing document search...")
from rag_service import search_documents_async
test_queries = ["pension", "salary", "leave", "training"]
for query in test_queries:
docs = await search_documents_async(query, limit=3)
logger.info(f" Query '{query}': Found {len(docs) if docs else 0} documents")
except Exception as e:
logger.error(f"❌ Error checking documents: {e}")
if __name__ == "__main__":
asyncio.run(check_document_count())