#!/usr/bin/env python3 """ Check the number of documents in the LanceDB database """ import asyncio import logging from lancedb_service import lancedb_service # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger("check_docs") async def check_document_count(): """Check how many documents are in the database""" try: logger.info("šŸ” Checking LanceDB document count...") # Get all tables in the database db = lancedb_service.db table_names = await asyncio.to_thread(lambda: db.table_names()) logger.info(f"šŸ“Š Found {len(table_names)} tables in database:") total_documents = 0 for table_name in table_names: try: table = await asyncio.to_thread(lambda: db.open_table(table_name)) count = await asyncio.to_thread(lambda: table.count_rows()) logger.info(f" šŸ“„ {table_name}: {count} documents") total_documents += count except Exception as e: logger.error(f" āŒ Error checking table {table_name}: {e}") logger.info(f"šŸ“ˆ Total documents across all tables: {total_documents}") # Also check if we can search the main documents table logger.info("\nšŸ” Testing document search...") from rag_service import search_documents_async test_queries = ["pension", "salary", "leave", "training"] for query in test_queries: docs = await search_documents_async(query, limit=3) logger.info(f" Query '{query}': Found {len(docs) if docs else 0} documents") except Exception as e: logger.error(f"āŒ Error checking documents: {e}") if __name__ == "__main__": asyncio.run(check_document_count())