Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Check the number of documents in the LanceDB database | |
| """ | |
| import asyncio | |
| import logging | |
| from lancedb_service import lancedb_service | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger("check_docs") | |
| async def check_document_count(): | |
| """Check how many documents are in the database""" | |
| try: | |
| logger.info("π Checking LanceDB document count...") | |
| # Get all tables in the database | |
| db = lancedb_service.db | |
| table_names = await asyncio.to_thread(lambda: db.table_names()) | |
| logger.info(f"π Found {len(table_names)} tables in database:") | |
| total_documents = 0 | |
| for table_name in table_names: | |
| try: | |
| table = await asyncio.to_thread(lambda: db.open_table(table_name)) | |
| count = await asyncio.to_thread(lambda: table.count_rows()) | |
| logger.info(f" π {table_name}: {count} documents") | |
| total_documents += count | |
| except Exception as e: | |
| logger.error(f" β Error checking table {table_name}: {e}") | |
| logger.info(f"π Total documents across all tables: {total_documents}") | |
| # Also check if we can search the main documents table | |
| logger.info("\nπ Testing document search...") | |
| from rag_service import search_documents_async | |
| test_queries = ["pension", "salary", "leave", "training"] | |
| for query in test_queries: | |
| docs = await search_documents_async(query, limit=3) | |
| logger.info(f" Query '{query}': Found {len(docs) if docs else 0} documents") | |
| except Exception as e: | |
| logger.error(f"β Error checking documents: {e}") | |
| if __name__ == "__main__": | |
| asyncio.run(check_document_count()) |