Spaces:
Sleeping
Sleeping
File size: 1,838 Bytes
224c593 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
#!/usr/bin/env python3
"""
Check the number of documents in the LanceDB database
"""
import asyncio
import logging
from lancedb_service import lancedb_service
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("check_docs")
async def check_document_count():
"""Check how many documents are in the database"""
try:
logger.info("π Checking LanceDB document count...")
# Get all tables in the database
db = lancedb_service.db
table_names = await asyncio.to_thread(lambda: db.table_names())
logger.info(f"π Found {len(table_names)} tables in database:")
total_documents = 0
for table_name in table_names:
try:
table = await asyncio.to_thread(lambda: db.open_table(table_name))
count = await asyncio.to_thread(lambda: table.count_rows())
logger.info(f" π {table_name}: {count} documents")
total_documents += count
except Exception as e:
logger.error(f" β Error checking table {table_name}: {e}")
logger.info(f"π Total documents across all tables: {total_documents}")
# Also check if we can search the main documents table
logger.info("\nπ Testing document search...")
from rag_service import search_documents_async
test_queries = ["pension", "salary", "leave", "training"]
for query in test_queries:
docs = await search_documents_async(query, limit=3)
logger.info(f" Query '{query}': Found {len(docs) if docs else 0} documents")
except Exception as e:
logger.error(f"β Error checking documents: {e}")
if __name__ == "__main__":
asyncio.run(check_document_count()) |