#!/usr/bin/env python3 """ Comprehensive analysis of the actual LanceDB database contents """ import sys import os import traceback sys.path.append(os.path.dirname(os.path.abspath(__file__))) try: import lancedb import pandas as pd from pathlib import Path def analyze_lancedb_contents(): """Analyze the actual contents of the LanceDB database""" db_path = "./lancedb_data" print("šŸ” LanceDB Database Analysis") print("=" * 60) try: db = lancedb.connect(db_path) table_names = db.table_names() print(f"šŸ“Š Found {len(table_names)} tables: {table_names}") print() total_documents = 0 for table_name in table_names: print(f"šŸ“‹ Table: {table_name}") print("-" * 40) try: table = db.open_table(table_name) count = table.count_rows() total_documents += count print(f" šŸ“Š Total rows: {count}") if count > 0: # Get schema info try: schema = table.schema print(f" šŸ“ Columns: {[field.name for field in schema]}") except: pass # Show sample data try: sample_size = min(3, count) sample = table.head(sample_size) sample_data = sample.to_pylist() print(f" šŸ“„ Sample documents ({sample_size}/{count}):") for i, row in enumerate(sample_data): print(f" Document {i+1}:") # Show content preview if 'content' in row: content = str(row['content'])[:200] + "..." if len(str(row['content'])) > 200 else str(row['content']) print(f" Content: {content}") # Show filename if available if 'filename' in row: print(f" Filename: {row['filename']}") # Show other relevant fields for key, value in row.items(): if key not in ['content', 'filename', 'vector', 'id'] and value: print(f" {key}: {str(value)[:100]}") print() except Exception as e: print(f" āš ļø Could not read sample data: {e}") print() except Exception as e: print(f" āŒ Error reading table {table_name}: {e}") print() print("=" * 60) print(f"šŸŽÆ SUMMARY:") print(f" Total Documents Across All Tables: {total_documents}") print(f" Database Size: {'LARGE' if total_documents > 100 else 'MEDIUM' if total_documents > 10 else 'SMALL'}") # Check specifically for voice bot usage if 'rajasthan_documents' in table_names: raj_table = db.open_table('rajasthan_documents') raj_count = raj_table.count_rows() print(f" Voice Bot Documents: {raj_count} (rajasthan_documents table)") if 'documents' in table_names: doc_table = db.open_table('documents') doc_count = doc_table.count_rows() print(f" General Documents: {doc_count} (documents table)") print() print("šŸ¤– Voice Bot Analysis:") if total_documents >= 1000: print(" āœ… YES - Voice bot has access to 1000+ documents!") elif total_documents >= 100: print(" āš ļø PARTIAL - Voice bot has substantial documents but less than 1000") elif total_documents >= 10: print(" āš ļø LIMITED - Voice bot has moderate document access") else: print(" āŒ MINIMAL - Voice bot has very limited document access") return total_documents except Exception as e: print(f"āŒ Error connecting to database: {e}") traceback.print_exc() return 0 if __name__ == "__main__": total = analyze_lancedb_contents() print(f"\nšŸŽÆ Final Answer: Your voice bot has access to {total} documents") except ImportError as e: print(f"āŒ Missing dependencies: {e}") print("Please install: pip install lancedb pandas") except Exception as e: print(f"āŒ Unexpected error: {e}") traceback.print_exc()