File size: 5,394 Bytes
67a99cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3
"""
Comprehensive analysis of the actual LanceDB database contents
"""

import sys
import os
import traceback
sys.path.append(os.path.dirname(os.path.abspath(__file__)))

try:
    import lancedb
    import pandas as pd
    from pathlib import Path
    
    def analyze_lancedb_contents():
        """Analyze the actual contents of the LanceDB database"""
        
        db_path = "./lancedb_data"
        print("πŸ” LanceDB Database Analysis")
        print("=" * 60)
        
        try:
            db = lancedb.connect(db_path)
            table_names = db.table_names()
            
            print(f"πŸ“Š Found {len(table_names)} tables: {table_names}")
            print()
            
            total_documents = 0
            
            for table_name in table_names:
                print(f"πŸ“‹ Table: {table_name}")
                print("-" * 40)
                
                try:
                    table = db.open_table(table_name)
                    count = table.count_rows()
                    total_documents += count
                    
                    print(f"   πŸ“Š Total rows: {count}")
                    
                    if count > 0:
                        # Get schema info
                        try:
                            schema = table.schema
                            print(f"   πŸ“ Columns: {[field.name for field in schema]}")
                        except:
                            pass
                        
                        # Show sample data
                        try:
                            sample_size = min(3, count)
                            sample = table.head(sample_size)
                            sample_data = sample.to_pylist()
                            
                            print(f"   πŸ“„ Sample documents ({sample_size}/{count}):")
                            
                            for i, row in enumerate(sample_data):
                                print(f"      Document {i+1}:")
                                
                                # Show content preview
                                if 'content' in row:
                                    content = str(row['content'])[:200] + "..." if len(str(row['content'])) > 200 else str(row['content'])
                                    print(f"         Content: {content}")
                                
                                # Show filename if available
                                if 'filename' in row:
                                    print(f"         Filename: {row['filename']}")
                                
                                # Show other relevant fields
                                for key, value in row.items():
                                    if key not in ['content', 'filename', 'vector', 'id'] and value:
                                        print(f"         {key}: {str(value)[:100]}")
                                print()
                                
                        except Exception as e:
                            print(f"      ⚠️ Could not read sample data: {e}")
                    
                    print()
                    
                except Exception as e:
                    print(f"   ❌ Error reading table {table_name}: {e}")
                    print()
            
            print("=" * 60)
            print(f"🎯 SUMMARY:")
            print(f"   Total Documents Across All Tables: {total_documents}")
            print(f"   Database Size: {'LARGE' if total_documents > 100 else 'MEDIUM' if total_documents > 10 else 'SMALL'}")
            
            # Check specifically for voice bot usage
            if 'rajasthan_documents' in table_names:
                raj_table = db.open_table('rajasthan_documents')
                raj_count = raj_table.count_rows()
                print(f"   Voice Bot Documents: {raj_count} (rajasthan_documents table)")
                
            if 'documents' in table_names:
                doc_table = db.open_table('documents')
                doc_count = doc_table.count_rows()
                print(f"   General Documents: {doc_count} (documents table)")
            
            print()
            print("πŸ€– Voice Bot Analysis:")
            if total_documents >= 1000:
                print("   βœ… YES - Voice bot has access to 1000+ documents!")
            elif total_documents >= 100:
                print("   ⚠️ PARTIAL - Voice bot has substantial documents but less than 1000")
            elif total_documents >= 10:
                print("   ⚠️ LIMITED - Voice bot has moderate document access")
            else:
                print("   ❌ MINIMAL - Voice bot has very limited document access")
                
            return total_documents
            
        except Exception as e:
            print(f"❌ Error connecting to database: {e}")
            traceback.print_exc()
            return 0
    
    if __name__ == "__main__":
        total = analyze_lancedb_contents()
        print(f"\n🎯 Final Answer: Your voice bot has access to {total} documents")
        
except ImportError as e:
    print(f"❌ Missing dependencies: {e}")
    print("Please install: pip install lancedb pandas")
except Exception as e:
    print(f"❌ Unexpected error: {e}")
    traceback.print_exc()