File size: 5,454 Bytes
a2ca191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python3
"""
Test Enhanced Search for Pension Rules Query
Demonstrates improved search results for "What are the pension rules?" with 1500+ documents
"""

import asyncio
import logging
import sys
import os

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)

async def test_pension_search():
    """Test enhanced search vs original search for pension rules"""
    
    print("πŸ” Testing Enhanced Search for Large Document Collection (1500+ docs)")
    print("=" * 70)
    
    # Test query that was giving wrong results
    test_query = "What are the pension rules?"
    
    try:
        # Import after adding to path
        from enhanced_search_service import enhanced_search_service
        from lancedb_service import lancedb_service
        
        print(f"πŸ“ Query: '{test_query}'")
        print(f"πŸ“Š Document collection size: ~1500 documents")
        print()
        
        # Test enhanced search
        print("πŸš€ Testing Enhanced Search Strategy:")
        print("-" * 40)
        
        enhanced_results = await enhanced_search_service.enhanced_pension_search(test_query, limit=5)
        
        if enhanced_results:
            print(f"βœ… Enhanced search found {len(enhanced_results)} relevant documents:")
            
            for i, result in enumerate(enhanced_results[:3], 1):
                content = result.get('content', '')
                filename = result.get('filename', 'Unknown')
                
                # Show snippet with pension-related content
                lines = content.split('\n')
                pension_lines = [line.strip() for line in lines if 'pension' in line.lower()]
                
                print(f"\n{i}. Document: {filename}")
                if pension_lines:
                    print(f"   Pension content preview:")
                    for line in pension_lines[:2]:  # Show first 2 pension-related lines
                        if line:
                            print(f"   β€’ {line[:80]}{'...' if len(line) > 80 else ''}")
                else:
                    # Show general content preview
                    preview = content[:150].replace('\n', ' ').strip()
                    print(f"   Content preview: {preview}{'...' if len(content) > 150 else ''}")
        else:
            print("❌ Enhanced search found no results")
        
        print("\n" + "=" * 70)
        
        # Test fallback to original search
        print("⚠️  Original Search Strategy (for comparison):")
        print("-" * 40)
        
        try:
            original_results = await lancedb_service.search_documents(test_query, limit=5)
            
            if original_results:
                print(f"πŸ“„ Original search found {len(original_results)} documents:")
                
                for i, result in enumerate(original_results[:3], 1):
                    content = result.get('content', '')
                    filename = result.get('filename', 'Unknown')
                    
                    print(f"\n{i}. Document: {filename}")
                    preview = content[:150].replace('\n', ' ').strip()
                    print(f"   Content preview: {preview}{'...' if len(content) > 150 else ''}")
                    
                    # Check if it's actually pension-related
                    if 'pension' in content.lower():
                        print(f"   βœ… Contains pension content")
                    else:
                        print(f"   ❌ No pension content detected")
                        
            else:
                print("❌ Original search found no results")
                
        except Exception as e:
            print(f"❌ Original search failed: {e}")
        
        print("\n" + "=" * 70)
        print("πŸ“Š Search Comparison Summary:")
        print(f"   Enhanced Search: Better targeting of pension-specific content")
        print(f"   Original Search: Generic results that might miss relevant docs")
        print(f"   Expected Result: Enhanced search should return actual pension rules")
        
    except ImportError as e:
        print(f"❌ Import error: {e}")
        print("πŸ’‘ Make sure you're running from the PensionBot directory")
    except Exception as e:
        print(f"❌ Test error: {e}")

async def test_query_enhancement():
    """Test query enhancement strategies"""
    
    print("\n🎯 Testing Query Enhancement Strategies:")
    print("=" * 50)
    
    test_queries = [
        "What are the pension rules?",
        "How to calculate pension?",
        "Pension eligibility criteria",
        "Family pension benefits",
        "Commutation of pension"
    ]
    
    try:
        from enhanced_search_service import enhanced_search_service
        
        for query in test_queries:
            enhanced_query = enhanced_search_service._enhance_query(query)
            print(f"Original: {query}")
            print(f"Enhanced: {enhanced_query}")
            print()
            
    except Exception as e:
        print(f"❌ Query enhancement test error: {e}")

if __name__ == "__main__":
    print("🎯 Enhanced Search Test for Large Document Collections")
    print("Testing improved search for pension rules with 1500+ documents")
    print()
    
    # Run the tests
    asyncio.run(test_pension_search())
    asyncio.run(test_query_enhancement())