PensionBot / test_enhanced_search.py
ChAbhishek28's picture
Add 89999999999999999999999999999
a2ca191
raw
history blame
5.45 kB
#!/usr/bin/env python3
"""
Test Enhanced Search for Pension Rules Query
Demonstrates improved search results for "What are the pension rules?" with 1500+ documents
"""
import asyncio
import logging
import sys
import os
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)
async def test_pension_search():
"""Test enhanced search vs original search for pension rules"""
print("πŸ” Testing Enhanced Search for Large Document Collection (1500+ docs)")
print("=" * 70)
# Test query that was giving wrong results
test_query = "What are the pension rules?"
try:
# Import after adding to path
from enhanced_search_service import enhanced_search_service
from lancedb_service import lancedb_service
print(f"πŸ“ Query: '{test_query}'")
print(f"πŸ“Š Document collection size: ~1500 documents")
print()
# Test enhanced search
print("πŸš€ Testing Enhanced Search Strategy:")
print("-" * 40)
enhanced_results = await enhanced_search_service.enhanced_pension_search(test_query, limit=5)
if enhanced_results:
print(f"βœ… Enhanced search found {len(enhanced_results)} relevant documents:")
for i, result in enumerate(enhanced_results[:3], 1):
content = result.get('content', '')
filename = result.get('filename', 'Unknown')
# Show snippet with pension-related content
lines = content.split('\n')
pension_lines = [line.strip() for line in lines if 'pension' in line.lower()]
print(f"\n{i}. Document: {filename}")
if pension_lines:
print(f" Pension content preview:")
for line in pension_lines[:2]: # Show first 2 pension-related lines
if line:
print(f" β€’ {line[:80]}{'...' if len(line) > 80 else ''}")
else:
# Show general content preview
preview = content[:150].replace('\n', ' ').strip()
print(f" Content preview: {preview}{'...' if len(content) > 150 else ''}")
else:
print("❌ Enhanced search found no results")
print("\n" + "=" * 70)
# Test fallback to original search
print("⚠️ Original Search Strategy (for comparison):")
print("-" * 40)
try:
original_results = await lancedb_service.search_documents(test_query, limit=5)
if original_results:
print(f"πŸ“„ Original search found {len(original_results)} documents:")
for i, result in enumerate(original_results[:3], 1):
content = result.get('content', '')
filename = result.get('filename', 'Unknown')
print(f"\n{i}. Document: {filename}")
preview = content[:150].replace('\n', ' ').strip()
print(f" Content preview: {preview}{'...' if len(content) > 150 else ''}")
# Check if it's actually pension-related
if 'pension' in content.lower():
print(f" βœ… Contains pension content")
else:
print(f" ❌ No pension content detected")
else:
print("❌ Original search found no results")
except Exception as e:
print(f"❌ Original search failed: {e}")
print("\n" + "=" * 70)
print("πŸ“Š Search Comparison Summary:")
print(f" Enhanced Search: Better targeting of pension-specific content")
print(f" Original Search: Generic results that might miss relevant docs")
print(f" Expected Result: Enhanced search should return actual pension rules")
except ImportError as e:
print(f"❌ Import error: {e}")
print("πŸ’‘ Make sure you're running from the PensionBot directory")
except Exception as e:
print(f"❌ Test error: {e}")
async def test_query_enhancement():
"""Test query enhancement strategies"""
print("\n🎯 Testing Query Enhancement Strategies:")
print("=" * 50)
test_queries = [
"What are the pension rules?",
"How to calculate pension?",
"Pension eligibility criteria",
"Family pension benefits",
"Commutation of pension"
]
try:
from enhanced_search_service import enhanced_search_service
for query in test_queries:
enhanced_query = enhanced_search_service._enhance_query(query)
print(f"Original: {query}")
print(f"Enhanced: {enhanced_query}")
print()
except Exception as e:
print(f"❌ Query enhancement test error: {e}")
if __name__ == "__main__":
print("🎯 Enhanced Search Test for Large Document Collections")
print("Testing improved search for pension rules with 1500+ documents")
print()
# Run the tests
asyncio.run(test_pension_search())
asyncio.run(test_query_enhancement())