#!/usr/bin/env python3 """ Test Enhanced Search for Pension Rules Query Demonstrates improved search results for "What are the pension rules?" with 1500+ documents """ import asyncio import logging import sys import os # Setup logging logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') logger = logging.getLogger(__name__) async def test_pension_search(): """Test enhanced search vs original search for pension rules""" print("šŸ” Testing Enhanced Search for Large Document Collection (1500+ docs)") print("=" * 70) # Test query that was giving wrong results test_query = "What are the pension rules?" try: # Import after adding to path from enhanced_search_service import enhanced_search_service from lancedb_service import lancedb_service print(f"šŸ“ Query: '{test_query}'") print(f"šŸ“Š Document collection size: ~1500 documents") print() # Test enhanced search print("šŸš€ Testing Enhanced Search Strategy:") print("-" * 40) enhanced_results = await enhanced_search_service.enhanced_pension_search(test_query, limit=5) if enhanced_results: print(f"āœ… Enhanced search found {len(enhanced_results)} relevant documents:") for i, result in enumerate(enhanced_results[:3], 1): content = result.get('content', '') filename = result.get('filename', 'Unknown') # Show snippet with pension-related content lines = content.split('\n') pension_lines = [line.strip() for line in lines if 'pension' in line.lower()] print(f"\n{i}. Document: {filename}") if pension_lines: print(f" Pension content preview:") for line in pension_lines[:2]: # Show first 2 pension-related lines if line: print(f" • {line[:80]}{'...' if len(line) > 80 else ''}") else: # Show general content preview preview = content[:150].replace('\n', ' ').strip() print(f" Content preview: {preview}{'...' if len(content) > 150 else ''}") else: print("āŒ Enhanced search found no results") print("\n" + "=" * 70) # Test fallback to original search print("āš ļø Original Search Strategy (for comparison):") print("-" * 40) try: original_results = await lancedb_service.search_documents(test_query, limit=5) if original_results: print(f"šŸ“„ Original search found {len(original_results)} documents:") for i, result in enumerate(original_results[:3], 1): content = result.get('content', '') filename = result.get('filename', 'Unknown') print(f"\n{i}. Document: {filename}") preview = content[:150].replace('\n', ' ').strip() print(f" Content preview: {preview}{'...' if len(content) > 150 else ''}") # Check if it's actually pension-related if 'pension' in content.lower(): print(f" āœ… Contains pension content") else: print(f" āŒ No pension content detected") else: print("āŒ Original search found no results") except Exception as e: print(f"āŒ Original search failed: {e}") print("\n" + "=" * 70) print("šŸ“Š Search Comparison Summary:") print(f" Enhanced Search: Better targeting of pension-specific content") print(f" Original Search: Generic results that might miss relevant docs") print(f" Expected Result: Enhanced search should return actual pension rules") except ImportError as e: print(f"āŒ Import error: {e}") print("šŸ’” Make sure you're running from the PensionBot directory") except Exception as e: print(f"āŒ Test error: {e}") async def test_query_enhancement(): """Test query enhancement strategies""" print("\nšŸŽÆ Testing Query Enhancement Strategies:") print("=" * 50) test_queries = [ "What are the pension rules?", "How to calculate pension?", "Pension eligibility criteria", "Family pension benefits", "Commutation of pension" ] try: from enhanced_search_service import enhanced_search_service for query in test_queries: enhanced_query = enhanced_search_service._enhance_query(query) print(f"Original: {query}") print(f"Enhanced: {enhanced_query}") print() except Exception as e: print(f"āŒ Query enhancement test error: {e}") if __name__ == "__main__": print("šŸŽÆ Enhanced Search Test for Large Document Collections") print("Testing improved search for pension rules with 1500+ documents") print() # Run the tests asyncio.run(test_pension_search()) asyncio.run(test_query_enhancement())