PensionBot / setup_documents.py
ChAbhishek28's picture
Add 899999999999999999999999
224c593
raw
history blame
11.5 kB
"""
Setup script to populate LanceDB with sample documents for the Voice Bot
"""
import os
from lancedb_service import lancedb_service
import asyncio
import logging
logger = logging.getLogger("voicebot")
# Sample government documents content - Diverse content for different queries
SAMPLE_DOCUMENTS = [
{
"content": """Government employees are entitled to various types of leave during their service period. The leave rules are governed by the Central Civil Services (Leave) Rules and state-specific adaptations.
Types of Leave Available:
1. Casual Leave (CL): 8 days per calendar year
- Can be taken for urgent personal work
- No medical certificate required
- Cannot be carried forward to next year
2. Earned Leave (EL): 30 days per year
- Can be accumulated up to 300 days
- Encashment allowed at retirement
- Advance grant possible for valid reasons
3. Medical Leave: As per medical requirements
- Medical certificate mandatory
- Can be combined with EL
- Special provisions for chronic illnesses
4. Maternity/Paternity Leave:
- Maternity: 180 days (6 months)
- Paternity: 15 days within 6 months of child birth
Application Process:
- Submit leave application in advance
- Get approval from competent authority
- Maintain proper leave records
- Emergency leave can be regularized later""",
"filename": "leave_rules.txt",
"source": "Government Leave Manual 2024"
},
{
"content": """Government employee salary structure consists of multiple components designed to provide comprehensive compensation and benefits.
Salary Components:
1. Basic Pay: Core salary amount based on pay scale and grade
2. Dearness Allowance (DA): Currently 50% of basic pay (as of 2024)
3. House Rent Allowance (HRA): Varies by city classification
- X Class cities: 24% of basic pay
- Y Class cities: 16% of basic pay
- Z Class cities: 8% of basic pay
4. Transport Allowance: ₹3,600 per month for most employees
5. Medical Allowance: ₹1,000 per month
Special Allowances:
- City Compensatory Allowance (CCA)
- Special Allowance for difficult postings
- Overtime Allowance (where applicable)
Deductions:
- Income Tax (as per IT rules)
- Provident Fund: 12% of basic pay
- Group Insurance Scheme (GIS)
- Professional Tax (state-specific)
Pay Revision:
- Pay Commission recommendations every 10 years
- Annual increment: Usually 3% of basic pay
- Promotion-based pay upgrades as per rules""",
"filename": "salary_structure.txt",
"source": "Pay Commission Guidelines 2024"
},
{
"content": """Transfer and posting policies for government employees are designed to ensure fair distribution of work, prevent corruption, and provide career development opportunities.
Transfer Rules:
1. Tenure-based Transfers:
- Minimum tenure: 3 years in sensitive posts
- Maximum tenure: 5 years in one location (general rule)
- Cooling off period: 2 years before returning to same post
2. Request Transfers:
- Can apply after completing minimum tenure
- Valid grounds: medical, family, educational needs
- Subject to administrative approval and replacement availability
3. Administrative Transfers:
- Based on service requirements
- Immediate transfer in case of disciplinary issues
- Mutual transfers allowed with proper approvals
Posting Guidelines:
- Home district posting: After 5 years of service
- Difficult area posting: Incentives and allowances provided
- Border area posting: Special security clearance required
Transfer Process:
1. Issue of transfer order by competent authority
2. Relieving formalities at current posting
3. Journey time allowance as per rules
4. Joining at new posting within prescribed time
5. Handing/taking over of charge properly
Benefits:
- Transfer TA/DA as per entitlement
- Family accommodation assistance
- School transfer certificates for children""",
"filename": "transfer_posting_rules.txt",
"source": "Administrative Transfer Policy 2024"
},
{
"content": """Training and skill development programs are essential for government employees to enhance their capabilities and stay updated with modern administrative practices.
Training Categories:
1. Induction Training:
- Mandatory for all new recruits
- Duration: 3-6 months depending on service
- Covers service rules, conduct rules, and job-specific skills
2. In-Service Training:
- Periodic skill upgradation programs
- Leadership development courses
- Technology and digital literacy training
3. Specialized Training:
- Domain-specific technical training
- Foreign training opportunities for senior officers
- Research and innovation workshops
Training Institutes:
- National Academy of Administration (NAAN) - for IAS officers
- State Administrative Training Institutes
- Sector-specific training centers
- Online learning platforms (iGOT Karmayogi)
Benefits of Training:
1. Career advancement opportunities
2. Better performance and efficiency
3. Exposure to best practices
4. Networking with peers
5. Personal and professional development
Training Leave:
- Special training leave with full pay
- Study leave for higher education
- Deputation opportunities to training institutes
Digital Learning Initiative:
- Mission Karmayogi for capacity building
- Online certification courses
- Competency-based training modules
- Performance-linked training requirements
Training is considered essential for promotion to higher grades and is often a mandatory requirement for career progression in government service.""",
"filename": "training_development.txt",
"source": "Government Training Policy 2024"
},
{
"content": """Retirement benefits for government employees include multiple components to ensure financial security post-retirement.
Retirement Benefits Package:
1. Pension: Monthly payment based on last drawn salary and service years
- Calculation: (Last drawn basic pay + DA) × service years ÷ 70
- Minimum pension: ₹9,000 per month
- Maximum pension: No upper limit
2. Gratuity: Lump sum payment at retirement
- Formula: (Basic pay + DA) × 15/26 × years of service
- Maximum: ₹20 lakh (as of 2024)
- Tax exemption available
3. Provident Fund (GPF/CPF):
- Employee + Government contribution throughout service
- Withdrawal allowed at retirement
- Interest rate: Currently 8% per annum
4. Commutation of Pension:
- Option to convert part of pension to lump sum
- Up to 50% of pension can be commuted
- Restoration after 15 years
5. Medical Benefits:
- Continued medical facility post-retirement
- Central Government Health Scheme (CGHS) coverage
- Reimbursement of medical expenses
Retirement Process:
- Apply 6 months before retirement date
- Complete all service formalities
- Obtain clearances from all departments
- Submit required documents and forms
The retirement package is designed to provide comprehensive financial support and maintain dignity of life for retired government employees.""",
"filename": "retirement_benefits.txt",
"source": "Retirement Benefits Manual 2024"
},
{
"content": """Government procurement policies and procedures ensure transparency, fairness, and value for money in public purchases.
Procurement Methods:
1. Open Tender:
- Public advertisement required
- Minimum 21 days for bid submission
- Used for high-value procurements above ₹25 lakh
2. Limited Tender:
- Invitation to selected vendors
- For specialized items or urgent requirements
- Proper justification required
3. Single Tender:
- Direct negotiation with one vendor
- Only in exceptional circumstances
- Requires special approval
Key Procurement Rules:
- Preference to Make in India products
- MSME reservation: 25% of procurement
- Minimum 50% local content requirement
- GeM (Government e-Marketplace) mandatory for routine items
Tender Process:
1. Preparation of tender documents
2. Advertisement and bidder registration
3. Pre-bid meetings for clarifications
4. Bid submission and opening
5. Technical and financial evaluation
6. Contract award to lowest compliant bidder
Documentation Required:
- Technical specifications
- Terms and conditions
- Evaluation criteria
- Approval for procurement
- Committee formations for evaluation
This system ensures competitive pricing, quality products, and supports domestic manufacturing while maintaining complete transparency in government spending.""",
"filename": "procurement_policy.txt",
"source": "Government Procurement Guidelines 2024"
}
]
async def setup_sample_documents():
"""Create sample documents in the database"""
try:
logger.info("🗂️ Setting up sample government documents...")
# Create document objects that match LanceDB expectations
docs = []
for doc_data in SAMPLE_DOCUMENTS:
# Create a simple document object with the expected attributes
doc = type('Document', (), {
'page_content': doc_data["content"],
'metadata': {"source": doc_data["source"], "filename": doc_data["filename"]}
})()
docs.append(doc)
# Add to government_docs knowledge base
await lancedb_service.add_documents(
docs=docs,
user_id="system",
knowledge_base="government_docs",
filename="sample_documents.txt"
)
# Also create a specific rajasthan_documents table entry
# Check if rajasthan_documents table exists, create if not
try:
if "rajasthan_documents" not in lancedb_service.db.table_names():
# Create the table using the same structure as documents table
import pandas as pd
import uuid
from datetime import datetime
sample_data = pd.DataFrame({
"id": [str(uuid.uuid4())],
"content": ["sample"],
"filename": ["sample"],
"vector": [lancedb_service.embedding_model.embed_query("sample")]
})
lancedb_service.db.create_table("rajasthan_documents", sample_data)
# Delete sample data
tbl = lancedb_service.db.open_table("rajasthan_documents")
tbl.delete("id = 'sample'")
# Now add the real documents
rajasthan_docs = []
for doc_data in SAMPLE_DOCUMENTS:
embedding = lancedb_service.embedding_model.embed_query(doc_data["content"])
raj_doc = {
"id": str(uuid.uuid4()),
"content": doc_data["content"],
"filename": doc_data["filename"],
"vector": embedding
}
rajasthan_docs.append(raj_doc)
df = pd.DataFrame(rajasthan_docs)
tbl.add(df)
except Exception as e:
logger.warning(f"⚠️ Could not setup rajasthan_documents table: {e}")
logger.info("✅ Sample documents added successfully")
except Exception as e:
logger.error(f"❌ Error setting up documents: {e}")
if __name__ == "__main__":
asyncio.run(setup_sample_documents())