File size: 2,693 Bytes
547fc39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c3ff5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547fc39
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env python3
"""
Download LanceDB data from Hugging Face LFS storage at startup.
This script downloads the Git LFS files that contain the 11,555 document database.
"""
import os
import subprocess
from pathlib import Path

def download_lfs_files():
    """Download Git LFS files from the Hugging Face Space repository."""
    print("πŸ”½ Checking for LanceDB data...")
    
    lancedb_path = Path("lancedb_data")
    
    # Check if database already exists and has data
    if lancedb_path.exists():
        rajasthan_docs = lancedb_path / "rajasthan_documents.lance"
        if rajasthan_docs.exists():
            # Check if it's a real file (not LFS pointer)
            version_file = rajasthan_docs / "_versions"
            if version_file.exists():
                print("βœ… LanceDB data already downloaded")
                return True
            else:
                print("⚠️ LanceDB data is LFS pointer - downloading actual files...")
    
    # Try to download LFS files using huggingface_hub
    try:
        from huggingface_hub import snapshot_download
        
        space_id = "ChAbhishek28/PensionBot"
        print(f"πŸ“₯ Downloading LFS files from {space_id}...")
        
        # Download only the lancedb_data folder
        snapshot_download(
            repo_id=space_id,
            repo_type="space",
            allow_patterns="lancedb_data/**",
            local_dir=".",
            local_dir_use_symlinks=False,
        )
        
        print("βœ… LFS files downloaded successfully")
        
        # Verify download
        if lancedb_path.exists():
            print(f"πŸ“‚ lancedb_data exists at: {lancedb_path.absolute()}")
            rajasthan_docs = lancedb_path / "rajasthan_documents.lance"
            if rajasthan_docs.exists():
                version_file = rajasthan_docs / "_versions"
                print(f"πŸ“‚ rajasthan_documents.lance exists: {rajasthan_docs.absolute()}")
                print(f"πŸ“‚ _versions exists: {version_file.exists()}")
                if version_file.exists():
                    version_files = list(version_file.iterdir())
                    print(f"πŸ“Š Found {len(version_files)} version files")
        else:
            print(f"⚠️ lancedb_data NOT found at: {lancedb_path.absolute()}")
            print(f"πŸ“‚ Current directory: {Path.cwd()}")
            print(f"πŸ“‚ Contents: {list(Path.cwd().iterdir())[:10]}")
        
        return True
        
    except Exception as e:
        print(f"❌ Failed to download LFS files: {e}")
        print("⚠️ Voice Bot will start with minimal documents")
        return False

if __name__ == "__main__":
    download_lfs_files()