# help me write a python script to download a file from remote server and storage in /tmp # if the file has already exist in /tmp, just skip download step # the next step, create the volume, and extract the downloaded data into that volume # the final step, cleanup /tmp # http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz # http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz import os import requests import shutil import subprocess import tarfile from tqdm import tqdm DOWNLOADS = [ "http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_cache_backup.tar.gz", "http://172.16.15.118:9557/repository/atalink-hf-models/backups/data_gfpgan_backup.tar.gz" # "https://huggingface.co/hynt/F5-TTS-Vietnamese-ViVoice/resolve/main/config.json" ] TMP_DIR = "tmp" VOLUME_PREFIX = "atalink_" def download_file(url, dest_folder): filename = os.path.basename(url) dest_path = os.path.join(dest_folder, filename) if os.path.exists(dest_path): print(f"✅ [SKIP] {filename} already exists at \033[96m{dest_path}\033[0m, skipping download.") return dest_path print(f"\n📥 [START] Downloading: \033[1;36m{filename}\033[0m → \033[96m{dest_path}\033[0m") with requests.get(url, stream=True) as r: r.raise_for_status() total = int(r.headers.get("content-length", 0)) chunk_size = 1024 * 1024 # 1MB with open(dest_path, "wb") as f, tqdm( total=total, unit="B", unit_scale=True, desc=filename ) as pbar: for chunk in r.iter_content(chunk_size=chunk_size): if chunk: f.write(chunk) pbar.update(len(chunk)) print(f"✅ [DONE] Downloaded: \033[1;32m{filename}\033[0m → \033[96m{dest_path}\033[0m\n") return dest_path def create_volume_and_extract(tar_path, volume_name): # Create Docker volume subprocess.run(["docker", "volume", "create", volume_name], check=True) # Extract tar.gz into the volume using a temporary container print(f"\n📦 [EXTRACT] Extracting \033[1;36m{os.path.basename(tar_path)}\033[0m into Docker volume \033[1;33m{volume_name}\033[0m ...") subprocess.run( [ "docker", "run", "--rm", # Chạy container tạm thời, tự xóa sau khi xong "-v", f"{volume_name}:/data", # Mount Docker volume (volume_name) vào thư mục /data trong container "-v", f"{os.path.abspath(os.path.dirname(tar_path))}:/tmpdata", # Mount thư mục chứa file tar.gz trên host vào /tmpdata trong container "busybox", # Image dùng để chạy container (ở đây là Ubuntu 22.04) "sh", "-c", # Chạy lệnh bash trong container f"tar -xzvf /tmpdata/{os.path.basename(tar_path)} --strip 1 -C /data", # Lệnh giải nén file tar.gz từ /tmpdata vào /data ], check=True, ) print(f"✅ [DONE] Extracted \033[1;36m{os.path.basename(tar_path)}\033[0m into volume \033[1;33m{volume_name}\033[0m\n") def cleanup_tmp(files): for f in files: try: os.remove(f) print(f"🧹 [CLEANUP] Removed \033[96m{f}\033[0m") except Exception as e: print(f"⚠️ [CLEANUP] Could not remove \033[96m{f}\033[0m: {e}") def main(): downloaded_files = [] for url in DOWNLOADS: tar_path = download_file(url, TMP_DIR) downloaded_files.append(tar_path) volume_name = ( VOLUME_PREFIX + os.path.splitext(os.path.splitext(os.path.basename(tar_path))[0])[0] ) print(f"🚀 [VOLUME] Name: \033[1;33m{volume_name}\033[0m") create_volume_and_extract(tar_path, volume_name) cleanup_tmp(downloaded_files) if __name__ == "__main__": main() # if __name__ == "__main__": # Test create_volume_and_extract với file test_data_backup.tar.gz # test_tar = os.path.join("tmp", "data_backup.tar.gz") # if os.path.exists(test_tar): # create_volume_and_extract(test_tar, "atalink_data_backup") # else: # print("File tmp/test_data_backup.tar.gz không tồn tại để test.")