Spaces:
Running
on
Zero
Running
on
Zero
Improve checkpoint caching check and set logdir for easier output finding
Browse files
app.py
CHANGED
|
@@ -51,15 +51,24 @@ def get_checkpoint_path(checkpoint_file, default_local_path, repo_id=None, filen
|
|
| 51 |
# Try downloading from Model Hub if configured
|
| 52 |
if repo_id and filename and HF_HUB_AVAILABLE:
|
| 53 |
try:
|
| 54 |
-
print(f"π₯ Downloading checkpoint from {repo_id}...")
|
| 55 |
# Use /workspace if available (persistent), otherwise /tmp
|
| 56 |
cache_dir = "/workspace/checkpoints" if os.path.exists("/workspace") else "/tmp/checkpoints"
|
| 57 |
os.makedirs(cache_dir, exist_ok=True)
|
| 58 |
|
| 59 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
import time
|
| 61 |
start_time = time.time()
|
| 62 |
-
print(f"
|
|
|
|
| 63 |
|
| 64 |
checkpoint_path = hf_hub_download(
|
| 65 |
repo_id=repo_id,
|
|
@@ -67,12 +76,11 @@ def get_checkpoint_path(checkpoint_file, default_local_path, repo_id=None, filen
|
|
| 67 |
cache_dir=cache_dir,
|
| 68 |
local_files_only=False,
|
| 69 |
resume_download=True, # Resume if interrupted
|
| 70 |
-
timeout=600 # 10 minute timeout per request
|
| 71 |
)
|
| 72 |
|
| 73 |
elapsed = time.time() - start_time
|
| 74 |
-
print(f"Download completed in {elapsed:.1f}
|
| 75 |
-
print(f"β
Checkpoint
|
| 76 |
return checkpoint_path
|
| 77 |
except Exception as e:
|
| 78 |
error_detail = str(e)
|
|
@@ -124,6 +132,7 @@ def generate_image(prompt, aspect_ratio, cfg, seed, checkpoint_file, config_path
|
|
| 124 |
output_dir.mkdir(exist_ok=True)
|
| 125 |
|
| 126 |
# Run sampling command
|
|
|
|
| 127 |
cmd = [
|
| 128 |
"python", "sample.py",
|
| 129 |
"--model_config_path", config_path,
|
|
@@ -137,7 +146,8 @@ def generate_image(prompt, aspect_ratio, cfg, seed, checkpoint_file, config_path
|
|
| 137 |
"--finetuned_vae", "none",
|
| 138 |
"--jacobi", "1",
|
| 139 |
"--jacobi_th", "0.001",
|
| 140 |
-
"--jacobi_block_size", "16"
|
|
|
|
| 141 |
]
|
| 142 |
|
| 143 |
status_msg += "Running generation...\n"
|
|
|
|
| 51 |
# Try downloading from Model Hub if configured
|
| 52 |
if repo_id and filename and HF_HUB_AVAILABLE:
|
| 53 |
try:
|
|
|
|
| 54 |
# Use /workspace if available (persistent), otherwise /tmp
|
| 55 |
cache_dir = "/workspace/checkpoints" if os.path.exists("/workspace") else "/tmp/checkpoints"
|
| 56 |
os.makedirs(cache_dir, exist_ok=True)
|
| 57 |
|
| 58 |
+
# Check if already downloaded
|
| 59 |
+
possible_path = os.path.join(cache_dir, "models--" + repo_id.replace("/", "--"), "snapshots", "*", filename)
|
| 60 |
+
import glob
|
| 61 |
+
existing = glob.glob(possible_path)
|
| 62 |
+
if existing:
|
| 63 |
+
checkpoint_path = existing[0]
|
| 64 |
+
print(f"β
Using cached checkpoint: {checkpoint_path}")
|
| 65 |
+
return checkpoint_path
|
| 66 |
+
|
| 67 |
+
# Download with progress tracking
|
| 68 |
import time
|
| 69 |
start_time = time.time()
|
| 70 |
+
print(f"π₯ Downloading checkpoint from {repo_id}...")
|
| 71 |
+
print(f"File size: ~15.5 GB - This may take 10-30 minutes")
|
| 72 |
|
| 73 |
checkpoint_path = hf_hub_download(
|
| 74 |
repo_id=repo_id,
|
|
|
|
| 76 |
cache_dir=cache_dir,
|
| 77 |
local_files_only=False,
|
| 78 |
resume_download=True, # Resume if interrupted
|
|
|
|
| 79 |
)
|
| 80 |
|
| 81 |
elapsed = time.time() - start_time
|
| 82 |
+
print(f"β
Download completed in {elapsed/60:.1f} minutes")
|
| 83 |
+
print(f"β
Checkpoint at: {checkpoint_path}")
|
| 84 |
return checkpoint_path
|
| 85 |
except Exception as e:
|
| 86 |
error_detail = str(e)
|
|
|
|
| 132 |
output_dir.mkdir(exist_ok=True)
|
| 133 |
|
| 134 |
# Run sampling command
|
| 135 |
+
# Set logdir to outputs directory for easier file finding
|
| 136 |
cmd = [
|
| 137 |
"python", "sample.py",
|
| 138 |
"--model_config_path", config_path,
|
|
|
|
| 146 |
"--finetuned_vae", "none",
|
| 147 |
"--jacobi", "1",
|
| 148 |
"--jacobi_th", "0.001",
|
| 149 |
+
"--jacobi_block_size", "16",
|
| 150 |
+
"--logdir", str(output_dir) # Set logdir to outputs
|
| 151 |
]
|
| 152 |
|
| 153 |
status_msg += "Running generation...\n"
|