leoeric commited on
Commit
02a442c
Β·
1 Parent(s): 3216b1d

Improve checkpoint caching check and set logdir for easier output finding

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -51,15 +51,24 @@ def get_checkpoint_path(checkpoint_file, default_local_path, repo_id=None, filen
51
  # Try downloading from Model Hub if configured
52
  if repo_id and filename and HF_HUB_AVAILABLE:
53
  try:
54
- print(f"πŸ“₯ Downloading checkpoint from {repo_id}...")
55
  # Use /workspace if available (persistent), otherwise /tmp
56
  cache_dir = "/workspace/checkpoints" if os.path.exists("/workspace") else "/tmp/checkpoints"
57
  os.makedirs(cache_dir, exist_ok=True)
58
 
59
- # Add timeout and better error handling
 
 
 
 
 
 
 
 
 
60
  import time
61
  start_time = time.time()
62
- print(f"Starting download from {repo_id}...")
 
63
 
64
  checkpoint_path = hf_hub_download(
65
  repo_id=repo_id,
@@ -67,12 +76,11 @@ def get_checkpoint_path(checkpoint_file, default_local_path, repo_id=None, filen
67
  cache_dir=cache_dir,
68
  local_files_only=False,
69
  resume_download=True, # Resume if interrupted
70
- timeout=600 # 10 minute timeout per request
71
  )
72
 
73
  elapsed = time.time() - start_time
74
- print(f"Download completed in {elapsed:.1f} seconds")
75
- print(f"βœ… Checkpoint downloaded to: {checkpoint_path}")
76
  return checkpoint_path
77
  except Exception as e:
78
  error_detail = str(e)
@@ -124,6 +132,7 @@ def generate_image(prompt, aspect_ratio, cfg, seed, checkpoint_file, config_path
124
  output_dir.mkdir(exist_ok=True)
125
 
126
  # Run sampling command
 
127
  cmd = [
128
  "python", "sample.py",
129
  "--model_config_path", config_path,
@@ -137,7 +146,8 @@ def generate_image(prompt, aspect_ratio, cfg, seed, checkpoint_file, config_path
137
  "--finetuned_vae", "none",
138
  "--jacobi", "1",
139
  "--jacobi_th", "0.001",
140
- "--jacobi_block_size", "16"
 
141
  ]
142
 
143
  status_msg += "Running generation...\n"
 
51
  # Try downloading from Model Hub if configured
52
  if repo_id and filename and HF_HUB_AVAILABLE:
53
  try:
 
54
  # Use /workspace if available (persistent), otherwise /tmp
55
  cache_dir = "/workspace/checkpoints" if os.path.exists("/workspace") else "/tmp/checkpoints"
56
  os.makedirs(cache_dir, exist_ok=True)
57
 
58
+ # Check if already downloaded
59
+ possible_path = os.path.join(cache_dir, "models--" + repo_id.replace("/", "--"), "snapshots", "*", filename)
60
+ import glob
61
+ existing = glob.glob(possible_path)
62
+ if existing:
63
+ checkpoint_path = existing[0]
64
+ print(f"βœ… Using cached checkpoint: {checkpoint_path}")
65
+ return checkpoint_path
66
+
67
+ # Download with progress tracking
68
  import time
69
  start_time = time.time()
70
+ print(f"πŸ“₯ Downloading checkpoint from {repo_id}...")
71
+ print(f"File size: ~15.5 GB - This may take 10-30 minutes")
72
 
73
  checkpoint_path = hf_hub_download(
74
  repo_id=repo_id,
 
76
  cache_dir=cache_dir,
77
  local_files_only=False,
78
  resume_download=True, # Resume if interrupted
 
79
  )
80
 
81
  elapsed = time.time() - start_time
82
+ print(f"βœ… Download completed in {elapsed/60:.1f} minutes")
83
+ print(f"βœ… Checkpoint at: {checkpoint_path}")
84
  return checkpoint_path
85
  except Exception as e:
86
  error_detail = str(e)
 
132
  output_dir.mkdir(exist_ok=True)
133
 
134
  # Run sampling command
135
+ # Set logdir to outputs directory for easier file finding
136
  cmd = [
137
  "python", "sample.py",
138
  "--model_config_path", config_path,
 
146
  "--finetuned_vae", "none",
147
  "--jacobi", "1",
148
  "--jacobi_th", "0.001",
149
+ "--jacobi_block_size", "16",
150
+ "--logdir", str(output_dir) # Set logdir to outputs
151
  ]
152
 
153
  status_msg += "Running generation...\n"