prithivMLmods commited on
Commit
4180f8a
·
verified ·
1 Parent(s): 5b603ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -17
app.py CHANGED
@@ -101,7 +101,6 @@ if torch.cuda.is_available():
101
  print("Using device:", device)
102
 
103
  # --- Imports for Custom Pipeline ---
104
- # Note: These require the local 'qwenimage' folder to be present
105
  from diffusers import FlowMatchEulerDiscreteScheduler
106
  from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
107
  from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
@@ -129,7 +128,7 @@ pipe.load_lora_weights("tarn59/apply_texture_qwen_image_edit_2509",
129
  weight_name="apply_texture_v2_qwen_image_edit_2509.safetensors",
130
  adapter_name="texture-edit")
131
 
132
- # 2. Fuse Objects (Note: Filename contains non-ascii characters, handled as string)
133
  pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Fusion",
134
  weight_name="溶图.safetensors",
135
  adapter_name="fuse-objects")
@@ -140,7 +139,7 @@ pipe.load_lora_weights("Alissonerdx/BFS-Best-Face-Swap",
140
  adapter_name="face-swap")
141
 
142
 
143
- # Attempt to set Flash Attention 3 (Requires H100 or compatible setup)
144
  try:
145
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
146
  print("Flash Attention 3 Processor set successfully.")
@@ -172,7 +171,7 @@ def update_dimensions_on_upload(image):
172
 
173
  @spaces.GPU(duration=60)
174
  def infer(
175
- input_image,
176
  prompt,
177
  lora_adapter,
178
  seed,
@@ -181,9 +180,24 @@ def infer(
181
  steps,
182
  progress=gr.Progress(track_tqdm=True)
183
  ):
184
- if input_image is None:
 
 
 
 
185
  raise gr.Error("Please upload an image to edit.")
186
 
 
 
 
 
 
 
 
 
 
 
 
187
  # Map Dropdown choices to internal Adapter names
188
  adapters_map = {
189
  "Texture Edit": "texture-edit",
@@ -205,7 +219,7 @@ def infer(
205
  generator = torch.Generator(device=device).manual_seed(seed)
206
  negative_prompt = "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
207
 
208
- original_image = input_image.convert("RGB")
209
  width, height = update_dimensions_on_upload(original_image)
210
 
211
  result = pipe(
@@ -222,14 +236,25 @@ def infer(
222
  return result, seed
223
 
224
  @spaces.GPU(duration=60)
225
- def infer_example(input_image, prompt, lora_adapter):
226
- if input_image is None:
 
227
  return None, 0
228
- input_pil = input_image.convert("RGB")
229
- guidance_scale = 4.0 # Slightly higher default for better adherence
230
- steps = 30
231
- result, seed = infer(input_pil, prompt, lora_adapter, 0, True, guidance_scale, steps)
232
- return result, seed
 
 
 
 
 
 
 
 
 
 
233
 
234
 
235
  css="""
@@ -247,7 +272,15 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
247
 
248
  with gr.Row(equal_height=True):
249
  with gr.Column():
250
- input_image = gr.Gallery(label="Input Images", show_label=False, type="pil", interactive=True)
 
 
 
 
 
 
 
 
251
 
252
  prompt = gr.Text(
253
  label="Edit Prompt",
@@ -272,11 +305,26 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
272
  guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=4.0)
273
  steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=30)
274
 
 
 
275
  gr.Examples(
276
  examples=[
277
- ["examples/texture_sample.jpg", "Change the material of the object to rusted metal texture.", "Texture Edit"],
278
- ["examples/fusion_sample.jpg", "Fuse the product naturally into the background.", "Fuse-Objects"],
279
- ["examples/face_sample.jpg", "Swap the face with a cyberpunk robot face.", "Face-Swap"],
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  ],
281
  inputs=[input_image, prompt, lora_adapter],
282
  outputs=[output_image, seed],
 
101
  print("Using device:", device)
102
 
103
  # --- Imports for Custom Pipeline ---
 
104
  from diffusers import FlowMatchEulerDiscreteScheduler
105
  from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
106
  from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
 
128
  weight_name="apply_texture_v2_qwen_image_edit_2509.safetensors",
129
  adapter_name="texture-edit")
130
 
131
+ # 2. Fuse Objects
132
  pipe.load_lora_weights("dx8152/Qwen-Image-Edit-2509-Fusion",
133
  weight_name="溶图.safetensors",
134
  adapter_name="fuse-objects")
 
139
  adapter_name="face-swap")
140
 
141
 
142
+ # Attempt to set Flash Attention 3
143
  try:
144
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
145
  print("Flash Attention 3 Processor set successfully.")
 
171
 
172
  @spaces.GPU(duration=60)
173
  def infer(
174
+ input_gallery_items,
175
  prompt,
176
  lora_adapter,
177
  seed,
 
180
  steps,
181
  progress=gr.Progress(track_tqdm=True)
182
  ):
183
+ """
184
+ Input:
185
+ input_gallery_items: Since type="pil", this is a List[Tuple[PIL.Image, str]] or List[PIL.Image]
186
+ """
187
+ if not input_gallery_items:
188
  raise gr.Error("Please upload an image to edit.")
189
 
190
+ # Extract the image from the Gallery input
191
+ # When type='pil', Gradio Gallery returns a list of tuples (image, caption) or just images
192
+ first_item = input_gallery_items[0]
193
+
194
+ if isinstance(first_item, tuple):
195
+ # Format is (PIL.Image, Caption)
196
+ input_pil = first_item[0]
197
+ else:
198
+ # Format is PIL.Image directly
199
+ input_pil = first_item
200
+
201
  # Map Dropdown choices to internal Adapter names
202
  adapters_map = {
203
  "Texture Edit": "texture-edit",
 
219
  generator = torch.Generator(device=device).manual_seed(seed)
220
  negative_prompt = "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
221
 
222
+ original_image = input_pil.convert("RGB")
223
  width, height = update_dimensions_on_upload(original_image)
224
 
225
  result = pipe(
 
236
  return result, seed
237
 
238
  @spaces.GPU(duration=60)
239
+ def infer_example(input_gallery_items, prompt, lora_adapter):
240
+ # input_gallery_items will be the list structure from gr.Examples
241
+ if not input_gallery_items:
242
  return None, 0
243
+
244
+ # When passed from gr.Examples with type="pil" and a Gallery component,
245
+ # we might need to handle file paths if cache_examples=False or PIL if processed.
246
+ # However, since we use infer_example as the fn, we mimic the infer logic.
247
+
248
+ # For examples with type="pil", gradio usually converts paths to PIL.
249
+ return infer(
250
+ input_gallery_items,
251
+ prompt,
252
+ lora_adapter,
253
+ seed=0,
254
+ randomize_seed=True,
255
+ guidance_scale=4.0,
256
+ steps=30
257
+ )
258
 
259
 
260
  css="""
 
272
 
273
  with gr.Row(equal_height=True):
274
  with gr.Column():
275
+ # Changed to Gallery to support potential multi-image flows (conceptually) and match user request
276
+ input_image = gr.Gallery(
277
+ label="Input Images",
278
+ show_label=False,
279
+ type="pil",
280
+ interactive=True,
281
+ height=290,
282
+ columns=1
283
+ )
284
 
285
  prompt = gr.Text(
286
  label="Edit Prompt",
 
305
  guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=4.0)
306
  steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=30)
307
 
308
+ # FIX: Correctly formatted inputs for gr.Gallery in Examples.
309
+ # Each example input corresponding to the Gallery component must be a LIST of images.
310
  gr.Examples(
311
  examples=[
312
+ # Format: [ [Image_List], Prompt, Adapter ]
313
+ [
314
+ ["examples/texture_sample.jpg"],
315
+ "Change the material of the object to rusted metal texture.",
316
+ "Texture Edit"
317
+ ],
318
+ [
319
+ ["examples/fusion_sample.jpg"],
320
+ "Fuse the product naturally into the background.",
321
+ "Fuse-Objects"
322
+ ],
323
+ [
324
+ ["examples/face_sample.jpg"],
325
+ "Swap the face with a cyberpunk robot face.",
326
+ "Face-Swap"
327
+ ],
328
  ],
329
  inputs=[input_image, prompt, lora_adapter],
330
  outputs=[output_image, seed],