FLUX.1-Kontext-multi-image

Running

App Files Files Community

aiqtech commited on Aug 31

Commit

ee482b5

verified ·

1 Parent(s): 7239641

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -19

app.py CHANGED Viewed

@@ -80,7 +80,7 @@ def load_pipeline():
 pipe, MODEL_STATUS = load_pipeline()
 print(MODEL_STATUS)
-def prepare_images_for_kontext(reference_image, pose_image, target_size=768):
     """
     Prepare reference and pose images for Kontext processing.
     Following the RefControl format: reference (left) | pose (right)
@@ -175,12 +175,36 @@ def generate_pose_transfer(
     if enhance_pose:
         pose_image = extract_pose_edges(pose_image)
-    # Prepare concatenated input
-    concatenated_input = prepare_images_for_kontext(reference_image, pose_image)
     if concatenated_input is None:
         raise gr.Error("Failed to process images")
     # Construct prompt with trigger word
     if prompt:
         full_prompt = f"{TRIGGER_WORD}, {prompt}"
@@ -195,43 +219,55 @@ def generate_pose_transfer(
     try:
         # Check if we have LoRA capabilities
-        has_lora = hasattr(pipe, 'set_adapters') and "RefControl" in MODEL_STATUS
-        with torch.autocast("cuda"):
-            if has_lora:
-                # Try to set LoRA adapter strength
-                try:
-                    pipe.set_adapters(["refcontrol"], adapter_weights=[lora_scale])
-                except Exception as e:
-                    print(f"Could not set LoRA adapter: {e}")
-            # Generate image based on pipeline type
             if "Kontext" in MODEL_STATUS:
                 # Use Kontext pipeline
                 result = pipe(
                     image=concatenated_input,
                     prompt=full_prompt,
-                    negative_prompt=negative_prompt if negative_prompt else None,
                     guidance_scale=guidance_scale,
                     num_inference_steps=num_inference_steps,
                     generator=generator,
-                    width=concatenated_input.width,
-                    height=concatenated_input.height,
                 ).images[0]
             else:
-                # Use standard FLUX pipeline (image-to-image)
                 result = pipe(
                     prompt=full_prompt,
                     image=concatenated_input,
                     guidance_scale=guidance_scale,
                     num_inference_steps=num_inference_steps,
                     generator=generator,
-                    strength=0.85,  # For img2img mode
                 ).images[0]
         return result, seed, concatenated_input
     except Exception as e:
         raise gr.Error(f"Generation failed: {str(e)}")
 # CSS styling

 pipe, MODEL_STATUS = load_pipeline()
 print(MODEL_STATUS)
+def prepare_images_for_kontext(reference_image, pose_image, target_size=512):
     """
     Prepare reference and pose images for Kontext processing.
     Following the RefControl format: reference (left) | pose (right)
     if enhance_pose:
         pose_image = extract_pose_edges(pose_image)
+    # Prepare concatenated input with fixed size
+    concatenated_input = prepare_images_for_kontext(reference_image, pose_image, target_size=512)
     if concatenated_input is None:
         raise gr.Error("Failed to process images")
+    # Ensure dimensions are model-compatible
+    width, height = concatenated_input.size
+    # Round to nearest 64 pixels for stability
+    width = (width // 64) * 64
+    height = (height // 64) * 64
+    # Limit maximum size to prevent memory issues
+    max_size = 1024
+    if width > max_size:
+        ratio = max_size / width
+        width = max_size
+        height = int(height * ratio)
+        height = (height // 64) * 64
+    if height > max_size:
+        ratio = max_size / height
+        height = max_size
+        width = int(width * ratio)
+        width = (width // 64) * 64
+    # Resize if needed
+    if (width, height) != concatenated_input.size:
+        concatenated_input = concatenated_input.resize((width, height), Image.LANCZOS)
     # Construct prompt with trigger word
     if prompt:
         full_prompt = f"{TRIGGER_WORD}, {prompt}"
     try:
         # Check if we have LoRA capabilities
+        has_lora = hasattr(pipe, 'set_adapters') and "LoRA" in MODEL_STATUS
+        # Set LoRA if available
+        if has_lora:
+            try:
+                pipe.set_adapters(["refcontrol"], adapter_weights=[lora_scale])
+                print(f"LoRA adapter set with strength: {lora_scale}")
+            except Exception as e:
+                print(f"LoRA adapter not set: {e}")
+        print(f"Generating with size: {width}x{height}")
+        print(f"Prompt: {full_prompt[:100]}...")
+        # Generate image
+        with torch.cuda.amp.autocast(dtype=torch.bfloat16):
             if "Kontext" in MODEL_STATUS:
                 # Use Kontext pipeline
                 result = pipe(
                     image=concatenated_input,
                     prompt=full_prompt,
+                    negative_prompt=negative_prompt if negative_prompt else "",
                     guidance_scale=guidance_scale,
                     num_inference_steps=num_inference_steps,
                     generator=generator,
+                    width=width,
+                    height=height,
                 ).images[0]
             else:
+                # Use standard FLUX pipeline
                 result = pipe(
                     prompt=full_prompt,
+                    negative_prompt=negative_prompt if negative_prompt else "",
                     image=concatenated_input,
                     guidance_scale=guidance_scale,
                     num_inference_steps=num_inference_steps,
                     generator=generator,
+                    strength=0.85,
                 ).images[0]
+        print("Generation successful!")
         return result, seed, concatenated_input
+    except RuntimeError as e:
+        if "out of memory" in str(e).lower():
+            raise gr.Error("GPU out of memory. Try reducing image size or inference steps.")
+        else:
+            raise gr.Error(f"Generation failed: {str(e)}")
     except Exception as e:
+        print(f"Error details: {e}")
         raise gr.Error(f"Generation failed: {str(e)}")
 # CSS styling