FLUX.1-Kontext-multi-image

Running

App Files Files Community

aiqtech commited on Aug 31

Commit

b22198b

verified ·

1 Parent(s): 12007e9

Update app.py

Browse files

Files changed (1) hide show

app.py +274 -134

app.py CHANGED Viewed

@@ -3,204 +3,344 @@ import numpy as np
 import spaces
 import torch
 import random
-from PIL import Image
-#from kontext_pipeline import FluxKontextPipeline
 from diffusers import FluxKontextPipeline
 from diffusers.utils import load_image
-# Load Kontext model
 MAX_SEED = np.iinfo(np.int32).max
-pipe = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16).to("cuda")
-def concatenate_images(images, direction="horizontal"):
     """
-    Concatenate multiple PIL images either horizontally or vertically.
     Args:
-        images: List of PIL Images
-        direction: "horizontal" or "vertical"
     Returns:
-        PIL Image: Concatenated image
     """
-    if not images:
         return None
-    # Filter out None images
-    valid_images = [img for img in images if img is not None]
-    if not valid_images:
         return None
-    if len(valid_images) == 1:
-        return valid_images[0].convert("RGB")
-    # Convert all images to RGB
-    valid_images = [img.convert("RGB") for img in valid_images]
-    if direction == "horizontal":
-        # Calculate total width and max height
-        total_width = sum(img.width for img in valid_images)
-        max_height = max(img.height for img in valid_images)
-        # Create new image
-        concatenated = Image.new('RGB', (total_width, max_height), (255, 255, 255))
-        # Paste images
-        x_offset = 0
-        for img in valid_images:
-            # Center image vertically if heights differ
-            y_offset = (max_height - img.height) // 2
-            concatenated.paste(img, (x_offset, y_offset))
-            x_offset += img.width
-    else:  # vertical
-        # Calculate max width and total height
-        max_width = max(img.width for img in valid_images)
-        total_height = sum(img.height for img in valid_images)
-        # Create new image
-        concatenated = Image.new('RGB', (max_width, total_height), (255, 255, 255))
-        # Paste images
-        y_offset = 0
-        for img in valid_images:
-            # Center image horizontally if widths differ
-            x_offset = (max_width - img.width) // 2
-            concatenated.paste(img, (x_offset, y_offset))
-            y_offset += img.height
-    return concatenated
 @spaces.GPU
-def infer(input_images, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, progress=gr.Progress(track_tqdm=True)):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    # Handle input_images - it could be a single image or a list of images
-    if input_images is None:
-        raise gr.Error("Please upload at least one image.")
-    # If it's a single image (not a list), convert to list
-    if not isinstance(input_images, list):
-        input_images = [input_images]
-    # Filter out None images
-    valid_images = [img[0] for img in input_images if img is not None]
-    if not valid_images:
-        raise gr.Error("Please upload at least one valid image.")
-    # Concatenate images horizontally
-    concatenated_image = concatenate_images(valid_images, "horizontal")
-    if concatenated_image is None:
-        raise gr.Error("Failed to process the input images.")
-    # original_width, original_height = concatenated_image.size
-    # if original_width >= original_height:
-    #     new_width = 1024
-    #     new_height = int(original_height * (new_width / original_width))
-    #     new_height = round(new_height / 64) * 64
-    # else:
-    #     new_height = 1024
-    #     new_width = int(original_width * (new_height / original_height))
-    #     new_width = round(new_width / 64) * 64
-    #concatenated_image_resized = concatenated_image.resize((new_width, new_height), Image.LANCZOS)
-    final_prompt = f"From the provided reference images, create a unified, cohesive image such that {prompt}. Maintain the identity and characteristics of each subject while adjusting their proportions, scale, and positioning to create a harmonious, naturally balanced composition. Blend and integrate all elements seamlessly with consistent lighting, perspective, and style.the final result should look like a single naturally captured scene where all subjects are properly sized and positioned relative to each other, not assembled from multiple sources."
-    image = pipe(
-        image=concatenated_image,
-        prompt=final_prompt,
-        guidance_scale=guidance_scale,
-        width=concatenated_image.size[0],
-        height=concatenated_image.size[1],
-        generator=torch.Generator().manual_seed(seed),
-    ).images[0]
-    return image, seed, gr.update(visible=True)
-css="""
 #col-container {
     margin: 0 auto;
-    max-width: 960px;
 }
 """
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""# FLUX.1 Kontext [dev] - Multi-Image
-        Flux Kontext with multiple image input support - compose a new image with elements from multiple images using Kontext [dev]
         """)
         with gr.Row():
-            with gr.Column():
-                input_images = gr.Gallery(
-                    label="Upload image(s) for editing",
-                    show_label=True,
-                    elem_id="gallery_input",
-                    columns=3,
-                    rows=2,
-                    object_fit="contain",
-                    height="auto",
-                    file_types=['image'],
-                    type='pil'
-                )
                 with gr.Row():
-                    prompt = gr.Text(
-                        label="Prompt",
-                        show_label=False,
-                        info = "describe the desired output composition",
-                        max_lines=1,
-                        placeholder="e.g. the dog from the left image sits on the bench from the right image",
-                        container=False,
                     )
-                    run_button = gr.Button("Run", scale=0)
-                with gr.Accordion("Advanced Settings", open=False):
                     seed = gr.Slider(
                         label="Seed",
                         minimum=0,
                         maximum=MAX_SEED,
                         step=1,
-                        value=0,
                     )
-                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                     guidance_scale = gr.Slider(
                         label="Guidance Scale",
-                        minimum=1,
-                        maximum=10,
-                        step=0.1,
-                        value=2.5,
-                    )
-            with gr.Column():
-                result = gr.Image(label="Result", show_label=False, interactive=False)
-                reuse_button = gr.Button("Reuse this image", visible=False)
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn = infer,
-        inputs = [input_images, prompt, seed, randomize_seed, guidance_scale],
-        outputs = [result, seed, reuse_button]
     )
     reuse_button.click(
-        fn = lambda image: [image] if image is not None else [],  # Convert single image to list for gallery
-        inputs = [result],
-        outputs = [input_images]
     )
 demo.launch()

 import spaces
 import torch
 import random
+from PIL import Image, ImageOps
 from diffusers import FluxKontextPipeline
 from diffusers.utils import load_image
+# Load Kontext model with Reference Pose LoRA
 MAX_SEED = np.iinfo(np.int32).max
+# Initialize the pipeline
+pipe = FluxKontextPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-Kontext-dev",
+    torch_dtype=torch.bfloat16
+).to("cuda")
+# Load the Reference Pose LoRA (if available)
+# Note: You'll need to add the actual LoRA loading code here
+# pipe.load_lora_weights("path/to/refcontrol-pose-lora", adapter_name="refcontrol")
+def prepare_pose_reference_pair(reference_image, pose_image):
     """
+    Prepare the reference image and pose control map for Kontext processing.
     Args:
+        reference_image: PIL Image - The source image with identity/style to preserve
+        pose_image: PIL Image - The pose/line art control map
     Returns:
+        PIL Image: Concatenated image with reference on left, pose on right
     """
+    if reference_image is None or pose_image is None:
         return None
+    # Convert images to RGB
+    reference_image = reference_image.convert("RGB")
+    pose_image = pose_image.convert("RGB")
+    # Resize images to have the same height for better concatenation
+    target_height = 768  # Standard height for Flux
+    # Calculate proportional widths
+    ref_ratio = reference_image.width / reference_image.height
+    pose_ratio = pose_image.width / pose_image.height
+    ref_width = int(target_height * ref_ratio)
+    pose_width = int(target_height * pose_ratio)
+    # Ensure dimensions are divisible by 8 (required for Flux)
+    ref_width = (ref_width // 8) * 8
+    pose_width = (pose_width // 8) * 8
+    # Resize images
+    reference_resized = reference_image.resize((ref_width, target_height), Image.LANCZOS)
+    pose_resized = pose_image.resize((pose_width, target_height), Image.LANCZOS)
+    # Create concatenated image: reference on left, pose on right
+    total_width = ref_width + pose_width
+    concatenated = Image.new('RGB', (total_width, target_height), (255, 255, 255))
+    # Paste images
+    concatenated.paste(reference_resized, (0, 0))
+    concatenated.paste(pose_resized, (ref_width, 0))
+    return concatenated, ref_width, pose_width
+def process_pose_image(pose_image):
+    """
+    Process the pose image to enhance line art visibility if needed.
+    """
+    if pose_image is None:
         return None
+    pose_image = pose_image.convert("RGB")
+    # Optional: Enhance contrast for better pose detection
+    # You can add image processing here if the pose needs enhancement
+    return pose_image
 @spaces.GPU
+def infer_pose_transfer(
+    reference_image,
+    pose_image,
+    prompt="",
+    seed=42,
+    randomize_seed=False,
+    guidance_scale=3.5,
+    strength=0.85,
+    progress=gr.Progress(track_tqdm=True)
+):
+    """
+    Transfer pose from control image to reference image using Flux Kontext.
+    """
+    if reference_image is None or pose_image is None:
+        raise gr.Error("Please upload both a reference image and a pose image.")
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    # Process pose image if needed
+    pose_image = process_pose_image(pose_image)
+    # Prepare the concatenated input
+    concatenated_input, ref_width, pose_width = prepare_pose_reference_pair(
+        reference_image,
+        pose_image
+    )
+    if concatenated_input is None:
+        raise gr.Error("Failed to process the input images.")
+    # Construct the prompt with the trigger word
+    base_prompt = "refcontrolpose"
+    if prompt:
+        # User-provided prompt with trigger word
+        full_prompt = f"{base_prompt}, {prompt}"
+    else:
+        # Default prompt for pose transfer
+        full_prompt = f"{base_prompt}, transfer the pose from the right image to the subject in the left image, maintaining the identity, clothing, and style of the original subject while adopting the exact pose and body position shown in the control map"
+    # Add instruction for the model to understand the layout
+    full_prompt += ". The left side shows the reference with identity to preserve, the right side shows the target pose to follow."
+    # Generate the image
+    with torch.autocast("cuda"):
+        result = pipe(
+            image=concatenated_input,
+            prompt=full_prompt,
+            guidance_scale=guidance_scale,
+            num_inference_steps=28,
+            width=concatenated_input.size[0],
+            height=concatenated_input.size[1],
+            generator=torch.Generator("cuda").manual_seed(seed),
+        ).images[0]
+    # Optional: Crop the result to show only the transformed subject
+    # You might want to crop out the concatenated input and show only the result
+    return result, seed, concatenated_input
+def create_pose_from_image(image):
+    """
+    Helper function to extract pose/line art from an image.
+    This is a placeholder - you might want to integrate with OpenPose or similar.
+    """
+    if image is None:
+        return None
+    # Placeholder: In production, you'd use OpenPose or similar
+    # For now, we'll just convert to grayscale as a simple edge detection
+    from PIL import ImageFilter, ImageOps
+    image = image.convert("L")  # Convert to grayscale
+    image = image.filter(ImageFilter.FIND_EDGES)  # Simple edge detection
+    image = ImageOps.invert(image)  # Invert to get black lines on white
+    image = image.convert("RGB")  # Convert back to RGB
+    return image
+# CSS styling
+css = """
 #col-container {
     margin: 0 auto;
+    max-width: 1200px;
+}
+.image-container {
+    border: 2px solid #e0e0e0;
+    border-radius: 8px;
+    padding: 10px;
+    background: #f9f9f9;
+}
+.result-container {
+    border: 3px solid #4CAF50;
+    border-radius: 8px;
+    padding: 10px;
+    background: #f0f8f0;
 }
 """
+# Create Gradio interface
 with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown("""
+        # 🎭 FLUX.1 Kontext Reference Pose Transfer
+        Transfer any pose to your subject while preserving their identity and style!
+        **How it works:**
+        1. Upload a **reference image** (your subject with identity/style to preserve)
+        2. Upload a **pose image** (line art or pose skeleton to follow)
+        3. The model will generate your subject in the new pose
+        Uses the **refcontrolpose** LoRA for precise pose control.
         """)
         with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### 📸 Input Images")
                 with gr.Row():
+                    with gr.Column():
+                        reference_image = gr.Image(
+                            label="Reference Image (Subject)",
+                            type="pil",
+                            elem_classes=["image-container"],
+                            height=300
+                        )
+                        gr.Markdown("*Upload the image with the subject/style to preserve*")
+                    with gr.Column():
+                        pose_image = gr.Image(
+                            label="Pose Control (Line Art)",
+                            type="pil",
+                            elem_classes=["image-container"],
+                            height=300
+                        )
+                        gr.Markdown("*Upload the pose/line art to follow*")
+                # Optional: Add pose extraction tool
+                with gr.Accordion("🔧 Extract Pose from Image", open=False):
+                    source_for_pose = gr.Image(
+                        label="Source Image for Pose Extraction",
+                        type="pil",
+                        height=200
                     )
+                    extract_pose_btn = gr.Button("Extract Pose", size="sm")
+                prompt = gr.Textbox(
+                    label="Additional Prompt (Optional)",
+                    placeholder="e.g., wearing a red dress, in a garden, professional photography",
+                    info="Add details about the desired output (trigger word 'refcontrolpose' is added automatically)",
+                    lines=2
+                )
+                with gr.Row():
+                    run_button = gr.Button("🎨 Transfer Pose", variant="primary", scale=2)
+                    clear_button = gr.Button("🗑️ Clear", scale=1)
+                with gr.Accordion("⚙️ Advanced Settings", open=False):
                     seed = gr.Slider(
                         label="Seed",
                         minimum=0,
                         maximum=MAX_SEED,
                         step=1,
+                        value=42,
                     )
+                    randomize_seed = gr.Checkbox(
+                        label="Randomize seed",
+                        value=True
+                    )
                     guidance_scale = gr.Slider(
                         label="Guidance Scale",
+                        minimum=1.0,
+                        maximum=10.0,
+                        step=0.5,
+                        value=3.5,
+                        info="Higher values follow the pose more strictly"
+                    )
+                    strength = gr.Slider(
+                        label="Transformation Strength",
+                        minimum=0.1,
+                        maximum=1.0,
+                        step=0.05,
+                        value=0.85,
+                        info="How much to change from the original"
+                    )
+            with gr.Column(scale=1):
+                gr.Markdown("### 🖼️ Results")
+                result = gr.Image(
+                    label="Generated Result",
+                    elem_classes=["result-container"],
+                    interactive=False,
+                    height=400
+                )
+                with gr.Accordion("📊 Generation Info", open=False):
+                    used_seed = gr.Number(label="Seed Used", interactive=False)
+                    input_preview = gr.Image(
+                        label="Concatenated Input (Reference | Pose)",
+                        height=200
+                    )
+                with gr.Row():
+                    save_button = gr.Button("💾 Save Result", size="sm")
+                    reuse_button = gr.Button("♻️ Use as Reference", size="sm")
+        # Examples
+        gr.Markdown("### 💡 Examples")
+        gr.Examples(
+            examples=[
+                ["A person in business attire", "standing confidently"],
+                ["A dancer in elegant costume", "performing a ballet leap"],
+                ["An athlete in sportswear", "doing a martial arts kick"],
+                ["A model in casual outfit", "sitting on a chair"],
+            ],
+            inputs=[prompt],
+            label="Example Prompts"
+        )
+    # Event handlers
+    run_button.click(
+        fn=infer_pose_transfer,
+        inputs=[
+            reference_image,
+            pose_image,
+            prompt,
+            seed,
+            randomize_seed,
+            guidance_scale,
+            strength
+        ],
+        outputs=[result, used_seed, input_preview]
+    )
+    extract_pose_btn.click(
+        fn=create_pose_from_image,
+        inputs=[source_for_pose],
+        outputs=[pose_image]
     )
     reuse_button.click(
+        fn=lambda img: img,
+        inputs=[result],
+        outputs=[reference_image]
+    )
+    clear_button.click(
+        fn=lambda: [None, None, "", None, 42, None],
+        outputs=[reference_image, pose_image, prompt, result, used_seed, input_preview]
     )
+# Launch the app
+demo.queue()
 demo.launch()