FLUX.1-Kontext-multi-image

Running

App Files Files Community

aiqtech commited on Aug 31

Commit

cd76271

verified ·

1 Parent(s): b22198b

Update app.py

Browse files

Files changed (1) hide show

app.py +343 -212

app.py CHANGED Viewed

@@ -1,261 +1,335 @@
 import gradio as gr
 import numpy as np
-import spaces
 import torch
 import random
-from PIL import Image, ImageOps
-from diffusers import FluxKontextPipeline
-from diffusers.utils import load_image
-# Load Kontext model with Reference Pose LoRA
 MAX_SEED = np.iinfo(np.int32).max
-# Initialize the pipeline
-pipe = FluxKontextPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-Kontext-dev",
-    torch_dtype=torch.bfloat16
-).to("cuda")
-# Load the Reference Pose LoRA (if available)
-# Note: You'll need to add the actual LoRA loading code here
-# pipe.load_lora_weights("path/to/refcontrol-pose-lora", adapter_name="refcontrol")
-def prepare_pose_reference_pair(reference_image, pose_image):
     """
-    Prepare the reference image and pose control map for Kontext processing.
-    Args:
-        reference_image: PIL Image - The source image with identity/style to preserve
-        pose_image: PIL Image - The pose/line art control map
-    Returns:
-        PIL Image: Concatenated image with reference on left, pose on right
     """
     if reference_image is None or pose_image is None:
         return None
-    # Convert images to RGB
     reference_image = reference_image.convert("RGB")
     pose_image = pose_image.convert("RGB")
-    # Resize images to have the same height for better concatenation
-    target_height = 768  # Standard height for Flux
-    # Calculate proportional widths
     ref_ratio = reference_image.width / reference_image.height
     pose_ratio = pose_image.width / pose_image.height
-    ref_width = int(target_height * ref_ratio)
-    pose_width = int(target_height * pose_ratio)
-    # Ensure dimensions are divisible by 8 (required for Flux)
     ref_width = (ref_width // 8) * 8
     pose_width = (pose_width // 8) * 8
     # Resize images
-    reference_resized = reference_image.resize((ref_width, target_height), Image.LANCZOS)
-    pose_resized = pose_image.resize((pose_width, target_height), Image.LANCZOS)
-    # Create concatenated image: reference on left, pose on right
     total_width = ref_width + pose_width
-    concatenated = Image.new('RGB', (total_width, target_height), (255, 255, 255))
-    # Paste images
     concatenated.paste(reference_resized, (0, 0))
     concatenated.paste(pose_resized, (ref_width, 0))
-    return concatenated, ref_width, pose_width
-def process_pose_image(pose_image):
     """
-    Process the pose image to enhance line art visibility if needed.
     """
-    if pose_image is None:
         return None
-    pose_image = pose_image.convert("RGB")
-    # Optional: Enhance contrast for better pose detection
-    # You can add image processing here if the pose needs enhancement
-    return pose_image
-@spaces.GPU
-def infer_pose_transfer(
-    reference_image,
-    pose_image,
-    prompt="",
-    seed=42,
-    randomize_seed=False,
     guidance_scale=3.5,
-    strength=0.85,
     progress=gr.Progress(track_tqdm=True)
 ):
     """
-    Transfer pose from control image to reference image using Flux Kontext.
     """
     if reference_image is None or pose_image is None:
-        raise gr.Error("Please upload both a reference image and a pose image.")
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
-    # Process pose image if needed
-    pose_image = process_pose_image(pose_image)
-    # Prepare the concatenated input
-    concatenated_input, ref_width, pose_width = prepare_pose_reference_pair(
-        reference_image,
-        pose_image
-    )
     if concatenated_input is None:
-        raise gr.Error("Failed to process the input images.")
-    # Construct the prompt with the trigger word
-    base_prompt = "refcontrolpose"
     if prompt:
-        # User-provided prompt with trigger word
-        full_prompt = f"{base_prompt}, {prompt}"
     else:
-        # Default prompt for pose transfer
-        full_prompt = f"{base_prompt}, transfer the pose from the right image to the subject in the left image, maintaining the identity, clothing, and style of the original subject while adopting the exact pose and body position shown in the control map"
-    # Add instruction for the model to understand the layout
-    full_prompt += ". The left side shows the reference with identity to preserve, the right side shows the target pose to follow."
-    # Generate the image
-    with torch.autocast("cuda"):
-        result = pipe(
-            image=concatenated_input,
-            prompt=full_prompt,
-            guidance_scale=guidance_scale,
-            num_inference_steps=28,
-            width=concatenated_input.size[0],
-            height=concatenated_input.size[1],
-            generator=torch.Generator("cuda").manual_seed(seed),
-        ).images[0]
-    # Optional: Crop the result to show only the transformed subject
-    # You might want to crop out the concatenated input and show only the result
-    return result, seed, concatenated_input
-def create_pose_from_image(image):
-    """
-    Helper function to extract pose/line art from an image.
-    This is a placeholder - you might want to integrate with OpenPose or similar.
-    """
-    if image is None:
-        return None
-    # Placeholder: In production, you'd use OpenPose or similar
-    # For now, we'll just convert to grayscale as a simple edge detection
-    from PIL import ImageFilter, ImageOps
-    image = image.convert("L")  # Convert to grayscale
-    image = image.filter(ImageFilter.FIND_EDGES)  # Simple edge detection
-    image = ImageOps.invert(image)  # Invert to get black lines on white
-    image = image.convert("RGB")  # Convert back to RGB
-    return image
 # CSS styling
 css = """
 #col-container {
     margin: 0 auto;
-    max-width: 1200px;
 }
-.image-container {
     border: 2px solid #e0e0e0;
     border-radius: 8px;
-    padding: 10px;
-    background: #f9f9f9;
 }
-.result-container {
     border: 3px solid #4CAF50;
     border-radius: 8px;
-    padding: 10px;
-    background: #f0f8f0;
 }
 """
 # Create Gradio interface
-with gr.Blocks(css=css) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("""
-        # 🎭 FLUX.1 Kontext Reference Pose Transfer
-        Transfer any pose to your subject while preserving their identity and style!
-        **How it works:**
-        1. Upload a **reference image** (your subject with identity/style to preserve)
-        2. Upload a **pose image** (line art or pose skeleton to follow)
-        3. The model will generate your subject in the new pose
-        Uses the **refcontrolpose** LoRA for precise pose control.
-        """)
         with gr.Row():
             with gr.Column(scale=1):
-                gr.Markdown("### 📸 Input Images")
-                with gr.Row():
-                    with gr.Column():
-                        reference_image = gr.Image(
-                            label="Reference Image (Subject)",
-                            type="pil",
-                            elem_classes=["image-container"],
-                            height=300
-                        )
-                        gr.Markdown("*Upload the image with the subject/style to preserve*")
-                    with gr.Column():
-                        pose_image = gr.Image(
-                            label="Pose Control (Line Art)",
-                            type="pil",
-                            elem_classes=["image-container"],
-                            height=300
-                        )
-                        gr.Markdown("*Upload the pose/line art to follow*")
-                # Optional: Add pose extraction tool
                 with gr.Accordion("🔧 Extract Pose from Image", open=False):
-                    source_for_pose = gr.Image(
-                        label="Source Image for Pose Extraction",
                         type="pil",
                         height=200
                     )
-                    extract_pose_btn = gr.Button("Extract Pose", size="sm")
                 prompt = gr.Textbox(
-                    label="Additional Prompt (Optional)",
-                    placeholder="e.g., wearing a red dress, in a garden, professional photography",
-                    info="Add details about the desired output (trigger word 'refcontrolpose' is added automatically)",
                     lines=2
                 )
-                with gr.Row():
-                    run_button = gr.Button("🎨 Transfer Pose", variant="primary", scale=2)
-                    clear_button = gr.Button("🗑️ Clear", scale=1)
                 with gr.Accordion("⚙️ Advanced Settings", open=False):
-                    seed = gr.Slider(
-                        label="Seed",
-                        minimum=0,
-                        maximum=MAX_SEED,
-                        step=1,
-                        value=42,
-                    )
-                    randomize_seed = gr.Checkbox(
-                        label="Randomize seed",
-                        value=True
-                    )
                     guidance_scale = gr.Slider(
                         label="Guidance Scale",
@@ -263,84 +337,141 @@ with gr.Blocks(css=css) as demo:
                         maximum=10.0,
                         step=0.5,
                         value=3.5,
-                        info="Higher values follow the pose more strictly"
                     )
-                    strength = gr.Slider(
-                        label="Transformation Strength",
-                        minimum=0.1,
-                        maximum=1.0,
-                        step=0.05,
-                        value=0.85,
-                        info="How much to change from the original"
                     )
             with gr.Column(scale=1):
-                gr.Markdown("### 🖼️ Results")
-                result = gr.Image(
-                    label="Generated Result",
-                    elem_classes=["result-container"],
                     interactive=False,
-                    height=400
                 )
-                with gr.Accordion("📊 Generation Info", open=False):
-                    used_seed = gr.Number(label="Seed Used", interactive=False)
-                    input_preview = gr.Image(
-                        label="Concatenated Input (Reference | Pose)",
                         height=200
                     )
                 with gr.Row():
-                    save_button = gr.Button("💾 Save Result", size="sm")
-                    reuse_button = gr.Button("♻️ Use as Reference", size="sm")
         # Examples
-        gr.Markdown("### 💡 Examples")
         gr.Examples(
             examples=[
-                ["A person in business attire", "standing confidently"],
-                ["A dancer in elegant costume", "performing a ballet leap"],
-                ["An athlete in sportswear", "doing a martial arts kick"],
-                ["A model in casual outfit", "sitting on a chair"],
             ],
-            inputs=[prompt],
-            label="Example Prompts"
         )
     # Event handlers
-    run_button.click(
-        fn=infer_pose_transfer,
         inputs=[
-            reference_image,
-            pose_image,
-            prompt,
-            seed,
-            randomize_seed,
             guidance_scale,
-            strength
         ],
-        outputs=[result, used_seed, input_preview]
     )
-    extract_pose_btn.click(
-        fn=create_pose_from_image,
-        inputs=[source_for_pose],
         outputs=[pose_image]
     )
-    reuse_button.click(
-        fn=lambda img: img,
-        inputs=[result],
         outputs=[reference_image]
     )
-    clear_button.click(
-        fn=lambda: [None, None, "", None, 42, None],
-        outputs=[reference_image, pose_image, prompt, result, used_seed, input_preview]
     )
 # Launch the app
-demo.queue()
-demo.launch()

 import gradio as gr
 import numpy as np
 import torch
 import random
+import os
+import spaces
+from PIL import Image, ImageOps, ImageFilter
+from diffusers import FluxPipeline, DiffusionPipeline
+from diffusers.loaders import LoraLoaderMixin
+import requests
+from io import BytesIO
+# Constants
 MAX_SEED = np.iinfo(np.int32).max
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Model configuration
+KONTEXT_MODEL = "black-forest-labs/FLUX.1-Kontext-dev"
+LORA_MODEL = "thedeoxen/refcontrol-flux-kontext-reference-pose-lora"
+TRIGGER_WORD = "refcontrolpose"
+# Initialize pipeline with authentication
+print("Loading models...")
+try:
+    # Load Flux Kontext pipeline with HF token
+    if HF_TOKEN:
+        from diffusers import FluxKontextPipeline
+        pipe = FluxKontextPipeline.from_pretrained(
+            KONTEXT_MODEL,
+            torch_dtype=torch.bfloat16,
+            use_auth_token=HF_TOKEN
+        )
+        # Load the RefControl LoRA
+        pipe.load_lora_weights(
+            LORA_MODEL,
+            adapter_name="refcontrol",
+            use_auth_token=HF_TOKEN
+        )
+        # Move to GPU
+        pipe = pipe.to("cuda")
+        MODEL_STATUS = "✅ Flux Kontext + RefControl LoRA loaded successfully"
+        print(MODEL_STATUS)
+    else:
+        raise ValueError("HF_TOKEN not found in environment variables")
+except Exception as e:
+    print(f"Error loading models: {e}")
+    # Fallback to base model without LoRA
+    try:
+        pipe = DiffusionPipeline.from_pretrained(
+            "black-forest-labs/FLUX.1-dev",
+            torch_dtype=torch.bfloat16,
+            use_auth_token=HF_TOKEN if HF_TOKEN else True
+        ).to("cuda")
+        MODEL_STATUS = "⚠️ Running in fallback mode (FLUX.1-dev without LoRA)"
+    except:
+        MODEL_STATUS = "❌ Failed to load models. Please check HF_TOKEN"
+        pipe = None
+def prepare_images_for_kontext(reference_image, pose_image, target_size=768):
     """
+    Prepare reference and pose images for Kontext processing.
+    Following the RefControl format: reference (left) | pose (right)
     """
     if reference_image is None or pose_image is None:
         return None
+    # Convert to RGB
     reference_image = reference_image.convert("RGB")
     pose_image = pose_image.convert("RGB")
+    # Calculate dimensions maintaining aspect ratio
     ref_ratio = reference_image.width / reference_image.height
     pose_ratio = pose_image.width / pose_image.height
+    # Set heights to target size
+    height = target_size
+    ref_width = int(height * ref_ratio)
+    pose_width = int(height * pose_ratio)
+    # Ensure dimensions are divisible by 8 (FLUX requirement)
     ref_width = (ref_width // 8) * 8
     pose_width = (pose_width // 8) * 8
+    height = (height // 8) * 8
     # Resize images
+    reference_resized = reference_image.resize((ref_width, height), Image.LANCZOS)
+    pose_resized = pose_image.resize((pose_width, height), Image.LANCZOS)
+    # Concatenate horizontally: reference | pose
     total_width = ref_width + pose_width
+    concatenated = Image.new('RGB', (total_width, height))
     concatenated.paste(reference_resized, (0, 0))
     concatenated.paste(pose_resized, (ref_width, 0))
+    return concatenated
+def extract_pose_edges(image):
     """
+    Extract edge/pose information from an image.
     """
+    if image is None:
         return None
+    # Convert to grayscale
+    gray = image.convert("L")
+    # Apply edge detection
+    edges = gray.filter(ImageFilter.FIND_EDGES)
+    # Enhance contrast
+    edges = ImageOps.autocontrast(edges)
+    # Invert to get black lines on white
+    edges = ImageOps.invert(edges)
+    # Smooth the result
+    edges = edges.filter(ImageFilter.SMOOTH_MORE)
+    # Convert back to RGB
+    return edges.convert("RGB")
+@spaces.GPU(duration=60)
+def generate_pose_transfer(
+    reference_image,
+    pose_image,
+    prompt="",
+    negative_prompt="",
+    seed=42,
+    randomize_seed=False,
     guidance_scale=3.5,
+    num_inference_steps=28,
+    lora_scale=1.0,
+    enhance_pose=False,
     progress=gr.Progress(track_tqdm=True)
 ):
     """
+    Main generation function using RefControl LoRA.
     """
+    if pipe is None:
+        return None, 0, "Model not loaded. Please check HF_TOKEN"
     if reference_image is None or pose_image is None:
+        raise gr.Error("Please upload both reference and pose images")
+    # Randomize seed if requested
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
+    # Enhance pose if requested
+    if enhance_pose:
+        pose_image = extract_pose_edges(pose_image)
+    # Prepare concatenated input
+    concatenated_input = prepare_images_for_kontext(reference_image, pose_image)
     if concatenated_input is None:
+        raise gr.Error("Failed to process images")
+    # Construct prompt with trigger word
     if prompt:
+        full_prompt = f"{TRIGGER_WORD}, {prompt}"
     else:
+        full_prompt = f"{TRIGGER_WORD}, transfer the pose from the right image to the subject in the left image while maintaining their identity, clothing, and style"
+    # Add instruction for the model
+    full_prompt += ". The left image shows the reference subject, the right image shows the target pose."
+    # Set generator for reproducibility
+    generator = torch.Generator("cuda").manual_seed(seed)
+    try:
+        # Generate with LoRA
+        with torch.autocast("cuda"):
+            if hasattr(pipe, 'set_adapters'):
+                # Set LoRA adapter strength
+                pipe.set_adapters(["refcontrol"], adapter_weights=[lora_scale])
+            # Generate image
+            result = pipe(
+                image=concatenated_input,
+                prompt=full_prompt,
+                negative_prompt=negative_prompt,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_inference_steps,
+                generator=generator,
+                width=concatenated_input.width,
+                height=concatenated_input.height,
+            ).images[0]
+        return result, seed, concatenated_input
+    except Exception as e:
+        raise gr.Error(f"Generation failed: {str(e)}")
 # CSS styling
 css = """
 #col-container {
     margin: 0 auto;
+    max-width: 1280px;
+}
+.header {
+    text-align: center;
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    padding: 20px;
+    border-radius: 12px;
+    margin-bottom: 20px;
+}
+.header h1 {
+    color: white;
+    margin: 0;
+}
+.status-box {
+    padding: 10px;
+    border-radius: 8px;
+    margin: 10px 0;
+    font-weight: bold;
 }
+.input-image {
     border: 2px solid #e0e0e0;
     border-radius: 8px;
+    overflow: hidden;
 }
+.result-image {
     border: 3px solid #4CAF50;
     border-radius: 8px;
+    overflow: hidden;
 }
 """
 # Create Gradio interface
+with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="col-container"):
+        # Header with authentication
+        with gr.Row():
+            with gr.Column():
+                gr.HTML("""
+                <div class="header">
+                    <h1>🎭 RefControl Flux Kontext - Reference Pose Transfer</h1>
+                    <p style="color: white;">Powered by thedeoxen/refcontrol-flux-kontext-reference-pose-lora</p>
+                </div>
+                """)
+                # Model status
+                gr.Markdown(f"""
+                <div class="status-box" style="background: {'#d4edda' if '✅' in MODEL_STATUS else '#f8d7da'};">
+                    {MODEL_STATUS}
+                </div>
+                """)
+        # Authentication info
+        if not HF_TOKEN:
+            gr.Markdown("""
+            ### 🔐 Authentication Required
+            Please set your Hugging Face token to use this Space:
+            1. Go to Settings → Variables and secrets
+            2. Add `HF_TOKEN` with your Hugging Face token
+            3. Restart the Space
+            """)
+            gr.LoginButton("Sign in with Hugging Face", size="lg")
+        # Main interface
         with gr.Row():
             with gr.Column(scale=1):
+                gr.Markdown("### 📥 Input Images")
+                # Reference image
+                reference_image = gr.Image(
+                    label="Reference Image (Subject to transform)",
+                    type="pil",
+                    elem_classes=["input-image"],
+                    height=300
+                )
+                # Pose image
+                pose_image = gr.Image(
+                    label="Pose Control (Line art or skeleton)",
+                    type="pil",
+                    elem_classes=["input-image"],
+                    height=300
+                )
+                # Pose extraction tool
                 with gr.Accordion("🔧 Extract Pose from Image", open=False):
+                    extract_source = gr.Image(
+                        label="Source image for pose extraction",
                         type="pil",
                         height=200
                     )
+                    extract_btn = gr.Button("Extract Pose", size="sm")
+                # Prompts
                 prompt = gr.Textbox(
+                    label=f"Prompt (trigger word '{TRIGGER_WORD}' added automatically)",
+                    placeholder="e.g., wearing elegant dress, professional photography",
                     lines=2
                 )
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    placeholder="e.g., blurry, low quality, distorted",
+                    lines=2
+                )
+                # Generate button
+                generate_btn = gr.Button(
+                    "🎨 Generate Pose Transfer",
+                    variant="primary",
+                    size="lg"
+                )
+                # Advanced settings
                 with gr.Accordion("⚙️ Advanced Settings", open=False):
+                    with gr.Row():
+                        seed = gr.Slider(
+                            label="Seed",
+                            minimum=0,
+                            maximum=MAX_SEED,
+                            step=1,
+                            value=42
+                        )
+                        randomize_seed = gr.Checkbox(
+                            label="Randomize",
+                            value=True
+                        )
                     guidance_scale = gr.Slider(
                         label="Guidance Scale",
                         maximum=10.0,
                         step=0.5,
                         value=3.5,
+                        info="How strictly to follow the pose"
                     )
+                    num_inference_steps = gr.Slider(
+                        label="Inference Steps",
+                        minimum=20,
+                        maximum=50,
+                        step=1,
+                        value=28
+                    )
+                    lora_scale = gr.Slider(
+                        label="LoRA Strength",
+                        minimum=0.0,
+                        maximum=2.0,
+                        step=0.1,
+                        value=1.0,
+                        info="RefControl LoRA influence"
+                    )
+                    enhance_pose = gr.Checkbox(
+                        label="Auto-enhance pose edges",
+                        value=False
                     )
             with gr.Column(scale=1):
+                gr.Markdown("### 🖼️ Generated Result")
+                # Result image
+                result_image = gr.Image(
+                    label="Generated Image",
+                    elem_classes=["result-image"],
                     interactive=False,
+                    height=500
                 )
+                # Info display
+                with gr.Row():
+                    seed_used = gr.Number(
+                        label="Seed Used",
+                        interactive=False
+                    )
+                # Debug view
+                with gr.Accordion("🔍 Debug View", open=False):
+                    concat_preview = gr.Image(
+                        label="Input Concatenation (Reference | Pose)",
                         height=200
                     )
+                # Reuse buttons
                 with gr.Row():
+                    reuse_ref_btn = gr.Button("♻️ Use as Reference", size="sm")
+                    reuse_pose_btn = gr.Button("📐 Extract & Use as Pose", size="sm")
+                    clear_btn = gr.Button("🗑️ Clear All", size="sm")
         # Examples
+        gr.Markdown("### 💡 Example Prompts")
         gr.Examples(
             examples=[
+                ["professional portrait, studio lighting, high quality"],
+                ["wearing red dress, outdoor garden setting"],
+                ["business attire, corporate headshot"],
+                ["casual streetwear, urban background"],
+                ["athletic wear, dynamic action shot"],
+                ["elegant evening gown, luxury setting"],
             ],
+            inputs=[prompt]
         )
+        # Instructions
+        with gr.Accordion("📖 How to Use", open=False):
+            gr.Markdown("""
+            1. **Upload Reference Image**: The person/subject you want to transform
+            2. **Upload Pose Image**: Line art or skeleton pose to follow
+            3. **Optional**: Add descriptive prompt for style/setting
+            4. **Click Generate**: Wait for the magic to happen!
+            **Tips:**
+            - Use clear, high-contrast pose images for best results
+            - The model preserves identity from reference while following pose
+            - Adjust LoRA strength to balance identity vs pose adherence
+            - Higher guidance scale = stricter pose following
+            """)
     # Event handlers
+    generate_btn.click(
+        fn=generate_pose_transfer,
         inputs=[
+            reference_image,
+            pose_image,
+            prompt,
+            negative_prompt,
+            seed,
+            randomize_seed,
             guidance_scale,
+            num_inference_steps,
+            lora_scale,
+            enhance_pose
         ],
+        outputs=[result_image, seed_used, concat_preview]
     )
+    extract_btn.click(
+        fn=extract_pose_edges,
+        inputs=[extract_source],
         outputs=[pose_image]
     )
+    reuse_ref_btn.click(
+        fn=lambda x: x,
+        inputs=[result_image],
         outputs=[reference_image]
     )
+    reuse_pose_btn.click(
+        fn=extract_pose_edges,
+        inputs=[result_image],
+        outputs=[pose_image]
+    )
+    clear_btn.click(
+        fn=lambda: [None, None, "", "", 42, None, None],
+        outputs=[
+            reference_image,
+            pose_image,
+            prompt,
+            negative_prompt,
+            seed_used,
+            result_image,
+            concat_preview
+        ]
     )
 # Launch the app
+if __name__ == "__main__":
+    demo.queue()
+    demo.launch()