Update engineers/deformes3D_thinker.py
Browse files- engineers/deformes3D_thinker.py +19 -51
engineers/deformes3D_thinker.py
CHANGED
|
@@ -30,82 +30,50 @@
|
|
| 30 |
#
|
| 31 |
# Version 2.1.1
|
| 32 |
|
|
|
|
| 33 |
import logging
|
| 34 |
-
from pathlib import Path
|
| 35 |
from PIL import Image
|
| 36 |
import gradio as gr
|
| 37 |
-
import torch
|
| 38 |
-
import numpy as np
|
| 39 |
|
| 40 |
-
|
| 41 |
-
from
|
| 42 |
-
from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem
|
| 43 |
|
| 44 |
logger = logging.getLogger(__name__)
|
| 45 |
|
| 46 |
class Deformes3DThinker:
|
| 47 |
"""
|
| 48 |
-
The tactical specialist that handles cinematic decision-making by
|
| 49 |
-
|
| 50 |
"""
|
| 51 |
|
| 52 |
def __init__(self):
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
logger.info("Deformes3DThinker initialized and linked to LTX pipeline's enhancement models.")
|
| 59 |
|
| 60 |
def get_enhanced_motion_prompt(self, global_prompt: str, story_history: str,
|
| 61 |
past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
|
| 62 |
past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> str:
|
| 63 |
"""
|
| 64 |
-
|
| 65 |
-
image
|
| 66 |
"""
|
| 67 |
try:
|
| 68 |
-
image_caption_model = self.ltx_pipeline.prompt_enhancer_image_caption_model
|
| 69 |
-
image_caption_processor = self.ltx_pipeline.prompt_enhancer_image_caption_processor
|
| 70 |
-
llm_model = self.ltx_pipeline.prompt_enhancer_llm_model
|
| 71 |
-
llm_tokenizer = self.ltx_pipeline.prompt_enhancer_llm_tokenizer
|
| 72 |
-
|
| 73 |
-
if not all([image_caption_model, image_caption_processor, llm_model, llm_tokenizer]):
|
| 74 |
-
logger.warning("LTX prompt enhancement models not found. Using fallback.")
|
| 75 |
-
return f"A cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
|
| 76 |
-
|
| 77 |
present_image = Image.open(present_keyframe_path).convert("RGB")
|
| 78 |
-
images_list = [present_image]
|
| 79 |
-
|
| 80 |
-
# Gerar a caption da imagem de referência
|
| 81 |
-
inputs = image_caption_processor(
|
| 82 |
-
["<DETAILED_CAPTION>"] * len(images_list), images_list, return_tensors="pt"
|
| 83 |
-
).to(image_caption_model.device)
|
| 84 |
-
generated_ids = image_caption_model.generate(**inputs, max_new_tokens=1024, do_sample=False, num_beams=3)
|
| 85 |
-
image_captions = image_caption_processor.batch_decode(generated_ids, skip_special_tokens=True)
|
| 86 |
-
|
| 87 |
-
# Criar o prompt para o LLM de enhancement
|
| 88 |
-
user_content = f"user_prompt: {future_scene_desc}\nimage_caption: {image_captions[0]}"
|
| 89 |
-
messages = [{"role": "system", "content": I2V_CINEMATIC_PROMPT}, {"role": "user", "content": user_content}]
|
| 90 |
-
|
| 91 |
-
text = llm_tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 92 |
-
model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
|
| 93 |
-
|
| 94 |
-
# Chamar o LLM e decodificar a resposta
|
| 95 |
-
output_ids = llm_model.generate(**model_inputs, max_new_tokens=256)
|
| 96 |
-
input_ids_len = model_inputs.input_ids.shape[1]
|
| 97 |
-
decoded_prompts = llm_tokenizer.batch_decode(output_ids[:, input_ids_len:], skip_special_tokens=True)
|
| 98 |
-
|
| 99 |
-
enhanced_prompt = decoded_prompts[0]
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
except Exception as e:
|
| 105 |
-
logger.error(f"The Film Director (Deformes3D Thinker) failed
|
| 106 |
fallback_prompt = f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
|
| 107 |
logger.info(f"Deformes3DThinker Fallback -> Motion Prompt: '{fallback_prompt}'")
|
| 108 |
return fallback_prompt
|
| 109 |
|
| 110 |
# --- Singleton Instance ---
|
| 111 |
-
deformes3d_thinker_singleton = Deformes3DThinker()
|
|
|
|
| 30 |
#
|
| 31 |
# Version 2.1.1
|
| 32 |
|
| 33 |
+
|
| 34 |
import logging
|
|
|
|
| 35 |
from PIL import Image
|
| 36 |
import gradio as gr
|
|
|
|
|
|
|
| 37 |
|
| 38 |
+
# A única dependência agora é o nosso novo manager dedicado
|
| 39 |
+
from managers.prompt_enhancer_manager import prompt_enhancer_manager_singleton
|
|
|
|
| 40 |
|
| 41 |
logger = logging.getLogger(__name__)
|
| 42 |
|
| 43 |
class Deformes3DThinker:
|
| 44 |
"""
|
| 45 |
+
The tactical specialist that handles cinematic decision-making by delegating
|
| 46 |
+
prompt generation to the specialized PromptEnhancerManager.
|
| 47 |
"""
|
| 48 |
|
| 49 |
def __init__(self):
|
| 50 |
+
if not prompt_enhancer_manager_singleton:
|
| 51 |
+
raise RuntimeError("Deformes3DThinker requires the PromptEnhancerManager to be initialized.")
|
| 52 |
+
self.enhancer = prompt_enhancer_manager_singleton
|
| 53 |
+
logger.info("Deformes3DThinker initialized and linked to PromptEnhancerManager.")
|
|
|
|
|
|
|
| 54 |
|
| 55 |
def get_enhanced_motion_prompt(self, global_prompt: str, story_history: str,
|
| 56 |
past_keyframe_path: str, present_keyframe_path: str, future_keyframe_path: str,
|
| 57 |
past_scene_desc: str, present_scene_desc: str, future_scene_desc: str) -> str:
|
| 58 |
"""
|
| 59 |
+
Gets a refined, cinematic motion prompt by delegating to the PromptEnhancerManager.
|
| 60 |
+
It uses the present image as visual context and the future scene description as the goal.
|
| 61 |
"""
|
| 62 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
present_image = Image.open(present_keyframe_path).convert("RGB")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
# Delega a tarefa complexa para o especialista
|
| 66 |
+
enhanced_prompt = self.enhancer.generate_enhanced_prompt(
|
| 67 |
+
image=present_image,
|
| 68 |
+
user_prompt=future_scene_desc
|
| 69 |
+
)
|
| 70 |
+
return enhanced_prompt
|
| 71 |
|
| 72 |
except Exception as e:
|
| 73 |
+
logger.error(f"The Film Director (Deformes3D Thinker) failed: {e}. Using fallback.", exc_info=True)
|
| 74 |
fallback_prompt = f"A smooth, continuous cinematic transition from '{present_scene_desc}' to '{future_scene_desc}'."
|
| 75 |
logger.info(f"Deformes3DThinker Fallback -> Motion Prompt: '{fallback_prompt}'")
|
| 76 |
return fallback_prompt
|
| 77 |
|
| 78 |
# --- Singleton Instance ---
|
| 79 |
+
deformes3d_thinker_singleton = Deformes3DThinker()
|