Spaces:

abiyyufahri
/

GUI-Agent

Sleeping

App Files Files Community

abiyyufahri commited on Jul 24

Commit

1c943af

1 Parent(s): 78565ee

Install error fix attemp 8

Browse files

Files changed (3) hide show

Dockerfile +7 -1
main.py +67 -27
requirements.txt +2 -1

Dockerfile CHANGED Viewed

@@ -10,11 +10,13 @@ USER user
 ENV PATH="/home/user/.local/bin:$PATH"
 WORKDIR /app
 COPY --chown=user requirements.txt ./
 # Install dependencies step by step
 RUN pip install --upgrade pip && \
-    pip install --no-cache-dir packaging ninja wheel setuptools numpy
 # Install PyTorch CPU version
 RUN pip install --no-cache-dir torch==2.2.2+cpu torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
@@ -37,6 +39,10 @@ RUN pip install --no-cache-dir \
     opencv-python-headless \
     deepspeed==0.16.0
 COPY --chown=user . .
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 ENV PATH="/home/user/.local/bin:$PATH"
 WORKDIR /app
+# Copy requirements first for better caching
 COPY --chown=user requirements.txt ./
 # Install dependencies step by step
 RUN pip install --upgrade pip && \
+    pip install --no-cache-dir packaging ninja wheel setuptools "numpy<2.0.0"
 # Install PyTorch CPU version
 RUN pip install --no-cache-dir torch==2.2.2+cpu torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
     opencv-python-headless \
     deepspeed==0.16.0
+# Copy all application files
 COPY --chown=user . .
+# Ensure main.py exists and is readable
+RUN ls -la /app/ && cat /app/main.py | head -10
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py CHANGED Viewed

@@ -6,31 +6,55 @@ from io import BytesIO
 import base64
 import torch
 import re
-from transformers import AutoModelForCausalLM, AutoProcessor
-app = FastAPI()
-# Load model untuk CPU
 model_name = "microsoft/GUI-Actor-2B-Qwen2-VL"
-# Load processor
-try:
-    processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
-except Exception as e:
-    print(f"Failed to load AutoProcessor: {e}")
-    from transformers import Qwen2VLProcessor
-    processor = Qwen2VLProcessor.from_pretrained(model_name)
-tokenizer = processor.tokenizer
-# Load model dengan CPU support
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float32,  # float32 untuk CPU
-    device_map=None,            # CPU only
-    trust_remote_code=True,     # untuk custom model
-    attn_implementation=None    # skip flash attention
-).eval()
 class Base64Request(BaseModel):
     image_base64: str
@@ -67,7 +91,7 @@ def extract_coordinates(text):
 def cpu_inference(conversation, model, tokenizer, processor):
     """
-    Inference function untuk CPU tanpa GUI-Actor dependencies
     """
     try:
         # Apply chat template
@@ -118,8 +142,27 @@ def cpu_inference(conversation, model, tokenizer, processor):
             "success": False
         }
 @app.post("/click/base64")
 async def predict_click_base64(data: Base64Request):
     try:
         # Decode base64 to image
         image_data = base64.b64decode(data.image_base64.split(",")[-1])
@@ -178,7 +221,8 @@ async def health_check():
         "status": "healthy",
         "model": model_name,
         "device": "cpu",
-        "torch_dtype": "float32"
     }
 @app.post("/click/form")
@@ -187,8 +231,4 @@ async def predict_click_form(
     instruction: str = Form(...)
 ):
     data = Base64Request(image_base64=image_base64, instruction=instruction)
-    return await predict_click_base64(data)
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import base64
 import torch
 import re
+app = FastAPI(title="GUI-Actor API", version="1.0.0")
+# Initialize global variables
+model = None
+processor = None
+tokenizer = None
 model_name = "microsoft/GUI-Actor-2B-Qwen2-VL"
+def load_model():
+    """Load model with proper error handling"""
+    global model, processor, tokenizer
+    try:
+        print("Loading processor...")
+        # Try different approaches to load the processor
+        try:
+            from transformers import Qwen2VLProcessor
+            processor = Qwen2VLProcessor.from_pretrained(model_name)
+            print("Successfully loaded Qwen2VLProcessor")
+        except Exception as e:
+            print(f"Failed to load Qwen2VLProcessor: {e}")
+            from transformers import AutoProcessor
+            processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
+            print("Successfully loaded AutoProcessor")
+        tokenizer = processor.tokenizer
+        print("Loading model...")
+        # Use the correct model class for Qwen2VL
+        from transformers import Qwen2VLForConditionalGeneration
+        model = Qwen2VLForConditionalGeneration.from_pretrained(
+            model_name,
+            torch_dtype=torch.float32,  # float32 untuk CPU
+            device_map=None,            # CPU only
+            trust_remote_code=True,     # untuk custom model
+            attn_implementation=None    # skip flash attention
+        ).eval()
+        print("Model loaded successfully!")
+        return True
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return False
+# Load model at startup
+model_loaded = load_model()
 class Base64Request(BaseModel):
     image_base64: str
 def cpu_inference(conversation, model, tokenizer, processor):
     """
+    Inference function untuk CPU
     """
     try:
         # Apply chat template
             "success": False
         }
+@app.get("/")
+async def root():
+    return {
+        "message": "GUI-Actor API is running",
+        "status": "healthy",
+        "model_loaded": model_loaded
+    }
 @app.post("/click/base64")
 async def predict_click_base64(data: Base64Request):
+    if not model_loaded:
+        return JSONResponse(
+            content={
+                "error": "Model not loaded properly",
+                "success": False,
+                "x": 0.5,
+                "y": 0.5
+            },
+            status_code=503
+        )
     try:
         # Decode base64 to image
         image_data = base64.b64decode(data.image_base64.split(",")[-1])
         "status": "healthy",
         "model": model_name,
         "device": "cpu",
+        "torch_dtype": "float32",
+        "model_loaded": model_loaded
     }
 @app.post("/click/form")
     instruction: str = Form(...)
 ):
     data = Base64Request(image_base64=image_base64, instruction=instruction)
+    return await predict_click_base64(data)

requirements.txt CHANGED Viewed

@@ -5,11 +5,12 @@ uvicorn[standard]
 transformers>=4.37.0
 datasets
 Pillow
 torch==2.2.2+cpu
 torchvision
 torchaudio
 --index-url https://download.pytorch.org/whl/cpu
 accelerate
 scipy
-numpy
 qwen-vl-utils

 transformers>=4.37.0
 datasets
 Pillow
+# Fix NumPy compatibility issue
+numpy<2.0.0
 torch==2.2.2+cpu
 torchvision
 torchaudio
 --index-url https://download.pytorch.org/whl/cpu
 accelerate
 scipy
 qwen-vl-utils