Spaces:

zain1133604
/

Intelliwatch-AI

Sleeping

App Files Files Community

zain1133604 commited on Oct 30

Commit

8a7566d

1 Parent(s): 46e5a8a

first commit

Browse files

Files changed (6) hide show

Dockerfile +42 -0
__pycache__/app.cpython-311.pyc +0 -0
app.py +243 -0
requirements.txt +49 -0
segformer_b1.onnx +3 -0
yolov8n.pt +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,42 @@

+# ==============================================
+# 🚀 SMART CCTV — Dockerfile
+# ==============================================
+# ---- Base Image ----
+# (Use NVIDIA’s PyTorch image for GPU support; change to "python:3.11-slim" if CPU only)
+FROM pytorch/pytorch:2.5.1-cuda12.1-cudnn9-runtime
+# ---- System Setup ----
+ENV DEBIAN_FRONTEND=noninteractive
+# Install system-level deps (for OpenCV, ffmpeg, etc.)
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsm6 \
+    libxext6 \
+    libgl1 \
+    && rm -rf /var/lib/apt/lists/*
+# ---- Working Directory ----
+WORKDIR /app
+# ---- Copy Requirements ----
+COPY requirements.txt .
+# ---- Install Python Dependencies ----
+RUN pip install --upgrade pip \
+ && pip install --no-cache-dir -r requirements.txt
+# ---- Copy App Code ----
+COPY . .
+# ---- Environment Variables ----
+ENV TRANSFORMERS_CACHE=/tmp/hf_cache
+RUN mkdir -p /tmp/hf_cache
+# ---- Expose Port ----
+EXPOSE 7860
+# ---- Command to Run the App ----
+# (You can change --host 0.0.0.0 --port 7860 to whatever you like)
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "120"]

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (11.8 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import torch
+import cv2
+import numpy as np
+import os
+import tempfile
+import shutil
+from ultralytics import YOLO
+# --- UPDATED IMPORTS for SegFormer PyTorch model ---
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+from transformers import VideoMAEForVideoClassification, VideoMAEFeatureExtractor
+from fastapi import FastAPI, UploadFile, File
+from fastapi.responses import FileResponse
+from starlette.background import BackgroundTask
+# --- Configuration ---
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"⚙️ Using device: {DEVICE}")
+# Set a writable directory for model caches on Hugging Face Spaces
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/hf_cache'
+os.makedirs('/tmp/hf_cache', exist_ok=True)
+# --- 1️⃣ Load Models ---
+# Semantic Segmentation (SegFormer PyTorch) <-- CHANGE HERE
+SEG_MODEL_NAME = "nvidia/segformer-b1-finetuned-ade-512-512"
+seg_processor = SegformerImageProcessor.from_pretrained(SEG_MODEL_NAME)
+seg_model = SegformerForSemanticSegmentation.from_pretrained(
+    SEG_MODEL_NAME
+).to(DEVICE).eval()
+# Object Detection + Tracking (YOLOv8 + ByteTrack)
+detector = YOLO("yolov8n.pt")
+# Behavior Recognition (VideoMAE)
+act_processor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics")
+act_model = VideoMAEForVideoClassification.from_pretrained("MCG-NJU/videomae-base-finetuned-kinetics").to(DEVICE).eval()
+ACTION_CLIP_LEN = 16
+# This dictionary stores the last predicted label for each ID
+id_last_labels = {}
+# -------------------------------
+# 2️⃣ FastAPI Setup
+# -------------------------------
+app = FastAPI(title="Smart CCTV Video Processor")
+# -------------------------------
+# 3️⃣ Utility Functions
+# -------------------------------
+def cleanup_task(dir_path):
+    """Utility to create a BackgroundTask for deleting the temp directory."""
+    # Ignore errors (like PermissionError on Windows) during cleanup
+    return BackgroundTask(shutil.rmtree, dir_path, ignore_errors=True)
+# --- REMOVED: preprocess_segformer() and run_segformer_onnx() ---
+# They are replaced by the functions below using the HuggingFace SegFormer pipeline.
+def run_segformer_pytorch(frame_rgb: np.ndarray, original_shape) -> np.ndarray:
+    """
+    Runs PyTorch SegFormer inference and returns the human mask.
+    This replaces the ONNX code.
+    """
+    # 1. Preprocess
+    # The processor handles resizing, normalization, and tensor conversion automatically
+    inputs = seg_processor(images=frame_rgb, return_tensors="pt").to(DEVICE)
+    # 2. Inference
+    with torch.no_grad():
+        seg_logits = seg_model(**inputs).logits
+    # 3. Post-process
+    # Resize logits to original image size
+    seg_logits = torch.nn.functional.interpolate(
+        seg_logits,
+        size=(original_shape[0], original_shape[1]), # H, W
+        mode='bilinear',
+        align_corners=False
+    )
+    pred_mask = torch.argmax(seg_logits, dim=1).squeeze().cpu().numpy()
+    # ADE20K class 12 = person
+    human_mask = (pred_mask == 12).astype(np.uint8)
+    return human_mask
+def run_action_recognition(clip_buffer: list) -> str:
+    """Runs VideoMAE on a list of RGB frames (clip_buffer)."""
+    # Resize all frames to 224x224 (VideoMAE input requirement)
+    clip_resized = [cv2.resize(f, (224, 224), interpolation=cv2.INTER_LINEAR) for f in clip_buffer]
+    # Convert to tensor input for VideoMAE
+    inputs = act_processor(clip_resized, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        outputs = act_model(**inputs)
+    pred = outputs.logits.argmax(-1).item()
+    label = act_model.config.id2label.get(pred, "Unknown Action")
+    return label
+# -------------------------------
+# 4️⃣ Main API Endpoint
+# -------------------------------
+@app.get("/")
+async def root():
+    return {"message": "Smart CCTV Backend: Upload a video to /process-video/"}
+@app.post("/process-video/")
+async def process_video_endpoint(file: UploadFile = File(...)):
+    global id_last_labels
+    if not file.content_type.startswith('video/'):
+        await file.close()
+        return {"error": "Invalid file type. Only video files are supported."}, 400
+    # MANUAL TEMP DIR SETUP: Fixes PermissionError by controlling cleanup
+    tmpdir = tempfile.mkdtemp()
+    input_path = os.path.join(tmpdir, file.filename)
+    output_path = os.path.join(tmpdir, "smart_cctv_output.mp4")
+    cap, out = None, None
+    try:
+        # Save the uploaded file to the temporary directory
+        with open(input_path, "wb") as buffer:
+            # IMPORTANT: Stream the file content to the buffer
+            while content := await file.read(1024 * 1024): # Read in chunks (1MB)
+                buffer.write(content)
+        await file.close()
+        cap = cv2.VideoCapture(input_path)
+        if not cap.isOpened():
+            return {"error": "Could not open video file."}, 500
+        # Video properties
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        if width <= 0 or height <= 0:
+            return {"error": "Invalid video dimensions."}, 500
+        # Output Video Setup
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+        # Buffers for Per-ID Action Clips
+        id_clip_buffers = {}
+        id_last_labels.clear()
+        action_clip_len = ACTION_CLIP_LEN
+        # Process Each Frame
+        frame_count = 0
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            original_shape = frame.shape
+            # --- Segmentation (PyTorch) --- <-- CHANGE HERE
+            human_mask = run_segformer_pytorch(frame_rgb, original_shape)
+            # --- Detection + Tracking (YOLO + ByteTrack) ---
+            results = detector.track(frame, persist=True, tracker="bytetrack.yaml", verbose=False)
+            boxes = results[0].boxes.xyxy.cpu().numpy() if results[0].boxes is not None else []
+            ids = results[0].boxes.id.cpu().numpy() if results[0].boxes.id is not None else []
+            # --- For each tracked person ---
+            for box, track_id in zip(boxes, ids):
+                x1, y1, x2, y2 = map(int, box)
+                track_id = int(track_id)
+                # Crop with a slight buffer
+                buffer_px = 5
+                y1 = max(0, y1 - buffer_px)
+                y2 = min(height, y2 + buffer_px)
+                x1 = max(0, x1 - buffer_px)
+                x2 = min(width, x2 + buffer_px)
+                person_crop = frame_rgb[y1:y2, x1:x2]
+                if person_crop.size == 0:
+                    continue
+                # Maintain a clip buffer per person ID
+                id_clip_buffers.setdefault(track_id, [])
+                id_clip_buffers[track_id].append(person_crop)
+                # Keep only latest N frames
+                if len(id_clip_buffers[track_id]) > action_clip_len:
+                    id_clip_buffers[track_id] = id_clip_buffers[track_id][-action_clip_len:]
+                # --- Action Recognition when clip ready ---
+                label = id_last_labels.get(track_id, "Analyzing...")
+                if len(id_clip_buffers[track_id]) == action_clip_len:
+                    # Run inference every N frames to save time/resources
+                    if frame_count % (action_clip_len // 2) == 0:
+                        label = run_action_recognition(id_clip_buffers[track_id])
+                        id_last_labels[track_id] = label
+                # --- Draw box + label ---
+                color = (0, 255, 0)
+                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+                cv2.putText(frame, f"ID {track_id}: {label}", (x1, y1 - 10),
+                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
+            # --- Segmentation Overlay ---
+            # NOTE: We no longer need to resize the mask here as the PyTorch function handles interpolation
+            mask_colored = cv2.applyColorMap((human_mask * 255).astype(np.uint8), cv2.COLORMAP_JET)
+            # Blend safely
+            overlay = cv2.addWeighted(frame, 0.7, mask_colored, 0.3, 0)
+            out.write(overlay)
+            frame_count += 1
+        # --- Cleanup (Before Return) ---
+        cap.release()
+        out.release()
+        # --- Return the processed video file ---
+        return FileResponse(
+            path=output_path,
+            media_type='video/mp4',
+            filename=f"processed_{os.path.basename(input_path)}",
+            # Use background task to delete the temporary folder AFTER the response is sent
+            background=cleanup_task(tmpdir)
+        )
+    except Exception as e:
+        # Ensure cleanup on failure
+        if cap: cap.release()
+        if out: out.release()
+        # Clean up the manually created temp directory
+        shutil.rmtree(tmpdir, ignore_errors=True)
+        # Re-raise the exception for FastAPI/Uvicorn to handle
+        raise e

requirements.txt ADDED Viewed

	@@ -0,0 +1,49 @@

+# ==========================
+# 🧠 CORE DEEP LEARNING
+# ==========================
+torch==2.5.1+cu121
+torchvision==0.20.1+cu121
+torchaudio==2.5.1+cu121
+# ==========================
+# 🤖 TRANSFORMERS + HF HUB
+# ==========================
+transformers==4.48.3
+huggingface-hub==0.34.4
+timm==0.8.13.dev0
+safetensors==0.6.2
+# ==========================
+# 🧩 YOLOv8 + BYTETrack
+# ==========================
+ultralytics==8.3.183
+lap==0.5.12
+lapx==0.5.11.post1
+shapely==2.1.2
+# ==========================
+# 🎥 VISION + UTILITIES
+# ==========================
+opencv-python==4.12.0.88
+numpy==2.1.2
+tqdm==4.67.1
+pillow==11.0.0
+scipy==1.16.1
+imageio==2.37.0
+# ==========================
+# 🌐 FASTAPI BACKEND
+# ==========================
+fastapi==0.116.1
+uvicorn==0.35.0
+starlette==0.47.2
+python-multipart==0.0.20
+pydantic==2.11.7
+pydantic-core==2.33.2
+typing-extensions==4.14.1
+# ==========================
+# ⚙️ MISC (Safe to include)
+# ==========================
+requests==2.32.5
+orjson==3.11.1

segformer_b1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5df6ae360ad1ab7f3085f3a510522f25ae1b6c4a2f4479a40852cb5be64305b4
+size 55144053

yolov8n.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
+size 6549796