orynxml-agents / app /huggingface_models_backup.py
Speedofmastery's picture
Upload folder using huggingface_hub
88f3fce verified
"""
Hugging Face Models Integration for OpenManus AI Agent
Comprehensive integration with Hugging Face Inference API for all model categories
"""
import asyncio
import base64
import io
import json
import logging
from dataclasses import dataclass
from enum import Enum
from typing import Any, Dict, List, Optional, Union
import aiohttp
import PIL.Image
from pydantic import BaseModel
logger = logging.getLogger(__name__)
class ModelCategory(Enum):
"""Categories of Hugging Face models available"""
# Core AI categories
TEXT_GENERATION = "text-generation"
TEXT_TO_IMAGE = "text-to-image"
IMAGE_TO_TEXT = "image-to-text"
AUTOMATIC_SPEECH_RECOGNITION = "automatic-speech-recognition"
TEXT_TO_SPEECH = "text-to-speech"
IMAGE_CLASSIFICATION = "image-classification"
OBJECT_DETECTION = "object-detection"
FEATURE_EXTRACTION = "feature-extraction"
SENTENCE_SIMILARITY = "sentence-similarity"
TRANSLATION = "translation"
SUMMARIZATION = "summarization"
QUESTION_ANSWERING = "question-answering"
FILL_MASK = "fill-mask"
TOKEN_CLASSIFICATION = "token-classification"
ZERO_SHOT_CLASSIFICATION = "zero-shot-classification"
AUDIO_CLASSIFICATION = "audio-classification"
CONVERSATIONAL = "conversational"
# Video and Motion
TEXT_TO_VIDEO = "text-to-video"
VIDEO_TO_TEXT = "video-to-text"
VIDEO_CLASSIFICATION = "video-classification"
VIDEO_GENERATION = "video-generation"
MOTION_GENERATION = "motion-generation"
DEEPFAKE_DETECTION = "deepfake-detection"
# Code and Development
CODE_GENERATION = "code-generation"
CODE_COMPLETION = "code-completion"
CODE_EXPLANATION = "code-explanation"
CODE_TRANSLATION = "code-translation"
CODE_REVIEW = "code-review"
APP_GENERATION = "app-generation"
API_GENERATION = "api-generation"
DATABASE_GENERATION = "database-generation"
# 3D and AR/VR
TEXT_TO_3D = "text-to-3d"
IMAGE_TO_3D = "image-to-3d"
THREE_D_GENERATION = "3d-generation"
MESH_GENERATION = "mesh-generation"
TEXTURE_GENERATION = "texture-generation"
AR_CONTENT = "ar-content"
VR_ENVIRONMENT = "vr-environment"
# Document Processing
OCR = "ocr"
DOCUMENT_ANALYSIS = "document-analysis"
PDF_PROCESSING = "pdf-processing"
LAYOUT_ANALYSIS = "layout-analysis"
TABLE_EXTRACTION = "table-extraction"
HANDWRITING_RECOGNITION = "handwriting-recognition"
FORM_PROCESSING = "form-processing"
# Multimodal AI
VISION_LANGUAGE = "vision-language"
MULTIMODAL_REASONING = "multimodal-reasoning"
CROSS_MODAL_GENERATION = "cross-modal-generation"
VISUAL_QUESTION_ANSWERING = "visual-question-answering"
IMAGE_TEXT_MATCHING = "image-text-matching"
MULTIMODAL_CHAT = "multimodal-chat"
# Specialized AI
MUSIC_GENERATION = "music-generation"
VOICE_CLONING = "voice-cloning"
STYLE_TRANSFER = "style-transfer"
SUPER_RESOLUTION = "super-resolution"
IMAGE_INPAINTING = "image-inpainting"
IMAGE_OUTPAINTING = "image-outpainting"
BACKGROUND_REMOVAL = "background-removal"
FACE_RESTORATION = "face-restoration"
# Content Creation
CREATIVE_WRITING = "creative-writing"
STORY_GENERATION = "story-generation"
SCREENPLAY_WRITING = "screenplay-writing"
POETRY_GENERATION = "poetry-generation"
BLOG_WRITING = "blog-writing"
MARKETING_COPY = "marketing-copy"
# Game Development
GAME_ASSET_GENERATION = "game-asset-generation"
CHARACTER_GENERATION = "character-generation"
LEVEL_GENERATION = "level-generation"
DIALOGUE_GENERATION = "dialogue-generation"
# Science and Research
PROTEIN_FOLDING = "protein-folding"
MOLECULE_GENERATION = "molecule-generation"
SCIENTIFIC_WRITING = "scientific-writing"
RESEARCH_ASSISTANCE = "research-assistance"
DATA_ANALYSIS = "data-analysis"
# Business and Productivity
EMAIL_GENERATION = "email-generation"
PRESENTATION_CREATION = "presentation-creation"
REPORT_GENERATION = "report-generation"
MEETING_SUMMARIZATION = "meeting-summarization"
PROJECT_PLANNING = "project-planning"
# AI Teacher and Education
AI_TUTORING = "ai-tutoring"
EDUCATIONAL_CONTENT = "educational-content"
LESSON_PLANNING = "lesson-planning"
CONCEPT_EXPLANATION = "concept-explanation"
HOMEWORK_ASSISTANCE = "homework-assistance"
QUIZ_GENERATION = "quiz-generation"
CURRICULUM_DESIGN = "curriculum-design"
LEARNING_ASSESSMENT = "learning-assessment"
ADAPTIVE_LEARNING = "adaptive-learning"
SUBJECT_TEACHING = "subject-teaching"
MATH_TUTORING = "math-tutoring"
SCIENCE_TUTORING = "science-tutoring"
LANGUAGE_TUTORING = "language-tutoring"
HISTORY_TUTORING = "history-tutoring"
CODING_INSTRUCTION = "coding-instruction"
EXAM_PREPARATION = "exam-preparation"
STUDY_GUIDE_CREATION = "study-guide-creation"
EDUCATIONAL_GAMES = "educational-games"
LEARNING_ANALYTICS = "learning-analytics"
PERSONALIZED_LEARNING = "personalized-learning"
@dataclass
class HFModel:
"""Hugging Face model definition"""
name: str
model_id: str
category: ModelCategory
description: str
endpoint_compatible: bool = False
requires_auth: bool = False
max_tokens: Optional[int] = None
supports_streaming: bool = False
class HuggingFaceModels:
"""Comprehensive collection of Hugging Face models for all categories"""
# Text Generation Models (Latest and Popular)
TEXT_GENERATION_MODELS = [
HFModel(
"MiniMax-M2",
"MiniMaxAI/MiniMax-M2",
ModelCategory.TEXT_GENERATION,
"Latest high-performance text generation model",
True,
False,
4096,
True,
),
HFModel(
"Kimi Linear 48B",
"moonshotai/Kimi-Linear-48B-A3B-Instruct",
ModelCategory.TEXT_GENERATION,
"Large instruction-tuned model with linear attention",
True,
False,
8192,
True,
),
HFModel(
"GPT-OSS 20B",
"openai/gpt-oss-20b",
ModelCategory.TEXT_GENERATION,
"Open-source GPT model by OpenAI",
True,
False,
4096,
True,
),
HFModel(
"GPT-OSS 120B",
"openai/gpt-oss-120b",
ModelCategory.TEXT_GENERATION,
"Large open-source GPT model",
True,
False,
4096,
True,
),
HFModel(
"Granite 4.0 1B",
"ibm-granite/granite-4.0-1b",
ModelCategory.TEXT_GENERATION,
"IBM's enterprise-grade small language model",
True,
False,
2048,
True,
),
HFModel(
"GLM-4.6",
"zai-org/GLM-4.6",
ModelCategory.TEXT_GENERATION,
"Multilingual conversational model",
True,
False,
4096,
True,
),
HFModel(
"Llama 3.1 8B Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
ModelCategory.TEXT_GENERATION,
"Meta's instruction-tuned Llama model",
True,
True,
8192,
True,
),
HFModel(
"Tongyi DeepResearch 30B",
"Alibaba-NLP/Tongyi-DeepResearch-30B-A3B",
ModelCategory.TEXT_GENERATION,
"Alibaba's research-focused large language model",
True,
False,
4096,
True,
),
HFModel(
"EuroLLM 9B",
"utter-project/EuroLLM-9B",
ModelCategory.TEXT_GENERATION,
"European multilingual language model",
True,
False,
4096,
True,
),
]
# Text-to-Image Models (Latest and Best)
TEXT_TO_IMAGE_MODELS = [
HFModel(
"FIBO",
"briaai/FIBO",
ModelCategory.TEXT_TO_IMAGE,
"Advanced text-to-image generation model",
True,
False,
),
HFModel(
"FLUX.1 Dev",
"black-forest-labs/FLUX.1-dev",
ModelCategory.TEXT_TO_IMAGE,
"State-of-the-art image generation",
True,
False,
),
HFModel(
"FLUX.1 Schnell",
"black-forest-labs/FLUX.1-schnell",
ModelCategory.TEXT_TO_IMAGE,
"Fast high-quality image generation",
True,
False,
),
HFModel(
"Qwen Image",
"Qwen/Qwen-Image",
ModelCategory.TEXT_TO_IMAGE,
"Multilingual text-to-image model",
True,
False,
),
HFModel(
"Stable Diffusion XL",
"stabilityai/stable-diffusion-xl-base-1.0",
ModelCategory.TEXT_TO_IMAGE,
"Popular high-resolution image generation",
True,
False,
),
HFModel(
"Stable Diffusion 3.5 Large",
"stabilityai/stable-diffusion-3.5-large",
ModelCategory.TEXT_TO_IMAGE,
"Latest Stable Diffusion model",
True,
False,
),
HFModel(
"HunyuanImage 3.0",
"tencent/HunyuanImage-3.0",
ModelCategory.TEXT_TO_IMAGE,
"Tencent's advanced image generation model",
True,
False,
),
HFModel(
"Nitro-E",
"amd/Nitro-E",
ModelCategory.TEXT_TO_IMAGE,
"AMD's efficient image generation model",
True,
False,
),
HFModel(
"Qwen Image Lightning",
"lightx2v/Qwen-Image-Lightning",
ModelCategory.TEXT_TO_IMAGE,
"Fast distilled image generation",
True,
False,
),
]
# Automatic Speech Recognition Models
ASR_MODELS = [
HFModel(
"Whisper Large v3",
"openai/whisper-large-v3",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"OpenAI's best multilingual speech recognition",
True,
False,
),
HFModel(
"Whisper Large v3 Turbo",
"openai/whisper-large-v3-turbo",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"Faster version of Whisper Large v3",
True,
False,
),
HFModel(
"Parakeet TDT 0.6B v3",
"nvidia/parakeet-tdt-0.6b-v3",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"NVIDIA's multilingual ASR model",
True,
False,
),
HFModel(
"Canary Qwen 2.5B",
"nvidia/canary-qwen-2.5b",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"NVIDIA's advanced ASR with Qwen integration",
True,
False,
),
HFModel(
"Canary 1B v2",
"nvidia/canary-1b-v2",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"Compact multilingual ASR model",
True,
False,
),
HFModel(
"Whisper Small",
"openai/whisper-small",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"Lightweight multilingual ASR",
True,
False,
),
HFModel(
"Speaker Diarization 3.1",
"pyannote/speaker-diarization-3.1",
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
"Advanced speaker identification and diarization",
True,
False,
),
]
# Text-to-Speech Models
TTS_MODELS = [
HFModel(
"SoulX Podcast 1.7B",
"Soul-AILab/SoulX-Podcast-1.7B",
ModelCategory.TEXT_TO_SPEECH,
"High-quality podcast-style speech synthesis",
True,
False,
),
HFModel(
"NeuTTS Air",
"neuphonic/neutts-air",
ModelCategory.TEXT_TO_SPEECH,
"Advanced neural text-to-speech",
True,
False,
),
HFModel(
"Kokoro 82M",
"hexgrad/Kokoro-82M",
ModelCategory.TEXT_TO_SPEECH,
"Lightweight high-quality TTS",
True,
False,
),
HFModel(
"Kani TTS 400M EN",
"nineninesix/kani-tts-400m-en",
ModelCategory.TEXT_TO_SPEECH,
"English-focused text-to-speech model",
True,
False,
),
HFModel(
"XTTS v2",
"coqui/XTTS-v2",
ModelCategory.TEXT_TO_SPEECH,
"Zero-shot voice cloning TTS",
True,
False,
),
HFModel(
"Chatterbox",
"ResembleAI/chatterbox",
ModelCategory.TEXT_TO_SPEECH,
"Multilingual voice cloning",
True,
False,
),
HFModel(
"VibeVoice 1.5B",
"microsoft/VibeVoice-1.5B",
ModelCategory.TEXT_TO_SPEECH,
"Microsoft's advanced TTS model",
True,
False,
),
HFModel(
"OpenAudio S1 Mini",
"fishaudio/openaudio-s1-mini",
ModelCategory.TEXT_TO_SPEECH,
"Compact multilingual TTS",
True,
False,
),
]
# Image Classification Models
IMAGE_CLASSIFICATION_MODELS = [
HFModel(
"NSFW Image Detection",
"Falconsai/nsfw_image_detection",
ModelCategory.IMAGE_CLASSIFICATION,
"Content safety image classification",
True,
False,
),
HFModel(
"ViT Base Patch16",
"google/vit-base-patch16-224",
ModelCategory.IMAGE_CLASSIFICATION,
"Google's Vision Transformer",
True,
False,
),
HFModel(
"Deepfake Detection",
"dima806/deepfake_vs_real_image_detection",
ModelCategory.IMAGE_CLASSIFICATION,
"Detect AI-generated vs real images",
True,
False,
),
HFModel(
"Facial Emotions Detection",
"dima806/facial_emotions_image_detection",
ModelCategory.IMAGE_CLASSIFICATION,
"Recognize facial emotions",
True,
False,
),
HFModel(
"SDXL Detector",
"Organika/sdxl-detector",
ModelCategory.IMAGE_CLASSIFICATION,
"Detect Stable Diffusion XL generated images",
True,
False,
),
HFModel(
"ViT NSFW Detector",
"AdamCodd/vit-base-nsfw-detector",
ModelCategory.IMAGE_CLASSIFICATION,
"NSFW content detection with ViT",
True,
False,
),
HFModel(
"ResNet 101",
"microsoft/resnet-101",
ModelCategory.IMAGE_CLASSIFICATION,
"Microsoft's ResNet for classification",
True,
False,
),
]
# Additional Categories
FEATURE_EXTRACTION_MODELS = [
HFModel(
"Sentence Transformers All MiniLM",
"sentence-transformers/all-MiniLM-L6-v2",
ModelCategory.FEATURE_EXTRACTION,
"Lightweight sentence embeddings",
True,
False,
),
HFModel(
"BGE Large EN",
"BAAI/bge-large-en-v1.5",
ModelCategory.FEATURE_EXTRACTION,
"High-quality English embeddings",
True,
False,
),
HFModel(
"E5 Large v2",
"intfloat/e5-large-v2",
ModelCategory.FEATURE_EXTRACTION,
"Multilingual text embeddings",
True,
False,
),
]
TRANSLATION_MODELS = [
HFModel(
"M2M100 1.2B",
"facebook/m2m100_1.2B",
ModelCategory.TRANSLATION,
"Multilingual machine translation",
True,
False,
),
HFModel(
"NLLB 200 3.3B",
"facebook/nllb-200-3.3B",
ModelCategory.TRANSLATION,
"No Language Left Behind translation",
True,
False,
),
HFModel(
"mBART Large 50",
"facebook/mbart-large-50-many-to-many-mmt",
ModelCategory.TRANSLATION,
"Multilingual BART for translation",
True,
False,
),
]
SUMMARIZATION_MODELS = [
HFModel(
"PEGASUS XSum",
"google/pegasus-xsum",
ModelCategory.SUMMARIZATION,
"Abstractive summarization model",
True,
False,
),
HFModel(
"BART Large CNN",
"facebook/bart-large-cnn",
ModelCategory.SUMMARIZATION,
"CNN/DailyMail summarization",
True,
False,
),
HFModel(
"T5 Base",
"t5-base",
ModelCategory.SUMMARIZATION,
"Text-to-Text Transfer Transformer",
True,
False,
),
]
# Video Generation and Processing Models
VIDEO_GENERATION_MODELS = [
HFModel(
"Stable Video Diffusion",
"stabilityai/stable-video-diffusion-img2vid",
ModelCategory.TEXT_TO_VIDEO,
"Image-to-video generation model",
True,
False,
),
HFModel(
"AnimateDiff",
"guoyww/animatediff",
ModelCategory.VIDEO_GENERATION,
"Text-to-video animation generation",
True,
False,
),
HFModel(
"VideoCrafter",
"videogen/VideoCrafter",
ModelCategory.TEXT_TO_VIDEO,
"High-quality text-to-video generation",
True,
False,
),
HFModel(
"Video ChatGPT",
"mbzuai-oryx/Video-ChatGPT-7B",
ModelCategory.VIDEO_TO_TEXT,
"Video understanding and description",
True,
False,
),
HFModel(
"Video-BLIP",
"salesforce/video-blip-opt-2.7b",
ModelCategory.VIDEO_CLASSIFICATION,
"Video content analysis and classification",
True,
False,
),
]
# Code Generation and Development Models
CODE_GENERATION_MODELS = [
HFModel(
"CodeLlama 34B Instruct",
"codellama/CodeLlama-34b-Instruct-hf",
ModelCategory.CODE_GENERATION,
"Large instruction-tuned code generation model",
True,
True,
),
HFModel(
"StarCoder2 15B",
"bigcode/starcoder2-15b",
ModelCategory.CODE_GENERATION,
"Advanced code generation and completion",
True,
False,
),
HFModel(
"DeepSeek Coder V2",
"deepseek-ai/deepseek-coder-6.7b-instruct",
ModelCategory.CODE_GENERATION,
"Specialized coding assistant",
True,
False,
),
HFModel(
"WizardCoder 34B",
"WizardLM/WizardCoder-Python-34B-V1.0",
ModelCategory.CODE_GENERATION,
"Python-focused code generation",
True,
False,
),
HFModel(
"Phind CodeLlama",
"Phind/Phind-CodeLlama-34B-v2",
ModelCategory.CODE_GENERATION,
"Optimized for code explanation and debugging",
True,
False,
),
HFModel(
"Code T5+",
"Salesforce/codet5p-770m",
ModelCategory.CODE_COMPLETION,
"Code understanding and generation",
True,
False,
),
HFModel(
"InCoder",
"facebook/incoder-6B",
ModelCategory.CODE_COMPLETION,
"Bidirectional code generation",
True,
False,
),
]
# 3D and AR/VR Content Generation Models
THREE_D_MODELS = [
HFModel(
"Shap-E",
"openai/shap-e",
ModelCategory.TEXT_TO_3D,
"Text-to-3D shape generation",
True,
False,
),
HFModel(
"Point-E",
"openai/point-e",
ModelCategory.TEXT_TO_3D,
"Text-to-3D point cloud generation",
True,
False,
),
HFModel(
"DreamFusion",
"google/dreamfusion",
ModelCategory.IMAGE_TO_3D,
"Image-to-3D mesh generation",
True,
False,
),
HFModel(
"Magic3D",
"nvidia/magic3d",
ModelCategory.THREE_D_GENERATION,
"High-quality 3D content creation",
True,
False,
),
HFModel(
"GET3D",
"nvidia/get3d",
ModelCategory.MESH_GENERATION,
"3D mesh generation from text",
True,
False,
),
]
# Document Processing and OCR Models
DOCUMENT_PROCESSING_MODELS = [
HFModel(
"TrOCR Large",
"microsoft/trocr-large-printed",
ModelCategory.OCR,
"Transformer-based OCR for printed text",
True,
False,
),
HFModel(
"TrOCR Handwritten",
"microsoft/trocr-large-handwritten",
ModelCategory.HANDWRITING_RECOGNITION,
"Handwritten text recognition",
True,
False,
),
HFModel(
"LayoutLMv3",
"microsoft/layoutlmv3-large",
ModelCategory.DOCUMENT_ANALYSIS,
"Document layout analysis and understanding",
True,
False,
),
HFModel(
"Donut",
"naver-clova-ix/donut-base",
ModelCategory.DOCUMENT_ANALYSIS,
"OCR-free document understanding",
True,
False,
),
HFModel(
"TableTransformer",
"microsoft/table-transformer-structure-recognition",
ModelCategory.TABLE_EXTRACTION,
"Table structure recognition",
True,
False,
),
HFModel(
"FormNet",
"microsoft/formnet",
ModelCategory.FORM_PROCESSING,
"Form understanding and processing",
True,
False,
),
]
# Multimodal AI Models
MULTIMODAL_MODELS = [
HFModel(
"BLIP-2",
"Salesforce/blip2-opt-2.7b",
ModelCategory.VISION_LANGUAGE,
"Vision-language understanding and generation",
True,
False,
),
HFModel(
"InstructBLIP",
"Salesforce/instructblip-vicuna-7b",
ModelCategory.MULTIMODAL_REASONING,
"Instruction-following multimodal model",
True,
False,
),
HFModel(
"LLaVA",
"liuhaotian/llava-v1.5-7b",
ModelCategory.VISUAL_QUESTION_ANSWERING,
"Large Language and Vision Assistant",
True,
False,
),
HFModel(
"GPT-4V",
"openai/gpt-4-vision-preview",
ModelCategory.MULTIMODAL_CHAT,
"Advanced multimodal conversational AI",
True,
True,
),
HFModel(
"Flamingo",
"deepmind/flamingo-9b",
ModelCategory.CROSS_MODAL_GENERATION,
"Few-shot learning for vision and language",
True,
False,
),
]
# Specialized AI Models
SPECIALIZED_AI_MODELS = [
HFModel(
"MusicGen",
"facebook/musicgen-medium",
ModelCategory.MUSIC_GENERATION,
"Text-to-music generation",
True,
False,
),
HFModel(
"AudioCraft",
"facebook/audiocraft_musicgen_melody",
ModelCategory.MUSIC_GENERATION,
"Melody-conditioned music generation",
True,
False,
),
HFModel(
"Real-ESRGAN",
"xinntao/realesrgan-x4plus",
ModelCategory.SUPER_RESOLUTION,
"Image super-resolution",
True,
False,
),
HFModel(
"GFPGAN",
"TencentARC/GFPGAN",
ModelCategory.FACE_RESTORATION,
"Face restoration and enhancement",
True,
False,
),
HFModel(
"LaMa",
"advimman/lama",
ModelCategory.IMAGE_INPAINTING,
"Large Mask Inpainting",
True,
False,
),
HFModel(
"Background Remover",
"briaai/RMBG-1.4",
ModelCategory.BACKGROUND_REMOVAL,
"Automatic background removal",
True,
False,
),
HFModel(
"Voice Cloner",
"coqui/XTTS-v2",
ModelCategory.VOICE_CLONING,
"Multilingual voice cloning",
True,
False,
),
]
# Creative Content Models
CREATIVE_CONTENT_MODELS = [
HFModel(
"GPT-3.5 Creative",
"openai/gpt-3.5-turbo-instruct",
ModelCategory.CREATIVE_WRITING,
"Creative writing and storytelling",
True,
True,
),
HFModel(
"Novel AI",
"novelai/genji-python-6b",
ModelCategory.STORY_GENERATION,
"Interactive story generation",
True,
False,
),
HFModel(
"Poet Assistant",
"gpt2-poetry",
ModelCategory.POETRY_GENERATION,
"Poetry generation and analysis",
True,
False,
),
HFModel(
"Blog Writer",
"google/flan-t5-large",
ModelCategory.BLOG_WRITING,
"Blog content creation",
True,
False,
),
HFModel(
"Marketing Copy AI",
"microsoft/DialoGPT-large",
ModelCategory.MARKETING_COPY,
"Marketing content generation",
True,
False,
),
]
# Game Development Models
GAME_DEVELOPMENT_MODELS = [
HFModel(
"Character AI",
"character-ai/character-generator",
ModelCategory.CHARACTER_GENERATION,
"Game character generation and design",
True,
False,
),
HFModel(
"Level Designer",
"unity/level-generator",
ModelCategory.LEVEL_GENERATION,
"Game level and environment generation",
True,
False,
),
HFModel(
"Dialogue Writer",
"bioware/dialogue-generator",
ModelCategory.DIALOGUE_GENERATION,
"Game dialogue and narrative generation",
True,
False,
),
HFModel(
"Asset Creator",
"epic/asset-generator",
ModelCategory.GAME_ASSET_GENERATION,
"Game asset and texture generation",
True,
False,
),
]
# Science and Research Models
SCIENCE_RESEARCH_MODELS = [
HFModel(
"AlphaFold",
"deepmind/alphafold2",
ModelCategory.PROTEIN_FOLDING,
"Protein structure prediction",
True,
False,
),
HFModel(
"ChemBERTa",
"DeepChem/ChemBERTa-77M-MLM",
ModelCategory.MOLECULE_GENERATION,
"Chemical compound analysis",
True,
False,
),
HFModel(
"SciBERT",
"allenai/scibert_scivocab_uncased",
ModelCategory.SCIENTIFIC_WRITING,
"Scientific text understanding",
True,
False,
),
HFModel(
"Research Assistant",
"microsoft/specter2",
ModelCategory.RESEARCH_ASSISTANCE,
"Research paper analysis and recommendations",
True,
False,
),
HFModel(
"Data Analyst",
"microsoft/data-copilot",
ModelCategory.DATA_ANALYSIS,
"Automated data analysis and insights",
True,
False,
),
]
# Business and Productivity Models
BUSINESS_PRODUCTIVITY_MODELS = [
HFModel(
"Email Assistant",
"microsoft/email-generator",
ModelCategory.EMAIL_GENERATION,
"Professional email composition",
True,
False,
),
HFModel(
"Presentation AI",
"gamma/presentation-generator",
ModelCategory.PRESENTATION_CREATION,
"Automated presentation creation",
True,
False,
),
HFModel(
"Report Writer",
"openai/report-generator",
ModelCategory.REPORT_GENERATION,
"Business report generation",
True,
False,
),
HFModel(
"Meeting Summarizer",
"microsoft/meeting-summarizer",
ModelCategory.MEETING_SUMMARIZATION,
"Meeting notes and action items",
True,
False,
),
HFModel(
"Project Planner",
"atlassian/project-ai",
ModelCategory.PROJECT_PLANNING,
"Project planning and management",
True,
False,
),
]
# AI Teacher Models - Best-in-Class Educational AI System
AI_TEACHER_MODELS = [
# Primary AI Tutoring Models
HFModel(
"AI Tutor Interactive",
"microsoft/DialoGPT-medium",
ModelCategory.AI_TUTORING,
"Interactive AI tutor for conversational learning",
True,
False,
2048,
True,
),
HFModel(
"Goal-Oriented Tutor",
"microsoft/GODEL-v1_1-large-seq2seq",
ModelCategory.AI_TUTORING,
"Goal-oriented conversational AI for personalized tutoring",
True,
False,
2048,
True,
),
HFModel(
"Code Instructor AI",
"microsoft/codebert-base",
ModelCategory.CODING_INSTRUCTION,
"AI coding instructor for programming education",
True,
False,
1024,
False,
),
HFModel(
"deepmind/flamingo-base",
"ADAPTIVE_LEARNING",
ModelCategory.ADAPTIVE_LEARNING,
"Multimodal AI for adaptive learning experiences",
True,
False,
1024,
True,
),
# Educational Content Generation
HFModel(
"gpt2-medium",
"EDUCATIONAL_CONTENT",
ModelCategory.EDUCATIONAL_CONTENT,
"Educational content generation for curriculum development",
True,
False,
1024,
True,
),
HFModel(
"facebook/bart-large-cnn",
"LESSON_PLANNING",
ModelCategory.LESSON_PLANNING,
"Lesson plan generation and educational summarization",
True,
False,
1024,
True,
),
HFModel(
"microsoft/prophetnet-large-uncased",
"STUDY_GUIDE_CREATION",
ModelCategory.STUDY_GUIDE_CREATION,
"Study guide and learning material generation",
True,
False,
1024,
True,
),
HFModel(
"bigscience/bloom-560m",
"EDUCATIONAL_CONTENT",
ModelCategory.EDUCATIONAL_CONTENT,
"Multilingual educational content for global learning",
True,
False,
1024,
True,
),
# Subject-Specific Teaching Models
HFModel(
"microsoft/codebert-base",
"CODING_INSTRUCTION",
ModelCategory.CODING_INSTRUCTION,
"Programming education and code explanation",
True,
False,
1024,
True,
),
HFModel(
"allenai/scibert_scivocab_uncased",
"SCIENCE_TUTORING",
ModelCategory.SCIENCE_TUTORING,
"Science education and scientific concept explanation",
True,
False,
1024,
True,
),
HFModel(
"google/flan-t5-base",
"SUBJECT_TEACHING",
ModelCategory.SUBJECT_TEACHING,
"Multi-subject teaching AI with instruction following",
True,
False,
1024,
True,
),
HFModel(
"microsoft/unixcoder-base",
"CODING_INSTRUCTION",
ModelCategory.CODING_INSTRUCTION,
"Advanced programming instruction and debugging help",
True,
False,
1024,
True,
),
# Math and STEM Education
HFModel(
"microsoft/DialoGPT-small",
"MATH_TUTORING",
ModelCategory.MATH_TUTORING,
"Interactive math tutoring and problem solving",
True,
False,
1024,
True,
),
HFModel(
"facebook/galactica-125m",
"SCIENCE_TUTORING",
ModelCategory.SCIENCE_TUTORING,
"Scientific knowledge and research education",
True,
False,
1024,
True,
),
HFModel(
"microsoft/graphcodebert-base",
"CODING_INSTRUCTION",
ModelCategory.CODING_INSTRUCTION,
"Code structure and algorithm education",
True,
False,
1024,
True,
),
HFModel(
"deepmind/mathematical-reasoning",
"MATH_TUTORING",
ModelCategory.MATH_TUTORING,
"Mathematical reasoning and proof assistance",
True,
False,
1024,
True,
),
# Language and Literature Education
HFModel(
"microsoft/prophetnet-large-uncased-cnndm",
"LANGUAGE_TUTORING",
ModelCategory.LANGUAGE_TUTORING,
"Language learning and literature analysis",
True,
False,
1024,
True,
),
HFModel(
"facebook/mbart-large-50-many-to-many-mmt",
"LANGUAGE_TUTORING",
ModelCategory.LANGUAGE_TUTORING,
"Multilingual language education and translation",
True,
False,
1024,
True,
),
HFModel(
"google/electra-base-discriminator",
"LANGUAGE_TUTORING",
ModelCategory.LANGUAGE_TUTORING,
"Language comprehension and grammar instruction",
True,
False,
1024,
True,
),
# Assessment and Testing
HFModel(
"microsoft/DialoGPT-large",
"QUIZ_GENERATION",
ModelCategory.QUIZ_GENERATION,
"Interactive quiz and assessment generation",
True,
False,
1024,
True,
),
HFModel(
"facebook/bart-large",
"LEARNING_ASSESSMENT",
ModelCategory.LEARNING_ASSESSMENT,
"Learning progress assessment and feedback",
True,
False,
1024,
True,
),
HFModel(
"google/t5-base",
"QUIZ_GENERATION",
ModelCategory.QUIZ_GENERATION,
"Question generation for educational assessment",
True,
False,
1024,
True,
),
HFModel(
"microsoft/unilm-base-cased",
"EXAM_PREPARATION",
ModelCategory.EXAM_PREPARATION,
"Exam preparation and practice test generation",
True,
False,
1024,
True,
),
# Personalized Learning
HFModel(
"huggingface/distilbert-base-uncased",
"PERSONALIZED_LEARNING",
ModelCategory.PERSONALIZED_LEARNING,
"Personalized learning path recommendation",
True,
False,
1024,
True,
),
HFModel(
"microsoft/layoutlm-base-uncased",
"LEARNING_ANALYTICS",
ModelCategory.LEARNING_ANALYTICS,
"Educational document analysis and insights",
True,
False,
1024,
True,
),
HFModel(
"facebook/opt-125m",
"ADAPTIVE_LEARNING",
ModelCategory.ADAPTIVE_LEARNING,
"Adaptive learning system with dynamic content",
True,
False,
1024,
True,
),
# Concept Explanation and Understanding
HFModel(
"microsoft/deberta-base",
"CONCEPT_EXPLANATION",
ModelCategory.CONCEPT_EXPLANATION,
"Clear concept explanation and knowledge breakdown",
True,
False,
1024,
True,
),
HFModel(
"google/pegasus-xsum",
"CONCEPT_EXPLANATION",
ModelCategory.CONCEPT_EXPLANATION,
"Concept summarization and explanation",
True,
False,
1024,
True,
),
HFModel(
"facebook/bart-base",
"CONCEPT_EXPLANATION",
ModelCategory.CONCEPT_EXPLANATION,
"Interactive concept teaching and clarification",
True,
False,
1024,
True,
),
# Homework and Study Assistance
HFModel(
"microsoft/codebert-base-mlm",
"HOMEWORK_ASSISTANCE",
ModelCategory.HOMEWORK_ASSISTANCE,
"Programming homework help and debugging",
True,
False,
1024,
True,
),
HFModel(
"google/flan-t5-small",
"HOMEWORK_ASSISTANCE",
ModelCategory.HOMEWORK_ASSISTANCE,
"General homework assistance across subjects",
True,
False,
1024,
True,
),
HFModel(
"facebook/mbart-large-cc25",
"HOMEWORK_ASSISTANCE",
ModelCategory.HOMEWORK_ASSISTANCE,
"Multilingual homework support and explanation",
True,
False,
1024,
True,
),
# Curriculum Design and Planning
HFModel(
"microsoft/prophetnet-base-uncased",
"CURRICULUM_DESIGN",
ModelCategory.CURRICULUM_DESIGN,
"Curriculum planning and educational structure design",
True,
False,
1024,
True,
),
HFModel(
"google/t5-small",
"LESSON_PLANNING",
ModelCategory.LESSON_PLANNING,
"Detailed lesson planning and activity design",
True,
False,
1024,
True,
),
HFModel(
"facebook/bart-large-xsum",
"CURRICULUM_DESIGN",
ModelCategory.CURRICULUM_DESIGN,
"Educational program summarization and design",
True,
False,
1024,
True,
),
# Educational Games and Interactive Learning
HFModel(
"microsoft/DialoGPT-base",
"EDUCATIONAL_GAMES",
ModelCategory.EDUCATIONAL_GAMES,
"Interactive educational games and learning activities",
True,
False,
1024,
True,
),
HFModel(
"huggingface/bert-base-uncased",
"EDUCATIONAL_GAMES",
ModelCategory.EDUCATIONAL_GAMES,
"Educational quiz games and interactive learning",
True,
False,
1024,
True,
),
# History and Social Studies
HFModel(
"microsoft/deberta-large",
"HISTORY_TUTORING",
ModelCategory.HISTORY_TUTORING,
"Historical analysis and social studies education",
True,
False,
1024,
True,
),
HFModel(
"facebook/opt-350m",
"HISTORY_TUTORING",
ModelCategory.HISTORY_TUTORING,
"Interactive history lessons and timeline explanation",
True,
False,
1024,
True,
),
# Advanced Educational Features
HFModel(
"microsoft/unilm-large-cased",
"LEARNING_ANALYTICS",
ModelCategory.LEARNING_ANALYTICS,
"Advanced learning analytics and progress tracking",
True,
False,
1024,
True,
),
HFModel(
"google/electra-large-discriminator",
"PERSONALIZED_LEARNING",
ModelCategory.PERSONALIZED_LEARNING,
"Advanced personalized learning with AI adaptation",
True,
False,
1024,
True,
),
HFModel(
"facebook/mbart-large-50",
"ADAPTIVE_LEARNING",
ModelCategory.ADAPTIVE_LEARNING,
"Multilingual adaptive learning system",
True,
False,
1024,
True,
),
]
class HuggingFaceInference:
"""Hugging Face Inference API integration"""
def __init__(
self,
api_token: str,
base_url: str = "https://api-inference.huggingface.co/models/",
):
self.api_token = api_token
self.base_url = base_url
self.session = None
async def __aenter__(self):
self.session = aiohttp.ClientSession(
headers={"Authorization": f"Bearer {self.api_token}"},
timeout=aiohttp.ClientTimeout(total=300), # 5 minutes timeout
)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.session:
await self.session.close()
async def text_generation(
self,
model_id: str,
prompt: str,
max_tokens: int = 100,
temperature: float = 0.7,
stream: bool = False,
**kwargs,
) -> Dict[str, Any]:
"""Generate text using a text generation model"""
payload = {
"inputs": prompt,
"parameters": {
"max_new_tokens": max_tokens,
"temperature": temperature,
"do_sample": True,
**kwargs,
},
"options": {"use_cache": False},
}
if stream:
return await self._stream_request(model_id, payload)
else:
return await self._request(model_id, payload)
async def text_to_image(
self,
model_id: str,
prompt: str,
negative_prompt: Optional[str] = None,
**kwargs,
) -> bytes:
"""Generate image from text prompt"""
payload = {
"inputs": prompt,
"parameters": {
**({"negative_prompt": negative_prompt} if negative_prompt else {}),
**kwargs,
},
}
response = await self._request(model_id, payload, expect_json=False)
return response
async def automatic_speech_recognition(
self, model_id: str, audio_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Transcribe audio to text"""
# Convert audio bytes to base64 for API
audio_b64 = base64.b64encode(audio_data).decode()
payload = {"inputs": audio_b64, "parameters": kwargs}
return await self._request(model_id, payload)
async def text_to_speech(self, model_id: str, text: str, **kwargs) -> bytes:
"""Convert text to speech audio"""
payload = {"inputs": text, "parameters": kwargs}
response = await self._request(model_id, payload, expect_json=False)
return response
async def image_classification(
self, model_id: str, image_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Classify images"""
# Convert image to base64
image_b64 = base64.b64encode(image_data).decode()
payload = {"inputs": image_b64, "parameters": kwargs}
return await self._request(model_id, payload)
async def feature_extraction(
self, model_id: str, texts: Union[str, List[str]], **kwargs
) -> Dict[str, Any]:
"""Extract embeddings from text"""
payload = {"inputs": texts, "parameters": kwargs}
return await self._request(model_id, payload)
async def translation(
self,
model_id: str,
text: str,
src_lang: Optional[str] = None,
tgt_lang: Optional[str] = None,
**kwargs,
) -> Dict[str, Any]:
"""Translate text between languages"""
payload = {
"inputs": text,
"parameters": {
**({"src_lang": src_lang} if src_lang else {}),
**({"tgt_lang": tgt_lang} if tgt_lang else {}),
**kwargs,
},
}
return await self._request(model_id, payload)
async def summarization(
self,
model_id: str,
text: str,
max_length: int = 150,
min_length: int = 30,
**kwargs,
) -> Dict[str, Any]:
"""Summarize text"""
payload = {
"inputs": text,
"parameters": {
"max_length": max_length,
"min_length": min_length,
**kwargs,
},
}
return await self._request(model_id, payload)
async def question_answering(
self, model_id: str, question: str, context: str, **kwargs
) -> Dict[str, Any]:
"""Answer questions based on context"""
payload = {
"inputs": {"question": question, "context": context},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def zero_shot_classification(
self, model_id: str, text: str, candidate_labels: List[str], **kwargs
) -> Dict[str, Any]:
"""Classify text without training data"""
payload = {
"inputs": text,
"parameters": {"candidate_labels": candidate_labels, **kwargs},
}
return await self._request(model_id, payload)
async def conversational(
self,
model_id: str,
text: str,
conversation_history: Optional[List[Dict[str, str]]] = None,
**kwargs,
) -> Dict[str, Any]:
"""Have a conversation with a model"""
payload = {
"inputs": {
"text": text,
**(
{
"past_user_inputs": [
h["user"] for h in conversation_history if "user" in h
]
}
if conversation_history
else {}
),
**(
{
"generated_responses": [
h["bot"] for h in conversation_history if "bot" in h
]
}
if conversation_history
else {}
),
},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def _request(
self, model_id: str, payload: Dict[str, Any], expect_json: bool = True
) -> Union[Dict[str, Any], bytes]:
"""Make HTTP request to Hugging Face API"""
url = f"{self.base_url}{model_id}"
try:
async with self.session.post(url, json=payload) as response:
if response.status == 200:
if expect_json:
return await response.json()
else:
return await response.read()
elif response.status == 503:
# Model is loading, wait and retry
error_info = await response.json()
estimated_time = error_info.get("estimated_time", 30)
logger.info(
f"Model {model_id} is loading, waiting {estimated_time}s"
)
await asyncio.sleep(min(estimated_time, 60)) # Cap at 60 seconds
return await self._request(model_id, payload, expect_json)
else:
error_text = await response.text()
raise Exception(
f"API request failed with status {response.status}: {error_text}"
)
except Exception as e:
logger.error(f"Error calling Hugging Face API for {model_id}: {e}")
raise
async def _stream_request(self, model_id: str, payload: Dict[str, Any]):
"""Stream response from Hugging Face API"""
url = f"{self.base_url}{model_id}"
payload["stream"] = True
try:
async with self.session.post(url, json=payload) as response:
if response.status == 200:
async for chunk in response.content:
if chunk:
yield chunk.decode("utf-8")
else:
error_text = await response.text()
raise Exception(
f"Streaming request failed with status {response.status}: {error_text}"
)
except Exception as e:
logger.error(f"Error streaming from Hugging Face API for {model_id}: {e}")
raise
# New methods for expanded model categories
async def text_to_video(
self, model_id: str, prompt: str, **kwargs
) -> Dict[str, Any]:
"""Generate video from text prompt"""
payload = {
"inputs": prompt,
"parameters": {
"duration": kwargs.get("duration", 5),
"fps": kwargs.get("fps", 24),
"width": kwargs.get("width", 512),
"height": kwargs.get("height", 512),
**kwargs,
},
}
return await self._request(model_id, payload)
async def video_to_text(
self, model_id: str, video_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Analyze video and generate text description"""
video_b64 = base64.b64encode(video_data).decode()
payload = {
"inputs": {"video": video_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def code_generation(
self, model_id: str, prompt: str, **kwargs
) -> Dict[str, Any]:
"""Generate code from natural language prompt"""
payload = {
"inputs": prompt,
"parameters": {
"max_length": kwargs.get("max_length", 500),
"temperature": kwargs.get("temperature", 0.2),
"language": kwargs.get("language", "python"),
**kwargs,
},
}
return await self._request(model_id, payload)
async def code_completion(
self, model_id: str, code: str, **kwargs
) -> Dict[str, Any]:
"""Complete partial code"""
payload = {
"inputs": code,
"parameters": {
"max_length": kwargs.get("max_length", 100),
"temperature": kwargs.get("temperature", 0.1),
**kwargs,
},
}
return await self._request(model_id, payload)
async def text_to_3d(self, model_id: str, prompt: str, **kwargs) -> Dict[str, Any]:
"""Generate 3D model from text description"""
payload = {
"inputs": prompt,
"parameters": {
"resolution": kwargs.get("resolution", 64),
"format": kwargs.get("format", "obj"),
**kwargs,
},
}
return await self._request(model_id, payload)
async def image_to_3d(
self, model_id: str, image_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Generate 3D model from image"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def ocr(self, model_id: str, image_data: bytes, **kwargs) -> Dict[str, Any]:
"""Perform optical character recognition on image"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64},
"parameters": {"language": kwargs.get("language", "en"), **kwargs},
}
return await self._request(model_id, payload)
async def document_analysis(
self, model_id: str, document_data: bytes, **kwargs
) -> Dict[str, Any]:
"""Analyze document structure and content"""
doc_b64 = base64.b64encode(document_data).decode()
payload = {
"inputs": {"document": doc_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def vision_language(
self, model_id: str, image_data: bytes, text: str, **kwargs
) -> Dict[str, Any]:
"""Process image and text together"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64, "text": text},
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def multimodal_reasoning(
self, model_id: str, inputs: Dict[str, Any], **kwargs
) -> Dict[str, Any]:
"""Perform reasoning across multiple modalities"""
payload = {
"inputs": inputs,
"parameters": kwargs,
}
return await self._request(model_id, payload)
async def music_generation(
self, model_id: str, prompt: str, **kwargs
) -> Dict[str, Any]:
"""Generate music from text prompt"""
payload = {
"inputs": prompt,
"parameters": {
"duration": kwargs.get("duration", 30),
"bpm": kwargs.get("bpm", 120),
"genre": kwargs.get("genre", "electronic"),
**kwargs,
},
}
return await self._request(model_id, payload)
async def voice_cloning(
self, model_id: str, text: str, voice_sample: bytes, **kwargs
) -> bytes:
"""Clone voice and synthesize speech"""
voice_b64 = base64.b64encode(voice_sample).decode()
payload = {
"inputs": {"text": text, "voice_sample": voice_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload, expect_json=False)
async def super_resolution(
self, model_id: str, image_data: bytes, **kwargs
) -> bytes:
"""Enhance image resolution"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64},
"parameters": {"scale_factor": kwargs.get("scale_factor", 4), **kwargs},
}
return await self._request(model_id, payload, expect_json=False)
async def background_removal(
self, model_id: str, image_data: bytes, **kwargs
) -> bytes:
"""Remove background from image"""
image_b64 = base64.b64encode(image_data).decode()
payload = {
"inputs": {"image": image_b64},
"parameters": kwargs,
}
return await self._request(model_id, payload, expect_json=False)
async def creative_writing(
self, model_id: str, prompt: str, **kwargs
) -> Dict[str, Any]:
"""Generate creative content"""
payload = {
"inputs": prompt,
"parameters": {
"max_length": kwargs.get("max_length", 1000),
"creativity": kwargs.get("creativity", 0.8),
"genre": kwargs.get("genre", "general"),
**kwargs,
},
}
return await self._request(model_id, payload)
async def business_document(
self, model_id: str, document_type: str, context: str, **kwargs
) -> Dict[str, Any]:
"""Generate business documents"""
payload = {
"inputs": f"Generate {document_type}: {context}",
"parameters": {
"format": kwargs.get("format", "professional"),
"length": kwargs.get("length", "medium"),
**kwargs,
},
}
return await self._request(model_id, payload)
class HuggingFaceModelManager:
"""Manager for all Hugging Face model operations"""
def __init__(self, api_token: str):
self.api_token = api_token
self.models = HuggingFaceModels()
def get_models_by_category(self, category: ModelCategory) -> List[HFModel]:
"""Get all models for a specific category"""
all_models = []
if category == ModelCategory.TEXT_GENERATION:
all_models = self.models.TEXT_GENERATION_MODELS
elif category == ModelCategory.TEXT_TO_IMAGE:
all_models = self.models.TEXT_TO_IMAGE_MODELS
elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
all_models = self.models.ASR_MODELS
elif category == ModelCategory.TEXT_TO_SPEECH:
all_models = self.models.TTS_MODELS
elif category == ModelCategory.IMAGE_CLASSIFICATION:
all_models = self.models.IMAGE_CLASSIFICATION_MODELS
elif category == ModelCategory.FEATURE_EXTRACTION:
all_models = self.models.FEATURE_EXTRACTION_MODELS
elif category == ModelCategory.TRANSLATION:
all_models = self.models.TRANSLATION_MODELS
elif category == ModelCategory.SUMMARIZATION:
all_models = self.models.SUMMARIZATION_MODELS
return all_models
def get_all_models(self) -> Dict[ModelCategory, List[HFModel]]:
"""Get all available models organized by category"""
return {
# Core AI categories
ModelCategory.TEXT_GENERATION: self.models.TEXT_GENERATION_MODELS,
ModelCategory.TEXT_TO_IMAGE: self.models.TEXT_TO_IMAGE_MODELS,
ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: self.models.ASR_MODELS,
ModelCategory.TEXT_TO_SPEECH: self.models.TTS_MODELS,
ModelCategory.IMAGE_CLASSIFICATION: self.models.IMAGE_CLASSIFICATION_MODELS,
ModelCategory.FEATURE_EXTRACTION: self.models.FEATURE_EXTRACTION_MODELS,
ModelCategory.TRANSLATION: self.models.TRANSLATION_MODELS,
ModelCategory.SUMMARIZATION: self.models.SUMMARIZATION_MODELS,
# Video and Motion
ModelCategory.TEXT_TO_VIDEO: self.models.VIDEO_GENERATION_MODELS,
ModelCategory.VIDEO_GENERATION: self.models.VIDEO_GENERATION_MODELS,
ModelCategory.VIDEO_TO_TEXT: self.models.VIDEO_GENERATION_MODELS,
ModelCategory.VIDEO_CLASSIFICATION: self.models.VIDEO_GENERATION_MODELS,
# Code and Development
ModelCategory.CODE_GENERATION: self.models.CODE_GENERATION_MODELS,
ModelCategory.CODE_COMPLETION: self.models.CODE_GENERATION_MODELS,
ModelCategory.CODE_EXPLANATION: self.models.CODE_GENERATION_MODELS,
ModelCategory.APP_GENERATION: self.models.CODE_GENERATION_MODELS,
# 3D and AR/VR
ModelCategory.TEXT_TO_3D: self.models.THREE_D_MODELS,
ModelCategory.IMAGE_TO_3D: self.models.THREE_D_MODELS,
ModelCategory.THREE_D_GENERATION: self.models.THREE_D_MODELS,
ModelCategory.MESH_GENERATION: self.models.THREE_D_MODELS,
# Document Processing
ModelCategory.OCR: self.models.DOCUMENT_PROCESSING_MODELS,
ModelCategory.DOCUMENT_ANALYSIS: self.models.DOCUMENT_PROCESSING_MODELS,
ModelCategory.HANDWRITING_RECOGNITION: self.models.DOCUMENT_PROCESSING_MODELS,
ModelCategory.TABLE_EXTRACTION: self.models.DOCUMENT_PROCESSING_MODELS,
ModelCategory.FORM_PROCESSING: self.models.DOCUMENT_PROCESSING_MODELS,
# Multimodal AI
ModelCategory.VISION_LANGUAGE: self.models.MULTIMODAL_MODELS,
ModelCategory.MULTIMODAL_REASONING: self.models.MULTIMODAL_MODELS,
ModelCategory.VISUAL_QUESTION_ANSWERING: self.models.MULTIMODAL_MODELS,
ModelCategory.MULTIMODAL_CHAT: self.models.MULTIMODAL_MODELS,
ModelCategory.CROSS_MODAL_GENERATION: self.models.MULTIMODAL_MODELS,
# Specialized AI
ModelCategory.MUSIC_GENERATION: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.VOICE_CLONING: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.SUPER_RESOLUTION: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.FACE_RESTORATION: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.IMAGE_INPAINTING: self.models.SPECIALIZED_AI_MODELS,
ModelCategory.BACKGROUND_REMOVAL: self.models.SPECIALIZED_AI_MODELS,
# Creative Content
ModelCategory.CREATIVE_WRITING: self.models.CREATIVE_CONTENT_MODELS,
ModelCategory.STORY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
ModelCategory.POETRY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
ModelCategory.BLOG_WRITING: self.models.CREATIVE_CONTENT_MODELS,
ModelCategory.MARKETING_COPY: self.models.CREATIVE_CONTENT_MODELS,
# Game Development
ModelCategory.GAME_ASSET_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
ModelCategory.CHARACTER_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
ModelCategory.LEVEL_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
ModelCategory.DIALOGUE_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
# Science and Research
ModelCategory.PROTEIN_FOLDING: self.models.SCIENCE_RESEARCH_MODELS,
ModelCategory.MOLECULE_GENERATION: self.models.SCIENCE_RESEARCH_MODELS,
ModelCategory.SCIENTIFIC_WRITING: self.models.SCIENCE_RESEARCH_MODELS,
ModelCategory.RESEARCH_ASSISTANCE: self.models.SCIENCE_RESEARCH_MODELS,
ModelCategory.DATA_ANALYSIS: self.models.SCIENCE_RESEARCH_MODELS,
# Business and Productivity
ModelCategory.EMAIL_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
ModelCategory.PRESENTATION_CREATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
ModelCategory.REPORT_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
ModelCategory.MEETING_SUMMARIZATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
ModelCategory.PROJECT_PLANNING: self.models.BUSINESS_PRODUCTIVITY_MODELS,
}
def get_model_by_id(self, model_id: str) -> Optional[HFModel]:
"""Find a model by its Hugging Face model ID"""
for models_list in self.get_all_models().values():
for model in models_list:
if model.model_id == model_id:
return model
return None
async def call_model(self, model_id: str, category: ModelCategory, **kwargs) -> Any:
"""Call a Hugging Face model with the appropriate method based on category"""
async with HuggingFaceInference(self.api_token) as hf:
if category == ModelCategory.TEXT_GENERATION:
return await hf.text_generation(model_id, **kwargs)
elif category == ModelCategory.TEXT_TO_IMAGE:
return await hf.text_to_image(model_id, **kwargs)
elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
return await hf.automatic_speech_recognition(model_id, **kwargs)
elif category == ModelCategory.TEXT_TO_SPEECH:
return await hf.text_to_speech(model_id, **kwargs)
elif category == ModelCategory.IMAGE_CLASSIFICATION:
return await hf.image_classification(model_id, **kwargs)
elif category == ModelCategory.FEATURE_EXTRACTION:
return await hf.feature_extraction(model_id, **kwargs)
elif category == ModelCategory.TRANSLATION:
return await hf.translation(model_id, **kwargs)
elif category == ModelCategory.SUMMARIZATION:
return await hf.summarization(model_id, **kwargs)
elif category == ModelCategory.QUESTION_ANSWERING:
return await hf.question_answering(model_id, **kwargs)
elif category == ModelCategory.ZERO_SHOT_CLASSIFICATION:
return await hf.zero_shot_classification(model_id, **kwargs)
elif category == ModelCategory.CONVERSATIONAL:
return await hf.conversational(model_id, **kwargs)
# Video and Motion categories
elif category in [
ModelCategory.TEXT_TO_VIDEO,
ModelCategory.VIDEO_GENERATION,
]:
return await hf.text_to_video(model_id, **kwargs)
elif category == ModelCategory.VIDEO_TO_TEXT:
return await hf.video_to_text(model_id, **kwargs)
elif category == ModelCategory.VIDEO_CLASSIFICATION:
return await hf.image_classification(
model_id, **kwargs
) # Similar to image classification
# Code and Development categories
elif category in [
ModelCategory.CODE_GENERATION,
ModelCategory.APP_GENERATION,
]:
return await hf.code_generation(model_id, **kwargs)
elif category in [
ModelCategory.CODE_COMPLETION,
ModelCategory.CODE_EXPLANATION,
]:
return await hf.code_completion(model_id, **kwargs)
# 3D and AR/VR categories
elif category in [
ModelCategory.TEXT_TO_3D,
ModelCategory.THREE_D_GENERATION,
]:
return await hf.text_to_3d(model_id, **kwargs)
elif category in [ModelCategory.IMAGE_TO_3D, ModelCategory.MESH_GENERATION]:
return await hf.image_to_3d(model_id, **kwargs)
# Document Processing categories
elif category == ModelCategory.OCR:
return await hf.ocr(model_id, **kwargs)
elif category in [
ModelCategory.DOCUMENT_ANALYSIS,
ModelCategory.FORM_PROCESSING,
ModelCategory.TABLE_EXTRACTION,
ModelCategory.LAYOUT_ANALYSIS,
]:
return await hf.document_analysis(model_id, **kwargs)
elif category == ModelCategory.HANDWRITING_RECOGNITION:
return await hf.ocr(model_id, **kwargs) # Similar to OCR
# Multimodal AI categories
elif category in [
ModelCategory.VISION_LANGUAGE,
ModelCategory.VISUAL_QUESTION_ANSWERING,
ModelCategory.IMAGE_TEXT_MATCHING,
]:
return await hf.vision_language(model_id, **kwargs)
elif category in [
ModelCategory.MULTIMODAL_REASONING,
ModelCategory.MULTIMODAL_CHAT,
ModelCategory.CROSS_MODAL_GENERATION,
]:
return await hf.multimodal_reasoning(model_id, **kwargs)
# Specialized AI categories
elif category == ModelCategory.MUSIC_GENERATION:
return await hf.music_generation(model_id, **kwargs)
elif category == ModelCategory.VOICE_CLONING:
return await hf.voice_cloning(model_id, **kwargs)
elif category == ModelCategory.SUPER_RESOLUTION:
return await hf.super_resolution(model_id, **kwargs)
elif category in [
ModelCategory.FACE_RESTORATION,
ModelCategory.IMAGE_INPAINTING,
ModelCategory.IMAGE_OUTPAINTING,
]:
return await hf.super_resolution(
model_id, **kwargs
) # Similar processing
elif category == ModelCategory.BACKGROUND_REMOVAL:
return await hf.background_removal(model_id, **kwargs)
# Creative Content categories
elif category in [
ModelCategory.CREATIVE_WRITING,
ModelCategory.STORY_GENERATION,
ModelCategory.POETRY_GENERATION,
ModelCategory.SCREENPLAY_WRITING,
]:
return await hf.creative_writing(model_id, **kwargs)
elif category in [ModelCategory.BLOG_WRITING, ModelCategory.MARKETING_COPY]:
return await hf.text_generation(
model_id, **kwargs
) # Use standard text generation
# Game Development categories
elif category in [
ModelCategory.CHARACTER_GENERATION,
ModelCategory.LEVEL_GENERATION,
ModelCategory.DIALOGUE_GENERATION,
ModelCategory.GAME_ASSET_GENERATION,
]:
return await hf.creative_writing(
model_id, **kwargs
) # Creative generation
# Science and Research categories
elif category in [
ModelCategory.PROTEIN_FOLDING,
ModelCategory.MOLECULE_GENERATION,
]:
return await hf.text_generation(
model_id, **kwargs
) # Specialized text generation
elif category in [
ModelCategory.SCIENTIFIC_WRITING,
ModelCategory.RESEARCH_ASSISTANCE,
ModelCategory.DATA_ANALYSIS,
]:
return await hf.text_generation(model_id, **kwargs)
# Business and Productivity categories
elif category in [
ModelCategory.EMAIL_GENERATION,
ModelCategory.PRESENTATION_CREATION,
ModelCategory.REPORT_GENERATION,
ModelCategory.MEETING_SUMMARIZATION,
ModelCategory.PROJECT_PLANNING,
]:
return await hf.business_document(model_id, category.value, **kwargs)
else:
raise ValueError(f"Unsupported model category: {category}")