# FROM ollama/ollama:0.12.3 FROM ollama/ollama:latest RUN apt update RUN apt upgrade -y # OLLAMA_DEBUG Show additional debug information (e.g. OLLAMA_DEBUG=1) # OLLAMA_HOST IP Address for the ollama server (default 127.0.0.1:11434) # OLLAMA_CONTEXT_LENGTH Context length to use unless otherwise specified (default: 4096) # OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default "5m") # OLLAMA_MAX_LOADED_MODELS Maximum number of loaded models per GPU # OLLAMA_MAX_QUEUE Maximum number of queued requests # OLLAMA_MODELS The path to the models directory # OLLAMA_NUM_PARALLEL Maximum number of parallel requests # OLLAMA_NOPRUNE Do not prune model blobs on startup # OLLAMA_ORIGINS A comma separated list of allowed origins # OLLAMA_SCHED_SPREAD Always schedule model across all GPUs # OLLAMA_FLASH_ATTENTION Enabled flash attention # OLLAMA_KV_CACHE_TYPE Quantization type for the K/V cache (default: f16) # OLLAMA_LLM_LIBRARY Set LLM library to bypass autodetection # OLLAMA_GPU_OVERHEAD Reserve a portion of VRAM per GPU (bytes) # OLLAMA_LOAD_TIMEOUT ENV OLLAMA_KEEP_ALIVE="24h" ENV OLLAMA_HOST=0.0.0.0:7861 ENV OLLAMA_LOAD_TIMEOUT="24h" RUN apt-get update && apt-get upgrade -y RUN apt-get install git g++ python3 python3-pip -y && apt-get clean COPY requirements.txt requirements.txt COPY pull06.sh pull06.sh COPY pull17.sh pull17.sh COPY pull4.sh pull4.sh COPY pull8.sh pull8.sh #RUN /bin/bash -x pull06.sh # RUN /bin/bash -x pull8.sh COPY pull14.sh pull14.sh # RUN /bin/bash -x pull14.sh RUN pip install --no-cache-dir -r requirements.txt --break-system-packages VOLUME vol1 vol2 COPY main.py main.py COPY util.py util.py COPY start.sh start.sh #ENTRYPOINT ["/usr/bin/ollama", "serve"] ENTRYPOINT ["/bin/bash", "-x", "start.sh"]