# FROM ollama/ollama:0.12.3
FROM ollama/ollama:latest
RUN apt update
RUN apt upgrade -y
# OLLAMA_DEBUG               Show additional debug information (e.g. OLLAMA_DEBUG=1)
#       OLLAMA_HOST                IP Address for the ollama server (default 127.0.0.1:11434)
#       OLLAMA_CONTEXT_LENGTH      Context length to use unless otherwise specified (default: 4096)
#       OLLAMA_KEEP_ALIVE          The duration that models stay loaded in memory (default "5m")
#       OLLAMA_MAX_LOADED_MODELS   Maximum number of loaded models per GPU
#       OLLAMA_MAX_QUEUE           Maximum number of queued requests
#       OLLAMA_MODELS              The path to the models directory
#       OLLAMA_NUM_PARALLEL        Maximum number of parallel requests
#       OLLAMA_NOPRUNE             Do not prune model blobs on startup
#       OLLAMA_ORIGINS             A comma separated list of allowed origins
#       OLLAMA_SCHED_SPREAD        Always schedule model across all GPUs
#       OLLAMA_FLASH_ATTENTION     Enabled flash attention
#       OLLAMA_KV_CACHE_TYPE       Quantization type for the K/V cache (default: f16)
#       OLLAMA_LLM_LIBRARY         Set LLM library to bypass autodetection
#       OLLAMA_GPU_OVERHEAD        Reserve a portion of VRAM per GPU (bytes)
#       OLLAMA_LOAD_TIMEOUT
ENV OLLAMA_KEEP_ALIVE="24h"
ENV OLLAMA_HOST=0.0.0.0:7861
ENV OLLAMA_LOAD_TIMEOUT="24h"

RUN apt-get update && apt-get upgrade -y
RUN apt-get install git g++ python3 python3-pip -y && apt-get clean

COPY requirements.txt requirements.txt
COPY pull06.sh pull06.sh
COPY pull17.sh pull17.sh
COPY pull4.sh pull4.sh
COPY pull8.sh pull8.sh
#RUN /bin/bash -x pull06.sh
# RUN /bin/bash -x pull8.sh
COPY pull14.sh pull14.sh
# RUN /bin/bash -x pull14.sh

RUN pip install --no-cache-dir -r requirements.txt --break-system-packages

VOLUME vol1 vol2
COPY main.py main.py
COPY util.py util.py
COPY start.sh start.sh
#ENTRYPOINT ["/usr/bin/ollama", "serve"]
ENTRYPOINT ["/bin/bash", "-x", "start.sh"]