diff --git a/vLLM/NVIDIA-SPARK/old/docker-compose.yml_00 b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_00
new file mode 100644
index 0000000..851d7c8
--- /dev/null
+++ b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_00
@@ -0,0 +1,37 @@
+services:
+  qwen-36:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-qwen
+    restart: always
+    runtime: nvidia
+    ipc: host
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+    ports:
+      - "8001:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    command: >
+      vllm serve /root/.cache/huggingface/hub/models--Qwen--Qwen3.6-35B-A3B-FP8/snapshots/61a5771f218894aaacf97551e24a25b866750fc2
+      --quantization fp8
+      --kv-cache-dtype fp8
+      --max-model-len 32768
+      --gpu-memory-utilization 0.40
+      --trust-remote-code
+      --served-model-name qwen-3.6-blackwell
+
+  gpt-oss:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-gpt-oss
+    restart: always
+    runtime: nvidia
+    ipc: host
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+    ports:
+      - "8000:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    command: >
+      vllm serve openai/gpt-oss-20b
+      --gpu-memory-utilization 0.40
diff --git a/vLLM/NVIDIA-SPARK/old/docker-compose.yml_01 b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_01
new file mode 100644
index 0000000..56a03f0
--- /dev/null
+++ b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_01
@@ -0,0 +1,39 @@
+services:
+  qwen-36:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-qwen
+    restart: always
+    runtime: nvidia
+    ipc: host
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - HF_HUB_ENABLE_HF_TRANSFER=1 # Accelera il caricamento se scarica
+    ports:
+      - "8001:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    command: >
+      vllm serve /root/.cache/huggingface/hub/models--Qwen--Qwen3.6-35B-A3B-FP8/snapshots/61a5771f218894aaacf97551e24a25b866750fc2
+      --quantization fp8
+      --kv-cache-dtype fp8
+      --max-model-len 32768
+      --gpu-memory-utilization 0.40
+      --trust-remote-code
+      --served-model-name qwen-3.6-blackwell
+
+  gpt-oss:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-gpt-oss
+    restart: always
+    runtime: nvidia
+    ipc: host
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+    ports:
+      - "8000:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    command: >
+      vllm serve openai/gpt-oss-20b
+      --gpu-memory-utilization 0.40
+      --served-model-name gpt-oss-20b
diff --git a/vLLM/NVIDIA-SPARK/old/docker-compose.yml_02 b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_02
new file mode 100644
index 0000000..05ae3c1
--- /dev/null
+++ b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_02
@@ -0,0 +1,50 @@
+services:
+  # Primo modello: GPT-OSS (più veloce da caricare)
+  gpt-oss:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-gpt-oss
+    restart: always
+    runtime: nvidia
+    ipc: host
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+    ports:
+      - "8000:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    command: >
+      vllm serve openai/gpt-oss-20b
+      --gpu-memory-utilization 0.40
+      --served-model-name gpt-oss-20b
+    healthcheck:
+      test: ["CMD-SHELL", "curl -f http://localhost:8000/health || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 10
+      start_period: 60s
+
+  # Secondo modello: Qwen (parte solo quando GPT-OSS è pronto)
+  qwen-36:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-qwen
+    restart: always
+    runtime: nvidia
+    ipc: host
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - HF_HUB_ENABLE_HF_TRANSFER=1 # Accelera il caricamento se scarica
+    depends_on:
+      gpt-oss:
+        condition: service_healthy
+    ports:
+      - "8001:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    command: >
+      vllm serve /root/.cache/huggingface/hub/models--Qwen--Qwen3.6-35B-A3B-FP8/snapshots/61a5771f218894aaacf97551e24a25b866750fc2
+      --quantization fp8
+      --kv-cache-dtype fp8
+      --max-model-len 32768
+      --gpu-memory-utilization 0.40
+      --trust-remote-code
+      --served-model-name qwen-3.6-blackwell
diff --git a/vLLM/NVIDIA-SPARK/old/docker-compose.yml_04 b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_04
new file mode 100644
index 0000000..2b04d5b
--- /dev/null
+++ b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_04
@@ -0,0 +1,49 @@
+services:
+  gpt-oss:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-gpt-oss
+    restart: always
+    ipc: host
+    shm_size: '16gb'
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+    ports:
+      - "8000:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    command: >
+      vllm serve openai/gpt-oss-20b
+      --gpu-memory-utilization 0.35
+      --trust-remote-code
+
+  qwen-36:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-qwen
+    restart: always
+    ipc: host
+    shm_size: '16gb'
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    depends_on:
+      - gpt-oss
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
+    ports:
+      - "8001:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    entrypoint: /bin/sh -c "sleep 300 && vllm serve /root/.cache/huggingface/hub/models--Qwen--Qwen3.6-35B-A3B-FP8/snapshots/61a5771f218894aaacf97551e24a25b866750fc2 --quantization fp8 --kv-cache-dtype fp8 --max-model-len 32768 --gpu-memory-utilization 0.40 --trust-remote-code --served-model-name qwen-3.6-blackwell"
diff --git a/vLLM/NVIDIA-SPARK/old/docker-compose.yml_05 b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_05
new file mode 100644
index 0000000..b06cde4
--- /dev/null
+++ b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_05
@@ -0,0 +1,62 @@
+services:
+  gpt-oss:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-gpt-oss
+    restart: always
+    ipc: host
+    environment:
+      - HF_MODEL_HANDLE=openai/gpt-oss-20b
+    ports:
+      - "8000:8000"
+    volumes:
+      - /root/.cache/huggingface/hub:/root/.cache/huggingface/hub
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    command: >
+      vllm serve openai/gpt-oss-20b
+      --gpu-memory-utilization 0.40
+      --kv-cache-dtype fp8
+      --max-model-len 32768
+      --trust-remote-code
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 300s
+
+  qwen-blackwell:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-qwen-blackwell
+    restart: always
+    ipc: host
+    depends_on:
+      gpt-oss:
+        condition: service_healthy  # Aspetta che il primo sia "Healthy"
+    ports:
+      - "8001:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    command: >
+      vllm serve /root/.cache/huggingface/hub/models--Qwen--Qwen3.6-35B-A3B-FP8/snapshots/61a5771f218894aaacf97551e24a25b866750fc2
+      --quantization fp8
+      --kv-cache-dtype fp8
+      --max-model-len 32768
+      --gpu-memory-utilization 0.40
+      --trust-remote-code
+      --enable-auto-tool-choice 
+      --tool-call-parser hermes
+      --served-model-name qwen-3.6-blackwell
+
diff --git a/vLLM/NVIDIA-SPARK/old/docker-compose.yml_ok b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_ok
new file mode 100644
index 0000000..b06cde4
--- /dev/null
+++ b/vLLM/NVIDIA-SPARK/old/docker-compose.yml_ok
@@ -0,0 +1,62 @@
+services:
+  gpt-oss:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-gpt-oss
+    restart: always
+    ipc: host
+    environment:
+      - HF_MODEL_HANDLE=openai/gpt-oss-20b
+    ports:
+      - "8000:8000"
+    volumes:
+      - /root/.cache/huggingface/hub:/root/.cache/huggingface/hub
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    command: >
+      vllm serve openai/gpt-oss-20b
+      --gpu-memory-utilization 0.40
+      --kv-cache-dtype fp8
+      --max-model-len 32768
+      --trust-remote-code
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 300s
+
+  qwen-blackwell:
+    image: nvcr.io/nvidia/vllm:26.03.post1-py3
+    container_name: vllm-qwen-blackwell
+    restart: always
+    ipc: host
+    depends_on:
+      gpt-oss:
+        condition: service_healthy  # Aspetta che il primo sia "Healthy"
+    ports:
+      - "8001:8000"
+    volumes:
+      - /root/.cache/huggingface:/root/.cache/huggingface
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    command: >
+      vllm serve /root/.cache/huggingface/hub/models--Qwen--Qwen3.6-35B-A3B-FP8/snapshots/61a5771f218894aaacf97551e24a25b866750fc2
+      --quantization fp8
+      --kv-cache-dtype fp8
+      --max-model-len 32768
+      --gpu-memory-utilization 0.40
+      --trust-remote-code
+      --enable-auto-tool-choice 
+      --tool-call-parser hermes
+      --served-model-name qwen-3.6-blackwell
+