):
+     logger.warning(
+         "vLLM only supports Linux platform (including WSL) and MacOS."
+         "Building on %s, "
diff --git a/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py b/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py
new file mode 100644
index 000000000000..cebb4198f919
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py
@@ -0,0 +1,35 @@
+--- vllm/distributed/parallel_state.py.orig	2026-04-07 18:19:15 UTC
++++ vllm/distributed/parallel_state.py
+@@ -24,6 +24,7 @@ import contextlib
+ """
+ 
+ import contextlib
++import sys
+ import gc
+ import pickle
+ import weakref
+@@ -341,8 +342,13 @@ class GroupCoordinator:
+             )
+             # a group with `gloo` backend, to allow direct coordination between
+             # processes through the CPU.
++            # On FreeBSD, gloo TCP transport is unavailable; use fake backend.
++            _cpu_backend = "gloo"
++            if sys.platform.startswith("freebsd"):
++                import importlib; importlib.import_module("torch.testing._internal.distributed.fake_pg")
++                _cpu_backend = "fake"
+             with suppress_stdout():
+-                cpu_group = torch.distributed.new_group(ranks, backend="gloo")
++                cpu_group = torch.distributed.new_group(ranks, backend=_cpu_backend)
+             if self.rank in ranks:
+                 self.ranks = ranks
+                 self.world_size = len(ranks)
+@@ -1419,6 +1425,9 @@ def init_distributed_environment(
+             )
+             backend = "gloo"
+         # this backend is used for WORLD
++        # On FreeBSD, register the 'fake' backend before use.
++        if sys.platform.startswith("freebsd") and backend == "fake":
++            import importlib; importlib.import_module("torch.testing._internal.distributed.fake_pg")
+         torch.distributed.init_process_group(
+             backend=backend,
+             init_method=distributed_init_method,
diff --git a/misc/py-vllm/files/patch-vllm_platforms_____init____.py b/misc/py-vllm/files/patch-vllm_platforms_____init____.py
new file mode 100644
index 000000000000..ab6f660d7de3
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_platforms_____init____.py
@@ -0,0 +1,29 @@
+--- vllm/platforms/__init__.py.orig	2026-04-07 17:26:12 UTC
++++ vllm/platforms/__init__.py
+@@ -58,6 +58,11 @@ def cuda_platform_plugin() -> str | None:
+ 
+ 
+ def cuda_platform_plugin() -> str | None:
++    import sys
++    if sys.platform.startswith("freebsd"):
++        # CUDA extensions are not built on FreeBSD; use CPU platform instead.
++        logger.debug("CUDA platform disabled on FreeBSD.")
++        return None
+     is_cuda = False
+     logger.debug("Checking if CUDA platform is available.")
+     try:
+@@ -172,10 +177,12 @@ def cpu_platform_plugin() -> str | None:
+         if not is_cpu:
+             import sys
+ 
+-            is_cpu = sys.platform.startswith("darwin")
++            is_cpu = sys.platform.startswith("darwin") or sys.platform.startswith(
++                "freebsd"
++            )
+             if is_cpu:
+                 logger.debug(
+-                    "Confirmed CPU platform is available because the machine is MacOS."
++                    "Confirmed CPU platform is available because the machine is MacOS or FreeBSD."
+                 )
+ 
+     except Exception as e:
diff --git a/misc/py-vllm/files/patch-vllm_platforms_cpu.py b/misc/py-vllm/files/patch-vllm_platforms_cpu.py
new file mode 100644
index 000000000000..ae1db6392125
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_platforms_cpu.py
@@ -0,0 +1,42 @@
+--- vllm/platforms/cpu.py.orig	2026-04-03 01:57:10 UTC
++++ vllm/platforms/cpu.py
+@@ -74,7 +74,8 @@ class CpuPlatform(Platform):
+     device_name: str = "cpu"
+     device_type: str = "cpu"
+     dispatch_key: str = "CPU"
+-    dist_backend: str = "gloo"
++    # FreeBSD lacks gloo TCP transport (epoll-based); use fake backend.
++    dist_backend: str = "fake" if sys.platform.startswith("freebsd") else "gloo"
+     device_control_env_var = "CPU_VISIBLE_MEMORY_NODES"
+ 
+     @property
+@@ -378,7 +379,28 @@ class CpuPlatform(Platform):
+ 
+     @classmethod
+     def get_allowed_cpu_core_node_list(cls) -> tuple[list[int], list[LogicalCPUInfo]]:
+-        assert platform.system() == "Linux"
++        assert platform.system() in ("Linux", "FreeBSD")
++
++        if platform.system() == "FreeBSD":
++            # FreeBSD lacks lscpu -J; treat all CPUs as a single NUMA node.
++            allowed_cpu_id_set = (
++                os.sched_getaffinity(0)
++                if hasattr(os, "sched_getaffinity")
++                else set(range(os.cpu_count() or 1))
++            )
++            logical_cpu_list = [
++                LogicalCPUInfo(id=cpu_id, physical_core=cpu_id, numa_node=0)
++                for cpu_id in sorted(allowed_cpu_id_set)
++            ]
++            allowed_numa_nodes_list = [0]
++            env_key = CpuPlatform.device_control_env_var
++            if env_key in os.environ and os.environ[env_key] != "":
++                visible_nodes = [int(s) for s in os.environ[env_key].split(",")]
++                allowed_numa_nodes_list = [
++                    x for x in sorted(list(set(visible_nodes)))
++                    if x in allowed_numa_nodes_list
++                ]
++            return allowed_numa_nodes_list, logical_cpu_list
+ 
+         # Init LogicalCPUInfo from lscpu
+         lscpu_output = subprocess.check_output(
diff --git a/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py b/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py
new file mode 100644
index 000000000000..e762ac4eafb9
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py
@@ -0,0 +1,12 @@
+--- vllm/v1/worker/cpu_worker.py.orig	2026-04-07 17:26:12 UTC
++++ vllm/v1/worker/cpu_worker.py
+@@ -91,6 +91,9 @@ class CPUWorker(Worker):
+                 self.local_omp_cpuid = self._get_autobind_cpu_ids(lambda cpus: cpus)
+             else:
+                 self.local_omp_cpuid = "nobind"
++        elif omp_cpuids == "auto":
++            # Non-Linux OS: NUMA-based auto-binding not supported, fall back to nobind
++            self.local_omp_cpuid = "nobind"
+         elif omp_cpuids == "nobind":
+             self.local_omp_cpuid = "nobind"
+         else:
diff --git a/misc/py-vllm/pkg-descr b/misc/py-vllm/pkg-descr
new file mode 100644
index 000000000000..1d4993624aea
--- /dev/null
+++ b/misc/py-vllm/pkg-descr
@@ -0,0 +1,13 @@
+vLLM is a fast and easy-to-use library for LLM inference and serving.
+It provides high-throughput and memory-efficient inference for large language
+models (LLMs) using state-of-the-art serving technologies including:
+
+- PagedAttention for efficient KV cache memory management
+- Continuous batching of incoming requests
+- Optimized CUDA kernels (on supported platforms)
+- Hugging Face model compatibility
+- Various decoding algorithms including parallel sampling and beam search
+- OpenAI-compatible API server
+
+On FreeBSD, vLLM runs in CPU/empty device mode (VLLM_TARGET_DEVICE=empty),
+providing pure Python inference without GPU acceleration.