Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 07 Apr 2026 19:19:19 +0000
From:      Yuri Victorovich <yuri@FreeBSD.org>
To:        ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-main@FreeBSD.org
Subject:   git: 919c3600edae - main - misc/py-vllm: New port: High-throughput and memory-efficient LLM inference engine
Message-ID:  <69d558b7.3335d.7eb97e75@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by yuri:

URL: https://cgit.FreeBSD.org/ports/commit/?id=919c3600edaed3248916e0b75d4249fa9903b904

commit 919c3600edaed3248916e0b75d4249fa9903b904
Author:     Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2026-04-07 19:18:53 +0000
Commit:     Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2026-04-07 19:19:14 +0000

    misc/py-vllm: New port: High-throughput and memory-efficient LLM inference engine
---
 misc/Makefile                                      |   1 +
 misc/py-vllm/Makefile                              | 109 +++++++++++++++++++++
 misc/py-vllm/distinfo                              |   5 +
 .../py-vllm/files/patch-cmake_cpu__extension.cmake |  78 +++++++++++++++
 misc/py-vllm/files/patch-csrc_cpu_shm.cpp          |  12 +++
 misc/py-vllm/files/patch-pyproject.toml            |  26 +++++
 misc/py-vllm/files/patch-setup.py                  |  15 +++
 .../patch-vllm_distributed_parallel__state.py      |  35 +++++++
 .../files/patch-vllm_platforms_____init____.py     |  29 ++++++
 misc/py-vllm/files/patch-vllm_platforms_cpu.py     |  42 ++++++++
 .../files/patch-vllm_v1_worker_cpu__worker.py      |  12 +++
 misc/py-vllm/pkg-descr                             |  13 +++
 12 files changed, 377 insertions(+)

diff --git a/misc/Makefile b/misc/Makefile
index 2dadb25668f2..c09343f97fef 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -623,6 +623,7 @@
     SUBDIR += py-uhi
     SUBDIR += py-uuid-utils
     SUBDIR += py-vaderSentiment
+    SUBDIR += py-vllm
     SUBDIR += py-wandb
     SUBDIR += py-wurlitzer
     SUBDIR += py-xformers
diff --git a/misc/py-vllm/Makefile b/misc/py-vllm/Makefile
new file mode 100644
index 000000000000..1a56e18195a9
--- /dev/null
+++ b/misc/py-vllm/Makefile
@@ -0,0 +1,109 @@
+PORTNAME=	vllm
+DISTVERSION=	0.19.0
+CATEGORIES=	misc python # machine-learning
+MASTER_SITES=	PYPI \
+		https://github.com/uxlfoundation/oneDNN/archive/refs/tags/:onednn_src
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+DISTFILES=	${DISTNAME}${EXTRACT_SUFX} \
+		v3.10${EXTRACT_SUFX}:onednn_src
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	High-throughput and memory-efficient LLM inference engine
+WWW=		https://vllm.ai/ \
+		https://github.com/vllm-project/vllm
+
+LICENSE=	APACHE20
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+BUILD_DEPENDS=	${LOCALBASE}/llvm19/bin/clang:devel/llvm19 \
+		${PYTHON_PKGNAMEPREFIX}Jinja2>=3.0:devel/py-Jinja2@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}ninja>=1.13:devel/py-ninja@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}packaging>=24.2:devel/py-packaging@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pytorch>=2.10.0:misc/py-pytorch@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}setuptools>=63.0:devel/py-setuptools@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}setuptools-scm>=8.0:devel/py-setuptools-scm@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}wheel>0:devel/py-wheel@${PY_FLAVOR}
+LIB_DEPENDS=	libabsl_status.so:devel/abseil \
+		libprotobuf.so:devel/protobuf
+RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}aiohttp>=3.13.3:www/py-aiohttp@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}anthropic>0:misc/py-anthropic@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}blake3>0:security/py-blake3@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}cachetools>0:devel/py-cachetools@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}cbor2>0:devel/py-cbor2@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}cloudpickle>0:devel/py-cloudpickle@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}compressed-tensors>=0.14.0.1:misc/py-compressed-tensors@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}depyf>=0.20.0:devel/py-depyf@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}diskcache>=5.6.3:devel/py-diskcache@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}einops>0:misc/py-einops@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}fastapi>0:www/py-fastapi@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}filelock>=3.16.1:sysutils/py-filelock@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}gguf>=0.17.0:misc/py-gguf@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}ijson>0:devel/py-ijson@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}Jinja2>=3.0:devel/py-Jinja2@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}lark>=1.2.2:devel/py-lark@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}llguidance>=1.3.0:textproc/py-llguidance@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}lm-format-enforcer>=0.11.3:misc/py-lm-format-enforcer@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}mcp>0:misc/py-mcp@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}mistral-common>=1.10.0:misc/py-mistral-common@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}model-hosting-container-standards>=0.1.13:misc/py-model-hosting-container-standards@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}msgspec>0:devel/py-msgspec@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}ninja>=1.13:devel/py-ninja@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}numpy1>=1.25:math/py-numpy1@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}openai>=2.0.0:misc/py-openai@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}openai-harmony>=0.0.3:misc/py-openai-harmony@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}opentelemetry-api>=1.27.0:devel/py-opentelemetry-api@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}opentelemetry-exporter-otlp>=1.27.0:devel/py-opentelemetry-exporter-otlp@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}opentelemetry-sdk>=1.27.0:devel/py-opentelemetry-sdk@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}opentelemetry-semantic-conventions-ai>=0.4.1:devel/py-opentelemetry-semantic-conventions-ai@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}opencv-python-headless>=4.11.0:graphics/py-opencv-python-headless@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}outlines-core>=0.2.11:textproc/py-outlines-core@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}partial-json-parser>0:textproc/py-partial-json-parser@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pillow>=10.0.0:graphics/py-pillow@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}prometheus-client>=0.18.0:net-mgmt/py-prometheus-client@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}prometheus-fastapi-instrumentator>=7.0.0:www/py-prometheus-fastapi-instrumentator@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}protobuf>=5.29.6:devel/py-protobuf@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}psutil>=5.9.0:sysutils/py-psutil@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}py-cpuinfo>0:sysutils/py-py-cpuinfo@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pybase64>0:devel/py-pybase64@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pydantic2>=2.12.0:devel/py-pydantic2@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}python-json-logger>0:devel/py-python-json-logger@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pyyaml>0:devel/py-pyyaml@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pyzmq>=25.0.0:net/py-pyzmq@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}regex>0:textproc/py-regex@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}requests>=2.26.0:www/py-requests@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}sentencepiece>0:textproc/py-sentencepiece@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}setproctitle>0:devel/py-setproctitle@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}setuptools>=63.0:devel/py-setuptools@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}tiktoken>=0.6.0:textproc/py-tiktoken@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}tokenizers>=0.21.1:textproc/py-tokenizers@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}tqdm>=4.0:misc/py-tqdm@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}uvloop>=0.20.0:devel/py-uvloop@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}transformers>=4.56.0:misc/py-transformers@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pytorch>=2.10.0:misc/py-pytorch@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}typing-extensions>=4.10:devel/py-typing-extensions@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}watchfiles>0:devel/py-watchfiles@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}xgrammar>=0.1.32:misc/py-xgrammar@${PY_FLAVOR}
+TEST_DEPENDS=	${PYTHON_PKGNAMEPREFIX}datasets>=4.8.2:misc/py-datasets@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}evaluate>=0.4.6:misc/py-evaluate@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}multiprocess>=0.70.19:devel/py-multiprocess@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pytest-asyncio>=1.3.0:devel/py-pytest-asyncio@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}tblib>=3.2.2:devel/py-tblib@${PY_FLAVOR}
+
+USES=		cmake:indirect python
+USE_PYTHON=	pep517 autoplist pytest
+
+# Build the CPU extension using clang (same ABI as PyTorch on FreeBSD).
+# VLLM_TARGET_DEVICE=cpu builds the vllm._C CPU extension.
+# oneDNN (fetched as a distfile) provides optimised GEMM kernels.
+MAKE_ENV+=	VLLM_TARGET_DEVICE=cpu \
+		CMAKE_ARGS="-DCMAKE_C_COMPILER=${LOCALBASE}/llvm19/bin/clang -DCMAKE_CXX_COMPILER=${LOCALBASE}/llvm19/bin/clang++ -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=TRUE" \
+		FETCHCONTENT_SOURCE_DIR_ONEDNN=${WRKDIR}/oneDNN-3.10
+
+TEST_ENV=	${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR}:${WRKSRC}/tests/vllm_test_utils:${WRKSRC}/tests/plugins/vllm_add_dummy_stat_logger
+TEST_WRKDIR=	${WRKSRC}/tests
+
+# tests don't run because:
+# * imagehash, lm_eval, mteb, pqdm, ray, runai_model_streamer, schemathesis which are not in FreeBSD ports yet
+# * vllm._C and vllm.v1.worker.gpu.mm.encoder_cudagraph require CUDA/GPU hardware.
+
+.include <bsd.port.mk>
diff --git a/misc/py-vllm/distinfo b/misc/py-vllm/distinfo
new file mode 100644
index 000000000000..579dac429ed1
--- /dev/null
+++ b/misc/py-vllm/distinfo
@@ -0,0 +1,5 @@
+TIMESTAMP = 1775582925
+SHA256 (vllm-0.19.0.tar.gz) = 81e59cf87175e7a62eb8d9acf5989484bbd17089d5eface353f89067bda282d9
+SIZE (vllm-0.19.0.tar.gz) = 31071745
+SHA256 (v3.10.tar.gz) = ba5834a1fdbb6d1c1b1c065dfd789438e7aa42c03fc52d92c02af85d78d1c75c
+SIZE (v3.10.tar.gz) = 13507701
diff --git a/misc/py-vllm/files/patch-cmake_cpu__extension.cmake b/misc/py-vllm/files/patch-cmake_cpu__extension.cmake
new file mode 100644
index 000000000000..9b7998f407f8
--- /dev/null
+++ b/misc/py-vllm/files/patch-cmake_cpu__extension.cmake
@@ -0,0 +1,78 @@
+--- cmake/cpu_extension.cmake.orig	2026-04-03 01:57:10 UTC
++++ cmake/cpu_extension.cmake
+@@ -20,6 +20,11 @@ set (ENABLE_NUMA TRUE)
+ 
+ set (ENABLE_NUMA TRUE)
+ 
++# FreeBSD does not have libnuma
++if (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
++    set(ENABLE_NUMA OFF)
++endif()
++
+ #
+ # Check the compile flags
+ #
+@@ -33,12 +38,25 @@ if (NOT MACOSX_FOUND)
+ endif()
+ 
+ if (NOT MACOSX_FOUND)
+-    execute_process(COMMAND cat /proc/cpuinfo
+-                    RESULT_VARIABLE CPUINFO_RET
+-                    OUTPUT_VARIABLE CPUINFO)
+-    if (NOT CPUINFO_RET EQUAL 0)
+-        message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
++    # Try Linux /proc/cpuinfo first, then the FreeBSD linuxulator path
++    if (EXISTS "/proc/cpuinfo")
++        set(_cpuinfo_path "/proc/cpuinfo")
++    elseif (EXISTS "/compat/linux/proc/cpuinfo")
++        set(_cpuinfo_path "/compat/linux/proc/cpuinfo")
++    else()
++        set(_cpuinfo_path "")
+     endif()
++    if (_cpuinfo_path)
++        execute_process(COMMAND cat ${_cpuinfo_path}
++                        RESULT_VARIABLE CPUINFO_RET
++                        OUTPUT_VARIABLE CPUINFO)
++        if (NOT CPUINFO_RET EQUAL 0)
++            message(FATAL_ERROR "Failed to check CPU features via ${_cpuinfo_path}")
++        endif()
++    else()
++        message(STATUS "No cpuinfo available; relying on CMAKE_SYSTEM_PROCESSOR for ISA detection")
++        set(CPUINFO "")
++    endif()
+ endif()
+ 
+ 
+@@ -91,9 +109,10 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR E
+ 
+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR ENABLE_X86_ISA)
+     set(ENABLE_X86_ISA ON)
+-    if (NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
+-            CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3))
+-        message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3")
++    if (NOT (
++            (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3) OR
++            (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 15.0)))
++        message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3 or clang >= 15.0")
+     endif()
+     list(APPEND CXX_COMPILE_FLAGS "-mf16c")
+     list(APPEND CXX_COMPILE_FLAGS_AVX512 ${CXX_COMPILE_FLAGS})
+@@ -407,9 +426,15 @@ if (ENABLE_X86_ISA)
+     message(STATUS "CPU extension (AVX512F) source files: ${VLLM_EXT_SRC_AVX512}")
+     message(STATUS "CPU extension (AVX2) source files: ${VLLM_EXT_SRC_AVX2}")
+ 
+-    set(_C_LIBS numa dnnl_ext)
+-    set(_C_AVX512_LIBS numa dnnl_ext)
+-    set(_C_AVX2_LIBS numa)
++    if(ENABLE_NUMA)
++        set(_C_LIBS numa dnnl_ext)
++        set(_C_AVX512_LIBS numa dnnl_ext)
++        set(_C_AVX2_LIBS numa)
++    else()
++        set(_C_LIBS dnnl_ext)
++        set(_C_AVX512_LIBS dnnl_ext)
++        set(_C_AVX2_LIBS "")
++    endif()
+ 
+     # AMX + AVX512F + AVX512BF16 + AVX512VNNI
+     define_extension_target(
diff --git a/misc/py-vllm/files/patch-csrc_cpu_shm.cpp b/misc/py-vllm/files/patch-csrc_cpu_shm.cpp
new file mode 100644
index 000000000000..521a3f335840
--- /dev/null
+++ b/misc/py-vllm/files/patch-csrc_cpu_shm.cpp
@@ -0,0 +1,12 @@
+--- csrc/cpu/shm.cpp.orig	2026-04-07 17:37:32 UTC
++++ csrc/cpu/shm.cpp
+@@ -2,6 +2,9 @@
+ 
+ #include <fcntl.h>
+ #include <sys/mman.h>
++#ifndef MAP_POPULATE
++#  define MAP_POPULATE 0
++#endif
+ #include <sys/stat.h>
+ #include <unistd.h>
+ 
diff --git a/misc/py-vllm/files/patch-pyproject.toml b/misc/py-vllm/files/patch-pyproject.toml
new file mode 100644
index 000000000000..5a3bc19a43b2
--- /dev/null
+++ b/misc/py-vllm/files/patch-pyproject.toml
@@ -0,0 +1,26 @@
+--- pyproject.toml.orig	2026-04-06 20:40:36 UTC
++++ pyproject.toml
+@@ -1,12 +1,9 @@ requires = [
+ [build-system]
+ # Should be mirrored in requirements/build.txt
+ requires = [
+-    "cmake>=3.26.1",
+-    "ninja",
+     "packaging>=24.2",
+-    "setuptools>=77.0.3,<81.0.0",
++    "setuptools>=63.0",
+     "setuptools-scm>=8.0",
+-    "torch == 2.10.0",
+     "wheel",
+     "jinja2",
+ ]
+@@ -15,8 +12,7 @@ authors = [{name = "vLLM Team"}]
+ [project]
+ name = "vllm"
+ authors = [{name = "vLLM Team"}]
+-license = "Apache-2.0"
+-license-files = ["LICENSE"]
++license = {text = "Apache-2.0"}
+ readme = "README.md"
+ description = "A high-throughput and memory-efficient inference and serving engine for LLMs"
+ classifiers = [
diff --git a/misc/py-vllm/files/patch-setup.py b/misc/py-vllm/files/patch-setup.py
new file mode 100644
index 000000000000..f05813edcc49
--- /dev/null
+++ b/misc/py-vllm/files/patch-setup.py
@@ -0,0 +1,15 @@
+--- setup.py.orig	2026-04-07 17:26:12 UTC
++++ setup.py
+@@ -42,7 +42,11 @@ if sys.platform.startswith("darwin") and VLLM_TARGET_D
+ if sys.platform.startswith("darwin") and VLLM_TARGET_DEVICE != "cpu":
+     logger.warning("VLLM_TARGET_DEVICE automatically set to `cpu` due to macOS")
+     VLLM_TARGET_DEVICE = "cpu"
+-elif not (sys.platform.startswith("linux") or sys.platform.startswith("darwin")):
++elif not (
++    sys.platform.startswith("linux")
++    or sys.platform.startswith("darwin")
++    or sys.platform.startswith("freebsd")
++):
+     logger.warning(
+         "vLLM only supports Linux platform (including WSL) and MacOS."
+         "Building on %s, "
diff --git a/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py b/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py
new file mode 100644
index 000000000000..cebb4198f919
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py
@@ -0,0 +1,35 @@
+--- vllm/distributed/parallel_state.py.orig	2026-04-07 18:19:15 UTC
++++ vllm/distributed/parallel_state.py
+@@ -24,6 +24,7 @@ import contextlib
+ """
+ 
+ import contextlib
++import sys
+ import gc
+ import pickle
+ import weakref
+@@ -341,8 +342,13 @@ class GroupCoordinator:
+             )
+             # a group with `gloo` backend, to allow direct coordination between
+             # processes through the CPU.
++            # On FreeBSD, gloo TCP transport is unavailable; use fake backend.
++            _cpu_backend = "gloo"
++            if sys.platform.startswith("freebsd"):
++                import importlib; importlib.import_module("torch.testing._internal.distributed.fake_pg")
++                _cpu_backend = "fake"
+             with suppress_stdout():
+-                cpu_group = torch.distributed.new_group(ranks, backend="gloo")
++                cpu_group = torch.distributed.new_group(ranks, backend=_cpu_backend)
+             if self.rank in ranks:
+                 self.ranks = ranks
+                 self.world_size = len(ranks)
+@@ -1419,6 +1425,9 @@ def init_distributed_environment(
+             )
+             backend = "gloo"
+         # this backend is used for WORLD
++        # On FreeBSD, register the 'fake' backend before use.
++        if sys.platform.startswith("freebsd") and backend == "fake":
++            import importlib; importlib.import_module("torch.testing._internal.distributed.fake_pg")
+         torch.distributed.init_process_group(
+             backend=backend,
+             init_method=distributed_init_method,
diff --git a/misc/py-vllm/files/patch-vllm_platforms_____init____.py b/misc/py-vllm/files/patch-vllm_platforms_____init____.py
new file mode 100644
index 000000000000..ab6f660d7de3
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_platforms_____init____.py
@@ -0,0 +1,29 @@
+--- vllm/platforms/__init__.py.orig	2026-04-07 17:26:12 UTC
++++ vllm/platforms/__init__.py
+@@ -58,6 +58,11 @@ def cuda_platform_plugin() -> str | None:
+ 
+ 
+ def cuda_platform_plugin() -> str | None:
++    import sys
++    if sys.platform.startswith("freebsd"):
++        # CUDA extensions are not built on FreeBSD; use CPU platform instead.
++        logger.debug("CUDA platform disabled on FreeBSD.")
++        return None
+     is_cuda = False
+     logger.debug("Checking if CUDA platform is available.")
+     try:
+@@ -172,10 +177,12 @@ def cpu_platform_plugin() -> str | None:
+         if not is_cpu:
+             import sys
+ 
+-            is_cpu = sys.platform.startswith("darwin")
++            is_cpu = sys.platform.startswith("darwin") or sys.platform.startswith(
++                "freebsd"
++            )
+             if is_cpu:
+                 logger.debug(
+-                    "Confirmed CPU platform is available because the machine is MacOS."
++                    "Confirmed CPU platform is available because the machine is MacOS or FreeBSD."
+                 )
+ 
+     except Exception as e:
diff --git a/misc/py-vllm/files/patch-vllm_platforms_cpu.py b/misc/py-vllm/files/patch-vllm_platforms_cpu.py
new file mode 100644
index 000000000000..ae1db6392125
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_platforms_cpu.py
@@ -0,0 +1,42 @@
+--- vllm/platforms/cpu.py.orig	2026-04-03 01:57:10 UTC
++++ vllm/platforms/cpu.py
+@@ -74,7 +74,8 @@ class CpuPlatform(Platform):
+     device_name: str = "cpu"
+     device_type: str = "cpu"
+     dispatch_key: str = "CPU"
+-    dist_backend: str = "gloo"
++    # FreeBSD lacks gloo TCP transport (epoll-based); use fake backend.
++    dist_backend: str = "fake" if sys.platform.startswith("freebsd") else "gloo"
+     device_control_env_var = "CPU_VISIBLE_MEMORY_NODES"
+ 
+     @property
+@@ -378,7 +379,28 @@ class CpuPlatform(Platform):
+ 
+     @classmethod
+     def get_allowed_cpu_core_node_list(cls) -> tuple[list[int], list[LogicalCPUInfo]]:
+-        assert platform.system() == "Linux"
++        assert platform.system() in ("Linux", "FreeBSD")
++
++        if platform.system() == "FreeBSD":
++            # FreeBSD lacks lscpu -J; treat all CPUs as a single NUMA node.
++            allowed_cpu_id_set = (
++                os.sched_getaffinity(0)
++                if hasattr(os, "sched_getaffinity")
++                else set(range(os.cpu_count() or 1))
++            )
++            logical_cpu_list = [
++                LogicalCPUInfo(id=cpu_id, physical_core=cpu_id, numa_node=0)
++                for cpu_id in sorted(allowed_cpu_id_set)
++            ]
++            allowed_numa_nodes_list = [0]
++            env_key = CpuPlatform.device_control_env_var
++            if env_key in os.environ and os.environ[env_key] != "":
++                visible_nodes = [int(s) for s in os.environ[env_key].split(",")]
++                allowed_numa_nodes_list = [
++                    x for x in sorted(list(set(visible_nodes)))
++                    if x in allowed_numa_nodes_list
++                ]
++            return allowed_numa_nodes_list, logical_cpu_list
+ 
+         # Init LogicalCPUInfo from lscpu
+         lscpu_output = subprocess.check_output(
diff --git a/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py b/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py
new file mode 100644
index 000000000000..e762ac4eafb9
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py
@@ -0,0 +1,12 @@
+--- vllm/v1/worker/cpu_worker.py.orig	2026-04-07 17:26:12 UTC
++++ vllm/v1/worker/cpu_worker.py
+@@ -91,6 +91,9 @@ class CPUWorker(Worker):
+                 self.local_omp_cpuid = self._get_autobind_cpu_ids(lambda cpus: cpus)
+             else:
+                 self.local_omp_cpuid = "nobind"
++        elif omp_cpuids == "auto":
++            # Non-Linux OS: NUMA-based auto-binding not supported, fall back to nobind
++            self.local_omp_cpuid = "nobind"
+         elif omp_cpuids == "nobind":
+             self.local_omp_cpuid = "nobind"
+         else:
diff --git a/misc/py-vllm/pkg-descr b/misc/py-vllm/pkg-descr
new file mode 100644
index 000000000000..1d4993624aea
--- /dev/null
+++ b/misc/py-vllm/pkg-descr
@@ -0,0 +1,13 @@
+vLLM is a fast and easy-to-use library for LLM inference and serving.
+It provides high-throughput and memory-efficient inference for large language
+models (LLMs) using state-of-the-art serving technologies including:
+
+- PagedAttention for efficient KV cache memory management
+- Continuous batching of incoming requests
+- Optimized CUDA kernels (on supported platforms)
+- Hugging Face model compatibility
+- Various decoding algorithms including parallel sampling and beam search
+- OpenAI-compatible API server
+
+On FreeBSD, vLLM runs in CPU/empty device mode (VLLM_TARGET_DEVICE=empty),
+providing pure Python inference without GPU acceleration.


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69d558b7.3335d.7eb97e75>