Date: Tue, 07 Apr 2026 19:19:19 +0000 From: Yuri Victorovich <yuri@FreeBSD.org> To: ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-main@FreeBSD.org Subject: git: 919c3600edae - main - misc/py-vllm: New port: High-throughput and memory-efficient LLM inference engine Message-ID: <69d558b7.3335d.7eb97e75@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch main has been updated by yuri: URL: https://cgit.FreeBSD.org/ports/commit/?id=919c3600edaed3248916e0b75d4249fa9903b904 commit 919c3600edaed3248916e0b75d4249fa9903b904 Author: Yuri Victorovich <yuri@FreeBSD.org> AuthorDate: 2026-04-07 19:18:53 +0000 Commit: Yuri Victorovich <yuri@FreeBSD.org> CommitDate: 2026-04-07 19:19:14 +0000 misc/py-vllm: New port: High-throughput and memory-efficient LLM inference engine --- misc/Makefile | 1 + misc/py-vllm/Makefile | 109 +++++++++++++++++++++ misc/py-vllm/distinfo | 5 + .../py-vllm/files/patch-cmake_cpu__extension.cmake | 78 +++++++++++++++ misc/py-vllm/files/patch-csrc_cpu_shm.cpp | 12 +++ misc/py-vllm/files/patch-pyproject.toml | 26 +++++ misc/py-vllm/files/patch-setup.py | 15 +++ .../patch-vllm_distributed_parallel__state.py | 35 +++++++ .../files/patch-vllm_platforms_____init____.py | 29 ++++++ misc/py-vllm/files/patch-vllm_platforms_cpu.py | 42 ++++++++ .../files/patch-vllm_v1_worker_cpu__worker.py | 12 +++ misc/py-vllm/pkg-descr | 13 +++ 12 files changed, 377 insertions(+) diff --git a/misc/Makefile b/misc/Makefile index 2dadb25668f2..c09343f97fef 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -623,6 +623,7 @@ SUBDIR += py-uhi SUBDIR += py-uuid-utils SUBDIR += py-vaderSentiment + SUBDIR += py-vllm SUBDIR += py-wandb SUBDIR += py-wurlitzer SUBDIR += py-xformers diff --git a/misc/py-vllm/Makefile b/misc/py-vllm/Makefile new file mode 100644 index 000000000000..1a56e18195a9 --- /dev/null +++ b/misc/py-vllm/Makefile @@ -0,0 +1,109 @@ +PORTNAME= vllm +DISTVERSION= 0.19.0 +CATEGORIES= misc python # machine-learning +MASTER_SITES= PYPI \ + https://github.com/uxlfoundation/oneDNN/archive/refs/tags/:onednn_src +PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} +DISTFILES= ${DISTNAME}${EXTRACT_SUFX} \ + v3.10${EXTRACT_SUFX}:onednn_src + +MAINTAINER= yuri@FreeBSD.org +COMMENT= High-throughput and memory-efficient LLM inference engine +WWW= https://vllm.ai/ \ + https://github.com/vllm-project/vllm + +LICENSE= APACHE20 +LICENSE_FILE= ${WRKSRC}/LICENSE + +BUILD_DEPENDS= ${LOCALBASE}/llvm19/bin/clang:devel/llvm19 \ + ${PYTHON_PKGNAMEPREFIX}Jinja2>=3.0:devel/py-Jinja2@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}ninja>=1.13:devel/py-ninja@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}packaging>=24.2:devel/py-packaging@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}pytorch>=2.10.0:misc/py-pytorch@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}setuptools>=63.0:devel/py-setuptools@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}setuptools-scm>=8.0:devel/py-setuptools-scm@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}wheel>0:devel/py-wheel@${PY_FLAVOR} +LIB_DEPENDS= libabsl_status.so:devel/abseil \ + libprotobuf.so:devel/protobuf +RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}aiohttp>=3.13.3:www/py-aiohttp@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}anthropic>0:misc/py-anthropic@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}blake3>0:security/py-blake3@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}cachetools>0:devel/py-cachetools@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}cbor2>0:devel/py-cbor2@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}cloudpickle>0:devel/py-cloudpickle@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}compressed-tensors>=0.14.0.1:misc/py-compressed-tensors@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}depyf>=0.20.0:devel/py-depyf@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}diskcache>=5.6.3:devel/py-diskcache@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}einops>0:misc/py-einops@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}fastapi>0:www/py-fastapi@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}filelock>=3.16.1:sysutils/py-filelock@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}gguf>=0.17.0:misc/py-gguf@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}ijson>0:devel/py-ijson@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}Jinja2>=3.0:devel/py-Jinja2@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}lark>=1.2.2:devel/py-lark@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}llguidance>=1.3.0:textproc/py-llguidance@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}lm-format-enforcer>=0.11.3:misc/py-lm-format-enforcer@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}mcp>0:misc/py-mcp@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}mistral-common>=1.10.0:misc/py-mistral-common@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}model-hosting-container-standards>=0.1.13:misc/py-model-hosting-container-standards@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}msgspec>0:devel/py-msgspec@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}ninja>=1.13:devel/py-ninja@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}numpy1>=1.25:math/py-numpy1@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}openai>=2.0.0:misc/py-openai@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}openai-harmony>=0.0.3:misc/py-openai-harmony@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}opentelemetry-api>=1.27.0:devel/py-opentelemetry-api@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}opentelemetry-exporter-otlp>=1.27.0:devel/py-opentelemetry-exporter-otlp@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}opentelemetry-sdk>=1.27.0:devel/py-opentelemetry-sdk@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}opentelemetry-semantic-conventions-ai>=0.4.1:devel/py-opentelemetry-semantic-conventions-ai@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}opencv-python-headless>=4.11.0:graphics/py-opencv-python-headless@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}outlines-core>=0.2.11:textproc/py-outlines-core@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}partial-json-parser>0:textproc/py-partial-json-parser@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}pillow>=10.0.0:graphics/py-pillow@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}prometheus-client>=0.18.0:net-mgmt/py-prometheus-client@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}prometheus-fastapi-instrumentator>=7.0.0:www/py-prometheus-fastapi-instrumentator@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}protobuf>=5.29.6:devel/py-protobuf@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}psutil>=5.9.0:sysutils/py-psutil@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}py-cpuinfo>0:sysutils/py-py-cpuinfo@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}pybase64>0:devel/py-pybase64@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}pydantic2>=2.12.0:devel/py-pydantic2@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}python-json-logger>0:devel/py-python-json-logger@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}pyyaml>0:devel/py-pyyaml@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}pyzmq>=25.0.0:net/py-pyzmq@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}regex>0:textproc/py-regex@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}requests>=2.26.0:www/py-requests@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}sentencepiece>0:textproc/py-sentencepiece@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}setproctitle>0:devel/py-setproctitle@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}setuptools>=63.0:devel/py-setuptools@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}tiktoken>=0.6.0:textproc/py-tiktoken@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}tokenizers>=0.21.1:textproc/py-tokenizers@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}tqdm>=4.0:misc/py-tqdm@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}uvloop>=0.20.0:devel/py-uvloop@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}transformers>=4.56.0:misc/py-transformers@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}pytorch>=2.10.0:misc/py-pytorch@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}typing-extensions>=4.10:devel/py-typing-extensions@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}watchfiles>0:devel/py-watchfiles@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}xgrammar>=0.1.32:misc/py-xgrammar@${PY_FLAVOR} +TEST_DEPENDS= ${PYTHON_PKGNAMEPREFIX}datasets>=4.8.2:misc/py-datasets@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}evaluate>=0.4.6:misc/py-evaluate@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}multiprocess>=0.70.19:devel/py-multiprocess@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}pytest-asyncio>=1.3.0:devel/py-pytest-asyncio@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}tblib>=3.2.2:devel/py-tblib@${PY_FLAVOR} + +USES= cmake:indirect python +USE_PYTHON= pep517 autoplist pytest + +# Build the CPU extension using clang (same ABI as PyTorch on FreeBSD). +# VLLM_TARGET_DEVICE=cpu builds the vllm._C CPU extension. +# oneDNN (fetched as a distfile) provides optimised GEMM kernels. +MAKE_ENV+= VLLM_TARGET_DEVICE=cpu \ + CMAKE_ARGS="-DCMAKE_C_COMPILER=${LOCALBASE}/llvm19/bin/clang -DCMAKE_CXX_COMPILER=${LOCALBASE}/llvm19/bin/clang++ -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=TRUE" \ + FETCHCONTENT_SOURCE_DIR_ONEDNN=${WRKDIR}/oneDNN-3.10 + +TEST_ENV= ${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR}:${WRKSRC}/tests/vllm_test_utils:${WRKSRC}/tests/plugins/vllm_add_dummy_stat_logger +TEST_WRKDIR= ${WRKSRC}/tests + +# tests don't run because: +# * imagehash, lm_eval, mteb, pqdm, ray, runai_model_streamer, schemathesis which are not in FreeBSD ports yet +# * vllm._C and vllm.v1.worker.gpu.mm.encoder_cudagraph require CUDA/GPU hardware. + +.include <bsd.port.mk> diff --git a/misc/py-vllm/distinfo b/misc/py-vllm/distinfo new file mode 100644 index 000000000000..579dac429ed1 --- /dev/null +++ b/misc/py-vllm/distinfo @@ -0,0 +1,5 @@ +TIMESTAMP = 1775582925 +SHA256 (vllm-0.19.0.tar.gz) = 81e59cf87175e7a62eb8d9acf5989484bbd17089d5eface353f89067bda282d9 +SIZE (vllm-0.19.0.tar.gz) = 31071745 +SHA256 (v3.10.tar.gz) = ba5834a1fdbb6d1c1b1c065dfd789438e7aa42c03fc52d92c02af85d78d1c75c +SIZE (v3.10.tar.gz) = 13507701 diff --git a/misc/py-vllm/files/patch-cmake_cpu__extension.cmake b/misc/py-vllm/files/patch-cmake_cpu__extension.cmake new file mode 100644 index 000000000000..9b7998f407f8 --- /dev/null +++ b/misc/py-vllm/files/patch-cmake_cpu__extension.cmake @@ -0,0 +1,78 @@ +--- cmake/cpu_extension.cmake.orig 2026-04-03 01:57:10 UTC ++++ cmake/cpu_extension.cmake +@@ -20,6 +20,11 @@ set (ENABLE_NUMA TRUE) + + set (ENABLE_NUMA TRUE) + ++# FreeBSD does not have libnuma ++if (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") ++ set(ENABLE_NUMA OFF) ++endif() ++ + # + # Check the compile flags + # +@@ -33,12 +38,25 @@ if (NOT MACOSX_FOUND) + endif() + + if (NOT MACOSX_FOUND) +- execute_process(COMMAND cat /proc/cpuinfo +- RESULT_VARIABLE CPUINFO_RET +- OUTPUT_VARIABLE CPUINFO) +- if (NOT CPUINFO_RET EQUAL 0) +- message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo") ++ # Try Linux /proc/cpuinfo first, then the FreeBSD linuxulator path ++ if (EXISTS "/proc/cpuinfo") ++ set(_cpuinfo_path "/proc/cpuinfo") ++ elseif (EXISTS "/compat/linux/proc/cpuinfo") ++ set(_cpuinfo_path "/compat/linux/proc/cpuinfo") ++ else() ++ set(_cpuinfo_path "") + endif() ++ if (_cpuinfo_path) ++ execute_process(COMMAND cat ${_cpuinfo_path} ++ RESULT_VARIABLE CPUINFO_RET ++ OUTPUT_VARIABLE CPUINFO) ++ if (NOT CPUINFO_RET EQUAL 0) ++ message(FATAL_ERROR "Failed to check CPU features via ${_cpuinfo_path}") ++ endif() ++ else() ++ message(STATUS "No cpuinfo available; relying on CMAKE_SYSTEM_PROCESSOR for ISA detection") ++ set(CPUINFO "") ++ endif() + endif() + + +@@ -91,9 +109,10 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR E + + if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR ENABLE_X86_ISA) + set(ENABLE_X86_ISA ON) +- if (NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND +- CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3)) +- message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3") ++ if (NOT ( ++ (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3) OR ++ (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 15.0))) ++ message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3 or clang >= 15.0") + endif() + list(APPEND CXX_COMPILE_FLAGS "-mf16c") + list(APPEND CXX_COMPILE_FLAGS_AVX512 ${CXX_COMPILE_FLAGS}) +@@ -407,9 +426,15 @@ if (ENABLE_X86_ISA) + message(STATUS "CPU extension (AVX512F) source files: ${VLLM_EXT_SRC_AVX512}") + message(STATUS "CPU extension (AVX2) source files: ${VLLM_EXT_SRC_AVX2}") + +- set(_C_LIBS numa dnnl_ext) +- set(_C_AVX512_LIBS numa dnnl_ext) +- set(_C_AVX2_LIBS numa) ++ if(ENABLE_NUMA) ++ set(_C_LIBS numa dnnl_ext) ++ set(_C_AVX512_LIBS numa dnnl_ext) ++ set(_C_AVX2_LIBS numa) ++ else() ++ set(_C_LIBS dnnl_ext) ++ set(_C_AVX512_LIBS dnnl_ext) ++ set(_C_AVX2_LIBS "") ++ endif() + + # AMX + AVX512F + AVX512BF16 + AVX512VNNI + define_extension_target( diff --git a/misc/py-vllm/files/patch-csrc_cpu_shm.cpp b/misc/py-vllm/files/patch-csrc_cpu_shm.cpp new file mode 100644 index 000000000000..521a3f335840 --- /dev/null +++ b/misc/py-vllm/files/patch-csrc_cpu_shm.cpp @@ -0,0 +1,12 @@ +--- csrc/cpu/shm.cpp.orig 2026-04-07 17:37:32 UTC ++++ csrc/cpu/shm.cpp +@@ -2,6 +2,9 @@ + + #include <fcntl.h> + #include <sys/mman.h> ++#ifndef MAP_POPULATE ++# define MAP_POPULATE 0 ++#endif + #include <sys/stat.h> + #include <unistd.h> + diff --git a/misc/py-vllm/files/patch-pyproject.toml b/misc/py-vllm/files/patch-pyproject.toml new file mode 100644 index 000000000000..5a3bc19a43b2 --- /dev/null +++ b/misc/py-vllm/files/patch-pyproject.toml @@ -0,0 +1,26 @@ +--- pyproject.toml.orig 2026-04-06 20:40:36 UTC ++++ pyproject.toml +@@ -1,12 +1,9 @@ requires = [ + [build-system] + # Should be mirrored in requirements/build.txt + requires = [ +- "cmake>=3.26.1", +- "ninja", + "packaging>=24.2", +- "setuptools>=77.0.3,<81.0.0", ++ "setuptools>=63.0", + "setuptools-scm>=8.0", +- "torch == 2.10.0", + "wheel", + "jinja2", + ] +@@ -15,8 +12,7 @@ authors = [{name = "vLLM Team"}] + [project] + name = "vllm" + authors = [{name = "vLLM Team"}] +-license = "Apache-2.0" +-license-files = ["LICENSE"] ++license = {text = "Apache-2.0"} + readme = "README.md" + description = "A high-throughput and memory-efficient inference and serving engine for LLMs" + classifiers = [ diff --git a/misc/py-vllm/files/patch-setup.py b/misc/py-vllm/files/patch-setup.py new file mode 100644 index 000000000000..f05813edcc49 --- /dev/null +++ b/misc/py-vllm/files/patch-setup.py @@ -0,0 +1,15 @@ +--- setup.py.orig 2026-04-07 17:26:12 UTC ++++ setup.py +@@ -42,7 +42,11 @@ if sys.platform.startswith("darwin") and VLLM_TARGET_D + if sys.platform.startswith("darwin") and VLLM_TARGET_DEVICE != "cpu": + logger.warning("VLLM_TARGET_DEVICE automatically set to `cpu` due to macOS") + VLLM_TARGET_DEVICE = "cpu" +-elif not (sys.platform.startswith("linux") or sys.platform.startswith("darwin")): ++elif not ( ++ sys.platform.startswith("linux") ++ or sys.platform.startswith("darwin") ++ or sys.platform.startswith("freebsd") ++): + logger.warning( + "vLLM only supports Linux platform (including WSL) and MacOS." + "Building on %s, " diff --git a/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py b/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py new file mode 100644 index 000000000000..cebb4198f919 --- /dev/null +++ b/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py @@ -0,0 +1,35 @@ +--- vllm/distributed/parallel_state.py.orig 2026-04-07 18:19:15 UTC ++++ vllm/distributed/parallel_state.py +@@ -24,6 +24,7 @@ import contextlib + """ + + import contextlib ++import sys + import gc + import pickle + import weakref +@@ -341,8 +342,13 @@ class GroupCoordinator: + ) + # a group with `gloo` backend, to allow direct coordination between + # processes through the CPU. ++ # On FreeBSD, gloo TCP transport is unavailable; use fake backend. ++ _cpu_backend = "gloo" ++ if sys.platform.startswith("freebsd"): ++ import importlib; importlib.import_module("torch.testing._internal.distributed.fake_pg") ++ _cpu_backend = "fake" + with suppress_stdout(): +- cpu_group = torch.distributed.new_group(ranks, backend="gloo") ++ cpu_group = torch.distributed.new_group(ranks, backend=_cpu_backend) + if self.rank in ranks: + self.ranks = ranks + self.world_size = len(ranks) +@@ -1419,6 +1425,9 @@ def init_distributed_environment( + ) + backend = "gloo" + # this backend is used for WORLD ++ # On FreeBSD, register the 'fake' backend before use. ++ if sys.platform.startswith("freebsd") and backend == "fake": ++ import importlib; importlib.import_module("torch.testing._internal.distributed.fake_pg") + torch.distributed.init_process_group( + backend=backend, + init_method=distributed_init_method, diff --git a/misc/py-vllm/files/patch-vllm_platforms_____init____.py b/misc/py-vllm/files/patch-vllm_platforms_____init____.py new file mode 100644 index 000000000000..ab6f660d7de3 --- /dev/null +++ b/misc/py-vllm/files/patch-vllm_platforms_____init____.py @@ -0,0 +1,29 @@ +--- vllm/platforms/__init__.py.orig 2026-04-07 17:26:12 UTC ++++ vllm/platforms/__init__.py +@@ -58,6 +58,11 @@ def cuda_platform_plugin() -> str | None: + + + def cuda_platform_plugin() -> str | None: ++ import sys ++ if sys.platform.startswith("freebsd"): ++ # CUDA extensions are not built on FreeBSD; use CPU platform instead. ++ logger.debug("CUDA platform disabled on FreeBSD.") ++ return None + is_cuda = False + logger.debug("Checking if CUDA platform is available.") + try: +@@ -172,10 +177,12 @@ def cpu_platform_plugin() -> str | None: + if not is_cpu: + import sys + +- is_cpu = sys.platform.startswith("darwin") ++ is_cpu = sys.platform.startswith("darwin") or sys.platform.startswith( ++ "freebsd" ++ ) + if is_cpu: + logger.debug( +- "Confirmed CPU platform is available because the machine is MacOS." ++ "Confirmed CPU platform is available because the machine is MacOS or FreeBSD." + ) + + except Exception as e: diff --git a/misc/py-vllm/files/patch-vllm_platforms_cpu.py b/misc/py-vllm/files/patch-vllm_platforms_cpu.py new file mode 100644 index 000000000000..ae1db6392125 --- /dev/null +++ b/misc/py-vllm/files/patch-vllm_platforms_cpu.py @@ -0,0 +1,42 @@ +--- vllm/platforms/cpu.py.orig 2026-04-03 01:57:10 UTC ++++ vllm/platforms/cpu.py +@@ -74,7 +74,8 @@ class CpuPlatform(Platform): + device_name: str = "cpu" + device_type: str = "cpu" + dispatch_key: str = "CPU" +- dist_backend: str = "gloo" ++ # FreeBSD lacks gloo TCP transport (epoll-based); use fake backend. ++ dist_backend: str = "fake" if sys.platform.startswith("freebsd") else "gloo" + device_control_env_var = "CPU_VISIBLE_MEMORY_NODES" + + @property +@@ -378,7 +379,28 @@ class CpuPlatform(Platform): + + @classmethod + def get_allowed_cpu_core_node_list(cls) -> tuple[list[int], list[LogicalCPUInfo]]: +- assert platform.system() == "Linux" ++ assert platform.system() in ("Linux", "FreeBSD") ++ ++ if platform.system() == "FreeBSD": ++ # FreeBSD lacks lscpu -J; treat all CPUs as a single NUMA node. ++ allowed_cpu_id_set = ( ++ os.sched_getaffinity(0) ++ if hasattr(os, "sched_getaffinity") ++ else set(range(os.cpu_count() or 1)) ++ ) ++ logical_cpu_list = [ ++ LogicalCPUInfo(id=cpu_id, physical_core=cpu_id, numa_node=0) ++ for cpu_id in sorted(allowed_cpu_id_set) ++ ] ++ allowed_numa_nodes_list = [0] ++ env_key = CpuPlatform.device_control_env_var ++ if env_key in os.environ and os.environ[env_key] != "": ++ visible_nodes = [int(s) for s in os.environ[env_key].split(",")] ++ allowed_numa_nodes_list = [ ++ x for x in sorted(list(set(visible_nodes))) ++ if x in allowed_numa_nodes_list ++ ] ++ return allowed_numa_nodes_list, logical_cpu_list + + # Init LogicalCPUInfo from lscpu + lscpu_output = subprocess.check_output( diff --git a/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py b/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py new file mode 100644 index 000000000000..e762ac4eafb9 --- /dev/null +++ b/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py @@ -0,0 +1,12 @@ +--- vllm/v1/worker/cpu_worker.py.orig 2026-04-07 17:26:12 UTC ++++ vllm/v1/worker/cpu_worker.py +@@ -91,6 +91,9 @@ class CPUWorker(Worker): + self.local_omp_cpuid = self._get_autobind_cpu_ids(lambda cpus: cpus) + else: + self.local_omp_cpuid = "nobind" ++ elif omp_cpuids == "auto": ++ # Non-Linux OS: NUMA-based auto-binding not supported, fall back to nobind ++ self.local_omp_cpuid = "nobind" + elif omp_cpuids == "nobind": + self.local_omp_cpuid = "nobind" + else: diff --git a/misc/py-vllm/pkg-descr b/misc/py-vllm/pkg-descr new file mode 100644 index 000000000000..1d4993624aea --- /dev/null +++ b/misc/py-vllm/pkg-descr @@ -0,0 +1,13 @@ +vLLM is a fast and easy-to-use library for LLM inference and serving. +It provides high-throughput and memory-efficient inference for large language +models (LLMs) using state-of-the-art serving technologies including: + +- PagedAttention for efficient KV cache memory management +- Continuous batching of incoming requests +- Optimized CUDA kernels (on supported platforms) +- Hugging Face model compatibility +- Various decoding algorithms including parallel sampling and beam search +- OpenAI-compatible API server + +On FreeBSD, vLLM runs in CPU/empty device mode (VLLM_TARGET_DEVICE=empty), +providing pure Python inference without GPU acceleration.home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69d558b7.3335d.7eb97e75>
