FreeBSD Mail Archives

Date:      Wed, 08 Apr 2026 02:45:29 +0000
From:      Yuri Victorovich <yuri@FreeBSD.org>
To:        ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-main@FreeBSD.org
Subject:   git: c61790d93374 - main - graphics/py-olmocr: New port: PDF and image OCR toolkit using visual language models
Message-ID:  <69d5c149.432cc.e50099a@gitrepo.freebsd.org>

index | next in thread | raw e-mail


The branch main has been updated by yuri:

URL: https://cgit.FreeBSD.org/ports/commit/?id=c61790d933741f0db49e7715d3a4fe64e689b945

commit c61790d933741f0db49e7715d3a4fe64e689b945
Author:     Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2026-04-08 01:37:34 +0000
Commit:     Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2026-04-08 02:45:26 +0000

    graphics/py-olmocr: New port: PDF and image OCR toolkit using visual language models
---
 graphics/Makefile                                 |  1 +
 graphics/py-olmocr/Makefile                       | 47 ++++++++++++++++++++
 graphics/py-olmocr/distinfo                       |  3 ++
 graphics/py-olmocr/files/patch-olmocr_pipeline.py | 54 +++++++++++++++++++++++
 graphics/py-olmocr/files/patch-pyproject.toml     | 13 ++++++
 graphics/py-olmocr/pkg-descr                      | 10 +++++
 6 files changed, 128 insertions(+)

diff --git a/graphics/Makefile b/graphics/Makefile
index 50a6c9fb5271..341697adaae9 100644
--- a/graphics/Makefile
+++ b/graphics/Makefile
@@ -983,6 +983,7 @@
     SUBDIR += py-napari-console
     SUBDIR += py-napari-plugin-engine
     SUBDIR += py-nwdiag
+    SUBDIR += py-olmocr
     SUBDIR += py-opencolorio
     SUBDIR += py-opencv-python-headless
     SUBDIR += py-openimageio
diff --git a/graphics/py-olmocr/Makefile b/graphics/py-olmocr/Makefile
new file mode 100644
index 000000000000..2150805c4329
--- /dev/null
+++ b/graphics/py-olmocr/Makefile
@@ -0,0 +1,47 @@
+PORTNAME=	olmocr
+DISTVERSION=	0.4.27
+CATEGORIES=	graphics python
+MASTER_SITES=	PYPI
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER=	yuri@FreeBSD.org
+COMMENT=	PDF and image OCR toolkit using visual language models
+WWW=		https://olmocr.allenai.org/ \
+		https://github.com/allenai/olmocr
+
+LICENSE=	APACHE20
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+BUILD_DEPENDS=	${PYTHON_PKGNAMEPREFIX}setuptools>0:devel/py-setuptools@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}wheel>0:devel/py-wheel@${PY_FLAVOR}
+RUN_DEPENDS=	pdftoppm:graphics/poppler-utils \
+		${PYTHON_PKGNAMEPREFIX}bleach>0:www/py-bleach@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}boto3>0:www/py-boto3@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}cached-path>0:devel/py-cached-path@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}cryptography>0:security/py-cryptography@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}filelock>0:sysutils/py-filelock@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}ftfy>0:textproc/py-ftfy@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}httpx>0:www/py-httpx@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}lingua-language-detector>0:textproc/py-lingua-language-detector@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}markdown2>0:textproc/py-markdown2@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}markdownify>0:textproc/py-markdownify@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}orjson>0:devel/py-orjson@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pillow>0:graphics/py-pillow@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pypdf>=5.2.0:print/py-pypdf@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}pypdfium2>0:graphics/py-pypdfium2@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}requests>0:www/py-requests@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}smart-open>0:net/py-smart-open@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}zstandard>0:archivers/py-zstandard@${PY_FLAVOR}
+RUN_DEPENDS_gpu=	\
+		${PYTHON_PKGNAMEPREFIX}pytorch>0:misc/py-pytorch@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}transformers>=4.57.3:misc/py-transformers@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}vllm>=0.11.2:misc/py-vllm@${PY_FLAVOR}
+RUN_DEPENDS+=	${RUN_DEPENDS_gpu}
+TEST_DEPENDS=	${PYTHON_PKGNAMEPREFIX}pytest>0:devel/py-pytest@${PY_FLAVOR}
+
+USES=		python
+USE_PYTHON=	pep517 concurrent autoplist
+
+NO_ARCH=	yes
+
+.include <bsd.port.mk>
diff --git a/graphics/py-olmocr/distinfo b/graphics/py-olmocr/distinfo
new file mode 100644
index 000000000000..504681120d06
--- /dev/null
+++ b/graphics/py-olmocr/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1775589887
+SHA256 (olmocr-0.4.27.tar.gz) = 7da74f37a3e987f966765503c59913c2268289c9ecb14b4c5f40c89a0e8e5393
+SIZE (olmocr-0.4.27.tar.gz) = 410438
diff --git a/graphics/py-olmocr/files/patch-olmocr_pipeline.py b/graphics/py-olmocr/files/patch-olmocr_pipeline.py
new file mode 100644
index 000000000000..ea3871e620f9
--- /dev/null
+++ b/graphics/py-olmocr/files/patch-olmocr_pipeline.py
@@ -0,0 +1,54 @@
+--- olmocr/pipeline.py.orig	2026-03-12 16:30:29 UTC
++++ olmocr/pipeline.py
+@@ -811,7 +811,7 @@ async def vllm_server_task(model_name_or_path, args, u
+         model_name_or_path,
+         "--port",
+         str(args.port),
+-        "--disable-log-requests",
++        "--no-enable-log-requests",
+         "--uvicorn-log-level",
+         "warning",
+         "--served-model-name",
+@@ -833,12 +833,15 @@ async def vllm_server_task(model_name_or_path, args, u
+     if unknown_args:
+         cmd.extend(unknown_args)
+ 
++    if getattr(args, "device", "cpu") == "cpu":
++        cmd.append("--enforce-eager")
++
+     proc = await asyncio.create_subprocess_exec(
+         *cmd,
+         stdout=asyncio.subprocess.PIPE,
+         stderr=asyncio.subprocess.PIPE,
+         # OMP_NUM_THREADS needs to be 1, otherwise you could have contention if you are running multiple copies of olmOCR on a machine with several GPUS
+-        env={**os.environ, "OMP_NUM_THREADS": "1"},
++        env={**os.environ, "OMP_NUM_THREADS": str(os.cpu_count()) if getattr(args, "device", "cpu") == "cpu" else "1"},
+     )
+ 
+     # Ensure the subprocess is terminated on exit
+@@ -1211,7 +1214,7 @@ async def main():
+     parser.add_argument(
+         "--model",
+         help="Path where the model is located, allenai/olmOCR-2-7B-1025-FP8 is the default, can be local, s3, or hugging face.",
+-        default="allenai/olmOCR-2-7B-1025-FP8",
++        default="allenai/olmOCR-2-7B-1025",
+     )
+ 
+     # More detailed config options, usually you shouldn't have to change these
+@@ -1256,6 +1259,7 @@ async def main():
+     vllm_group.add_argument("--tensor-parallel-size", "-tp", type=int, default=1, help="Tensor parallel size for vLLM")
+     vllm_group.add_argument("--data-parallel-size", "-dp", type=int, default=1, help="Data parallel size for vLLM")
+     vllm_group.add_argument("--port", type=int, default=30024, help="Port to use for the VLLM server")
++    vllm_group.add_argument("--device", type=str, default="cpu", help="Device to use for inference (cpu, cuda, etc.)")
+ 
+     # Beaker/job running stuff
+     beaker_group = parser.add_argument_group("beaker/cluster execution")
+@@ -1421,7 +1425,7 @@ async def main():
+ 
+     # If you get this far, then you are doing inference and need a GPU
+     # check_sglang_version()
+-    if use_internal_server:
++    if use_internal_server and args.device != "cpu":
+         check_torch_gpu_available()
+ 
+     logger.info(f"Starting pipeline with PID {os.getpid()}")
diff --git a/graphics/py-olmocr/files/patch-pyproject.toml b/graphics/py-olmocr/files/patch-pyproject.toml
new file mode 100644
index 000000000000..797bc8cd210d
--- /dev/null
+++ b/graphics/py-olmocr/files/patch-pyproject.toml
@@ -0,0 +1,13 @@
+--- pyproject.toml.orig	2026-04-07 19:24:23 UTC
++++ pyproject.toml
+@@ -52,8 +52,8 @@ gpu = [
+ [project.optional-dependencies]
+ gpu = [
+     "torch>=2.7.0",
+-    "transformers==4.57.3",
+-    "vllm==0.11.2"
++    "transformers>=4.57.3",
++    "vllm>=0.11.2"
+ ]
+ 
+ beaker = [
diff --git a/graphics/py-olmocr/pkg-descr b/graphics/py-olmocr/pkg-descr
new file mode 100644
index 000000000000..4e4bda2276ad
--- /dev/null
+++ b/graphics/py-olmocr/pkg-descr
@@ -0,0 +1,10 @@
+olmocr is a toolkit for converting PDFs and other image-based document formats
+into clean, readable, plain text format.
+
+Features:
+* Convert PDF, PNG, and JPEG based documents into clean Markdown
+* Support for equations, tables, handwriting, and complex formatting
+* Automatically removes headers and footers
+* Convert into text with a natural reading order, even in the presence of
+  figures, multi-column layouts, and insets
+* Efficient, less than $200 USD per million pages converted

home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69d5c149.432cc.e50099a>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation