Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 17 Nov 2025 07:30:03 GMT
From:      Kai Knoblich <kai@FreeBSD.org>
To:        ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-branches@FreeBSD.org
Subject:   git: 7dfd37d644fb - 2025Q4 - textproc/py-ocrmypdf: Update to 16.11.1
Message-ID:  <202511170730.5AH7U3Wv076724@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help

The branch 2025Q4 has been updated by kai:

URL: https://cgit.FreeBSD.org/ports/commit/?id=7dfd37d644fbf5ff3acc4dbd3482ecd0278c5d7a

commit 7dfd37d644fbf5ff3acc4dbd3482ecd0278c5d7a
Author:     Kai Knoblich <kai@FreeBSD.org>
AuthorDate: 2025-11-17 07:21:22 +0000
Commit:     Kai Knoblich <kai@FreeBSD.org>
CommitDate: 2025-11-17 07:29:16 +0000

    textproc/py-ocrmypdf: Update to 16.11.1
    
    Backport a workaround for JPEG encoding issues with Ghostscript 10.6.0.
    
    There's already release 16.12.0, but it requires py-pikepdf 10.0.1 as a
    minimum which isn't present in the ports tree, yet.
    
    Changelog:
    
    https://github.com/ocrmypdf/OCRmyPDF/blob/v16.11.1/docs/release_notes.md
    
    MFH:            2025Q4
    (cherry picked from commit 103146dda2acf8f8d0882baedccfd0124b5be6e1)
---
 textproc/py-ocrmypdf/Makefile                      |  5 +-
 textproc/py-ocrmypdf/distinfo                      |  6 +-
 .../files/patch-src_ocrmypdf_optimize.py           | 66 ++++++++++++++++++++++
 3 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/textproc/py-ocrmypdf/Makefile b/textproc/py-ocrmypdf/Makefile
index 52412e6c56a9..ade0a531faa2 100644
--- a/textproc/py-ocrmypdf/Makefile
+++ b/textproc/py-ocrmypdf/Makefile
@@ -1,5 +1,5 @@
 PORTNAME=	ocrmypdf
-DISTVERSION=	16.11.0
+DISTVERSION=	16.11.1
 CATEGORIES=	textproc python
 MASTER_SITES=	PYPI
 PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
@@ -31,9 +31,10 @@ TEST_DEPENDS=	${PYTHON_PKGNAMEPREFIX}hypothesis>=6.36.0:devel/py-hypothesis@${PY
 USES=		ghostscript:run python:3.10+ shebangfix
 USE_PYTHON=	autoplist concurrent pep517 pytest
 # Skip some checks as they yield wrong results if run with the root account
+# "test_watcher" requires additional deps used by the "watcher" feature
 PYTEST_IGNORED_TESTS=	test_chmod \
 			test_input_file_not_readable \
-			test_malformed_docinfo # leads to an internal pytest error
+			test_watcher
 
 SHEBANG_FILES=	src/ocrmypdf/__main__.py \
 		src/ocrmypdf/pdfinfo/__init__.py
diff --git a/textproc/py-ocrmypdf/distinfo b/textproc/py-ocrmypdf/distinfo
index e20d42f98e01..582ec949cdca 100644
--- a/textproc/py-ocrmypdf/distinfo
+++ b/textproc/py-ocrmypdf/distinfo
@@ -1,3 +1,3 @@
-TIMESTAMP = 1757764047
-SHA256 (ocrmypdf-16.11.0.tar.gz) = d89077e503238dac35c6e565925edc8d98b71e5289853c02cacbc1d0901f1be7
-SIZE (ocrmypdf-16.11.0.tar.gz) = 7015068
+TIMESTAMP = 1763048154
+SHA256 (ocrmypdf-16.11.1.tar.gz) = 838ab69e0ee0f04feea0d5861a17badecab6d3beaed0e29a97058eadda58cbb1
+SIZE (ocrmypdf-16.11.1.tar.gz) = 7015278
diff --git a/textproc/py-ocrmypdf/files/patch-src_ocrmypdf_optimize.py b/textproc/py-ocrmypdf/files/patch-src_ocrmypdf_optimize.py
new file mode 100644
index 000000000000..34e6453d57df
--- /dev/null
+++ b/textproc/py-ocrmypdf/files/patch-src_ocrmypdf_optimize.py
@@ -0,0 +1,66 @@
+From: "James R. Barlow" <james@purplerock.ca>
+Date: Sun, 9 Nov 2025 15:43:36 -0800
+Subject: [PATCH] Work around Ghostscript 10.6.0 JPEG encoding issue by forcing
+ optimization.
+
+Not an ideal fix, but it improves an issue affecting numerous users.
+
+Fixes 1585.
+
+Obtained from:
+
+https://github.com/ocrmypdf/OCRmyPDF/commit/f4c6c8121ba8178ff3a1cb8f70037bbc3a31391b.patch
+
+--- src/ocrmypdf/optimize.py.orig	2020-02-02 00:00:00 UTC
++++ src/ocrmypdf/optimize.py
+@@ -17,6 +17,7 @@ import img2pdf
+ from zlib import compress
+ 
+ import img2pdf
++from packaging.version import Version
+ from pikepdf import (
+     Dictionary,
+     Name,
+@@ -32,7 +33,7 @@ from ocrmypdf._concurrent import Executor, SerialExecu
+ from PIL import Image
+ 
+ from ocrmypdf._concurrent import Executor, SerialExecutor
+-from ocrmypdf._exec import jbig2enc, pngquant
++from ocrmypdf._exec import ghostscript, jbig2enc, pngquant
+ from ocrmypdf._jobcontext import PdfContext
+ from ocrmypdf._progressbar import ProgressBar
+ from ocrmypdf.exceptions import OutputFileAccessError
+@@ -189,6 +190,16 @@ def extract_image_jbig2(
+     return None
+ 
+ 
++def _should_optimize_jpeg(options, filtdp):
++    if options.optimize >= 2:
++        return True
++    if options.optimize < 2 and ghostscript.version() >= Version('10.6.0'):
++        # Ghostscript 10.6.0+ introduced some sort of JPEG encoding issue.
++        # To resolve this, re-optimize the JPEG anyway.
++        return True
++    return False
++
++
+ def extract_image_generic(
+     *, pdf: Pdf, root: Path, image: Stream, xref: Xref, options
+ ) -> XrefExt | None:
+@@ -202,15 +213,7 @@ def extract_image_generic(
+     if pim.bits_per_component == 1:
+         return None
+ 
+-    if filtdp[0] == Name.DCTDecode and options.optimize >= 2:
+-        # This is a simple heuristic derived from some training data, that has
+-        # about a 70% chance of guessing whether the JPEG is high quality,
+-        # and possibly recompressible, or not. The number itself doesn't mean
+-        # anything.
+-        # bytes_per_pixel = int(raw_jpeg.Length) / (w * h)
+-        # jpeg_quality_estimate = 117.0 * (bytes_per_pixel ** 0.213)
+-        # if jpeg_quality_estimate < 65:
+-        #     return None
++    if filtdp[0] == Name.DCTDecode and _should_optimize_jpeg(options, filtdp):
+         try:
+             imgname = root / f'{xref:08d}'
+             with imgname.open('wb') as f:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202511170730.5AH7U3Wv076724>