Date: Fri, 3 Jan 2020 00:19:58 +0000 (UTC) From: Michael Gmelin <grembo@FreeBSD.org> To: ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org Subject: svn commit: r521891 - in head/deskutils/py-paperless: . files Message-ID: <202001030019.0030JwoG078027@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: grembo Date: Fri Jan 3 00:19:58 2020 New Revision: 521891 URL: https://svnweb.freebsd.org/changeset/ports/521891 Log: Port back filename transformation feature from pull request https://github.com/the-paperless-project/paperless/pull/542 Adapt man page to new default python version. Remove stale comment. Added: head/deskutils/py-paperless/files/patch-docs-guesswork.rst (contents, props changed) head/deskutils/py-paperless/files/patch-src-documents-models.py (contents, props changed) Modified: head/deskutils/py-paperless/Makefile head/deskutils/py-paperless/files/paperless.7.in head/deskutils/py-paperless/files/patch-paperless.conf.example head/deskutils/py-paperless/files/patch-src-paperless-settings.py Modified: head/deskutils/py-paperless/Makefile ============================================================================== --- head/deskutils/py-paperless/Makefile Thu Jan 2 23:58:27 2020 (r521890) +++ head/deskutils/py-paperless/Makefile Fri Jan 3 00:19:58 2020 (r521891) @@ -2,7 +2,7 @@ PORTNAME= paperless PORTVERSION= 2.7.0 -PORTREVISION= 2 +PORTREVISION= 3 CATEGORIES= deskutils python PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} @@ -108,7 +108,6 @@ do-install: ${STAGEDIR}/var/db/paperless/sqlite ${INSTALL_MAN} ${WRKDIR}/paperless.7 ${STAGEDIR}${MANPREFIX}/man/man7 -# Klammern aussenrum? post-install-DOCS-on: @${MKDIR} ${STAGEDIR}${DOCSDIR}/presentation @cd ${WRKSRC}/docs/_build/html && \ Modified: head/deskutils/py-paperless/files/paperless.7.in ============================================================================== --- head/deskutils/py-paperless/files/paperless.7.in Thu Jan 2 23:58:27 2020 (r521890) +++ head/deskutils/py-paperless/files/paperless.7.in Fri Jan 3 00:19:58 2020 (r521891) @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 30, 2019 +.Dd January 3, 2020 .Dt PAPERLESS 7 .Os .Sh NAME @@ -130,7 +130,7 @@ web server, e.g., nginx + uwsgi. .Pp Install and configure uwsgi: .Pp -.Dl "pkg install uwsgi-py36" +.Dl "pkg install uwsgi" .Dl "mkdir -p %%PREFIX%%/etc/uwsgi" .Dl "cp %%EXAMPLESDIR%%/uwsgi.ini \\" .Dl " %%PREFIX%%/etc/uwsgi/paperless.ini" Added: head/deskutils/py-paperless/files/patch-docs-guesswork.rst ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/deskutils/py-paperless/files/patch-docs-guesswork.rst Fri Jan 3 00:19:58 2020 (r521891) @@ -0,0 +1,37 @@ +--- docs/guesswork.rst.orig 2019-01-27 13:48:05 UTC ++++ docs/guesswork.rst +@@ -54,6 +54,34 @@ filename as described above. + + .. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings + ++Transforming filenames for parsing ++---------------------------------- ++Some devices can't produce filenames that can be parsed by the default ++parser. By configuring the option ``PAPERLESS_FILENAME_PARSE_TRANSFORMS`` in ++``paperless.conf`` one can add transformations that are applied to the filename ++before it's parsed. ++ ++The option contains a list of dictionaries of regular expressions (key: ++``pattern``) and replacements (key: ``repl``) in JSON format, which are ++applied in order by passing them to ``re.subn``. Transformation stops ++after the first match, so at most one transformation is applied. The general ++syntax is ++ ++.. code:: python ++ ++ [{"pattern":"pattern1", "repl":"repl1"}, {"pattern":"pattern2", "repl":"repl2"}, ..., {"pattern":"patternN", "repl":"replN"}] ++ ++The example below is for a Brother ADS-2400N, a scanner that allows ++different names to different hardware buttons (useful for handling ++multiple entities in one instance), but insists on adding ``_<count>`` ++to the filename. ++ ++.. code:: python ++ ++ # Brother profile configuration, support "Name_Date_Count" (the default ++ # setting) and "Name_Count" (use "Name" as tag and "Count" as title). ++ PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}, {"pattern":"^([a-z]+)_([0-9]+)\\.", "repl":" - \\2 - \\1."}] ++ + .. _guesswork-content: + + Reading the Document Contents Modified: head/deskutils/py-paperless/files/patch-paperless.conf.example ============================================================================== --- head/deskutils/py-paperless/files/patch-paperless.conf.example Thu Jan 2 23:58:27 2020 (r521890) +++ head/deskutils/py-paperless/files/patch-paperless.conf.example Fri Jan 3 00:19:58 2020 (r521891) @@ -36,3 +36,27 @@ # To host paperless under a subpath url like example.com/paperless you set # this value to /paperless. No trailing slash! +@@ -135,6 +135,23 @@ PAPERLESS_EMAIL_SECRET="" + # as normal. + #PAPERLESS_FILENAME_DATE_ORDER="YMD" + ++# Sometimes devices won't create filenames which can be parsed properly ++# by the filename parser (see ++# https://paperless.readthedocs.io/en/latest/guesswork.html). ++# ++# This setting allows to specify a list of transformations ++# in regular expression syntax, which are passed in order to re.sub. ++# Transformation stops after the first match, so at most one transformation ++# is applied. ++# ++# Syntax is a JSON array of dictionaries containing "pattern" and "repl" ++# as keys. ++# ++# The example below transforms filenames created by a Brother ADS-2400N ++# document scanner in its standard configuration `Name_Date_Count', so that ++# count is used as title, name as tag and date can be parsed by paperless. ++#PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}] ++ + # + # The following values use sensible defaults for modern systems, but if you're + # running Paperless on a low-resource device (like a Raspberry Pi), modifying Added: head/deskutils/py-paperless/files/patch-src-documents-models.py ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/deskutils/py-paperless/files/patch-src-documents-models.py Fri Jan 3 00:19:58 2020 (r521891) @@ -0,0 +1,18 @@ +--- src/documents/models.py.orig 2019-01-27 13:48:05 UTC ++++ src/documents/models.py +@@ -483,8 +483,14 @@ class FileInfo: + "<title>.<suffix>" + """ + ++ filename = os.path.basename(path) ++ for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS: ++ (filename, count) = pattern.subn(repl, filename) ++ if count: ++ break ++ + for regex in cls.REGEXES.values(): +- m = regex.match(os.path.basename(path)) ++ m = regex.match(filename) + if m: + properties = m.groupdict() + cls._mangle_property(properties, "created") Modified: head/deskutils/py-paperless/files/patch-src-paperless-settings.py ============================================================================== --- head/deskutils/py-paperless/files/patch-src-paperless-settings.py Thu Jan 2 23:58:27 2020 (r521890) +++ head/deskutils/py-paperless/files/patch-src-paperless-settings.py Fri Jan 3 00:19:58 2020 (r521891) @@ -1,6 +1,16 @@ --- src/paperless/settings.py.orig 2019-01-27 13:48:05 UTC +++ src/paperless/settings.py -@@ -104,7 +104,7 @@ MIDDLEWARE = [ +@@ -10,7 +10,9 @@ For the full list of settings and their + https://docs.djangoproject.com/en/1.10/ref/settings/ + """ + ++import json + import os ++import re + + from dotenv import load_dotenv + +@@ -102,7 +104,7 @@ MIDDLEWARE = [ ] # We allow CORS from localhost:8080 @@ -9,3 +19,19 @@ # If auth is disabled, we just use our "bypass" authentication middleware if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")): +@@ -314,6 +316,15 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_ + DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY") + FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER") + ++# Transformations applied before filename parsing ++FILENAME_PARSE_TRANSFORMS = [] ++_filename_parse_transforms = os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS") ++if _filename_parse_transforms: ++ FILENAME_PARSE_TRANSFORMS = [( ++ re.compile(t["pattern"]), t["repl"]) ++ for t in json.loads(_filename_parse_transforms) ++ ] ++ + # Specify for how many years a correspondent is considered recent. Recent + # correspondents will be shown in a separate "Recent correspondents" filter as + # well. Set to 0 to disable this filter.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202001030019.0030JwoG078027>