Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 3 Jan 2020 00:19:58 +0000 (UTC)
From:      Michael Gmelin <grembo@FreeBSD.org>
To:        ports-committers@freebsd.org, svn-ports-all@freebsd.org, svn-ports-head@freebsd.org
Subject:   svn commit: r521891 - in head/deskutils/py-paperless: . files
Message-ID:  <202001030019.0030JwoG078027@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: grembo
Date: Fri Jan  3 00:19:58 2020
New Revision: 521891
URL: https://svnweb.freebsd.org/changeset/ports/521891

Log:
  Port back filename transformation feature from pull request
  https://github.com/the-paperless-project/paperless/pull/542
  
  Adapt man page to new default python version. Remove stale comment.

Added:
  head/deskutils/py-paperless/files/patch-docs-guesswork.rst   (contents, props changed)
  head/deskutils/py-paperless/files/patch-src-documents-models.py   (contents, props changed)
Modified:
  head/deskutils/py-paperless/Makefile
  head/deskutils/py-paperless/files/paperless.7.in
  head/deskutils/py-paperless/files/patch-paperless.conf.example
  head/deskutils/py-paperless/files/patch-src-paperless-settings.py

Modified: head/deskutils/py-paperless/Makefile
==============================================================================
--- head/deskutils/py-paperless/Makefile	Thu Jan  2 23:58:27 2020	(r521890)
+++ head/deskutils/py-paperless/Makefile	Fri Jan  3 00:19:58 2020	(r521891)
@@ -2,7 +2,7 @@
 
 PORTNAME=	paperless
 PORTVERSION=	2.7.0
-PORTREVISION=	2
+PORTREVISION=	3
 CATEGORIES=	deskutils python
 PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
 
@@ -108,7 +108,6 @@ do-install:
 		${STAGEDIR}/var/db/paperless/sqlite
 	${INSTALL_MAN} ${WRKDIR}/paperless.7 ${STAGEDIR}${MANPREFIX}/man/man7
 
-# Klammern aussenrum?
 post-install-DOCS-on:
 	@${MKDIR} ${STAGEDIR}${DOCSDIR}/presentation
 	@cd ${WRKSRC}/docs/_build/html && \

Modified: head/deskutils/py-paperless/files/paperless.7.in
==============================================================================
--- head/deskutils/py-paperless/files/paperless.7.in	Thu Jan  2 23:58:27 2020	(r521890)
+++ head/deskutils/py-paperless/files/paperless.7.in	Fri Jan  3 00:19:58 2020	(r521891)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 30, 2019
+.Dd January 3, 2020
 .Dt PAPERLESS 7
 .Os
 .Sh NAME
@@ -130,7 +130,7 @@ web server, e.g., nginx + uwsgi.
 .Pp
 Install and configure uwsgi:
 .Pp
-.Dl "pkg install uwsgi-py36"
+.Dl "pkg install uwsgi"
 .Dl "mkdir -p %%PREFIX%%/etc/uwsgi"
 .Dl "cp %%EXAMPLESDIR%%/uwsgi.ini \\"
 .Dl "    %%PREFIX%%/etc/uwsgi/paperless.ini"

Added: head/deskutils/py-paperless/files/patch-docs-guesswork.rst
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/deskutils/py-paperless/files/patch-docs-guesswork.rst	Fri Jan  3 00:19:58 2020	(r521891)
@@ -0,0 +1,37 @@
+--- docs/guesswork.rst.orig	2019-01-27 13:48:05 UTC
++++ docs/guesswork.rst
+@@ -54,6 +54,34 @@ filename as described above.
+ 
+ .. _dateparser: https://github.com/scrapinghub/dateparser/blob/v0.7.0/docs/usage.rst#settings
+ 
++Transforming filenames for parsing
++----------------------------------
++Some devices can't produce filenames that can be parsed by the default
++parser. By configuring the option ``PAPERLESS_FILENAME_PARSE_TRANSFORMS`` in
++``paperless.conf`` one can add transformations that are applied to the filename
++before it's parsed.
++
++The option contains a list of dictionaries of regular expressions (key:
++``pattern``) and replacements (key: ``repl``) in JSON format, which are
++applied in order by passing them to ``re.subn``. Transformation stops
++after the first match, so at most one transformation is applied. The general
++syntax is
++
++.. code:: python
++
++   [{"pattern":"pattern1", "repl":"repl1"}, {"pattern":"pattern2", "repl":"repl2"}, ..., {"pattern":"patternN", "repl":"replN"}]
++
++The example below is for a Brother ADS-2400N, a scanner that allows
++different names to different hardware buttons (useful for handling
++multiple entities in one instance), but insists on adding ``_<count>``
++to the filename.
++
++.. code:: python
++
++   # Brother profile configuration, support "Name_Date_Count" (the default
++   # setting) and "Name_Count" (use "Name" as tag and "Count" as title).
++   PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}, {"pattern":"^([a-z]+)_([0-9]+)\\.", "repl":" - \\2 - \\1."}]
++
+ .. _guesswork-content:
+ 
+ Reading the Document Contents

Modified: head/deskutils/py-paperless/files/patch-paperless.conf.example
==============================================================================
--- head/deskutils/py-paperless/files/patch-paperless.conf.example	Thu Jan  2 23:58:27 2020	(r521890)
+++ head/deskutils/py-paperless/files/patch-paperless.conf.example	Fri Jan  3 00:19:58 2020	(r521891)
@@ -36,3 +36,27 @@
  
  # To host paperless under a subpath url like example.com/paperless you set
  # this value to /paperless. No trailing slash!
+@@ -135,6 +135,23 @@ PAPERLESS_EMAIL_SECRET=""
+ # as normal.
+ #PAPERLESS_FILENAME_DATE_ORDER="YMD"
+ 
++# Sometimes devices won't create filenames which can be parsed properly
++# by the filename parser (see
++# https://paperless.readthedocs.io/en/latest/guesswork.html).
++#
++# This setting allows to specify a list of transformations
++# in regular expression syntax, which are passed in order to re.sub.
++# Transformation stops after the first match, so at most one transformation
++# is applied.
++#
++# Syntax is a JSON array of dictionaries containing "pattern" and "repl"
++# as keys.
++#
++# The example below transforms filenames created by a Brother ADS-2400N
++# document scanner in its standard configuration `Name_Date_Count', so that
++# count is used as title, name as tag and date can be parsed by paperless.
++#PAPERLESS_FILENAME_PARSE_TRANSFORMS=[{"pattern":"^([a-z]+)_(\\d{8})_(\\d{6})_([0-9]+)\\.", "repl":"\\2\\3Z - \\4 - \\1."}]
++
+ #
+ # The following values use sensible defaults for modern systems, but if you're
+ # running Paperless on a low-resource device (like a Raspberry Pi), modifying

Added: head/deskutils/py-paperless/files/patch-src-documents-models.py
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/deskutils/py-paperless/files/patch-src-documents-models.py	Fri Jan  3 00:19:58 2020	(r521891)
@@ -0,0 +1,18 @@
+--- src/documents/models.py.orig	2019-01-27 13:48:05 UTC
++++ src/documents/models.py
+@@ -483,8 +483,14 @@ class FileInfo:
+           "<title>.<suffix>"
+         """
+ 
++        filename = os.path.basename(path)
++        for (pattern, repl) in settings.FILENAME_PARSE_TRANSFORMS:
++            (filename, count) = pattern.subn(repl, filename)
++            if count:
++                break
++
+         for regex in cls.REGEXES.values():
+-            m = regex.match(os.path.basename(path))
++            m = regex.match(filename)
+             if m:
+                 properties = m.groupdict()
+                 cls._mangle_property(properties, "created")

Modified: head/deskutils/py-paperless/files/patch-src-paperless-settings.py
==============================================================================
--- head/deskutils/py-paperless/files/patch-src-paperless-settings.py	Thu Jan  2 23:58:27 2020	(r521890)
+++ head/deskutils/py-paperless/files/patch-src-paperless-settings.py	Fri Jan  3 00:19:58 2020	(r521891)
@@ -1,6 +1,16 @@
 --- src/paperless/settings.py.orig	2019-01-27 13:48:05 UTC
 +++ src/paperless/settings.py
-@@ -104,7 +104,7 @@ MIDDLEWARE = [
+@@ -10,7 +10,9 @@ For the full list of settings and their 
+ https://docs.djangoproject.com/en/1.10/ref/settings/
+ """
+ 
++import json
+ import os
++import re
+ 
+ from dotenv import load_dotenv
+ 
+@@ -102,7 +104,7 @@ MIDDLEWARE = [
  ]
  
  # We allow CORS from localhost:8080
@@ -9,3 +19,19 @@
  
  # If auth is disabled, we just use our "bypass" authentication middleware
  if bool(os.getenv("PAPERLESS_DISABLE_LOGIN", "false").lower() in ("yes", "y", "1", "t", "true")):
+@@ -314,6 +316,15 @@ FY_END = os.getenv("PAPERLESS_FINANCIAL_
+ DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
+ FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
+ 
++# Transformations applied before filename parsing
++FILENAME_PARSE_TRANSFORMS = []
++_filename_parse_transforms = os.getenv("PAPERLESS_FILENAME_PARSE_TRANSFORMS")
++if _filename_parse_transforms:
++    FILENAME_PARSE_TRANSFORMS = [(
++            re.compile(t["pattern"]), t["repl"])
++        for t in json.loads(_filename_parse_transforms)
++    ]
++
+ # Specify for how many years a correspondent is considered recent. Recent
+ # correspondents will be shown in a separate "Recent correspondents" filter as
+ # well. Set to 0 to disable this filter.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202001030019.0030JwoG078027>