Date: Tue, 17 Mar 2026 07:52:58 +0000 From: Yuri Victorovich <yuri@FreeBSD.org> To: ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-main@FreeBSD.org Subject: git: 2b309f349f91 - main - misc/py-misaki: New port: Grapheme-to-phoneme engine with modular architecture Message-ID: <69b9085a.3cff6.7f6b47e3@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch main has been updated by yuri: URL: https://cgit.FreeBSD.org/ports/commit/?id=2b309f349f910c703ee891edfc799e87fc1e7390 commit 2b309f349f910c703ee891edfc799e87fc1e7390 Author: Yuri Victorovich <yuri@FreeBSD.org> AuthorDate: 2026-03-17 03:31:44 +0000 Commit: Yuri Victorovich <yuri@FreeBSD.org> CommitDate: 2026-03-17 07:52:50 +0000 misc/py-misaki: New port: Grapheme-to-phoneme engine with modular architecture --- misc/Makefile | 1 + misc/py-misaki/Makefile | 56 +++++++++++++++++++++++++++++++++ misc/py-misaki/distinfo | 3 ++ misc/py-misaki/files/patch-misaki_en.py | 17 ++++++++++ misc/py-misaki/files/usage-ja.py | 16 ++++++++++ misc/py-misaki/files/usage-ko.py | 16 ++++++++++ misc/py-misaki/pkg-descr | 8 +++++ 7 files changed, 117 insertions(+) diff --git a/misc/Makefile b/misc/Makefile index 1da8cf933961..38e5019e0512 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -508,6 +508,7 @@ SUBDIR += py-mcp SUBDIR += py-mem0ai SUBDIR += py-mffpy + SUBDIR += py-misaki SUBDIR += py-mixpanel SUBDIR += py-ml-collections SUBDIR += py-mmcv diff --git a/misc/py-misaki/Makefile b/misc/py-misaki/Makefile new file mode 100644 index 000000000000..a3035475951b --- /dev/null +++ b/misc/py-misaki/Makefile @@ -0,0 +1,56 @@ +PORTNAME= misaki +DISTVERSION= 0.9.4 +CATEGORIES= misc python # machine learning +MASTER_SITES= PYPI +PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX} + +MAINTAINER= ports@FreeBSD.org +COMMENT= Grapheme-to-phoneme engine with modular architecture +WWW= https://github.com/hexgrad/misaki + +LICENSE= MIT +LICENSE_FILE= ${WRKSRC}/LICENSE + +BUILD_DEPENDS= ${PYTHON_PKGNAMEPREFIX}hatchling>0:devel/py-hatchling@${PY_FLAVOR} +RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}addict>0:devel/py-addict@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}regex>0:textproc/py-regex@${PY_FLAVOR} + +USES= python +USE_PYTHON= pep517 concurrent autoplist + +NO_ARCH= yes + +#OPTIONS_DEFINE= EN JA KO ZH +OPTIONS_DEFINE= EN JA KO +OPTIONS_DEFAULT= EN +EN_DESC= English language support +JA_DESC= Japanese language support +KO_DESC= Korean language support +#ZH_DESC= Chinese language support + +EN_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}num2words>0:devel/py-num2words@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}spacy>0:textproc/py-spacy@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}en-core-web-sm>0:textproc/en-core-web-sm@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}phonemizer-fork>0:textproc/py-phonemizer-fork@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}espeakng_loader>0:audio/py-espeakng-loader@${PY_FLAVOR} +JA_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}fugashi>0:japanese/py-fugashi@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}jaconv>0:japanese/py-jaconv@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}mojimoji>0:japanese/py-mojimoji@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}unidic>0:japanese/py-unidic@${PY_FLAVOR} +JA_BROKEN= openjtalk and pyopenjtalk need to be ported +KO_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}jamo>0:korean/py-jamo@${PY_FLAVOR} \ + ${PYTHON_PKGNAMEPREFIX}nltk>0:textproc/py-nltk@${PY_FLAVOR} +KO_BROKEN= see https://github.com/hexgrad/misaki/issues/96 +# chinese/py-jieba requires paddlepaddle which is extremely convoluted and can't yet be ported: disable Chinese for now +#ZH_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}jieba>0:chinese/py-jieba@${PY_FLAVOR} \ +# ${PYTHON_PKGNAMEPREFIX}ordered-set>0:devel/py-ordered-set@${PY_FLAVOR} \ +# ${PYTHON_PKGNAMEPREFIX}pypinyin>0:chinese/py-pypinyin@${PY_FLAVOR} \ +# ${PYTHON_PKGNAMEPREFIX}pypinyin-dict>0:chinese/py-pypinyin-dict@${PY_FLAVOR} + +TEST_ENV= ${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR} + +do-test-EN-on: # run the example since there are no tests defined + @cd ${WRKSRC} && \ + ${SETENV} ${TEST_ENV} ${PYTHON_CMD} ${WRKSRC}/examples/usage.py + +.include <bsd.port.mk> diff --git a/misc/py-misaki/distinfo b/misc/py-misaki/distinfo new file mode 100644 index 000000000000..76d4acd99418 --- /dev/null +++ b/misc/py-misaki/distinfo @@ -0,0 +1,3 @@ +TIMESTAMP = 1773622826 +SHA256 (misaki-0.9.4.tar.gz) = 3960fa3e6de179a90ee8e628446a4a4f6b8c730b6e3410999cf396189f4d9c40 +SIZE (misaki-0.9.4.tar.gz) = 3756765 diff --git a/misc/py-misaki/files/patch-misaki_en.py b/misc/py-misaki/files/patch-misaki_en.py new file mode 100644 index 000000000000..b393dd7efd60 --- /dev/null +++ b/misc/py-misaki/files/patch-misaki_en.py @@ -0,0 +1,17 @@ +-- prevent model download in favor of port dependency + +--- misaki/en.py.orig2025-01-14 00:00:00 UTC ++++ misaki/en.py +@@ -497,8 +497,10 @@ class G2P: + self.version = version + self.british = british + name = f"en_core_web_{'trf' if trf else 'sm'}" +- if not spacy.util.is_package(name): +- spacy.cli.download(name) ++ # Model is pre-installed by FreeBSD port, don't download at runtime ++ # if not spacy.util.is_package(name): ++ # spacy.cli.download(name) ++ # Fail early if model is not installed + components = ['transformer' if trf else 'tok2vec', 'tagger'] + self.nlp = spacy.load(name, enable=components) + self.lexicon = Lexicon(british) diff --git a/misc/py-misaki/files/usage-ja.py b/misc/py-misaki/files/usage-ja.py new file mode 100644 index 000000000000..fb8cd23a1268 --- /dev/null +++ b/misc/py-misaki/files/usage-ja.py @@ -0,0 +1,16 @@ +""" +To run: +uv venv --seed -p 3.11 +uv pip install ".[en]" +uv run examples/usage.py +""" + +from misaki import ja + +g2p = en.G2P(trf=False, fallback=None) # no transformer, American English + +text = '素敵な午後をお過ごしください。また後ほどお会いしましょう。' + +phonemes, tokens = g2p(text) + +print(phonemes) # misˈɑki ɪz ə ʤˈitəpˈi ˈɛnʤən dəzˈInd fɔɹ kˈOkəɹO mˈɑdᵊlz. diff --git a/misc/py-misaki/files/usage-ko.py b/misc/py-misaki/files/usage-ko.py new file mode 100644 index 000000000000..929ed97a5ced --- /dev/null +++ b/misc/py-misaki/files/usage-ko.py @@ -0,0 +1,16 @@ +""" +To run: +uv venv --seed -p 3.11 +uv pip install ".[en]" +uv run examples/usage.py +""" + +from misaki import ko + +g2p = ko.G2p() + +text = '즐거운 오후 보내시고 나중에 다시 뵙겠습니다.' + +phonemes, tokens = g2p(text) + +print(phonemes) diff --git a/misc/py-misaki/pkg-descr b/misc/py-misaki/pkg-descr new file mode 100644 index 000000000000..245434122308 --- /dev/null +++ b/misc/py-misaki/pkg-descr @@ -0,0 +1,8 @@ +Misaki is a grapheme-to-phoneme (G2P) engine with a modular architecture +that separates its core functionality from language-specific features. + +It supports multiple languages including English, Japanese, Chinese, Korean, +Vietnamese, and Hebrew through optional language-specific modules. + +The core package provides basic G2P conversion functionality with enhanced +data structures (via addict) and improved regular expressions (via regex).home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69b9085a.3cff6.7f6b47e3>
