Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 17 Mar 2026 07:52:58 +0000
From:      Yuri Victorovich <yuri@FreeBSD.org>
To:        ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-main@FreeBSD.org
Subject:   git: 2b309f349f91 - main - misc/py-misaki: New port: Grapheme-to-phoneme engine with modular architecture
Message-ID:  <69b9085a.3cff6.7f6b47e3@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by yuri:

URL: https://cgit.FreeBSD.org/ports/commit/?id=2b309f349f910c703ee891edfc799e87fc1e7390

commit 2b309f349f910c703ee891edfc799e87fc1e7390
Author:     Yuri Victorovich <yuri@FreeBSD.org>
AuthorDate: 2026-03-17 03:31:44 +0000
Commit:     Yuri Victorovich <yuri@FreeBSD.org>
CommitDate: 2026-03-17 07:52:50 +0000

    misc/py-misaki: New port: Grapheme-to-phoneme engine with modular architecture
---
 misc/Makefile                           |  1 +
 misc/py-misaki/Makefile                 | 56 +++++++++++++++++++++++++++++++++
 misc/py-misaki/distinfo                 |  3 ++
 misc/py-misaki/files/patch-misaki_en.py | 17 ++++++++++
 misc/py-misaki/files/usage-ja.py        | 16 ++++++++++
 misc/py-misaki/files/usage-ko.py        | 16 ++++++++++
 misc/py-misaki/pkg-descr                |  8 +++++
 7 files changed, 117 insertions(+)

diff --git a/misc/Makefile b/misc/Makefile
index 1da8cf933961..38e5019e0512 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -508,6 +508,7 @@
     SUBDIR += py-mcp
     SUBDIR += py-mem0ai
     SUBDIR += py-mffpy
+    SUBDIR += py-misaki
     SUBDIR += py-mixpanel
     SUBDIR += py-ml-collections
     SUBDIR += py-mmcv
diff --git a/misc/py-misaki/Makefile b/misc/py-misaki/Makefile
new file mode 100644
index 000000000000..a3035475951b
--- /dev/null
+++ b/misc/py-misaki/Makefile
@@ -0,0 +1,56 @@
+PORTNAME=	misaki
+DISTVERSION=	0.9.4
+CATEGORIES=	misc python # machine learning
+MASTER_SITES=	PYPI
+PKGNAMEPREFIX=	${PYTHON_PKGNAMEPREFIX}
+
+MAINTAINER=	ports@FreeBSD.org
+COMMENT=	Grapheme-to-phoneme engine with modular architecture
+WWW=		https://github.com/hexgrad/misaki
+
+LICENSE=	MIT
+LICENSE_FILE=	${WRKSRC}/LICENSE
+
+BUILD_DEPENDS=	${PYTHON_PKGNAMEPREFIX}hatchling>0:devel/py-hatchling@${PY_FLAVOR}
+RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}addict>0:devel/py-addict@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}regex>0:textproc/py-regex@${PY_FLAVOR}
+
+USES=		python
+USE_PYTHON=	pep517 concurrent autoplist
+
+NO_ARCH=	yes
+
+#OPTIONS_DEFINE=		EN JA KO ZH
+OPTIONS_DEFINE=		EN JA KO
+OPTIONS_DEFAULT=	EN
+EN_DESC=		English language support
+JA_DESC=		Japanese language support
+KO_DESC=		Korean language support
+#ZH_DESC=		Chinese language support
+
+EN_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}num2words>0:devel/py-num2words@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}spacy>0:textproc/py-spacy@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}en-core-web-sm>0:textproc/en-core-web-sm@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}phonemizer-fork>0:textproc/py-phonemizer-fork@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}espeakng_loader>0:audio/py-espeakng-loader@${PY_FLAVOR}
+JA_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}fugashi>0:japanese/py-fugashi@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}jaconv>0:japanese/py-jaconv@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}mojimoji>0:japanese/py-mojimoji@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}unidic>0:japanese/py-unidic@${PY_FLAVOR}
+JA_BROKEN=	openjtalk and pyopenjtalk need to be ported
+KO_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}jamo>0:korean/py-jamo@${PY_FLAVOR} \
+		${PYTHON_PKGNAMEPREFIX}nltk>0:textproc/py-nltk@${PY_FLAVOR}
+KO_BROKEN=	see https://github.com/hexgrad/misaki/issues/96
+# chinese/py-jieba requires paddlepaddle which is extremely convoluted and can't yet be ported: disable Chinese for now
+#ZH_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}jieba>0:chinese/py-jieba@${PY_FLAVOR} \
+#		${PYTHON_PKGNAMEPREFIX}ordered-set>0:devel/py-ordered-set@${PY_FLAVOR} \
+#		${PYTHON_PKGNAMEPREFIX}pypinyin>0:chinese/py-pypinyin@${PY_FLAVOR} \
+#		${PYTHON_PKGNAMEPREFIX}pypinyin-dict>0:chinese/py-pypinyin-dict@${PY_FLAVOR}
+
+TEST_ENV=	${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR}
+
+do-test-EN-on: # run the example since there are no tests defined
+	@cd ${WRKSRC} && \
+		${SETENV} ${TEST_ENV} ${PYTHON_CMD} ${WRKSRC}/examples/usage.py
+
+.include <bsd.port.mk>
diff --git a/misc/py-misaki/distinfo b/misc/py-misaki/distinfo
new file mode 100644
index 000000000000..76d4acd99418
--- /dev/null
+++ b/misc/py-misaki/distinfo
@@ -0,0 +1,3 @@
+TIMESTAMP = 1773622826
+SHA256 (misaki-0.9.4.tar.gz) = 3960fa3e6de179a90ee8e628446a4a4f6b8c730b6e3410999cf396189f4d9c40
+SIZE (misaki-0.9.4.tar.gz) = 3756765
diff --git a/misc/py-misaki/files/patch-misaki_en.py b/misc/py-misaki/files/patch-misaki_en.py
new file mode 100644
index 000000000000..b393dd7efd60
--- /dev/null
+++ b/misc/py-misaki/files/patch-misaki_en.py
@@ -0,0 +1,17 @@
+-- prevent model download in favor of port dependency
+
+--- misaki/en.py.orig2025-01-14 00:00:00 UTC
++++ misaki/en.py
+@@ -497,8 +497,10 @@ class G2P:
+         self.version = version
+         self.british = british
+         name = f"en_core_web_{'trf' if trf else 'sm'}"
+-        if not spacy.util.is_package(name):
+-            spacy.cli.download(name)
++        # Model is pre-installed by FreeBSD port, don't download at runtime
++        # if not spacy.util.is_package(name):
++        #     spacy.cli.download(name)
++        # Fail early if model is not installed
+         components = ['transformer' if trf else 'tok2vec', 'tagger']
+         self.nlp = spacy.load(name, enable=components)
+         self.lexicon = Lexicon(british)
diff --git a/misc/py-misaki/files/usage-ja.py b/misc/py-misaki/files/usage-ja.py
new file mode 100644
index 000000000000..fb8cd23a1268
--- /dev/null
+++ b/misc/py-misaki/files/usage-ja.py
@@ -0,0 +1,16 @@
+"""
+To run:
+uv venv --seed -p 3.11
+uv pip install ".[en]"
+uv run examples/usage.py    
+"""
+
+from misaki import ja
+
+g2p = en.G2P(trf=False, fallback=None) # no transformer, American English
+
+text = '素敵な午後をお過ごしください。また後ほどお会いしましょう。'
+
+phonemes, tokens = g2p(text)
+
+print(phonemes) # misˈɑki ɪz ə ʤˈitəpˈi ˈɛnʤən dəzˈInd fɔɹ kˈOkəɹO mˈɑdᵊlz.
diff --git a/misc/py-misaki/files/usage-ko.py b/misc/py-misaki/files/usage-ko.py
new file mode 100644
index 000000000000..929ed97a5ced
--- /dev/null
+++ b/misc/py-misaki/files/usage-ko.py
@@ -0,0 +1,16 @@
+"""
+To run:
+uv venv --seed -p 3.11
+uv pip install ".[en]"
+uv run examples/usage.py    
+"""
+
+from misaki import ko
+
+g2p = ko.G2p()
+
+text = '즐거운 오후 보내시고 나중에 다시 뵙겠습니다.'
+
+phonemes, tokens = g2p(text)
+
+print(phonemes)
diff --git a/misc/py-misaki/pkg-descr b/misc/py-misaki/pkg-descr
new file mode 100644
index 000000000000..245434122308
--- /dev/null
+++ b/misc/py-misaki/pkg-descr
@@ -0,0 +1,8 @@
+Misaki is a grapheme-to-phoneme (G2P) engine with a modular architecture
+that separates its core functionality from language-specific features.
+
+It supports multiple languages including English, Japanese, Chinese, Korean,
+Vietnamese, and Hebrew through optional language-specific modules.
+
+The core package provides basic G2P conversion functionality with enhanced
+data structures (via addict) and improved regular expressions (via regex).


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69b9085a.3cff6.7f6b47e3>