Date: Mon, 2 Sep 2024 07:51:19 GMT From: Dimitry Andric <dim@FreeBSD.org> To: ports-committers@FreeBSD.org, dev-commits-ports-all@FreeBSD.org, dev-commits-ports-branches@FreeBSD.org Subject: git: ea00294d911a - 2024Q3 - devel/rapidfuzz-cpp: fix tests with libc++ 19 Message-ID: <202409020751.4827pJ1f004787@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch 2024Q3 has been updated by dim: URL: https://cgit.FreeBSD.org/ports/commit/?id=ea00294d911ad7e0f7a9221413b1edc5c63670f6 commit ea00294d911ad7e0f7a9221413b1edc5c63670f6 Author: Dimitry Andric <dim@FreeBSD.org> AuthorDate: 2024-09-01 15:03:00 +0000 Commit: Dimitry Andric <dim@FreeBSD.org> CommitDate: 2024-09-02 07:50:55 +0000 devel/rapidfuzz-cpp: fix tests with libc++ 19 As noted in the libc++ 19 release notes [1], std::char_traits<> is now only provided for char, char8_t, char16_t, char32_t and wchar_t, and any instantiation for other types will fail. This causes ports using devel/rapidfuzz-cpp to fail to compile with clang 19 and libc++ 19, resulting in errors similar to: /usr/include/c++/v1/string:820:42: error: implicit instantiation of undefined template 'std::char_traits<unsigned int>' 820 | static_assert(is_same<_CharT, typename traits_type::char_type>::value, | ^ /wrkdirs/usr/ports/devel/py-rapidfuzz/work-py311/rapidfuzz-3.9.6/src/rapidfuzz/cpp_common.hpp:711:25: note: in instantiation of template class 'std::basic_string<unsigned int>' requested here 711 | auto proc_str = rf::opcodes_apply<uint32_t>(ops, s1, s2); | ^ The devel/rapidfuzz-cpp port itself does "build" since it only gathers a bunch of headers and installs them into the stage area, but running 'make test' also fails similarly: /usr/include/c++/v1/string:820:42: error: implicit instantiation of undefined template 'std::char_traits<unsigned char>' 820 | static_assert(is_same<_CharT, typename traits_type::char_type>::value, | ^ /wrkdirs/usr/ports/devel/rapidfuzz-cpp/work/rapidfuzz-cpp-3.0.5/test/distance/examples/ocr.cpp:3:28: note: in instantiation of template class 'std::basic_string<unsigned char>' requested here 3 | std::basic_string<uint8_t> ocr_example1 = { | ^ /usr/include/c++/v1/__fwd/string.h:23:29: note: template is declared here 23 | struct _LIBCPP_TEMPLATE_VIS char_traits; | ^ Unfortunately rapidfuzz-cpp makes heavy use of the no-longer-existing `std::basic_string<uint8_t>`, so I had to do quite a lots of search and replace operations, replacing these with equivalent `std::vector` types. Note that as far as I can see, only devel/py-rapidfuzz is a consumer of this port, applying these changes should not disrupt anything else. I have a follow-up patch for that port too. [1] https://libcxx.llvm.org/ReleaseNotes/19.html#deprecations-and-removals PR: 281193 Approved by: yuri (maintainer) MFH: 2024Q3 (cherry picked from commit 9830c5e79dca5b0a62ab7cbdd4c1e19739bbb84b) --- devel/rapidfuzz-cpp/Makefile | 1 + .../files/patch-bench_bench-jarowinkler.cpp | 14 ++++ .../rapidfuzz-cpp/files/patch-bench_bench-lcs.cpp | 14 ++++ .../files/patch-bench_bench-levenshtein.cpp | 14 ++++ .../files/patch-extras_rapidfuzz__amalgamated.hpp | 53 +++++++++++++ ...uzzing_fuzz__damerau__levenshtein__distance.cpp | 33 ++++++++ .../files/patch-fuzzing_fuzz__indel__distance.cpp | 20 +++++ .../files/patch-fuzzing_fuzz__indel__editops.cpp | 11 +++ .../files/patch-fuzzing_fuzz__jaro__similarity.cpp | 48 ++++++++++++ .../files/patch-fuzzing_fuzz__lcs__similarity.cpp | 27 +++++++ .../patch-fuzzing_fuzz__levenshtein__distance.cpp | 40 ++++++++++ .../patch-fuzzing_fuzz__levenshtein__editops.cpp | 20 +++++ .../files/patch-fuzzing_fuzz__osa__distance.cpp | 33 ++++++++ .../rapidfuzz-cpp/files/patch-fuzzing_fuzzing.hpp | 45 +++++++++++ .../files/patch-rapidfuzz_distance.hpp | 53 +++++++++++++ devel/rapidfuzz-cpp/files/patch-test_common.hpp | 23 ++++++ .../files/patch-test_distance_examples_ocr.cpp | 19 +++++ .../files/patch-test_distance_examples_ocr.hpp | 12 +++ ...t_distance_examples_pythonLevenshteinIssue9.cpp | 20 +++++ ...t_distance_examples_pythonLevenshteinIssue9.hpp | 14 ++++ .../files/patch-test_distance_tests-Hamming.cpp | 18 +++++ .../files/patch-test_distance_tests-Indel.cpp | 11 +++ .../patch-test_distance_tests-Levenshtein.cpp | 88 ++++++++++++++++++++++ 23 files changed, 631 insertions(+) diff --git a/devel/rapidfuzz-cpp/Makefile b/devel/rapidfuzz-cpp/Makefile index 2ae77cc591a5..2d6b4e8b9ea9 100644 --- a/devel/rapidfuzz-cpp/Makefile +++ b/devel/rapidfuzz-cpp/Makefile @@ -1,6 +1,7 @@ PORTNAME= rapidfuzz-cpp DISTVERSIONPREFIX= v DISTVERSION= 3.0.5 +PORTREVISION= 1 CATEGORIES= devel MAINTAINER= yuri@FreeBSD.org diff --git a/devel/rapidfuzz-cpp/files/patch-bench_bench-jarowinkler.cpp b/devel/rapidfuzz-cpp/files/patch-bench_bench-jarowinkler.cpp new file mode 100644 index 000000000000..40fb6f859f8a --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-bench_bench-jarowinkler.cpp @@ -0,0 +1,14 @@ +--- bench/bench-jarowinkler.cpp.orig 2024-07-02 14:50:14 UTC ++++ bench/bench-jarowinkler.cpp +@@ -19,9 +19,9 @@ template <typename T> + } + + template <typename T> +-std::basic_string<T> str_multiply(std::basic_string<T> a, unsigned int b) ++std::vector<T> str_multiply(std::vector<T> a, unsigned int b) + { +- std::basic_string<T> output; ++ std::vector<T> output; + while (b--) + output += a; + diff --git a/devel/rapidfuzz-cpp/files/patch-bench_bench-lcs.cpp b/devel/rapidfuzz-cpp/files/patch-bench_bench-lcs.cpp new file mode 100644 index 000000000000..b75a965d642a --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-bench_bench-lcs.cpp @@ -0,0 +1,14 @@ +--- bench/bench-lcs.cpp.orig 2024-07-02 14:50:14 UTC ++++ bench/bench-lcs.cpp +@@ -20,9 +20,9 @@ template <typename T> + } + + template <typename T> +-std::basic_string<T> str_multiply(std::basic_string<T> a, unsigned int b) ++std::vector<T> str_multiply(std::vector<T> a, unsigned int b) + { +- std::basic_string<T> output; ++ std::vector<T> output; + while (b--) + output += a; + diff --git a/devel/rapidfuzz-cpp/files/patch-bench_bench-levenshtein.cpp b/devel/rapidfuzz-cpp/files/patch-bench_bench-levenshtein.cpp new file mode 100644 index 000000000000..b5c9e6930b71 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-bench_bench-levenshtein.cpp @@ -0,0 +1,14 @@ +--- bench/bench-levenshtein.cpp.orig 2024-07-02 14:50:14 UTC ++++ bench/bench-levenshtein.cpp +@@ -19,9 +19,9 @@ template <typename T> + } + + template <typename T> +-std::basic_string<T> str_multiply(std::basic_string<T> a, unsigned int b) ++std::vector<T> str_multiply(std::vector<T> a, unsigned int b) + { +- std::basic_string<T> output; ++ std::vector<T> output; + while (b--) + output += a; + diff --git a/devel/rapidfuzz-cpp/files/patch-extras_rapidfuzz__amalgamated.hpp b/devel/rapidfuzz-cpp/files/patch-extras_rapidfuzz__amalgamated.hpp new file mode 100644 index 000000000000..f9a80b0e964f --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-extras_rapidfuzz__amalgamated.hpp @@ -0,0 +1,53 @@ +--- extras/rapidfuzz_amalgamated.hpp.orig 2024-07-02 14:50:14 UTC ++++ extras/rapidfuzz_amalgamated.hpp +@@ -9152,13 +9152,13 @@ template <typename CharT, typename InputIt1, typename + namespace rapidfuzz { + + template <typename CharT, typename InputIt1, typename InputIt2> +-std::basic_string<CharT> editops_apply(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, +- InputIt2 last2) ++std::vector<CharT> editops_apply(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, ++ InputIt2 last2) + { + auto len1 = static_cast<size_t>(std::distance(first1, last1)); + auto len2 = static_cast<size_t>(std::distance(first2, last2)); + +- std::basic_string<CharT> res_str; ++ std::vector<CharT> res_str; + res_str.resize(len1 + len2); + size_t src_pos = 0; + size_t dest_pos = 0; +@@ -9198,20 +9198,20 @@ template <typename CharT, typename Sentence1, typename + } + + template <typename CharT, typename Sentence1, typename Sentence2> +-std::basic_string<CharT> editops_apply(const Editops& ops, const Sentence1& s1, const Sentence2& s2) ++std::vector<CharT> editops_apply(const Editops& ops, const Sentence1& s1, const Sentence2& s2) + { + return editops_apply<CharT>(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2), + detail::to_end(s2)); + } + + template <typename CharT, typename InputIt1, typename InputIt2> +-std::basic_string<CharT> opcodes_apply(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, +- InputIt2 last2) ++std::vector<CharT> opcodes_apply(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, ++ InputIt2 last2) + { + auto len1 = static_cast<size_t>(std::distance(first1, last1)); + auto len2 = static_cast<size_t>(std::distance(first2, last2)); + +- std::basic_string<CharT> res_str; ++ std::vector<CharT> res_str; + res_str.resize(len1 + len2); + size_t dest_pos = 0; + +@@ -9237,7 +9237,7 @@ template <typename CharT, typename Sentence1, typename + } + + template <typename CharT, typename Sentence1, typename Sentence2> +-std::basic_string<CharT> opcodes_apply(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) ++std::vector<CharT> opcodes_apply(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) + { + return opcodes_apply<CharT>(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2), + detail::to_end(s2)); diff --git a/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__damerau__levenshtein__distance.cpp b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__damerau__levenshtein__distance.cpp new file mode 100644 index 000000000000..5b21c94399a4 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__damerau__levenshtein__distance.cpp @@ -0,0 +1,33 @@ +--- fuzzing/fuzz_damerau_levenshtein_distance.cpp.orig 2024-07-02 14:50:14 UTC ++++ fuzzing/fuzz_damerau_levenshtein_distance.cpp +@@ -8,8 +8,8 @@ + #include <stdexcept> + #include <string> + +-void validate_distance(size_t reference_dist, const std::basic_string<uint8_t>& s1, +- const std::basic_string<uint8_t>& s2, size_t score_cutoff) ++void validate_distance(size_t reference_dist, const std::vector<uint8_t>& s1, ++ const std::vector<uint8_t>& s2, size_t score_cutoff) + { + if (reference_dist > score_cutoff) reference_dist = score_cutoff + 1; + +@@ -26,7 +26,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { +- std::basic_string<uint8_t> s1, s2; ++ std::vector<uint8_t> s1, s2; + if (!extract_strings(data, size, s1, s2)) return 0; + + size_t reference_dist = rapidfuzz_reference::damerau_levenshtein_distance(s1, s2); +@@ -40,8 +40,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + /* test long sequences */ + for (unsigned int i = 2; i < 9; ++i) { +- std::basic_string<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i)); +- std::basic_string<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i)); ++ std::vector<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i)); ++ std::vector<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i)); + + if (s1_.size() > 10000 || s2_.size() > 10000) break; + diff --git a/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__indel__distance.cpp b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__indel__distance.cpp new file mode 100644 index 000000000000..f398a1eae0a3 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__indel__distance.cpp @@ -0,0 +1,20 @@ +--- fuzzing/fuzz_indel_distance.cpp.orig 2024-07-02 14:50:14 UTC ++++ fuzzing/fuzz_indel_distance.cpp +@@ -8,7 +8,7 @@ + #include <stdexcept> + #include <string> + +-void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2, ++void validate_distance(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2, + size_t score_cutoff) + { + auto dist = rapidfuzz::indel_distance(s1, s2, score_cutoff); +@@ -25,7 +25,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { +- std::basic_string<uint8_t> s1, s2; ++ std::vector<uint8_t> s1, s2; + if (!extract_strings(data, size, s1, s2)) return 0; + + validate_distance(s1, s2, 0); diff --git a/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__indel__editops.cpp b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__indel__editops.cpp new file mode 100644 index 000000000000..5c9003765235 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__indel__editops.cpp @@ -0,0 +1,11 @@ +--- fuzzing/fuzz_indel_editops.cpp.orig 2024-07-02 14:50:14 UTC ++++ fuzzing/fuzz_indel_editops.cpp +@@ -9,7 +9,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { +- std::basic_string<uint8_t> s1, s2; ++ std::vector<uint8_t> s1, s2; + if (!extract_strings(data, size, s1, s2)) return 0; + + size_t score = rapidfuzz_reference::indel_distance(s1, s2); diff --git a/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__jaro__similarity.cpp b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__jaro__similarity.cpp new file mode 100644 index 000000000000..d3ceee29e568 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__jaro__similarity.cpp @@ -0,0 +1,48 @@ +--- fuzzing/fuzz_jaro_similarity.cpp.orig 2024-07-02 14:50:14 UTC ++++ fuzzing/fuzz_jaro_similarity.cpp +@@ -14,7 +14,7 @@ template <size_t MaxLen> + } + + template <size_t MaxLen> +-void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2) ++void validate_simd(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2) + { + #ifdef RAPIDFUZZ_SIMD + size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0); +@@ -22,7 +22,7 @@ void validate_simd(const std::basic_string<uint8_t>& s + + rapidfuzz::experimental::MultiJaro<MaxLen> scorer(count); + +- std::vector<std::basic_string<uint8_t>> strings; ++ std::vector<std::vector<uint8_t>> strings; + + for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) { + if (std::distance(it1, s1.end()) < static_cast<ptrdiff_t>(MaxLen)) { +@@ -59,7 +59,7 @@ void validate_simd(const std::basic_string<uint8_t>& s + #endif + } + +-void validate_distance(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2) ++void validate_distance(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2) + { + double reference_sim = rapidfuzz_reference::jaro_similarity(s1, s2); + double sim = rapidfuzz::jaro_similarity(s1, s2); +@@ -80,15 +80,15 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { +- std::basic_string<uint8_t> s1, s2; ++ std::vector<uint8_t> s1, s2; + if (!extract_strings(data, size, s1, s2)) return 0; + + validate_distance(s1, s2); + + /* test long sequences */ + for (unsigned int i = 2; i < 9; ++i) { +- std::basic_string<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i)); +- std::basic_string<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i)); ++ std::vector<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i)); ++ std::vector<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i)); + + if (s1_.size() > 10000 || s2_.size() > 10000) break; + diff --git a/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__lcs__similarity.cpp b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__lcs__similarity.cpp new file mode 100644 index 000000000000..8ec292fd13d7 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__lcs__similarity.cpp @@ -0,0 +1,27 @@ +--- fuzzing/fuzz_lcs_similarity.cpp.orig 2024-07-02 14:50:14 UTC ++++ fuzzing/fuzz_lcs_similarity.cpp +@@ -9,13 +9,13 @@ template <size_t MaxLen> + #include <string> + + template <size_t MaxLen> +-void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2) ++void validate_simd(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2) + { + #ifdef RAPIDFUZZ_SIMD + size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0); + rapidfuzz::experimental::MultiLCSseq<MaxLen> scorer(count); + +- std::vector<std::basic_string<uint8_t>> strings; ++ std::vector<std::vector<uint8_t>> strings; + + for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) { + if (std::distance(it1, s1.end()) < static_cast<ptrdiff_t>(MaxLen)) { +@@ -51,7 +51,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { +- std::basic_string<uint8_t> s1, s2; ++ std::vector<uint8_t> s1, s2; + if (!extract_strings(data, size, s1, s2)) { + return 0; + } diff --git a/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__levenshtein__distance.cpp b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__levenshtein__distance.cpp new file mode 100644 index 000000000000..bcc00b61344e --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__levenshtein__distance.cpp @@ -0,0 +1,40 @@ +--- fuzzing/fuzz_levenshtein_distance.cpp.orig 2024-07-02 14:50:14 UTC ++++ fuzzing/fuzz_levenshtein_distance.cpp +@@ -9,7 +9,7 @@ template <size_t MaxLen> + #include <string> + + template <size_t MaxLen> +-void validate_simd(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2) ++void validate_simd(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2) + { + #ifdef RAPIDFUZZ_SIMD + size_t count = s1.size() / MaxLen + ((s1.size() % MaxLen) != 0); +@@ -17,7 +17,7 @@ void validate_simd(const std::basic_string<uint8_t>& s + + rapidfuzz::experimental::MultiLevenshtein<MaxLen> scorer(count); + +- std::vector<std::basic_string<uint8_t>> strings; ++ std::vector<std::vector<uint8_t>> strings; + + for (auto it1 = s1.begin(); it1 != s1.end(); it1 += MaxLen) { + if (std::distance(it1, s1.end()) < static_cast<ptrdiff_t>(MaxLen)) { +@@ -52,8 +52,8 @@ void validate_simd(const std::basic_string<uint8_t>& s + #endif + } + +-void validate_distance(size_t reference_dist, const std::basic_string<uint8_t>& s1, +- const std::basic_string<uint8_t>& s2, size_t score_cutoff) ++void validate_distance(size_t reference_dist, const std::vector<uint8_t>& s1, ++ const std::vector<uint8_t>& s2, size_t score_cutoff) + { + if (reference_dist > score_cutoff) reference_dist = score_cutoff + 1; + +@@ -75,7 +75,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { +- std::basic_string<uint8_t> s1, s2; ++ std::vector<uint8_t> s1, s2; + if (!extract_strings(data, size, s1, s2)) return 0; + + size_t reference_dist = rapidfuzz_reference::levenshtein_distance(s1, s2); diff --git a/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__levenshtein__editops.cpp b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__levenshtein__editops.cpp new file mode 100644 index 000000000000..74eb0be61c4c --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__levenshtein__editops.cpp @@ -0,0 +1,20 @@ +--- fuzzing/fuzz_levenshtein_editops.cpp.orig 2024-07-02 14:50:14 UTC ++++ fuzzing/fuzz_levenshtein_editops.cpp +@@ -7,7 +7,7 @@ + #include <stdexcept> + #include <string> + +-void validate_editops(const std::basic_string<uint8_t>& s1, const std::basic_string<uint8_t>& s2, ++void validate_editops(const std::vector<uint8_t>& s1, const std::vector<uint8_t>& s2, + size_t score, size_t score_hint = std::numeric_limits<size_t>::max()) + { + rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s1, s2, score_hint); +@@ -17,7 +17,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { +- std::basic_string<uint8_t> s1, s2; ++ std::vector<uint8_t> s1, s2; + if (!extract_strings(data, size, s1, s2)) return 0; + + /* hirschbergs algorithm is only used for very long sequences which are apparently not generated a lot by diff --git a/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__osa__distance.cpp b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__osa__distance.cpp new file mode 100644 index 000000000000..b199831b77ed --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzz__osa__distance.cpp @@ -0,0 +1,33 @@ +--- fuzzing/fuzz_osa_distance.cpp.orig 2024-07-02 14:50:14 UTC ++++ fuzzing/fuzz_osa_distance.cpp +@@ -8,8 +8,8 @@ + #include <stdexcept> + #include <string> + +-void validate_distance(size_t reference_dist, const std::basic_string<uint8_t>& s1, +- const std::basic_string<uint8_t>& s2, size_t score_cutoff) ++void validate_distance(size_t reference_dist, const std::vector<uint8_t>& s1, ++ const std::vector<uint8_t>& s2, size_t score_cutoff) + { + if (reference_dist > score_cutoff) reference_dist = score_cutoff + 1; + +@@ -26,7 +26,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) + { +- std::basic_string<uint8_t> s1, s2; ++ std::vector<uint8_t> s1, s2; + if (!extract_strings(data, size, s1, s2)) return 0; + + size_t reference_dist = rapidfuzz_reference::osa_distance(s1, s2); +@@ -40,8 +40,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d + + /* test long sequences */ + for (unsigned int i = 2; i < 9; ++i) { +- std::basic_string<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i)); +- std::basic_string<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i)); ++ std::vector<uint8_t> s1_ = str_multiply(s1, pow<size_t>(2, i)); ++ std::vector<uint8_t> s2_ = str_multiply(s2, pow<size_t>(2, i)); + + if (s1_.size() > 10000 || s2_.size() > 10000) break; + diff --git a/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzzing.hpp b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzzing.hpp new file mode 100644 index 000000000000..1f22c173aca1 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-fuzzing_fuzzing.hpp @@ -0,0 +1,45 @@ +--- fuzzing/fuzzing.hpp.orig 2024-07-02 14:50:14 UTC ++++ fuzzing/fuzzing.hpp +@@ -3,8 +3,8 @@ + #include <rapidfuzz/distance/Levenshtein.hpp> + #include <string> + +-static inline bool extract_strings(const uint8_t* data, size_t size, std::basic_string<uint8_t>& s1, +- std::basic_string<uint8_t>& s2) ++static inline bool extract_strings(const uint8_t* data, size_t size, std::vector<uint8_t>& s1, ++ std::vector<uint8_t>& s2) + { + if (size <= sizeof(uint32_t)) { + return false; +@@ -17,8 +17,8 @@ static inline bool extract_strings(const uint8_t* data + + data += sizeof(len1); + size -= sizeof(len1); +- s1 = std::basic_string<uint8_t>(data, len1); +- s2 = std::basic_string<uint8_t>(data + len1, size - len1); ++ s1 = std::vector<uint8_t>(data, len1); ++ s2 = std::vector<uint8_t>(data + len1, size - len1); + return true; + } + +@@ -36,9 +36,9 @@ template <typename T> + } + + template <typename T> +-std::basic_string<T> str_multiply(std::basic_string<T> a, size_t b) ++std::vector<T> str_multiply(std::vector<T> a, size_t b) + { +- std::basic_string<T> output; ++ std::vector<T> output; + while (b--) + output += a; + +@@ -46,7 +46,7 @@ template <typename T> + } + + template <typename T> +-void print_seq(const std::string& name, const std::basic_string<T>& seq) ++void print_seq(const std::string& name, const std::vector<T>& seq) + { + std::cout << name << " len: " << seq.size() << " content: "; + for (const auto& ch : seq) diff --git a/devel/rapidfuzz-cpp/files/patch-rapidfuzz_distance.hpp b/devel/rapidfuzz-cpp/files/patch-rapidfuzz_distance.hpp new file mode 100644 index 000000000000..691c972a8753 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-rapidfuzz_distance.hpp @@ -0,0 +1,53 @@ +--- rapidfuzz/distance.hpp.orig 2024-07-02 14:50:14 UTC ++++ rapidfuzz/distance.hpp +@@ -16,13 +16,13 @@ template <typename CharT, typename InputIt1, typename + namespace rapidfuzz { + + template <typename CharT, typename InputIt1, typename InputIt2> +-std::basic_string<CharT> editops_apply(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, +- InputIt2 last2) ++std::vector<CharT> editops_apply(const Editops& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, ++ InputIt2 last2) + { + auto len1 = static_cast<size_t>(std::distance(first1, last1)); + auto len2 = static_cast<size_t>(std::distance(first2, last2)); + +- std::basic_string<CharT> res_str; ++ std::vector<CharT> res_str; + res_str.resize(len1 + len2); + size_t src_pos = 0; + size_t dest_pos = 0; +@@ -62,20 +62,20 @@ template <typename CharT, typename Sentence1, typename + } + + template <typename CharT, typename Sentence1, typename Sentence2> +-std::basic_string<CharT> editops_apply(const Editops& ops, const Sentence1& s1, const Sentence2& s2) ++std::vector<CharT> editops_apply(const Editops& ops, const Sentence1& s1, const Sentence2& s2) + { + return editops_apply<CharT>(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2), + detail::to_end(s2)); + } + + template <typename CharT, typename InputIt1, typename InputIt2> +-std::basic_string<CharT> opcodes_apply(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, +- InputIt2 last2) ++std::vector<CharT> opcodes_apply(const Opcodes& ops, InputIt1 first1, InputIt1 last1, InputIt2 first2, ++ InputIt2 last2) + { + auto len1 = static_cast<size_t>(std::distance(first1, last1)); + auto len2 = static_cast<size_t>(std::distance(first2, last2)); + +- std::basic_string<CharT> res_str; ++ std::vector<CharT> res_str; + res_str.resize(len1 + len2); + size_t dest_pos = 0; + +@@ -101,7 +101,7 @@ template <typename CharT, typename Sentence1, typename + } + + template <typename CharT, typename Sentence1, typename Sentence2> +-std::basic_string<CharT> opcodes_apply(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) ++std::vector<CharT> opcodes_apply(const Opcodes& ops, const Sentence1& s1, const Sentence2& s2) + { + return opcodes_apply<CharT>(ops, detail::to_begin(s1), detail::to_end(s1), detail::to_begin(s2), + detail::to_end(s2)); diff --git a/devel/rapidfuzz-cpp/files/patch-test_common.hpp b/devel/rapidfuzz-cpp/files/patch-test_common.hpp new file mode 100644 index 000000000000..796389a0213d --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-test_common.hpp @@ -0,0 +1,23 @@ +--- test/common.hpp.orig 2024-07-02 14:50:14 UTC ++++ test/common.hpp +@@ -59,9 +59,18 @@ template <typename T> + }; + + template <typename T> +-std::basic_string<T> str_multiply(std::basic_string<T> a, size_t b) ++std::vector<T> str_multiply(std::vector<T> a, size_t b) + { +- std::basic_string<T> output; ++ std::vector<T> output; ++ while (b--) ++ output.insert(output.end(), a.begin(), a.end()); ++ ++ return output; ++} ++ ++std::string str_multiply(std::string a, size_t b) ++{ ++ std::string output; + while (b--) + output += a; + diff --git a/devel/rapidfuzz-cpp/files/patch-test_distance_examples_ocr.cpp b/devel/rapidfuzz-cpp/files/patch-test_distance_examples_ocr.cpp new file mode 100644 index 000000000000..ef9f4ff5a6dd --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-test_distance_examples_ocr.cpp @@ -0,0 +1,19 @@ +--- test/distance/examples/ocr.cpp.orig 2024-07-02 14:50:14 UTC ++++ test/distance/examples/ocr.cpp +@@ -1,6 +1,6 @@ + #include "ocr.hpp" + +-std::basic_string<uint8_t> ocr_example1 = { ++std::vector<uint8_t> ocr_example1 = { + 22, 18, 27, 22, 8, 23, 23, 18, 29, 27, 8, 23, 28, 18, 29, 27, 8, 24, 18, 27, 31, + 8, 24, 18, 29, 22, 8, 24, 24, 18, 31, 24, 8, 23, 24, 18, 25, 25, 8, 24, 26, 18, + 30, 24, 8, 23, 26, 18, 25, 30, 8, 29, 11, 2, 22, 18, 27, 22, 8, 23, 23, 18, 29, +@@ -5075,7 +5075,7 @@ std::basic_string<uint8_t> ocr_example1 = { + 27, 8, 29, 7, 8, 39, 61, 80, 8, 27, 28, 22, 21, 8, 65, 79, 68, 61, 72, 81, 65, + 74, 2}; + +-std::basic_string<uint8_t> ocr_example2 = { ++std::vector<uint8_t> ocr_example2 = { + 22, 18, 27, 22, 8, 23, 23, 18, 29, 27, 8, 23, 28, 18, 29, 27, 8, 24, 18, 27, 31, + 8, 24, 18, 29, 22, 8, 24, 24, 18, 31, 24, 8, 23, 24, 18, 25, 25, 8, 24, 26, 18, + 30, 24, 8, 23, 26, 18, 25, 30, 11, 2, 22, 18, 27, 22, 8, 23, 23, 18, 29, 27, 8, diff --git a/devel/rapidfuzz-cpp/files/patch-test_distance_examples_ocr.hpp b/devel/rapidfuzz-cpp/files/patch-test_distance_examples_ocr.hpp new file mode 100644 index 000000000000..4d17479472e0 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-test_distance_examples_ocr.hpp @@ -0,0 +1,12 @@ +--- test/distance/examples/ocr.hpp.orig 2024-07-02 14:50:14 UTC ++++ test/distance/examples/ocr.hpp +@@ -1,6 +1,6 @@ + #pragma once + #include <cstdint> +-#include <string> ++#include <vector> + +-extern std::basic_string<uint8_t> ocr_example1; +-extern std::basic_string<uint8_t> ocr_example2; ++extern std::vector<uint8_t> ocr_example1; ++extern std::vector<uint8_t> ocr_example2; diff --git a/devel/rapidfuzz-cpp/files/patch-test_distance_examples_pythonLevenshteinIssue9.cpp b/devel/rapidfuzz-cpp/files/patch-test_distance_examples_pythonLevenshteinIssue9.cpp new file mode 100644 index 000000000000..8a84f4847734 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-test_distance_examples_pythonLevenshteinIssue9.cpp @@ -0,0 +1,20 @@ +--- test/distance/examples/pythonLevenshteinIssue9.cpp.orig 2024-07-02 14:50:14 UTC ++++ test/distance/examples/pythonLevenshteinIssue9.cpp +@@ -2,7 +2,7 @@ namespace pythonLevenshteinIssue9 { + + namespace pythonLevenshteinIssue9 { + +-std::basic_string<uint8_t> example1 = { ++std::vector<uint8_t> example1 = { + 8, 14, 4, 2, 3, 7, 15, 6, 4, 5, 8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 10, 11, 12, 13, 8, + 2, 8, 14, 4, 2, 3, 7, 15, 6, 4, 5, 8, 6, 7, 16, 7, 13, 17, 2, 4, 16, 14, 7, 14, 18, 19, + 8, 20, 14, 4, 21, 13, 20, 22, 8, 2, 3, 4, 5, 6, 20, 8, 9, 10, 2, 10, 11, 12, 13, 8, 18, 14, +@@ -206,7 +206,7 @@ std::basic_string<uint8_t> example1 = { + 9, 8, 6, 7, 3, 7, 23, 4, 41, 7, 51, 8, 48, 69, 43, 6, 4, 9, 11, 20, 2, 13, 32, 5, 8, 18, + 16}; + +-std::basic_string<uint8_t> example2 = { ++std::vector<uint8_t> example2 = { + 3, 4, 5, 6, 7, 8, 9, 10, 2, 10, 11, 12, 13, 8, 2, 8, 41, 7, 9, 7, 13, 3, 18, 10, 5, 2, + 4, 16, 14, 7, 14, 18, 19, 8, 20, 14, 4, 21, 13, 20, 22, 8, 2, 3, 4, 5, 6, 20, 8, 9, 10, 2, + 10, 11, 12, 13, 8, 18, 14, 10, 7, 23, 17, 13, 4, 8, 11, 4, 14, 8, 15, 7, 12, 8, 14, 18, 16, 7, diff --git a/devel/rapidfuzz-cpp/files/patch-test_distance_examples_pythonLevenshteinIssue9.hpp b/devel/rapidfuzz-cpp/files/patch-test_distance_examples_pythonLevenshteinIssue9.hpp new file mode 100644 index 000000000000..5c6c7a97e615 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-test_distance_examples_pythonLevenshteinIssue9.hpp @@ -0,0 +1,14 @@ +--- test/distance/examples/pythonLevenshteinIssue9.hpp.orig 2024-07-02 14:50:14 UTC ++++ test/distance/examples/pythonLevenshteinIssue9.hpp +@@ -1,8 +1,8 @@ + #pragma once + #include <cstdint> +-#include <string> ++#include <vector> + + namespace pythonLevenshteinIssue9 { +-extern std::basic_string<uint8_t> example1; +-extern std::basic_string<uint8_t> example2; ++extern std::vector<uint8_t> example1; ++extern std::vector<uint8_t> example2; + } // namespace pythonLevenshteinIssue9 diff --git a/devel/rapidfuzz-cpp/files/patch-test_distance_tests-Hamming.cpp b/devel/rapidfuzz-cpp/files/patch-test_distance_tests-Hamming.cpp new file mode 100644 index 000000000000..fc4ebde2d4ff --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-test_distance_tests-Hamming.cpp @@ -0,0 +1,18 @@ +--- test/distance/tests-Hamming.cpp.orig 2024-07-02 14:50:14 UTC ++++ test/distance/tests-Hamming.cpp +@@ -110,13 +110,13 @@ TEST_CASE("Hamming_editops") + + { + rapidfuzz::Editops ops = rapidfuzz::hamming_editops(s, d); +- REQUIRE(d == rapidfuzz::editops_apply<char>(ops, s, d)); ++ REQUIRE(std::vector<char>{d.begin(), d.end()} == rapidfuzz::editops_apply<char>(ops, s, d)); + REQUIRE(ops.get_src_len() == s.size()); + REQUIRE(ops.get_dest_len() == d.size()); + } + { + rapidfuzz::Editops ops = rapidfuzz::hamming_editops(d, s); +- REQUIRE(s == rapidfuzz::editops_apply<char>(ops, d, s)); ++ REQUIRE(std::vector<char>{s.begin(), s.end()} == rapidfuzz::editops_apply<char>(ops, d, s)); + REQUIRE(ops.get_src_len() == d.size()); + REQUIRE(ops.get_dest_len() == s.size()); + } diff --git a/devel/rapidfuzz-cpp/files/patch-test_distance_tests-Indel.cpp b/devel/rapidfuzz-cpp/files/patch-test_distance_tests-Indel.cpp new file mode 100644 index 000000000000..7be40d59f043 --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-test_distance_tests-Indel.cpp @@ -0,0 +1,11 @@ +--- test/distance/tests-Indel.cpp.orig 2024-07-02 14:50:14 UTC ++++ test/distance/tests-Indel.cpp +@@ -277,7 +277,7 @@ TEST_CASE("Indel") + REQUIRE(indel_distance(s1, s2) == 231); + + rapidfuzz::Editops ops = rapidfuzz::indel_editops(s1, s2); +- REQUIRE(s2 == rapidfuzz::editops_apply<char>(ops, s1, s2)); ++ REQUIRE(std::vector<char>{s2.begin(), s2.end()} == rapidfuzz::editops_apply<char>(ops, s1, s2)); + } + } + } diff --git a/devel/rapidfuzz-cpp/files/patch-test_distance_tests-Levenshtein.cpp b/devel/rapidfuzz-cpp/files/patch-test_distance_tests-Levenshtein.cpp new file mode 100644 index 000000000000..513a15ffd91f --- /dev/null +++ b/devel/rapidfuzz-cpp/files/patch-test_distance_tests-Levenshtein.cpp @@ -0,0 +1,88 @@ +--- test/distance/tests-Levenshtein.cpp.orig 2024-07-02 14:50:14 UTC ++++ test/distance/tests-Levenshtein.cpp +@@ -63,9 +63,9 @@ template <typename T> + } + + template <typename T> +-std::basic_string<T> get_subsequence(const std::basic_string<T>& s, ptrdiff_t pos, ptrdiff_t len) ++std::vector<T> get_subsequence(const std::vector<T>& s, ptrdiff_t pos, ptrdiff_t len) + { +- return std::basic_string<T>(std::begin(s) + pos, std::begin(s) + pos + len); ++ return std::vector<T>(std::begin(s) + pos, std::begin(s) + pos + len); + } + + template <typename Sentence1, typename Sentence2> +@@ -233,7 +233,7 @@ TEST_CASE("Levenshtein_editops") + std::string d = "XYZLorem ABC iPsum"; + + rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s, d); +- REQUIRE(d == rapidfuzz::editops_apply<char>(ops, s, d)); ++ REQUIRE(std::vector<char>{d.begin(), d.end()} == rapidfuzz::editops_apply<char>(ops, s, d)); + REQUIRE(ops.get_src_len() == s.size()); + REQUIRE(ops.get_dest_len() == d.size()); + } +@@ -293,21 +293,21 @@ TEST_CASE("Levenshtein_editops[fuzzing_regressions]") + std::string s1 = "b"; + std::string s2 = "aaaaaaaaaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s1, s2); +- REQUIRE(s2 == rapidfuzz::editops_apply<char>(ops, s1, s2)); ++ REQUIRE(std::vector<char>{s2.begin(), s2.end()} == rapidfuzz::editops_apply<char>(ops, s1, s2)); + } + + { + std::string s1 = "aa"; + std::string s2 = "abb"; + rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s1, s2); +- REQUIRE(s2 == rapidfuzz::editops_apply<char>(ops, s1, s2)); ++ REQUIRE(std::vector<char>{s2.begin(), s2.end()} == rapidfuzz::editops_apply<char>(ops, s1, s2)); + } + + { + std::string s1 = str_multiply(std::string("abb"), 8 * 64); + std::string s2 = str_multiply(std::string("ccccca"), 8 * 64); + rapidfuzz::Editops ops = rapidfuzz::levenshtein_editops(s1, s2); +- REQUIRE(s2 == rapidfuzz::editops_apply<char>(ops, s1, s2)); ++ REQUIRE(std::vector<char>{s2.begin(), s2.end()} == rapidfuzz::editops_apply<char>(ops, s1, s2)); + } + } + +@@ -352,7 +352,7 @@ TEST_CASE("Levenshtein small band") + rapidfuzz::Editops ops1; + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), + rapidfuzz::detail::Range(s2)); +- REQUIRE(s2 == rapidfuzz::editops_apply<char>(ops1, s1, s2)); ++ REQUIRE(std::vector<char>{s2.begin(), s2.end()} == rapidfuzz::editops_apply<char>(ops1, s1, s2)); + rapidfuzz::Editops ops2; + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), + ops1.size()); +@@ -400,7 +400,7 @@ TEST_CASE("Levenshtein small band") + rapidfuzz::Editops ops1; + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1), + rapidfuzz::detail::Range(s2)); +- REQUIRE(s2 == rapidfuzz::editops_apply<char>(ops1, s1, s2)); ++ REQUIRE(std::vector<char>{s2.begin(), s2.end()} == rapidfuzz::editops_apply<char>(ops1, s1, s2)); + rapidfuzz::Editops ops2; + rapidfuzz::detail::levenshtein_align(ops2, rapidfuzz::detail::Range(s1), rapidfuzz::detail::Range(s2), + ops1.size()); +@@ -416,8 +416,8 @@ TEST_CASE("Levenshtein large band (python-Levenshtein + REQUIRE(example2.size() == 5569); + + { +- std::basic_string<uint8_t> s1 = get_subsequence(example1, 3718, 1509); +- std::basic_string<uint8_t> s2 = get_subsequence(example2, 2784, 2785); ++ std::vector<uint8_t> s1 = get_subsequence(example1, 3718, 1509); ++ std::vector<uint8_t> s2 = get_subsequence(example2, 2784, 2785); + + REQUIRE(rapidfuzz::levenshtein_distance(s1, s2) == 1587); + +@@ -440,8 +440,8 @@ TEST_CASE("Levenshtein large band (ocr example)") + REQUIRE(ocr_example2.size() == 107244); + + { +- std::basic_string<uint8_t> s1 = get_subsequence(ocr_example1, 51, 6541); +- std::basic_string<uint8_t> s2 = get_subsequence(ocr_example2, 51, 6516); ++ std::vector<uint8_t> s1 = get_subsequence(ocr_example1, 51, 6541); ++ std::vector<uint8_t> s2 = get_subsequence(ocr_example2, 51, 6516); + + rapidfuzz::Editops ops1; + rapidfuzz::detail::levenshtein_align(ops1, rapidfuzz::detail::Range(s1),
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202409020751.4827pJ1f004787>