diff options
Diffstat (limited to 'sci-ml/tokenizers/tokenizers-0.21.1.ebuild')
-rw-r--r-- | sci-ml/tokenizers/tokenizers-0.21.1.ebuild | 406 |
1 files changed, 406 insertions, 0 deletions
diff --git a/sci-ml/tokenizers/tokenizers-0.21.1.ebuild b/sci-ml/tokenizers/tokenizers-0.21.1.ebuild new file mode 100644 index 000000000000..3861fbf55811 --- /dev/null +++ b/sci-ml/tokenizers/tokenizers-0.21.1.ebuild @@ -0,0 +1,406 @@ +# Copyright 2023-2025 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +# Autogenerated by pycargoebuild 0.13.3 + +EAPI=8 + +DISTUTILS_USE_PEP517=maturin +PYTHON_COMPAT=( python3_{10..13} ) +DISTUTILS_EXT=1 +DISTUTILS_SINGLE_IMPL=1 + +CRATES=" + addr2line@0.24.2 + adler2@2.0.0 + aho-corasick@1.1.3 + anes@0.1.4 + anstream@0.6.18 + anstyle@1.0.10 + anstyle-parse@0.2.6 + anstyle-query@1.1.2 + anstyle-wincon@3.0.6 + assert_approx_eq@1.1.0 + autocfg@1.4.0 + backtrace@0.3.74 + base64@0.13.1 + base64@0.21.7 + base64@0.22.1 + bitflags@1.3.2 + bitflags@2.4.0 + bitflags@2.6.0 + bit-set@0.8.0 + bit-vec@0.8.0 + bumpalo@3.16.0 + byteorder@1.5.0 + bytes@1.10.1 + cast@0.3.0 + cc@1.2.6 + cc@1.2.8 + cfg_aliases@0.2.1 + cfg-if@1.0.0 + ciborium@0.2.2 + ciborium-io@0.2.2 + ciborium-ll@0.2.2 + clap@4.5.35 + clap_builder@4.5.35 + clap_lex@0.7.4 + colorchoice@1.0.3 + console@0.15.10 + crc32fast@1.4.2 + criterion@0.5.1 + criterion-plot@0.5.0 + crossbeam-deque@0.8.6 + crossbeam-epoch@0.9.18 + crossbeam-utils@0.8.21 + crunchy@0.2.3 + darling@0.20.10 + darling_core@0.20.10 + darling_macro@0.20.10 + derive_builder@0.20.2 + derive_builder_core@0.20.2 + derive_builder_macro@0.20.2 + dirs@5.0.1 + dirs-sys@0.4.1 + displaydoc@0.2.5 + either@1.13.0 + encode_unicode@1.0.0 + env_filter@0.1.3 + env_logger@0.11.6 + errno@0.3.10 + esaxx-rs@0.1.10 + fancy-regex@0.14.0 + fastrand@2.3.0 + flate2@1.1.1 + fnv@1.0.7 + form_urlencoded@1.2.1 + futures-channel@0.3.31 + futures-core@0.3.31 + futures-io@0.3.31 + futures-macro@0.3.31 + futures-sink@0.3.31 + futures-task@0.3.31 + futures-util@0.3.31 + getrandom@0.2.15 + getrandom@0.3.0 + gimli@0.31.1 + half@2.5.0 + heck@0.5.0 + hermit-abi@0.5.0 + hf-hub@0.4.2 + http@1.3.1 + http-body@1.0.1 + http-body-util@0.1.3 + httparse@1.10.1 + humantime@2.1.0 + hyper@1.6.0 + hyper-rustls@0.27.5 + hyper-util@0.1.11 + icu_collections@1.5.0 + icu_locid@1.5.0 + icu_locid_transform@1.5.0 + icu_locid_transform_data@1.5.1 + icu_normalizer@1.5.0 + icu_normalizer_data@1.5.1 + icu_properties@1.5.1 + icu_properties_data@1.5.1 + icu_provider@1.5.0 + icu_provider_macros@1.5.0 + ident_case@1.0.1 + idna@1.0.3 + idna_adapter@1.2.0 + indicatif@0.17.9 + indoc@2.0.5 + ipnet@2.11.0 + is-terminal@0.4.16 + is_terminal_polyfill@1.70.1 + itertools@0.10.5 + itertools@0.11.0 + itertools@0.12.1 + itertools@0.13.0 + itoa@1.0.14 + js-sys@0.3.76 + js-sys@0.3.77 + lazy_static@1.5.0 + libc@0.2.169 + libc@0.2.171 + libredox@0.1.3 + linux-raw-sys@0.4.14 + litemap@0.7.5 + log@0.4.22 + macro_rules_attribute@0.2.0 + macro_rules_attribute-proc_macro@0.2.0 + matrixmultiply@0.3.9 + memchr@2.7.4 + memoffset@0.9.1 + mime@0.3.17 + minimal-lexical@0.2.1 + miniz_oxide@0.8.7 + mio@1.0.3 + monostate@0.1.13 + monostate-impl@0.1.13 + ndarray@0.16.1 + nom@7.1.3 + nu-ansi-term@0.46.0 + number_prefix@0.4.0 + num-complex@0.4.6 + num-integer@0.1.46 + numpy@0.23.0 + num-traits@0.2.19 + object@0.36.7 + once_cell@1.20.2 + onig@6.4.0 + onig_sys@69.8.1 + oorandom@11.1.5 + option-ext@0.2.0 + overload@0.1.1 + paste@1.0.15 + percent-encoding@2.3.1 + pin-project-lite@0.2.16 + pin-utils@0.1.0 + pkg-config@0.3.31 + plotters@0.3.7 + plotters-backend@0.3.7 + plotters-svg@0.3.7 + portable-atomic@1.10.0 + portable-atomic-util@0.2.4 + ppv-lite86@0.2.20 + proc-macro2@1.0.92 + pyo3@0.23.5 + pyo3-build-config@0.23.5 + pyo3-ffi@0.23.5 + pyo3-macros@0.23.5 + pyo3-macros-backend@0.23.5 + quinn@0.11.7 + quinn-proto@0.11.10 + quinn-udp@0.5.11 + quote@1.0.38 + rand@0.8.5 + rand@0.9.0 + rand_chacha@0.3.1 + rand_chacha@0.9.0 + rand_core@0.6.4 + rand_core@0.9.0 + rawpointer@0.2.1 + rayon@1.10.0 + rayon-cond@0.3.0 + rayon-core@1.12.1 + redox_users@0.4.6 + regex@1.11.1 + regex-automata@0.4.9 + regex-syntax@0.8.5 + reqwest@0.12.15 + ring@0.17.14 + rustc-demangle@0.1.24 + rustc-hash@2.1.0 + rustix@0.38.42 + rustls@0.21.12 + rustls@0.23.25 + rustls-pemfile@2.2.0 + rustls-pki-types@1.11.0 + rustls-webpki@0.101.7 + rustls-webpki@0.103.1 + rustversion@1.0.20 + ryu@1.0.18 + same-file@1.0.6 + sct@0.7.1 + serde@1.0.217 + serde_derive@1.0.217 + serde_json@1.0.134 + serde_urlencoded@0.7.1 + sharded-slab@0.1.7 + shlex@1.3.0 + slab@0.4.9 + smallvec@1.13.2 + socket2@0.5.9 + socks@0.3.4 + spm_precompiled@0.1.4 + stable_deref_trait@1.2.0 + strsim@0.11.1 + subtle@2.6.1 + syn@2.0.93 + synstructure@0.13.1 + sync_wrapper@1.0.2 + target-lexicon@0.12.16 + tempfile@3.14.0 + thiserror@1.0.69 + thiserror@2.0.9 + thiserror-impl@1.0.69 + thiserror-impl@2.0.9 + thread_local@1.1.8 + tinytemplate@1.2.1 + tinyvec@1.9.0 + tinyvec_macros@0.1.1 + tokio@1.44.1 + tokio-rustls@0.26.2 + tokio-util@0.7.14 + tower@0.5.2 + tower-layer@0.3.3 + tower-service@0.3.3 + tracing@0.1.41 + tracing-attributes@0.1.28 + tracing-core@0.1.33 + tracing-log@0.2.0 + tracing-subscriber@0.3.19 + try-lock@0.2.5 + tinystr@0.7.5 + unicode_categories@0.1.1 + unicode-ident@1.0.14 + unicode-normalization-alignments@0.1.12 + unicode-segmentation@1.12.0 + unicode-width@0.2.0 + unindent@0.2.3 + untrusted@0.9.0 + ureq@2.8.0 + url@2.5.4 + utf16_iter@1.0.5 + utf8_iter@1.0.4 + utf8parse@0.2.2 + valuable@0.1.1 + walkdir@2.5.0 + want@0.3.1 + wasi@0.11.0+wasi-snapshot-preview1 + wasi@0.13.3+wasi-0.2.2 + wasm-bindgen@0.2.99 + wasm-bindgen@0.2.100 + wasm-bindgen-backend@0.2.99 + wasm-bindgen-backend@0.2.100 + wasm-bindgen-futures@0.4.50 + wasm-bindgen-macro@0.2.99 + wasm-bindgen-macro@0.2.100 + wasm-bindgen-macro-support@0.2.99 + wasm-bindgen-macro-support@0.2.100 + wasm-bindgen-shared@0.2.99 + wasm-bindgen-shared@0.2.100 + wasm-streams@0.4.2 + webpki-roots@0.25.4 + webpki-roots@0.26.8 + web-sys@0.3.77 + web-time@1.1.0 + winapi@0.3.9 + winapi-i686-pc-windows-gnu@0.4.0 + winapi-util@0.1.9 + winapi-x86_64-pc-windows-gnu@0.4.0 + windows_aarch64_gnullvm@0.48.0 + windows_aarch64_gnullvm@0.52.6 + windows_aarch64_gnullvm@0.53.0 + windows_aarch64_msvc@0.48.0 + windows_aarch64_msvc@0.52.6 + windows_aarch64_msvc@0.53.0 + windows_i686_gnu@0.48.0 + windows_i686_gnu@0.52.6 + windows_i686_gnu@0.53.0 + windows_i686_gnullvm@0.52.6 + windows_i686_gnullvm@0.53.0 + windows_i686_msvc@0.48.0 + windows_i686_msvc@0.52.6 + windows_i686_msvc@0.53.0 + windows-link@0.1.1 + windows-registry@0.4.0 + windows-result@0.3.2 + windows-strings@0.3.0 + windows-sys@0.48.0 + windows-sys@0.52.0 + windows-sys@0.59.0 + windows-targets@0.48.0 + windows-targets@0.52.6 + windows-targets@0.53.0 + windows_x86_64_gnu@0.48.0 + windows_x86_64_gnu@0.52.6 + windows_x86_64_gnu@0.53.0 + windows_x86_64_gnullvm@0.48.0 + windows_x86_64_gnullvm@0.52.6 + windows_x86_64_gnullvm@0.53.0 + windows_x86_64_msvc@0.48.0 + windows_x86_64_msvc@0.52.6 + windows_x86_64_msvc@0.53.0 + wit-bindgen-rt@0.33.0 + write16@1.0.0 + writeable@0.5.5 + yoke@0.7.5 + yoke-derive@0.7.5 + zerocopy@0.7.35 + zerocopy@0.8.24 + zerocopy-derive@0.7.35 + zerocopy-derive@0.8.24 + zerofrom@0.1.6 + zerofrom-derive@0.1.6 + zeroize@1.8.1 + zerovec@0.10.2 + zerovec-derive@0.10.2 +" + +inherit cargo distutils-r1 + +DESCRIPTION="Implementation of today's most used tokenizers" +HOMEPAGE="https://github.com/huggingface/tokenizers" +SRC_URI=" + https://github.com/huggingface/${PN}/archive/refs/tags/v${PV}.tar.gz + -> ${P}.gh.tar.gz + ${CARGO_CRATE_URIS} +" + +LICENSE="Apache-2.0" +# Dependent crate licenses +LICENSE+=" + Apache-2.0 Apache-2.0-with-LLVM-exceptions BSD-2 BSD ISC MIT MPL-2.0 + Unicode-DFS-2016 +" +SLOT="0" +KEYWORDS="~amd64" + +BDEPEND=" + test? ( sci-ml/datasets[${PYTHON_SINGLE_USEDEP}] ) + $(python_gen_cond_dep ' + dev-python/setuptools-rust[${PYTHON_USEDEP}] + ') +" + +distutils_enable_tests pytest + +QA_FLAGS_IGNORED=".*/site-packages/tokenizers/.*so" + +src_unpack() { + cargo_src_unpack +} + +pkg_setup() { + python-single-r1_pkg_setup + rust_pkg_setup +} + +src_prepare() { + default + cd bindings/python + eapply "${FILESDIR}"/${PN}-0.15.2-test.patch + distutils-r1_src_prepare +} + +src_configure() { + cd tokenizers + cargo_src_configure + cd ../bindings/python + distutils-r1_src_configure +} + +src_compile() { + cd tokenizers + cargo_src_compile + cd ../bindings/python + distutils-r1_src_compile +} + +src_test() { + cd tokenizers + # Tests do not work + #cargo_src_test + cd ../bindings/python + local -x EPYTEST_IGNORE=( benches/ ) + distutils-r1_src_test +} + +src_install() { + cd tokenizers + cd ../bindings/python + distutils-r1_src_install +} |