summaryrefslogtreecommitdiff
path: root/sci-ml/tokenizers/tokenizers-0.21.1.ebuild
diff options
context:
space:
mode:
Diffstat (limited to 'sci-ml/tokenizers/tokenizers-0.21.1.ebuild')
-rw-r--r--sci-ml/tokenizers/tokenizers-0.21.1.ebuild406
1 files changed, 406 insertions, 0 deletions
diff --git a/sci-ml/tokenizers/tokenizers-0.21.1.ebuild b/sci-ml/tokenizers/tokenizers-0.21.1.ebuild
new file mode 100644
index 000000000000..3861fbf55811
--- /dev/null
+++ b/sci-ml/tokenizers/tokenizers-0.21.1.ebuild
@@ -0,0 +1,406 @@
+# Copyright 2023-2025 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+# Autogenerated by pycargoebuild 0.13.3
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=maturin
+PYTHON_COMPAT=( python3_{10..13} )
+DISTUTILS_EXT=1
+DISTUTILS_SINGLE_IMPL=1
+
+CRATES="
+ addr2line@0.24.2
+ adler2@2.0.0
+ aho-corasick@1.1.3
+ anes@0.1.4
+ anstream@0.6.18
+ anstyle@1.0.10
+ anstyle-parse@0.2.6
+ anstyle-query@1.1.2
+ anstyle-wincon@3.0.6
+ assert_approx_eq@1.1.0
+ autocfg@1.4.0
+ backtrace@0.3.74
+ base64@0.13.1
+ base64@0.21.7
+ base64@0.22.1
+ bitflags@1.3.2
+ bitflags@2.4.0
+ bitflags@2.6.0
+ bit-set@0.8.0
+ bit-vec@0.8.0
+ bumpalo@3.16.0
+ byteorder@1.5.0
+ bytes@1.10.1
+ cast@0.3.0
+ cc@1.2.6
+ cc@1.2.8
+ cfg_aliases@0.2.1
+ cfg-if@1.0.0
+ ciborium@0.2.2
+ ciborium-io@0.2.2
+ ciborium-ll@0.2.2
+ clap@4.5.35
+ clap_builder@4.5.35
+ clap_lex@0.7.4
+ colorchoice@1.0.3
+ console@0.15.10
+ crc32fast@1.4.2
+ criterion@0.5.1
+ criterion-plot@0.5.0
+ crossbeam-deque@0.8.6
+ crossbeam-epoch@0.9.18
+ crossbeam-utils@0.8.21
+ crunchy@0.2.3
+ darling@0.20.10
+ darling_core@0.20.10
+ darling_macro@0.20.10
+ derive_builder@0.20.2
+ derive_builder_core@0.20.2
+ derive_builder_macro@0.20.2
+ dirs@5.0.1
+ dirs-sys@0.4.1
+ displaydoc@0.2.5
+ either@1.13.0
+ encode_unicode@1.0.0
+ env_filter@0.1.3
+ env_logger@0.11.6
+ errno@0.3.10
+ esaxx-rs@0.1.10
+ fancy-regex@0.14.0
+ fastrand@2.3.0
+ flate2@1.1.1
+ fnv@1.0.7
+ form_urlencoded@1.2.1
+ futures-channel@0.3.31
+ futures-core@0.3.31
+ futures-io@0.3.31
+ futures-macro@0.3.31
+ futures-sink@0.3.31
+ futures-task@0.3.31
+ futures-util@0.3.31
+ getrandom@0.2.15
+ getrandom@0.3.0
+ gimli@0.31.1
+ half@2.5.0
+ heck@0.5.0
+ hermit-abi@0.5.0
+ hf-hub@0.4.2
+ http@1.3.1
+ http-body@1.0.1
+ http-body-util@0.1.3
+ httparse@1.10.1
+ humantime@2.1.0
+ hyper@1.6.0
+ hyper-rustls@0.27.5
+ hyper-util@0.1.11
+ icu_collections@1.5.0
+ icu_locid@1.5.0
+ icu_locid_transform@1.5.0
+ icu_locid_transform_data@1.5.1
+ icu_normalizer@1.5.0
+ icu_normalizer_data@1.5.1
+ icu_properties@1.5.1
+ icu_properties_data@1.5.1
+ icu_provider@1.5.0
+ icu_provider_macros@1.5.0
+ ident_case@1.0.1
+ idna@1.0.3
+ idna_adapter@1.2.0
+ indicatif@0.17.9
+ indoc@2.0.5
+ ipnet@2.11.0
+ is-terminal@0.4.16
+ is_terminal_polyfill@1.70.1
+ itertools@0.10.5
+ itertools@0.11.0
+ itertools@0.12.1
+ itertools@0.13.0
+ itoa@1.0.14
+ js-sys@0.3.76
+ js-sys@0.3.77
+ lazy_static@1.5.0
+ libc@0.2.169
+ libc@0.2.171
+ libredox@0.1.3
+ linux-raw-sys@0.4.14
+ litemap@0.7.5
+ log@0.4.22
+ macro_rules_attribute@0.2.0
+ macro_rules_attribute-proc_macro@0.2.0
+ matrixmultiply@0.3.9
+ memchr@2.7.4
+ memoffset@0.9.1
+ mime@0.3.17
+ minimal-lexical@0.2.1
+ miniz_oxide@0.8.7
+ mio@1.0.3
+ monostate@0.1.13
+ monostate-impl@0.1.13
+ ndarray@0.16.1
+ nom@7.1.3
+ nu-ansi-term@0.46.0
+ number_prefix@0.4.0
+ num-complex@0.4.6
+ num-integer@0.1.46
+ numpy@0.23.0
+ num-traits@0.2.19
+ object@0.36.7
+ once_cell@1.20.2
+ onig@6.4.0
+ onig_sys@69.8.1
+ oorandom@11.1.5
+ option-ext@0.2.0
+ overload@0.1.1
+ paste@1.0.15
+ percent-encoding@2.3.1
+ pin-project-lite@0.2.16
+ pin-utils@0.1.0
+ pkg-config@0.3.31
+ plotters@0.3.7
+ plotters-backend@0.3.7
+ plotters-svg@0.3.7
+ portable-atomic@1.10.0
+ portable-atomic-util@0.2.4
+ ppv-lite86@0.2.20
+ proc-macro2@1.0.92
+ pyo3@0.23.5
+ pyo3-build-config@0.23.5
+ pyo3-ffi@0.23.5
+ pyo3-macros@0.23.5
+ pyo3-macros-backend@0.23.5
+ quinn@0.11.7
+ quinn-proto@0.11.10
+ quinn-udp@0.5.11
+ quote@1.0.38
+ rand@0.8.5
+ rand@0.9.0
+ rand_chacha@0.3.1
+ rand_chacha@0.9.0
+ rand_core@0.6.4
+ rand_core@0.9.0
+ rawpointer@0.2.1
+ rayon@1.10.0
+ rayon-cond@0.3.0
+ rayon-core@1.12.1
+ redox_users@0.4.6
+ regex@1.11.1
+ regex-automata@0.4.9
+ regex-syntax@0.8.5
+ reqwest@0.12.15
+ ring@0.17.14
+ rustc-demangle@0.1.24
+ rustc-hash@2.1.0
+ rustix@0.38.42
+ rustls@0.21.12
+ rustls@0.23.25
+ rustls-pemfile@2.2.0
+ rustls-pki-types@1.11.0
+ rustls-webpki@0.101.7
+ rustls-webpki@0.103.1
+ rustversion@1.0.20
+ ryu@1.0.18
+ same-file@1.0.6
+ sct@0.7.1
+ serde@1.0.217
+ serde_derive@1.0.217
+ serde_json@1.0.134
+ serde_urlencoded@0.7.1
+ sharded-slab@0.1.7
+ shlex@1.3.0
+ slab@0.4.9
+ smallvec@1.13.2
+ socket2@0.5.9
+ socks@0.3.4
+ spm_precompiled@0.1.4
+ stable_deref_trait@1.2.0
+ strsim@0.11.1
+ subtle@2.6.1
+ syn@2.0.93
+ synstructure@0.13.1
+ sync_wrapper@1.0.2
+ target-lexicon@0.12.16
+ tempfile@3.14.0
+ thiserror@1.0.69
+ thiserror@2.0.9
+ thiserror-impl@1.0.69
+ thiserror-impl@2.0.9
+ thread_local@1.1.8
+ tinytemplate@1.2.1
+ tinyvec@1.9.0
+ tinyvec_macros@0.1.1
+ tokio@1.44.1
+ tokio-rustls@0.26.2
+ tokio-util@0.7.14
+ tower@0.5.2
+ tower-layer@0.3.3
+ tower-service@0.3.3
+ tracing@0.1.41
+ tracing-attributes@0.1.28
+ tracing-core@0.1.33
+ tracing-log@0.2.0
+ tracing-subscriber@0.3.19
+ try-lock@0.2.5
+ tinystr@0.7.5
+ unicode_categories@0.1.1
+ unicode-ident@1.0.14
+ unicode-normalization-alignments@0.1.12
+ unicode-segmentation@1.12.0
+ unicode-width@0.2.0
+ unindent@0.2.3
+ untrusted@0.9.0
+ ureq@2.8.0
+ url@2.5.4
+ utf16_iter@1.0.5
+ utf8_iter@1.0.4
+ utf8parse@0.2.2
+ valuable@0.1.1
+ walkdir@2.5.0
+ want@0.3.1
+ wasi@0.11.0+wasi-snapshot-preview1
+ wasi@0.13.3+wasi-0.2.2
+ wasm-bindgen@0.2.99
+ wasm-bindgen@0.2.100
+ wasm-bindgen-backend@0.2.99
+ wasm-bindgen-backend@0.2.100
+ wasm-bindgen-futures@0.4.50
+ wasm-bindgen-macro@0.2.99
+ wasm-bindgen-macro@0.2.100
+ wasm-bindgen-macro-support@0.2.99
+ wasm-bindgen-macro-support@0.2.100
+ wasm-bindgen-shared@0.2.99
+ wasm-bindgen-shared@0.2.100
+ wasm-streams@0.4.2
+ webpki-roots@0.25.4
+ webpki-roots@0.26.8
+ web-sys@0.3.77
+ web-time@1.1.0
+ winapi@0.3.9
+ winapi-i686-pc-windows-gnu@0.4.0
+ winapi-util@0.1.9
+ winapi-x86_64-pc-windows-gnu@0.4.0
+ windows_aarch64_gnullvm@0.48.0
+ windows_aarch64_gnullvm@0.52.6
+ windows_aarch64_gnullvm@0.53.0
+ windows_aarch64_msvc@0.48.0
+ windows_aarch64_msvc@0.52.6
+ windows_aarch64_msvc@0.53.0
+ windows_i686_gnu@0.48.0
+ windows_i686_gnu@0.52.6
+ windows_i686_gnu@0.53.0
+ windows_i686_gnullvm@0.52.6
+ windows_i686_gnullvm@0.53.0
+ windows_i686_msvc@0.48.0
+ windows_i686_msvc@0.52.6
+ windows_i686_msvc@0.53.0
+ windows-link@0.1.1
+ windows-registry@0.4.0
+ windows-result@0.3.2
+ windows-strings@0.3.0
+ windows-sys@0.48.0
+ windows-sys@0.52.0
+ windows-sys@0.59.0
+ windows-targets@0.48.0
+ windows-targets@0.52.6
+ windows-targets@0.53.0
+ windows_x86_64_gnu@0.48.0
+ windows_x86_64_gnu@0.52.6
+ windows_x86_64_gnu@0.53.0
+ windows_x86_64_gnullvm@0.48.0
+ windows_x86_64_gnullvm@0.52.6
+ windows_x86_64_gnullvm@0.53.0
+ windows_x86_64_msvc@0.48.0
+ windows_x86_64_msvc@0.52.6
+ windows_x86_64_msvc@0.53.0
+ wit-bindgen-rt@0.33.0
+ write16@1.0.0
+ writeable@0.5.5
+ yoke@0.7.5
+ yoke-derive@0.7.5
+ zerocopy@0.7.35
+ zerocopy@0.8.24
+ zerocopy-derive@0.7.35
+ zerocopy-derive@0.8.24
+ zerofrom@0.1.6
+ zerofrom-derive@0.1.6
+ zeroize@1.8.1
+ zerovec@0.10.2
+ zerovec-derive@0.10.2
+"
+
+inherit cargo distutils-r1
+
+DESCRIPTION="Implementation of today's most used tokenizers"
+HOMEPAGE="https://github.com/huggingface/tokenizers"
+SRC_URI="
+ https://github.com/huggingface/${PN}/archive/refs/tags/v${PV}.tar.gz
+ -> ${P}.gh.tar.gz
+ ${CARGO_CRATE_URIS}
+"
+
+LICENSE="Apache-2.0"
+# Dependent crate licenses
+LICENSE+="
+ Apache-2.0 Apache-2.0-with-LLVM-exceptions BSD-2 BSD ISC MIT MPL-2.0
+ Unicode-DFS-2016
+"
+SLOT="0"
+KEYWORDS="~amd64"
+
+BDEPEND="
+ test? ( sci-ml/datasets[${PYTHON_SINGLE_USEDEP}] )
+ $(python_gen_cond_dep '
+ dev-python/setuptools-rust[${PYTHON_USEDEP}]
+ ')
+"
+
+distutils_enable_tests pytest
+
+QA_FLAGS_IGNORED=".*/site-packages/tokenizers/.*so"
+
+src_unpack() {
+ cargo_src_unpack
+}
+
+pkg_setup() {
+ python-single-r1_pkg_setup
+ rust_pkg_setup
+}
+
+src_prepare() {
+ default
+ cd bindings/python
+ eapply "${FILESDIR}"/${PN}-0.15.2-test.patch
+ distutils-r1_src_prepare
+}
+
+src_configure() {
+ cd tokenizers
+ cargo_src_configure
+ cd ../bindings/python
+ distutils-r1_src_configure
+}
+
+src_compile() {
+ cd tokenizers
+ cargo_src_compile
+ cd ../bindings/python
+ distutils-r1_src_compile
+}
+
+src_test() {
+ cd tokenizers
+ # Tests do not work
+ #cargo_src_test
+ cd ../bindings/python
+ local -x EPYTEST_IGNORE=( benches/ )
+ distutils-r1_src_test
+}
+
+src_install() {
+ cd tokenizers
+ cd ../bindings/python
+ distutils-r1_src_install
+}