python311Packages.tokenizers: 0.15.0 -> 0.19.1

Diff: https://github.com/huggingface/tokenizers/compare/v0.15.0...v0.19.1
This commit is contained in:
Gaetan Lepage 2024-04-20 00:07:38 +02:00
parent 92d295f588
commit 5e766a1a10
2 changed files with 243 additions and 343 deletions

File diff suppressed because it is too large Load diff

View file

@ -63,16 +63,16 @@ let
in
buildPythonPackage rec {
pname = "tokenizers";
version = "0.15.0";
format = "pyproject";
version = "0.19.1";
pyproject = true;
disabled = pythonOlder "3.7";
src = fetchFromGitHub {
owner = "huggingface";
repo = pname;
rev = "v${version}";
hash = "sha256-+yfX12eKtgZV1OQvPOlMVTONbpFuigHcl4SjoCIZkSk=";
repo = "tokenizers";
rev = "refs/tags/v${version}";
hash = "sha256-sKEAt46cdme821tzz9WSKnQb3hPmFJ4zvHgBNRxjEuk=";
};
cargoDeps = rustPlatform.importCargoLock {
@ -97,7 +97,13 @@ buildPythonPackage rec {
Security
];
propagatedBuildInputs = [
# Cargo.lock is outdated
# TODO: remove at next release
preConfigure = ''
cargo update --offline
'';
dependencies = [
numpy
];
@ -123,6 +129,8 @@ buildPythonPackage rec {
disabledTests = [
# Downloads data using the datasets module
"test_encode_special_tokens"
"test_splitting"
"TestTrainFromIterators"
# Those tests require more data
"test_from_pretrained"