openai-whisper: 20230314 -> 20230918

This commit is contained in:
MayNiklas 2023-09-25 18:11:26 +02:00
parent 4f474548a1
commit d8b46e5e4b
2 changed files with 15 additions and 18 deletions

View file

@ -5,7 +5,7 @@
, cudaSupport ? false
# runtime
, ffmpeg
, ffmpeg-headless
# propagates
, numpy
@ -14,7 +14,6 @@
, tqdm
, more-itertools
, transformers
, ffmpeg-python
, numba
, openai-triton
, scipy
@ -26,20 +25,20 @@
buildPythonPackage rec {
pname = "whisper";
version = "20230314";
version = "20230918";
format = "setuptools";
src = fetchFromGitHub {
owner = "openai";
repo = pname;
rev = "refs/tags/v${version}";
hash = "sha256-qQCELjRFeRCT1k1CBc3netRtFvt+an/EbkrgnmiX/mc=";
hash = "sha256-wBAanFVEIIzTcoX40P9eI26UdEu0SC/xuife/zi2Xho=";
};
patches = [
(substituteAll {
src = ./ffmpeg-path.patch;
inherit ffmpeg;
ffmpeg = ffmpeg-headless;
})
];
@ -48,7 +47,6 @@ buildPythonPackage rec {
tqdm
more-itertools
transformers
ffmpeg-python
numba
scipy
tiktoken
@ -61,7 +59,7 @@ buildPythonPackage rec {
postPatch = ''
substituteInPlace requirements.txt \
--replace "tiktoken==0.3.1" "tiktoken>=0.3.1"
--replace "tiktoken==0.3.3" "tiktoken>=0.3.3"
''
# openai-triton is only needed for CUDA support.
# triton needs CUDA to be build.
@ -80,7 +78,6 @@ buildPythonPackage rec {
disabledTests = [
# requires network access to download models
"test_tokenizer"
"test_transcribe"
# requires NVIDIA drivers
"test_dtw_cuda_equivalence"

View file

@ -1,13 +1,13 @@
diff --git a/whisper/audio.py b/whisper/audio.py
index a6074e8..da18350 100644
index 4f5b6e0..bfe7924 100644
--- a/whisper/audio.py
+++ b/whisper/audio.py
@@ -41,7 +41,7 @@ def load_audio(file: str, sr: int = SAMPLE_RATE):
out, _ = (
ffmpeg.input(file, threads=0)
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
- .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
+ .run(cmd=["@ffmpeg@/bin/ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
except ffmpeg.Error as e:
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
@@ -44,7 +44,7 @@ def load_audio(file: str, sr: int = SAMPLE_RATE):
# and resampling as necessary. Requires the ffmpeg CLI in PATH.
# fmt: off
cmd = [
- "ffmpeg",
+ "@ffmpeg@/bin/ffmpeg",
"-nostdin",
"-threads", "0",
"-i", file,