openai-whisper: 20230314 -> 20230918

2024-09-21 04:49:01 +02:00 · 2023-09-25 18:11:26 +02:00 · 2023-09-25 18:11:26 +02:00 · d8b46e5e4b
parent 4f474548a1
commit d8b46e5e4b
2 changed files with 15 additions and 18 deletions
--- a/pkgs/development/python-modules/openai-whisper/default.nix
+++ b/pkgs/development/python-modules/openai-whisper/default.nix
@ -5,7 +5,7 @@
 , cudaSupport ? false

 # runtime
-, ffmpeg
+, ffmpeg-headless

 # propagates
 , numpy
@ -14,7 +14,6 @@
 , tqdm
 , more-itertools
 , transformers
-, ffmpeg-python
 , numba
 , openai-triton
 , scipy
@ -26,20 +25,20 @@

 buildPythonPackage rec {
  pname = "whisper";
-  version = "20230314";
+  version = "20230918";
  format = "setuptools";

  src = fetchFromGitHub {
    owner = "openai";
    repo = pname;
    rev = "refs/tags/v${version}";
-    hash = "sha256-qQCELjRFeRCT1k1CBc3netRtFvt+an/EbkrgnmiX/mc=";
+    hash = "sha256-wBAanFVEIIzTcoX40P9eI26UdEu0SC/xuife/zi2Xho=";
  };

  patches = [
    (substituteAll {
      src = ./ffmpeg-path.patch;
-      inherit ffmpeg;
+      ffmpeg = ffmpeg-headless;
    })
  ];

@ -48,7 +47,6 @@ buildPythonPackage rec {
    tqdm
    more-itertools
    transformers
-    ffmpeg-python
    numba
    scipy
    tiktoken
@ -61,7 +59,7 @@ buildPythonPackage rec {

  postPatch = ''
    substituteInPlace requirements.txt \
-      --replace "tiktoken==0.3.1" "tiktoken>=0.3.1"
+      --replace "tiktoken==0.3.3" "tiktoken>=0.3.3"
  ''
  # openai-triton is only needed for CUDA support.
  # triton needs CUDA to be build.
@ -80,7 +78,6 @@ buildPythonPackage rec {

  disabledTests = [
    # requires network access to download models
-    "test_tokenizer"
    "test_transcribe"
    # requires NVIDIA drivers
    "test_dtw_cuda_equivalence"
--- a/pkgs/development/python-modules/openai-whisper/ffmpeg-path.patch
+++ b/pkgs/development/python-modules/openai-whisper/ffmpeg-path.patch
@ -1,13 +1,13 @@
 diff --git a/whisper/audio.py b/whisper/audio.py
-index a6074e8..da18350 100644
+index 4f5b6e0..bfe7924 100644
 --- a/whisper/audio.py
 +++ b/whisper/audio.py
-@@ -41,7 +41,7 @@ def load_audio(file: str, sr: int = SAMPLE_RATE):
-         out, _ = (
-             ffmpeg.input(file, threads=0)
-             .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
-            .run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
-+            .run(cmd=["@ffmpeg@/bin/ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
-         )
-     except ffmpeg.Error as e:
-         raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
+@@ -44,7 +44,7 @@ def load_audio(file: str, sr: int = SAMPLE_RATE):
+     # and resampling as necessary.  Requires the ffmpeg CLI in PATH.
+     # fmt: off
+     cmd = [
+-        "ffmpeg",
+        "@ffmpeg@/bin/ffmpeg",
+         "-nostdin",
+         "-threads", "0",
+         "-i", file,