Add mps support for MacOS

2024-07-13 15:36:31 +02:00
parent 51d6ea8e18
commit 65e6483c9f
3 changed files with 23 additions and 8 deletions
--- a/README.md
+++ b/README.md
@@ -95,3 +95,12 @@ The summarized content is saved as a markdown file named `summary.md` in the cur
 - [YouTube Video Summarizer with OpenAI Whisper and GPT](https://github.com/mirabdullahyaser/Summarizing-Youtube-Videos-with-OpenAI-Whisper-and-GPT-3/tree/master)
 - [Mistral Python Client](https://github.com/mistralai/client-python)
 - [Ollama : Installez LLama 2 et Code LLama en quelques secondes !](https://www.geeek.org/tutoriel-installation-llama-2-et-code-llama/)
+
+## Known Issues
+
+```python
+ValueError: Soundfile is either not in the correct format or is malformed. Ensure that the soundfile has a valid audio file extension (e.g. wav, flac or mp3) and is not corrupted. If reading from a remote URL, ensure that the URL is the full address to **download** the audio file.
+```
+
+To fix it :
+`ffmpeg -i my_file.mp4 -movflags faststart my_file_fixed.mp4`
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -1,3 +1,7 @@
 openai-whisper==20231117
 pytube==15.0.0
 ollama==0.1.8
+torch==2.5.0.dev20240712
+torchaudio==2.4.0.dev20240712
+torchvision==0.20.0.dev20240712
+transformers==4.42.4
--- a/src/summary.py
+++ b/src/summary.py
@@ -1,11 +1,10 @@
-import whisper
 import ollama
 import argparse
+import os
 from pytube import YouTube
 from pathlib import Path
-import os
+from transformers import pipeline

-WHISPER_MODEL = "base"
 OLLAMA_MODEL = "llama3"

 # Function to download a video from YouTube
@@ -16,16 +15,19 @@ def download_from_youtube(url: str, path: str):
    # Download the video to the specified path
    stream.download(Path(path), filename="to_transcribe.mp4")

-# Function to transcribe an audio file using the Whisper model
+# Function to transcribe an audio file using the transformers pipeline
 def transcribe_file(file_path: str, output_file: str) -> str:
-    # Load the Whisper model
-    model = whisper.load_model(WHISPER_MODEL)
+    # Load the pipeline model for automatic speech recognition with MPS
+    transcriber_gpu = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3", device="mps")
+    
    # Transcribe the audio file
-    transcribe = model.transcribe(file_path)
+    transcribe = transcriber_gpu(file_path)
+    
    # Save the transcribed text to the specified temporary file
    with open(output_file, 'w') as tmp_file:
        tmp_file.write(transcribe["text"])
        print(f"Transcription saved to file: {output_file}")
+    
    # Return the transcribed text
    return transcribe["text"]