Add mps support for MacOS

This commit is contained in:
Damien A
2024-07-13 15:36:31 +02:00
parent 51d6ea8e18
commit 65e6483c9f
3 changed files with 23 additions and 8 deletions

View File

@@ -95,3 +95,12 @@ The summarized content is saved as a markdown file named `summary.md` in the cur
- [YouTube Video Summarizer with OpenAI Whisper and GPT](https://github.com/mirabdullahyaser/Summarizing-Youtube-Videos-with-OpenAI-Whisper-and-GPT-3/tree/master)
- [Mistral Python Client](https://github.com/mistralai/client-python)
- [Ollama : Installez LLama 2 et Code LLama en quelques secondes !](https://www.geeek.org/tutoriel-installation-llama-2-et-code-llama/)
## Known Issues
```python
ValueError: Soundfile is either not in the correct format or is malformed. Ensure that the soundfile has a valid audio file extension (e.g. wav, flac or mp3) and is not corrupted. If reading from a remote URL, ensure that the URL is the full address to **download** the audio file.
```
To fix it :
`ffmpeg -i my_file.mp4 -movflags faststart my_file_fixed.mp4`

View File

@@ -1,3 +1,7 @@
openai-whisper==20231117
pytube==15.0.0
ollama==0.1.8
torch==2.5.0.dev20240712
torchaudio==2.4.0.dev20240712
torchvision==0.20.0.dev20240712
transformers==4.42.4

View File

@@ -1,11 +1,10 @@
import whisper
import ollama
import argparse
import os
from pytube import YouTube
from pathlib import Path
import os
from transformers import pipeline
WHISPER_MODEL = "base"
OLLAMA_MODEL = "llama3"
# Function to download a video from YouTube
@@ -16,16 +15,19 @@ def download_from_youtube(url: str, path: str):
# Download the video to the specified path
stream.download(Path(path), filename="to_transcribe.mp4")
# Function to transcribe an audio file using the Whisper model
# Function to transcribe an audio file using the transformers pipeline
def transcribe_file(file_path: str, output_file: str) -> str:
# Load the Whisper model
model = whisper.load_model(WHISPER_MODEL)
# Load the pipeline model for automatic speech recognition with MPS
transcriber_gpu = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3", device="mps")
# Transcribe the audio file
transcribe = model.transcribe(file_path)
transcribe = transcriber_gpu(file_path)
# Save the transcribed text to the specified temporary file
with open(output_file, 'w') as tmp_file:
tmp_file.write(transcribe["text"])
print(f"Transcription saved to file: {output_file}")
# Return the transcribed text
return transcribe["text"]