diff --git a/README.md b/README.md index 2acdf67..882045f 100644 --- a/README.md +++ b/README.md @@ -95,3 +95,12 @@ The summarized content is saved as a markdown file named `summary.md` in the cur - [YouTube Video Summarizer with OpenAI Whisper and GPT](https://github.com/mirabdullahyaser/Summarizing-Youtube-Videos-with-OpenAI-Whisper-and-GPT-3/tree/master) - [Mistral Python Client](https://github.com/mistralai/client-python) - [Ollama : Installez LLama 2 et Code LLama en quelques secondes !](https://www.geeek.org/tutoriel-installation-llama-2-et-code-llama/) + +## Known Issues + +```python +ValueError: Soundfile is either not in the correct format or is malformed. Ensure that the soundfile has a valid audio file extension (e.g. wav, flac or mp3) and is not corrupted. If reading from a remote URL, ensure that the URL is the full address to **download** the audio file. +``` + +To fix it : +`ffmpeg -i my_file.mp4 -movflags faststart my_file_fixed.mp4` diff --git a/src/requirements.txt b/src/requirements.txt index 9822881..78e0941 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,3 +1,7 @@ openai-whisper==20231117 pytube==15.0.0 -ollama==0.1.8 \ No newline at end of file +ollama==0.1.8 +torch==2.5.0.dev20240712 +torchaudio==2.4.0.dev20240712 +torchvision==0.20.0.dev20240712 +transformers==4.42.4 \ No newline at end of file diff --git a/src/summary.py b/src/summary.py index c2a90b6..9bc9213 100644 --- a/src/summary.py +++ b/src/summary.py @@ -1,11 +1,10 @@ -import whisper import ollama import argparse +import os from pytube import YouTube from pathlib import Path -import os +from transformers import pipeline -WHISPER_MODEL = "base" OLLAMA_MODEL = "llama3" # Function to download a video from YouTube @@ -16,16 +15,19 @@ def download_from_youtube(url: str, path: str): # Download the video to the specified path stream.download(Path(path), filename="to_transcribe.mp4") -# Function to transcribe an audio file using the Whisper model +# Function to transcribe an audio file using the transformers pipeline def transcribe_file(file_path: str, output_file: str) -> str: - # Load the Whisper model - model = whisper.load_model(WHISPER_MODEL) + # Load the pipeline model for automatic speech recognition with MPS + transcriber_gpu = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3", device="mps") + # Transcribe the audio file - transcribe = model.transcribe(file_path) + transcribe = transcriber_gpu(file_path) + # Save the transcribed text to the specified temporary file with open(output_file, 'w') as tmp_file: tmp_file.write(transcribe["text"]) print(f"Transcription saved to file: {output_file}") + # Return the transcribed text return transcribe["text"]