From 5add45c71cae89542465e1df5f6a280acba18292 Mon Sep 17 00:00:00 2001 From: Reza Behzadan Date: Sun, 10 Dec 2023 21:54:02 +0330 Subject: [PATCH] Add logging --- main.py | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/main.py b/main.py index 8410173..07b30a7 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,5 @@ +import logging + import librosa import torch from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor @@ -6,34 +8,48 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor MODEL = "/home/reza/data/huggingface-models/04.wav2vec2-large-xlsr-persian" +def initLogger(name=__name__, level=logging.DEBUG): + if name[:2] == '__' and name[-2:] == '__': + name = name[2:-2] + logger = logging.getLogger(name) + + fmt = '%(asctime)s | %(levelname)-8s | %(name)s | %(message)s' + datefmt = '%Y-%m-%d %H:%M:%S' + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + formatter = logging.Formatter(fmt, datefmt) + ch.setFormatter(formatter) + logger.addHandler(ch) + + logger.setLevel(level) + return logger + + def mp3_to_text(mp3_file_path): # Load the MP3 file and resample to 16kHz audio, sample_rate = librosa.load(mp3_file_path, sr=16000) - print() - print("Resampling is Done!") + logger.info("Resampling is Done!") # Load tokenizer and model from Hugging Face tokenizer = Wav2Vec2Processor.from_pretrained(MODEL) model = Wav2Vec2ForCTC.from_pretrained(MODEL) - print() - print("Loading model is Done!") + logger.info("Loading model is Done!") # Preprocess the audio input_values = tokenizer(audio, sampling_rate=16000, return_tensors="pt", padding="longest").input_values logits = model(input_values).logits - print() - print("Processing the audio is Done!") + logger.info("Processing the audio is Done!") # Decode the predicted IDs predicted_ids = torch.argmax(logits, dim=-1) transcription = tokenizer.batch_decode(predicted_ids) - print() - print("Decoding the prodicted IDs is Done!") + logger.info("Decoding the prodicted IDs is Done!") return transcription[0] -# text = mp3_to_text("samples/captcha.mp3") -text = mp3_to_text("samples/sample1.wav") -print() -print(text) +if __name__ == "__main__": + logger = initLogger('speech2text_fa', level=logging.INFO) + text = mp3_to_text("samples/sample1.wav") + print() + print(text)