transcription/conversion_simple_fr.py

37 lines
1.1 KiB
Python
Executable File

#!/usr/bin/python3
from vosk import Model, KaldiRecognizer, SetLogLevel
import sys
import os
import wave
modelDir=os.path.dirname(sys.argv[0])+"/models/fr"
SetLogLevel(0)
if not os.path.exists(modelDir):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as '{}'.".format(modelDir))
exit (1)
wf = wave.open(sys.argv[1], "rb")
framerate=wf.getframerate()
nchannels=wf.getnchannels()
sampwidth=wf.getsampwidth()
comptype=wf.getcomptype()
if framerate < 16000 or nchannels != 1 or sampwidth != 2 or comptype != "NONE":
print ("Audio file has : {} Hz sample rate, {} channels, {} byte sample width and {} compression type".format(framerate, nchannels, sampwidth, comptype))
print ("when 16000 Hz sample rate, 1 channel, 2 byte sample width and NONE compression type are required")
exit (1)
model = Model(modelDir)
rec = KaldiRecognizer(model, wf.getframerate())
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
print(rec.Result())
print(rec.FinalResult())