forked from tykayn/transcription
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
733 B
30 lines
733 B
#!/usr/bin/env python3 |
|
|
|
from vosk import Model, KaldiRecognizer, SetLogLevel |
|
import sys |
|
import os |
|
import wave |
|
|
|
SetLogLevel(0) |
|
|
|
if not os.path.exists("models/en"): |
|
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as 'models' in the current folder.") |
|
exit (1) |
|
|
|
|
|
wf = wave.open(sys.argv[1], "rb") |
|
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE": |
|
print ("Audio file must be WAV format mono PCM.") |
|
exit (1) |
|
|
|
model = Model("models/en") |
|
rec = KaldiRecognizer(model, wf.getframerate()) |
|
|
|
while True: |
|
data = wf.readframes(4000) |
|
if len(data) == 0: |
|
break |
|
if rec.AcceptWaveform(data): |
|
print(rec.Result()) |
|
|
|
print(rec.FinalResult())
|
|
|