forked from tykayn/transcription
up
This commit is contained in:
parent
f5d4a76db9
commit
89374d2418
8
.idea/.gitignore
vendored
8
.idea/.gitignore
vendored
@ -1,8 +0,0 @@
|
|||||||
# Default ignored files
|
|
||||||
/shelf/
|
|
||||||
/workspace.xml
|
|
||||||
# Datasource local storage ignored files
|
|
||||||
/dataSources/
|
|
||||||
/dataSources.local.xml
|
|
||||||
# Editor-based HTTP Client requests
|
|
||||||
/httpRequests/
|
|
@ -1,8 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="ProjectModuleManager">
|
|
||||||
<modules>
|
|
||||||
<module fileurl="file://$PROJECT_DIR$/.idea/transcription-vosk.iml" filepath="$PROJECT_DIR$/.idea/transcription-vosk.iml" />
|
|
||||||
</modules>
|
|
||||||
</component>
|
|
||||||
</project>
|
|
@ -1,11 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<module type="WEB_MODULE" version="4">
|
|
||||||
<component name="NewModuleRootManager">
|
|
||||||
<content url="file://$MODULE_DIR$">
|
|
||||||
<excludeFolder url="file://$MODULE_DIR$/models" />
|
|
||||||
<excludeFolder url="file://$MODULE_DIR$/input/converted_to_wav" />
|
|
||||||
</content>
|
|
||||||
<orderEntry type="inheritedJdk" />
|
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
|
||||||
</component>
|
|
||||||
</module>
|
|
@ -1,6 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<project version="4">
|
|
||||||
<component name="VcsDirectoryMappings">
|
|
||||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
|
||||||
</component>
|
|
||||||
</project>
|
|
2
Makefile
2
Makefile
@ -5,5 +5,7 @@ wav:
|
|||||||
bash inputs_to_wav.sh
|
bash inputs_to_wav.sh
|
||||||
convert:
|
convert:
|
||||||
bash transcript.sh $(args)
|
bash transcript.sh $(args)
|
||||||
|
convert_en:
|
||||||
|
bash english_transcript.sh $(args)
|
||||||
srt:
|
srt:
|
||||||
perl clean.sh $(args) > output/clean.srt
|
perl clean.sh $(args) > output/clean.srt
|
||||||
|
74
base_transcript.sh
Normal file
74
base_transcript.sh
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# utilisation: bash transcript.sh MONFICHIER.wav
|
||||||
|
# auteur du script: tykayn contact@cipherbliss.com
|
||||||
|
file="input/already_converted/demo.wav"
|
||||||
|
echo "########### $(date) : conversion de fichier audio .WAV mono piste uniquement,
|
||||||
|
avec Vosk installé par pip3, et un modèle de textes en français."
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : fichier : $file"
|
||||||
|
|
||||||
|
FOLDER_MODEL="fr"
|
||||||
|
|
||||||
|
# existence du modèle demandé
|
||||||
|
if [ -d "models/$FOLDER_MODEL" ]; then
|
||||||
|
echo "models/$FOLDER_MODEL le modèle est bien présent."
|
||||||
|
else
|
||||||
|
pwd
|
||||||
|
ls -l models
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : [ERREUR] le modèle de données dans models/$FOLDER_MODEL n'existe pas, vérifiez son installation :C peut être avez vous oublié de faire une commande 'make'"
|
||||||
|
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# existence du fichier demandé
|
||||||
|
if [ -f "$file" ]; then
|
||||||
|
echo "$file exists."
|
||||||
|
else
|
||||||
|
echo "########### $(date) : [ERREUR] voici les fichiers disponibles dans input/converted_to_wav: "
|
||||||
|
ls -l input/converted_to_wav
|
||||||
|
echo "########### $(date) : [ERREUR] le fichier $file n'existe PAS :C "
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " "
|
||||||
|
FILE_NAME=$(basename $file .wav)
|
||||||
|
OUT_DIR=$( echo "output/$FILE_NAME")
|
||||||
|
mkdir output/$FILE_NAME
|
||||||
|
|
||||||
|
python3 ./extract_srt.py "$file" > $OUT_DIR/0_output.json
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : nettoyer la sortie "
|
||||||
|
jq .text $OUT_DIR/0_output.json > $OUT_DIR/1_converted.txt
|
||||||
|
jq .text $OUT_DIR/0_output_$FILE_NAME.json > $OUT_DIR/1_converted_$FILE_NAME.txt
|
||||||
|
|
||||||
|
sed 's/null//g' $OUT_DIR/1_converted.txt > $OUT_DIR/2_without_nulls.txt
|
||||||
|
sed 's/^ *//; s/ *$//; /^$/d' $OUT_DIR/2_without_nulls.txt > $OUT_DIR/3_without_nulls.txt
|
||||||
|
sed 's/\"//g' $OUT_DIR/3_without_nulls.txt > $OUT_DIR/4_phrases.txt
|
||||||
|
sed 's/null//g' $OUT_DIR/1_converted_$FILE_NAME.txt > $OUT_DIR/2_without_nulls_$FILE_NAME.txt
|
||||||
|
sed 's/^ *//; s/ *$//; /^$/d' $OUT_DIR/2_without_nulls_$FILE_NAME.txt > $OUT_DIR/3_without_nulls_$FILE_NAME.txt
|
||||||
|
sed 's/\"//g' $OUT_DIR/3_without_nulls_$FILE_NAME.txt > $OUT_DIR/4_phrases_$FILE_NAME.txt
|
||||||
|
echo "########### $(date) : OK "
|
||||||
|
echo " "
|
||||||
|
COUNT_LINES=$(cat $OUT_DIR/phrases.txt |wc -l)
|
||||||
|
cat $OUT_DIR/4_phrases.txt
|
||||||
|
COUNT_LINES=$(cat $OUT_DIR/phrases_$FILE_NAME.txt |wc -l)
|
||||||
|
cat $OUT_DIR/4_phrases_$FILE_NAME.txt
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : lignes transcriptes $COUNT_LINES "
|
||||||
|
echo "########### $(date) : conversion faite dans output/converted_out_without_nulls.txt"
|
||||||
|
echo "########### $(date) : conversion de la sortie en pseudo fichier de sous titres"
|
||||||
|
perl clean.sh $OUT_DIR/0_output.json > $OUT_DIR/5_phrases_min_sec.txt
|
||||||
|
perl clean.sh $OUT_DIR/0_output_$FILE_NAME.json > $OUT_DIR/5_phrases_min_sec_$FILE_NAME.txt
|
||||||
|
cat $OUT_DIR/5_phrases_min_sec.srt
|
||||||
|
|
||||||
|
echo "########### $(date) : conversion de la sortie en fichier de sous titres "
|
||||||
|
python3 ./extract_srt.py "$file" > $OUT_DIR/5_output.srt
|
||||||
|
python3 ./extract_srt.py "$file" > $OUT_DIR/5_output_$FILE_NAME.srt
|
||||||
|
|
||||||
|
ls -l $OUT_DIR
|
||||||
|
|
||||||
|
echo "########### $(date) : conversion faite "
|
||||||
|
exit 0
|
||||||
|
|
74
en_test_base_transcript.sh
Normal file
74
en_test_base_transcript.sh
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# utilisation: bash transcript.sh MONFICHIER.wav
|
||||||
|
# auteur du script: tykayn contact@cipherbliss.com
|
||||||
|
file="input/already_converted/audio_drive_thru.wav"
|
||||||
|
echo "########### $(date) : conversion de fichier audio .WAV mono piste uniquement,
|
||||||
|
avec Vosk installé par pip3, et un modèle de textes en français."
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : fichier : $file"
|
||||||
|
|
||||||
|
FOLDER_MODEL="en"
|
||||||
|
|
||||||
|
# existence du modèle demandé
|
||||||
|
if [ -d "models/$FOLDER_MODEL" ]; then
|
||||||
|
echo "models/$FOLDER_MODEL le modèle est bien présent."
|
||||||
|
else
|
||||||
|
pwd
|
||||||
|
ls -l models
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : [ERREUR] le modèle de données dans models/$FOLDER_MODEL n'existe pas, vérifiez son installation :C peut être avez vous oublié de faire une commande 'make'"
|
||||||
|
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# existence du fichier demandé
|
||||||
|
if [ -f "$file" ]; then
|
||||||
|
echo "$file exists."
|
||||||
|
else
|
||||||
|
echo "########### $(date) : [ERREUR] voici les fichiers disponibles dans input/converted_to_wav: "
|
||||||
|
ls -l input/converted_to_wav
|
||||||
|
echo "########### $(date) : [ERREUR] le fichier $file n'existe PAS :C "
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " "
|
||||||
|
FILE_NAME=$(basename $file .wav)
|
||||||
|
OUT_DIR=$( echo "output/$FILE_NAME")
|
||||||
|
mkdir -p output/$FILE_NAME
|
||||||
|
|
||||||
|
python3 ./extract_srt_en.py "$file" > $OUT_DIR/0_output.json
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : nettoyer la sortie "
|
||||||
|
jq .text $OUT_DIR/0_output.json > $OUT_DIR/1_converted.txt
|
||||||
|
jq .text $OUT_DIR/0_output_$FILE_NAME.json > $OUT_DIR/1_converted_$FILE_NAME.txt
|
||||||
|
|
||||||
|
sed 's/null//g' $OUT_DIR/1_converted.txt > $OUT_DIR/2_without_nulls.txt
|
||||||
|
sed 's/^ *//; s/ *$//; /^$/d' $OUT_DIR/2_without_nulls.txt > $OUT_DIR/3_without_nulls.txt
|
||||||
|
sed 's/\"//g' $OUT_DIR/3_without_nulls.txt > $OUT_DIR/4_phrases.txt
|
||||||
|
sed 's/null//g' $OUT_DIR/1_converted_$FILE_NAME.txt > $OUT_DIR/2_without_nulls_$FILE_NAME.txt
|
||||||
|
sed 's/^ *//; s/ *$//; /^$/d' $OUT_DIR/2_without_nulls_$FILE_NAME.txt > $OUT_DIR/3_without_nulls_$FILE_NAME.txt
|
||||||
|
sed 's/\"//g' $OUT_DIR/3_without_nulls_$FILE_NAME.txt > $OUT_DIR/4_phrases_$FILE_NAME.txt
|
||||||
|
echo "########### $(date) : OK "
|
||||||
|
echo " "
|
||||||
|
COUNT_LINES=$(cat $OUT_DIR/phrases.txt |wc -l)
|
||||||
|
cat $OUT_DIR/4_phrases.txt
|
||||||
|
COUNT_LINES=$(cat $OUT_DIR/phrases_$FILE_NAME.txt |wc -l)
|
||||||
|
cat $OUT_DIR/4_phrases_$FILE_NAME.txt
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : lignes transcriptes $COUNT_LINES "
|
||||||
|
echo "########### $(date) : conversion faite dans output/converted_out_without_nulls.txt"
|
||||||
|
echo "########### $(date) : conversion de la sortie en pseudo fichier de sous titres"
|
||||||
|
perl clean.sh $OUT_DIR/0_output.json > $OUT_DIR/5_phrases_min_sec.txt
|
||||||
|
perl clean.sh $OUT_DIR/0_output_$FILE_NAME.json > $OUT_DIR/5_phrases_min_sec_$FILE_NAME.txt
|
||||||
|
cat $OUT_DIR/5_phrases_min_sec.srt
|
||||||
|
|
||||||
|
echo "########### $(date) : conversion de la sortie en fichier de sous titres "
|
||||||
|
python3 ./extract_srt.py "$file" > $OUT_DIR/5_output.srt
|
||||||
|
python3 ./extract_srt.py "$file" > $OUT_DIR/5_output_$FILE_NAME.srt
|
||||||
|
|
||||||
|
ls -l $OUT_DIR
|
||||||
|
|
||||||
|
echo "########### $(date) : conversion faite "
|
||||||
|
exit 0
|
||||||
|
|
121
english_transcript.sh
Executable file
121
english_transcript.sh
Executable file
@ -0,0 +1,121 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# utilisation: bash transcript.sh MONFICHIER.wav
|
||||||
|
# auteur du script: tykayn contact@cipherbliss.com
|
||||||
|
# ```bash
|
||||||
|
# bash transcript.sh myfile fr 1
|
||||||
|
# ```
|
||||||
|
echo " Transcript of a file - [file relative path \"input/aside/demo.wav\"] [lang en or fr] [enable srt conversion 1 or 0]"
|
||||||
|
# ----------------- Default parameters -----------------
|
||||||
|
#ENABLE_SRT=false
|
||||||
|
ENABLE_SRT=true
|
||||||
|
# disponibles: "fr" ou "en", trouvez d'autres modèles prédéfinis https://alphacephei.com/vosk/models
|
||||||
|
#FOLDER_MODEL="fr"
|
||||||
|
FOLDER_MODEL="en"
|
||||||
|
DEFAULT_FILE_TO_TRANSCRIPT="input/aside/demo.wav"
|
||||||
|
STARTTIME=$(date +%s)
|
||||||
|
|
||||||
|
# ----------------- prise en compte des arguments rentrés par l'utilisateur
|
||||||
|
echo "=====> langue: $FOLDER_MODEL"
|
||||||
|
echo "=====> fichier à convertir: $1"
|
||||||
|
|
||||||
|
if [ $1 ]; then
|
||||||
|
file=$1
|
||||||
|
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "utilisation du fichier de démo"
|
||||||
|
file=$DEFAULT_FILE_TO_TRANSCRIPT
|
||||||
|
fi
|
||||||
|
if [ $2 ]; then
|
||||||
|
lang_to_search=$2
|
||||||
|
else
|
||||||
|
lang_to_search=$FOLDER_MODEL
|
||||||
|
fi
|
||||||
|
if [ $3 ]; then
|
||||||
|
ENABLE_SRT=$3
|
||||||
|
else
|
||||||
|
ENABLE_SRT=$ENABLE_SRT
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " [file name]: $file, [lang]: $lang_to_search, [enable srt conversion]: $ENABLE_SRT."
|
||||||
|
|
||||||
|
FILE_NAME=$(basename $file .wav)
|
||||||
|
|
||||||
|
output_dir="output"
|
||||||
|
if [ $2 ]; then
|
||||||
|
output_dir=$2
|
||||||
|
fi
|
||||||
|
OUT_DIR=$( echo "$output_dir/$FILE_NAME")
|
||||||
|
|
||||||
|
echo "########### $(date) : conversion de fichier audio .WAV mono piste uniquement,
|
||||||
|
avec Vosk installé par pip3, et un modèle de textes en Anglais."
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : fichier : $file : $1"
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------- recherche de l'existence du modèle de langue demandé -----------------
|
||||||
|
if [ -d "models/$lang_to_search" ]; then
|
||||||
|
echo "models/$lang_to_search le modèle est bien présent."
|
||||||
|
else
|
||||||
|
pwd
|
||||||
|
ls -l models
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : [ERREUR] le modèle de données dans models/$lang_to_search n'existe pas, vérifiez son installation :C peut être avez vous oublié de faire une commande 'make'"
|
||||||
|
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# ----------------- existence du fichier demandé -----------------
|
||||||
|
if [ -f "$file" ]; then
|
||||||
|
echo "$file exists."
|
||||||
|
else
|
||||||
|
echo "########### $(date) : [ERREUR] fichier introuvable: $file"
|
||||||
|
echo "########### $(date) : [ERREUR] voici les fichiers disponibles dans input/converted_to_wav: "
|
||||||
|
echo " "
|
||||||
|
ls -l input/converted_to_wav
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : [ERREUR] le fichier $file n'existe PAS :C "
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo " "
|
||||||
|
|
||||||
|
mkdir output/$FILE_NAME -p
|
||||||
|
echo " convertir en sous titre ? $ENABLE_SRT"
|
||||||
|
if ($ENABLE_SRT) ; then
|
||||||
|
echo "########### $(date) : conversion de $file ,sortie en fichier de sous titres .srt"
|
||||||
|
echo ""
|
||||||
|
echo "## (cela prend plusieurs minutes généralement 1 / 10ème du temps du fichier audio)"
|
||||||
|
echo "..."
|
||||||
|
python3 ./extract_srt.py "$file" > $OUT_DIR/6_output_$FILE_NAME.srt
|
||||||
|
cat $OUT_DIR/6_output_$FILE_NAME.srt
|
||||||
|
COUNT_LINES=$(cat $OUT_DIR/6_output_$FILE_NAME.srt |wc -l)
|
||||||
|
echo " "
|
||||||
|
echo "-------------- DONE ------------"
|
||||||
|
echo " $COUNT_LINES lines in $OUT_DIR/6_phrases_min_sec.srt"
|
||||||
|
else
|
||||||
|
echo "########### $(date) : conversion de la sortie en divers fichiers marquant les temps et sans marquage"
|
||||||
|
python3 ./conversion_simple_en.py "$file" > $OUT_DIR/0_output_$FILE_NAME.json
|
||||||
|
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : nettoyer la sortie "
|
||||||
|
jq .text $OUT_DIR/0_output_$FILE_NAME.json > $OUT_DIR/1_converted_$FILE_NAME.txt
|
||||||
|
|
||||||
|
sed 's/null//g' $OUT_DIR/1_converted_$FILE_NAME.txt > $OUT_DIR/2_without_nulls_$FILE_NAME.txt
|
||||||
|
sed 's/^ *//; s/ *$//; /^$/d' $OUT_DIR/2_without_nulls_$FILE_NAME.txt > $OUT_DIR/3_without_nulls_$FILE_NAME.txt
|
||||||
|
sed 's/\"//g' $OUT_DIR/3_without_nulls_$FILE_NAME.txt > $OUT_DIR/4_phrases_$FILE_NAME.txt
|
||||||
|
echo "########### $(date) : OK "
|
||||||
|
echo " "
|
||||||
|
COUNT_LINES=$(cat $OUT_DIR/4_phrases_$FILE_NAME.txt |wc -l)
|
||||||
|
cat $OUT_DIR/4_phrases_$FILE_NAME.txt
|
||||||
|
echo " $COUNT_LINES lines in $OUT_DIR/4_phrases_$FILE_NAME.txt"
|
||||||
|
echo " "
|
||||||
|
echo "########### $(date) : lignes transcriptes $COUNT_LINES "
|
||||||
|
echo "########### $(date) : conversion faite dans output/converted_out_without_nulls.txt"
|
||||||
|
echo "########### $(date) : conversion de la sortie en pseudo fichier de sous titres"
|
||||||
|
perl clean.sh $OUT_DIR/0_output_$FILE_NAME.json > $OUT_DIR/5_phrases_min_sec_$FILE_NAME.txt
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "########### $(date) : conversion faite "
|
||||||
|
echo "########### en $SECONDS seconds"
|
||||||
|
exit 0
|
28
install.sh
28
install.sh
@ -26,24 +26,35 @@ echo -e " ${reset}"
|
|||||||
pip3 -v install vosk
|
pip3 -v install vosk
|
||||||
echo "${green}########### récupération du modèle de reconnaissance en Français sous licence aGPL, taille: 1.6Go. Choix des modèles disponibles: https://alphacephei.com/vosk/models ${reset}"
|
echo "${green}########### récupération du modèle de reconnaissance en Français sous licence aGPL, taille: 1.6Go. Choix des modèles disponibles: https://alphacephei.com/vosk/models ${reset}"
|
||||||
echo " "
|
echo " "
|
||||||
mkdir -p models
|
mkdir -p models/fr
|
||||||
echo -e "${green}########### Procéder au téléchargement du modèle Français (1.6go) pour transcrire les textes ?${reset} (écrivez o pour oui et faites entrée pour valider) ${reset}"
|
echo -e "${green}########### Procéder au téléchargement du modèle Français (1.6go) pour transcrire les textes ?${reset} (écrivez o pour oui et faites entrée pour valider) ${reset}"
|
||||||
read proceed
|
read proceed
|
||||||
|
|
||||||
# les autres modèles sont ici https://alphacephei.com/vosk/models
|
# les autres modèles sont ici https://alphacephei.com/vosk/models
|
||||||
if [[ $proceed == o* ]]; then
|
if [[ $proceed == o* ]]; then
|
||||||
echo "C'est parti."
|
echo "C'est parti."
|
||||||
f="vosk-model-fr-0.6-linto-2.2.0.zip"
|
if [ ! -f "vosk-model-fr-0.6-linto-2.2.0.zip" ]; then
|
||||||
if [ "x`find $f -printf %s`" != x1582884866 ]; then
|
echo "Récupération du modèle en Français vosk-model-fr-0.6-linto-2.2.0.zip"
|
||||||
wget https://alphacephei.com/vosk/models/$f
|
wget https://alphacephei.com/vosk/models/vosk-model-fr-0.6-linto-2.2.0.zip
|
||||||
echo -e "${green}########### téléchargement du modèle OK ${reset}"
|
echo -e "${green}########### téléchargement du modèle OK ${reset}"
|
||||||
else
|
unzip vosk-model-fr-0.6-linto-2.2.0.zip
|
||||||
echo "fichier zip $f déjà présent"
|
|
||||||
fi
|
|
||||||
unzip $f
|
|
||||||
mv vosk-model-fr-0.6-linto-2.2.0 models/fr
|
mv vosk-model-fr-0.6-linto-2.2.0 models/fr
|
||||||
echo -e "${green}########### décompression du modèle en Français OK ${reset}"
|
echo -e "${green}########### décompression du modèle en Français OK ${reset}"
|
||||||
ls -l models/fr
|
ls -l models/fr
|
||||||
|
else
|
||||||
|
echo "fichier zip vosk-model-fr-0.6-linto-2.2.0.zip déjà présent"
|
||||||
|
fi
|
||||||
|
if [ ! -f "vosk-model-en-us-0.42-gigaspeech.zip" ]; then
|
||||||
|
echo "Récupération du modèle en Anglais vosk-model-fr-0.6-linto-2.2.0.zip"
|
||||||
|
wget https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip
|
||||||
|
echo -e "${green}########### téléchargement du modèle OK ${reset}"
|
||||||
|
unzip vosk-model-en-us-0.42-gigaspeech.zip
|
||||||
|
mv vosk-model-en-us-0.42-gigaspeech.zip models/en
|
||||||
|
echo -e "${green}########### décompression du modèle en Français OK ${reset}"
|
||||||
|
ls -l models/en
|
||||||
|
else
|
||||||
|
echo "fichier zip vosk-model-fr-0.6-linto-2.2.0.zip déjà présent"
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
echo -e "${green}########### vous n'avez pas souhaité télécharger le modèle ${reset}"
|
echo -e "${green}########### vous n'avez pas souhaité télécharger le modèle ${reset}"
|
||||||
echo -e "${green}########### fin de l'installation sans télécharger de modèle de langue ${reset}"
|
echo -e "${green}########### fin de l'installation sans télécharger de modèle de langue ${reset}"
|
||||||
@ -53,3 +64,4 @@ fi
|
|||||||
|
|
||||||
|
|
||||||
echo -e "${green}########### installation ok ${reset}"
|
echo -e "${green}########### installation ok ${reset}"
|
||||||
|
exit 0
|
80
microphone.py
Normal file
80
microphone.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import queue
|
||||||
|
import sys
|
||||||
|
import sounddevice as sd
|
||||||
|
|
||||||
|
from vosk import Model, KaldiRecognizer
|
||||||
|
|
||||||
|
q = queue.Queue()
|
||||||
|
|
||||||
|
def int_or_str(text):
|
||||||
|
"""Helper function for argument parsing."""
|
||||||
|
try:
|
||||||
|
return int(text)
|
||||||
|
except ValueError:
|
||||||
|
return text
|
||||||
|
|
||||||
|
def callback(indata, frames, time, status):
|
||||||
|
"""This is called (from a separate thread) for each audio block."""
|
||||||
|
if status:
|
||||||
|
print(status, file=sys.stderr)
|
||||||
|
q.put(bytes(indata))
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(add_help=False)
|
||||||
|
parser.add_argument(
|
||||||
|
"-l", "--list-devices", action="store_true",
|
||||||
|
help="show list of audio devices and exit")
|
||||||
|
args, remaining = parser.parse_known_args()
|
||||||
|
if args.list_devices:
|
||||||
|
print(sd.query_devices())
|
||||||
|
parser.exit(0)
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=__doc__,
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
parents=[parser])
|
||||||
|
parser.add_argument(
|
||||||
|
"-f", "--filename", type=str, metavar="FILENAME",
|
||||||
|
help="audio file to store recording to")
|
||||||
|
parser.add_argument(
|
||||||
|
"-d", "--device", type=int_or_str,
|
||||||
|
help="input device (numeric ID or substring)")
|
||||||
|
parser.add_argument(
|
||||||
|
"-r", "--samplerate", type=int, help="sampling rate")
|
||||||
|
args = parser.parse_args(remaining)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if args.samplerate is None:
|
||||||
|
device_info = sd.query_devices(args.device, "input")
|
||||||
|
# soundfile expects an int, sounddevice provides a float:
|
||||||
|
args.samplerate = int(device_info["default_samplerate"])
|
||||||
|
|
||||||
|
model = Model("models/fr")
|
||||||
|
|
||||||
|
if args.filename:
|
||||||
|
dump_fn = open(args.filename, "wb")
|
||||||
|
else:
|
||||||
|
dump_fn = None
|
||||||
|
|
||||||
|
with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device,
|
||||||
|
dtype="int16", channels=1, callback=callback):
|
||||||
|
print("#" * 80)
|
||||||
|
print("Press Ctrl+C to stop the recording")
|
||||||
|
print("#" * 80)
|
||||||
|
|
||||||
|
rec = KaldiRecognizer(model, args.samplerate)
|
||||||
|
while True:
|
||||||
|
data = q.get()
|
||||||
|
if rec.AcceptWaveform(data):
|
||||||
|
print(rec.Result())
|
||||||
|
# else:
|
||||||
|
# print(rec.PartialResult())
|
||||||
|
if dump_fn is not None:
|
||||||
|
dump_fn.write(data)
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nDone")
|
||||||
|
parser.exit(0)
|
||||||
|
except Exception as e:
|
||||||
|
parser.exit(type(e).__name__ + ": " + str(e))
|
76
setup.py
Normal file
76
setup.py
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
import os
|
||||||
|
import setuptools
|
||||||
|
import shutil
|
||||||
|
import glob
|
||||||
|
import platform
|
||||||
|
|
||||||
|
# Figure out environment for cross-compile
|
||||||
|
vosk_source = os.getenv("VOSK_SOURCE", os.path.abspath(os.path.join(os.path.dirname(__file__),
|
||||||
|
"..")))
|
||||||
|
system = os.environ.get('VOSK_PLATFORM', platform.system())
|
||||||
|
architecture = os.environ.get('VOSK_ARCHITECTURE', platform.architecture()[0])
|
||||||
|
|
||||||
|
# Copy precompmilled libraries
|
||||||
|
for lib in glob.glob(os.path.join(vosk_source, "src/lib*.*")):
|
||||||
|
print ("Adding library", lib)
|
||||||
|
shutil.copy(lib, "vosk")
|
||||||
|
|
||||||
|
# Create OS-dependent, but Python-independent wheels.
|
||||||
|
try:
|
||||||
|
from wheel.bdist_wheel import bdist_wheel
|
||||||
|
except ImportError:
|
||||||
|
cmdclass = {}
|
||||||
|
else:
|
||||||
|
class bdist_wheel_tag_name(bdist_wheel):
|
||||||
|
def get_tag(self):
|
||||||
|
abi = 'none'
|
||||||
|
if system == 'Darwin':
|
||||||
|
oses = 'macosx_10_6_universal2'
|
||||||
|
elif system == 'Windows' and architecture == '32bit':
|
||||||
|
oses = 'win32'
|
||||||
|
elif system == 'Windows' and architecture == '64bit':
|
||||||
|
oses = 'win_amd64'
|
||||||
|
elif system == 'Linux' and architecture == '64bit':
|
||||||
|
oses = 'linux_x86_64'
|
||||||
|
elif system == 'Linux' and architecture == 'aarch64':
|
||||||
|
oses = 'manylinux2014_aarch64'
|
||||||
|
elif system == 'Linux':
|
||||||
|
oses = 'linux_' + architecture
|
||||||
|
else:
|
||||||
|
raise TypeError("Unknown build environment")
|
||||||
|
return 'py3', abi, oses
|
||||||
|
cmdclass = {'bdist_wheel': bdist_wheel_tag_name}
|
||||||
|
|
||||||
|
with open("README.md", "rb") as fh:
|
||||||
|
long_description = fh.read().decode("utf-8")
|
||||||
|
|
||||||
|
setuptools.setup(
|
||||||
|
name="vosk",
|
||||||
|
version="0.3.43",
|
||||||
|
author="Alpha Cephei Inc",
|
||||||
|
author_email="contact@alphacephei.com",
|
||||||
|
description="Offline open source speech recognition API based on Kaldi and Vosk",
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type="text/markdown",
|
||||||
|
url="https://github.com/alphacep/vosk-api",
|
||||||
|
packages=setuptools.find_packages(),
|
||||||
|
package_data = {'vosk': ['*.so', '*.dll', '*.dyld']},
|
||||||
|
entry_points = {
|
||||||
|
'console_scripts': ['vosk-transcriber=vosk.transcriber.cli:main'],
|
||||||
|
},
|
||||||
|
include_package_data=True,
|
||||||
|
classifiers=[
|
||||||
|
'Programming Language :: Python :: 3',
|
||||||
|
'License :: OSI Approved :: Apache Software License',
|
||||||
|
'Operating System :: Microsoft :: Windows',
|
||||||
|
'Operating System :: POSIX :: Linux',
|
||||||
|
'Operating System :: MacOS :: MacOS X',
|
||||||
|
'Topic :: Software Development :: Libraries :: Python Modules'
|
||||||
|
],
|
||||||
|
cmdclass=cmdclass,
|
||||||
|
python_requires='>=3',
|
||||||
|
zip_safe=False, # Since we load so file from the filesystem, we can not run from zip file
|
||||||
|
setup_requires=['cffi>=1.0', 'requests', 'tqdm', 'srt', 'websockets'],
|
||||||
|
install_requires=['cffi>=1.0', 'requests', 'tqdm', 'srt', 'websockets'],
|
||||||
|
cffi_modules=['vosk_builder.py:ffibuilder'],
|
||||||
|
)
|
58
transcribe_2.py
Normal file
58
transcribe_2.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
from vosk import Model, KaldiRecognizer, SetLogLevel
|
||||||
|
from tqdm.notebook import tqdm
|
||||||
|
import wave
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
def transcript_file(input_file, model_path):
|
||||||
|
|
||||||
|
# Check if file exists
|
||||||
|
if not os.path.isfile(input_file):
|
||||||
|
raise FileNotFoundError(os.path.basename(input_file) + " not found")
|
||||||
|
|
||||||
|
# Check if model path exists
|
||||||
|
if not os.path.exists(model_path):
|
||||||
|
raise FileNotFoundError(os.path.basename(model_path) + " not found")
|
||||||
|
|
||||||
|
# open audio file
|
||||||
|
wf = wave.open(input_file, "rb")
|
||||||
|
|
||||||
|
# check if wave file has the right properties
|
||||||
|
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
|
||||||
|
raise TypeError("Audio file must be WAV format mono PCM.")
|
||||||
|
|
||||||
|
# Initialize model
|
||||||
|
model = Model(model_path)
|
||||||
|
rec = KaldiRecognizer(model, wf.getframerate())
|
||||||
|
|
||||||
|
# Get file size (to calculate progress bar)
|
||||||
|
file_size = os.path.getsize(input_file)
|
||||||
|
|
||||||
|
# Run transcription
|
||||||
|
pbar = tqdm(total=file_size)
|
||||||
|
|
||||||
|
# To store our results
|
||||||
|
transcription = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
data = wf.readframes(4000) # use buffer of 4000
|
||||||
|
pbar.update(len(data))
|
||||||
|
if len(data) == 0:
|
||||||
|
pbar.set_description("Transcription finished")
|
||||||
|
break
|
||||||
|
if rec.AcceptWaveform(data):
|
||||||
|
# Convert json output to dict
|
||||||
|
result_dict = json.loads(rec.Result())
|
||||||
|
# Extract text values and append them to transcription list
|
||||||
|
transcription.append(result_dict.get("text", ""))
|
||||||
|
|
||||||
|
# Get final bits of audio and flush the pipeline
|
||||||
|
final_result = json.loads(rec.FinalResult())
|
||||||
|
transcription.append(final_result.get("text", ""))
|
||||||
|
|
||||||
|
transcription_text = ' '.join(transcription)
|
||||||
|
|
||||||
|
return transcription_text
|
||||||
|
|
||||||
|
wave_file = '/input/already_converted/drive_thu.wav'
|
||||||
|
transcription = transcript_file(wave_file, 'models/en')
|
@ -6,8 +6,8 @@
|
|||||||
# ```
|
# ```
|
||||||
echo " Transcript of a file - [file relative path \"input/aside/demo.wav\"] [lang en or fr] [enable srt conversion 1 or 0]"
|
echo " Transcript of a file - [file relative path \"input/aside/demo.wav\"] [lang en or fr] [enable srt conversion 1 or 0]"
|
||||||
# ----------------- Default parameters -----------------
|
# ----------------- Default parameters -----------------
|
||||||
ENABLE_SRT=false
|
#ENABLE_SRT=false
|
||||||
#ENABLE_SRT=true
|
ENABLE_SRT=true
|
||||||
# disponibles: "fr" ou "en", trouvez d'autres modèles prédéfinis https://alphacephei.com/vosk/models
|
# disponibles: "fr" ou "en", trouvez d'autres modèles prédéfinis https://alphacephei.com/vosk/models
|
||||||
FOLDER_MODEL="fr"
|
FOLDER_MODEL="fr"
|
||||||
#FOLDER_MODEL="en"
|
#FOLDER_MODEL="en"
|
||||||
@ -15,12 +15,11 @@ DEFAULT_FILE_TO_TRANSCRIPT="input/aside/demo.wav"
|
|||||||
STARTTIME=$(date +%s)
|
STARTTIME=$(date +%s)
|
||||||
|
|
||||||
# ----------------- prise en compte des arguments rentrés par l'utilisateur
|
# ----------------- prise en compte des arguments rentrés par l'utilisateur
|
||||||
echo "fichier à convertir: $1"
|
echo "=====> langue: $FOLDER_MODEL"
|
||||||
|
echo "=====> fichier à convertir: $1"
|
||||||
|
|
||||||
if [ $1 ]; then
|
if [ $1 ]; then
|
||||||
file=$1
|
file=$1
|
||||||
|
|
||||||
|
|
||||||
else
|
else
|
||||||
echo "utilisation du fichier de démo"
|
echo "utilisation du fichier de démo"
|
||||||
file=$DEFAULT_FILE_TO_TRANSCRIPT
|
file=$DEFAULT_FILE_TO_TRANSCRIPT
|
||||||
@ -82,7 +81,7 @@ echo " convertir en sous titre ? $ENABLE_SRT"
|
|||||||
if ($ENABLE_SRT) ; then
|
if ($ENABLE_SRT) ; then
|
||||||
echo "########### $(date) : conversion de $file ,sortie en fichier de sous titres .srt"
|
echo "########### $(date) : conversion de $file ,sortie en fichier de sous titres .srt"
|
||||||
echo ""
|
echo ""
|
||||||
echo "## (cela prend plusieurs minutes généralement 1 / 10ème du temps du fichier audio)"
|
echo "## (cela prend plusieurs minutes généralement 1 / 10ème du temps du fichier audio)"
|
||||||
echo "..."
|
echo "..."
|
||||||
python3 ./extract_srt.py "$file" > $OUT_DIR/6_output_$FILE_NAME.srt
|
python3 ./extract_srt.py "$file" > $OUT_DIR/6_output_$FILE_NAME.srt
|
||||||
cat $OUT_DIR/6_output_$FILE_NAME.srt
|
cat $OUT_DIR/6_output_$FILE_NAME.srt
|
||||||
@ -116,6 +115,6 @@ else
|
|||||||
fi
|
fi
|
||||||
#ls -l $OUT_DIR
|
#ls -l $OUT_DIR
|
||||||
|
|
||||||
echo "########### $(STARTTIME) -- $(date) : conversion faite "
|
echo "########### $($STARTTIME) -- $(date) : conversion faite "
|
||||||
echo "########### en $SECONDS seconds"
|
echo "########### en $SECONDS seconds"
|
||||||
exit 0
|
exit 0
|
||||||
|
14
vosk_builder.py
Normal file
14
vosk_builder.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import os
|
||||||
|
from cffi import FFI
|
||||||
|
|
||||||
|
vosk_root=os.environ.get("VOSK_SOURCE", "..")
|
||||||
|
cpp_command = "cpp " + vosk_root + "/src/vosk_api.h"
|
||||||
|
|
||||||
|
ffibuilder = FFI()
|
||||||
|
ffibuilder.set_source("vosk.vosk_cffi", None)
|
||||||
|
ffibuilder.cdef(os.popen(cpp_command).read())
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
ffibuilder.compile(verbose=True)
|
Loading…
Reference in New Issue
Block a user