transcription/website/youtube-dl.sh

22 lines
858 B
Bash
Raw Normal View History

2021-03-23 12:13:23 +01:00
#!/bin/bash
2021-03-23 12:54:17 +01:00
mkdir -p ../input/ydl
rm -rf ../input/ydl/$UNIQID.mp3
rm -rf ../input/ydl/$UNIQID
2021-03-23 12:54:17 +01:00
2021-03-23 12:13:23 +01:00
UNIQID=$1
URL=$2
2021-03-23 12:54:17 +01:00
OUTPUT="../input/ydl/$UNIQID.mp3"
2021-03-23 12:13:23 +01:00
2021-03-23 12:54:17 +01:00
youtube-dl --extract-audio --audio-format mp3 --audio-quality 0 --output $OUTPUT $URL
2021-03-23 12:13:23 +01:00
mkdir ../input/ydl/$UNIQID
2021-03-23 12:54:17 +01:00
ffmpeg -i "../input/ydl/$UNIQID.mp3" -ac 1 "../input/ydl/$UNIQID/$UNIQID.wav"
cd ..
OUT_DIR="input/ydl/$UNIQID"
python3 ./conversion_simple_fr.py "input/ydl/$UNIQID/$UNIQID.wav" > $OUT_DIR/0_output_$FILE_NAME.json
jq .text $OUT_DIR/0_output_$FILE_NAME.json > $OUT_DIR/1_converted_$FILE_NAME.txt
sed 's/null//g' $OUT_DIR/1_converted_$FILE_NAME.txt > $OUT_DIR/2_without_nulls_$FILE_NAME.txt
sed 's/^ *//; s/ *$//; /^$/d' $OUT_DIR/2_without_nulls_$FILE_NAME.txt > $OUT_DIR/3_without_nulls_$FILE_NAME.txt
sed 's/\"//g' $OUT_DIR/3_without_nulls_$FILE_NAME.txt > $OUT_DIR/4_phrases_$FILE_NAME.txt