Web-LangTag/registries/copy-and-convert.sh
Stephane Bortzmeyer 9bc5b62ca2 * More elements in XML schema
* A way to force conversion without downloading
2023-09-30 17:32:48 +02:00

95 lines
3.2 KiB
Bash
Executable File

#!/bin/sh
MYURL=https://www.langtag.net/
LTR_URL=https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
LTR_LOCAL=language-subtag-registry
PROGRAMS_DIR=../../GaBuZoMeu
TEST_PROGRAM=${PROGRAMS_DIR}/check-registry
OS="$(uname)"
if [ "$OS" = "FreeBSD" ]; then
# FreeBSD's mktemp is stupid enough to have *no*
# default template :-(
OUTPUT=`mktemp /tmp/$(basename $0).tmp.XXX)`
TMPDIFF=`mktemp /tmp/$(basename $0).tmp.XXX)`
else
OUTPUT=`mktemp`
TMPDIFF=`mktemp`
fi
MAINTAINER=bortzmeyer@langtag.net
ME=$(hostname -f)
# Conversions
CONVERT_XML_BORTZMEYER=${PROGRAMS_DIR}/registry2xml
CONVERT_XML_ELLERMANN="awk -f ltru2xml.awk "
CONVERT_POSTGRESQL=${PROGRAMS_DIR}/registry2postgresql
CONVERT_SQLITE=${PROGRAMS_DIR}/registry2sqlite
CONVERT_TXT=${PROGRAMS_DIR}/registry2txt
CONVERT_HTML=${PROGRAMS_DIR}/registry2mulhtml
FILL_DATABASE=./fill-in-database.sh
# --force is to avoid spurious warnings about "Ambiguous output"
#CRLF_TO_LOCAL="recode --force /CR-LF..US-ASCII "
trap "rm -f $OUTPUT $TMPDIFF; exit 1" 1 2 3 15
trap "rm -f $OUTPUT $TMPDIFF" EXIT
if [ -e ${LTR_LOCAL} ]; then
ltr_date=`head -n 1 ${LTR_LOCAL} | cut -d" " -f2`
# Allow time to elapse. The date of the file at IANA is often the day after
# the date written in the LSR. Heuristically, we add one day and a few hours.
current_date=`date +"%Y%m%d %H:%M:%S" --date="${ltr_date} +1 day +4 hour"`
else
# Trick to force a downloading
current_date="19700101"
#current_date=`date --utc +"%Y%m%d"`
fi
curl --silent --output ${LTR_LOCAL}.TMP \
--compressed \
--referer ${MYURL} \
--proxy "" \
--time-cond "${current_date}" \
--header "From: ${MAINTAINER}" \
${LTR_URL} 2>&1 > ${OUTPUT}
if [ $? != 0 ]; then
cat ${OUTPUT} | mutt -s "Network error getting ${LTR_URL}" ${MAINTAINER}
exit 1
fi
if [ -e ${LTR_LOCAL}.TMP ] || [ ! -z "${LANGTAG_FORCE_CONVERT}" ]; then
if [ -e ${LTR_LOCAL}.TMP ]; then
${TEST_PROGRAM} ${LTR_LOCAL}.TMP 2>&1 >> ${OUTPUT}
if [ $? = 0 ]; then
if [ -e ${LTR_LOCAL} ]; then
diff -u ${LTR_LOCAL} ${LTR_LOCAL}.TMP > $TMPDIFF
if [ ! -z $TMPDIFF ]; then
mutt -s "New LTR registry at ${MYURL} seen on ${ME}" ${MAINTAINER} < $TMPDIFF
fi
fi
mv ${LTR_LOCAL}.TMP ${LTR_LOCAL}
else
cat ${OUTPUT} | mutt -s "Invalid registry ${LTR_URL} seen on ${ME}" ${MAINTAINER}
exit 1
fi
fi
# Now, the various conversions
${CONVERT_XML_BORTZMEYER}
# trang is in Java and therefore fails frequently
# trang -Irnc -Orng ltru.rnc ltru.rng
xmllint --noout --relaxng ltru.rng ${LTR_LOCAL}.xml
${CONVERT_TXT}
#${CONVERT_XML_ELLERMANN} < ${LTR_LOCAL} > ${LTR_LOCAL}2.xml
#xmllint --noout --valid ${LTR_LOCAL}2.xml
${CONVERT_POSTGRESQL} > lsr-postgres.sql
${CONVERT_SQLITE} > lsr-sqlite.sql
# TODO: UTF-8 support on SQLite was never tested
./utf82ncr.py lsr-sqlite.sql
mv lsr-sqlite.sql lsr-sqlite-utf8.sql
mv lsr-sqlite-ncr.sql lsr-sqlite.sql
${CONVERT_HTML}
${FILL_DATABASE}
./lsr2atom.py > lsr.atom
version=`head -n 1 ${LTR_LOCAL} | awk '{print $2}'`
echo $version > ${LTR_LOCAL}-version
exit 0
else # File not downloaded, probably because there was nothing new.
exit 0
fi