2023-06-09 10:02:30 +02:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
MYURL=https://www.langtag.net/
|
|
|
|
LTR_URL=https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
|
|
|
|
LTR_LOCAL=language-subtag-registry
|
|
|
|
PROGRAMS_DIR=../../GaBuZoMeu
|
|
|
|
TEST_PROGRAM=${PROGRAMS_DIR}/check-registry
|
|
|
|
OS="$(uname)"
|
|
|
|
if [ "$OS" = "FreeBSD" ]; then
|
|
|
|
# FreeBSD's mktemp is stupid enough to have *no*
|
|
|
|
# default template :-(
|
|
|
|
OUTPUT=`mktemp /tmp/$(basename $0).tmp.XXX)`
|
|
|
|
TMPDIFF=`mktemp /tmp/$(basename $0).tmp.XXX)`
|
|
|
|
else
|
|
|
|
OUTPUT=`mktemp`
|
|
|
|
TMPDIFF=`mktemp`
|
|
|
|
fi
|
2023-08-04 10:34:53 +02:00
|
|
|
MAINTAINER=bortzmeyer@langtag.net
|
2023-06-09 10:02:30 +02:00
|
|
|
|
|
|
|
# Conversions
|
|
|
|
CONVERT_XML_BORTZMEYER=${PROGRAMS_DIR}/registry2xml
|
|
|
|
CONVERT_XML_ELLERMANN="awk -f ltru2xml.awk "
|
|
|
|
CONVERT_POSTGRESQL=${PROGRAMS_DIR}/registry2postgresql
|
|
|
|
CONVERT_SQLITE=${PROGRAMS_DIR}/registry2sqlite
|
|
|
|
CONVERT_TXT=${PROGRAMS_DIR}/registry2txt
|
|
|
|
CONVERT_HTML=${PROGRAMS_DIR}/registry2mulhtml
|
|
|
|
FILL_DATABASE=./fill-in-database.sh
|
|
|
|
# --force is to avoid spurious warnings about "Ambiguous output"
|
|
|
|
#CRLF_TO_LOCAL="recode --force /CR-LF..US-ASCII "
|
|
|
|
|
|
|
|
trap "rm -f $OUTPUT $TMPDIFF; exit 1" 1 2 3 15
|
|
|
|
trap "rm -f $OUTPUT $TMPDIFF" EXIT
|
|
|
|
|
|
|
|
if [ -e ${LTR_LOCAL} ]; then
|
|
|
|
ltr_date=`head -n 1 ${LTR_LOCAL} | cut -d" " -f2`
|
|
|
|
# Allow time to elapse. The date of the file at IANA is often the day after
|
|
|
|
# the date written in the LSR. Heuristically, we add one day and a few hours.
|
|
|
|
current_date=`date +"%Y%m%d %H:%M:%S" --date="${ltr_date} +1 day +4 hour"`
|
|
|
|
else
|
|
|
|
# Trick to force a downloading
|
|
|
|
current_date="19700101"
|
|
|
|
#current_date=`date --utc +"%Y%m%d"`
|
|
|
|
fi
|
|
|
|
curl --silent --output ${LTR_LOCAL}.TMP \
|
|
|
|
--compressed \
|
|
|
|
--referer ${MYURL} \
|
|
|
|
--proxy "" \
|
|
|
|
--time-cond "${current_date}" \
|
|
|
|
--header "From: ${MAINTAINER}" \
|
|
|
|
${LTR_URL} 2>&1 > ${OUTPUT}
|
|
|
|
if [ $? != 0 ]; then
|
|
|
|
cat ${OUTPUT} | mutt -s "Network error getting ${LTR_URL}" ${MAINTAINER}
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
if [ -e ${LTR_LOCAL}.TMP ]; then
|
|
|
|
#$CRLF_TO_LOCAL ${LTR_LOCAL}.TMP
|
|
|
|
${TEST_PROGRAM} ${LTR_LOCAL}.TMP 2>&1 >> ${OUTPUT}
|
|
|
|
if [ $? = 0 ]; then
|
|
|
|
if [ -e ${LTR_LOCAL} ]; then
|
|
|
|
diff -u ${LTR_LOCAL} ${LTR_LOCAL}.TMP > $TMPDIFF
|
|
|
|
if [ ! -z $TMPDIFF ]; then
|
|
|
|
mutt -s "New LTR registry at ${MYURL}" ${MAINTAINER} < $TMPDIFF
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
mv ${LTR_LOCAL}.TMP ${LTR_LOCAL}
|
|
|
|
# Now, the various conversions
|
|
|
|
${CONVERT_XML_BORTZMEYER}
|
|
|
|
# trang is in Java and therefore fails frequently
|
|
|
|
# trang -Irnc -Orng ltru.rnc ltru.rng
|
|
|
|
xmllint --noout --relaxng ltru.rng ${LTR_LOCAL}.xml
|
|
|
|
${CONVERT_TXT}
|
|
|
|
#${CONVERT_XML_ELLERMANN} < ${LTR_LOCAL} > ${LTR_LOCAL}2.xml
|
|
|
|
#xmllint --noout --valid ${LTR_LOCAL}2.xml
|
|
|
|
${CONVERT_POSTGRESQL} > lsr-postgres.sql
|
|
|
|
${CONVERT_SQLITE} > lsr-sqlite.sql
|
|
|
|
# TODO: UTF-8 support on SQLite was never tested
|
|
|
|
./utf82ncr.py lsr-sqlite.sql
|
|
|
|
mv lsr-sqlite.sql lsr-sqlite-utf8.sql
|
|
|
|
mv lsr-sqlite-ncr.sql lsr-sqlite.sql
|
|
|
|
${CONVERT_HTML}
|
|
|
|
${FILL_DATABASE}
|
2023-08-03 16:40:18 +02:00
|
|
|
./lsr2atom.py > lsr.atom
|
2023-06-09 10:02:30 +02:00
|
|
|
version=`head -n 1 ${LTR_LOCAL} | awk '{print $2}'`
|
|
|
|
echo $version > ${LTR_LOCAL}-version
|
|
|
|
exit 0
|
|
|
|
else
|
|
|
|
cat ${OUTPUT} | mutt -s "Invalid registry ${LTR_URL}" ${MAINTAINER}
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
else # File not downloaded, probably because there was nothing new.
|
|
|
|
exit 0
|
|
|
|
fi
|