#!/bin/sh MYURL=https://www.langtag.net/ LTR_URL=https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry LTR_LOCAL=language-subtag-registry PROGRAMS_DIR=../../GaBuZoMeu TEST_PROGRAM=${PROGRAMS_DIR}/check-registry OS="$(uname)" if [ "$OS" = "FreeBSD" ]; then # FreeBSD's mktemp is stupid enough to have *no* # default template :-( OUTPUT=`mktemp /tmp/$(basename $0).tmp.XXX)` TMPDIFF=`mktemp /tmp/$(basename $0).tmp.XXX)` else OUTPUT=`mktemp` TMPDIFF=`mktemp` fi MAINTAINER=stephane+langtag@bortzmeyer.org # Conversions CONVERT_XML_BORTZMEYER=${PROGRAMS_DIR}/registry2xml CONVERT_XML_ELLERMANN="awk -f ltru2xml.awk " CONVERT_POSTGRESQL=${PROGRAMS_DIR}/registry2postgresql CONVERT_SQLITE=${PROGRAMS_DIR}/registry2sqlite CONVERT_TXT=${PROGRAMS_DIR}/registry2txt CONVERT_HTML=${PROGRAMS_DIR}/registry2mulhtml FILL_DATABASE=./fill-in-database.sh # --force is to avoid spurious warnings about "Ambiguous output" #CRLF_TO_LOCAL="recode --force /CR-LF..US-ASCII " trap "rm -f $OUTPUT $TMPDIFF; exit 1" 1 2 3 15 trap "rm -f $OUTPUT $TMPDIFF" EXIT if [ -e ${LTR_LOCAL} ]; then ltr_date=`head -n 1 ${LTR_LOCAL} | cut -d" " -f2` # Allow time to elapse. The date of the file at IANA is often the day after # the date written in the LSR. Heuristically, we add one day and a few hours. current_date=`date +"%Y%m%d %H:%M:%S" --date="${ltr_date} +1 day +4 hour"` else # Trick to force a downloading current_date="19700101" #current_date=`date --utc +"%Y%m%d"` fi curl --silent --output ${LTR_LOCAL}.TMP \ --compressed \ --referer ${MYURL} \ --proxy "" \ --time-cond "${current_date}" \ --header "From: ${MAINTAINER}" \ ${LTR_URL} 2>&1 > ${OUTPUT} if [ $? != 0 ]; then cat ${OUTPUT} | mutt -s "Network error getting ${LTR_URL}" ${MAINTAINER} exit 1 fi if [ -e ${LTR_LOCAL}.TMP ]; then #$CRLF_TO_LOCAL ${LTR_LOCAL}.TMP ${TEST_PROGRAM} ${LTR_LOCAL}.TMP 2>&1 >> ${OUTPUT} if [ $? = 0 ]; then if [ -e ${LTR_LOCAL} ]; then diff -u ${LTR_LOCAL} ${LTR_LOCAL}.TMP > $TMPDIFF if [ ! -z $TMPDIFF ]; then mutt -s "New LTR registry at ${MYURL}" ${MAINTAINER} < $TMPDIFF fi fi mv ${LTR_LOCAL}.TMP ${LTR_LOCAL} # Now, the various conversions ${CONVERT_XML_BORTZMEYER} # trang is in Java and therefore fails frequently # trang -Irnc -Orng ltru.rnc ltru.rng xmllint --noout --relaxng ltru.rng ${LTR_LOCAL}.xml ${CONVERT_TXT} #${CONVERT_XML_ELLERMANN} < ${LTR_LOCAL} > ${LTR_LOCAL}2.xml #xmllint --noout --valid ${LTR_LOCAL}2.xml ${CONVERT_POSTGRESQL} > lsr-postgres.sql ${CONVERT_SQLITE} > lsr-sqlite.sql # TODO: UTF-8 support on SQLite was never tested ./utf82ncr.py lsr-sqlite.sql mv lsr-sqlite.sql lsr-sqlite-utf8.sql mv lsr-sqlite-ncr.sql lsr-sqlite.sql ${CONVERT_HTML} ${FILL_DATABASE} # Needs to be ported away from.DateTime #./lsr2atom.py > lsr.atom version=`head -n 1 ${LTR_LOCAL} | awk '{print $2}'` echo $version > ${LTR_LOCAL}-version exit 0 else cat ${OUTPUT} | mutt -s "Invalid registry ${LTR_URL}" ${MAINTAINER} exit 1 fi else # File not downloaded, probably because there was nothing new. exit 0 fi