diff --git a/bin/http_xsltproc.sh b/bin/http_xsltproc.sh index dd730be..414e18a 100755 --- a/bin/http_xsltproc.sh +++ b/bin/http_xsltproc.sh @@ -23,33 +23,67 @@ display_help () { /bin/cat <<- HELP_MSG This script is a wrapper around the xsltproc command-line tool. + Like xsltproc, this script applies an XSL stylesheet to an XML + document and outputs the resulting transformation on stdout. Unlike xsltproc, this script fetches the target XML document - from an HTTP server. + from an HTTP server. - Usage: ${0##*/} [-hv] [-o OUT_FILE] STYLESHEET URL + Usage: ${0##*/} [-hv] [-F | -C] [-o OUT_FILE] STYLESHEET URL This script takes two command-line arguments. The STYLESHEET argument is the absolute path to an XSL document in the local file system. The URL argument is the absolute URL of an XML document. The script fetches the XML document at the given - URL using the curl command-line tool and then applies the XSL - stylesheet to the XML document using the xsltproc tool. + URL using HTTP Conditional GET [RFC 7232]. If the server + responds with 200, the document in the response body is used. + If the server responds with 304, the document in the cache is + used instead. - This script requires environment variable LIB_DIR, which - specifies a directory containing various XSL stylesheets and - helper scripts including: - - $LIB_FILENAMES + This script requires two environment variables. CACHE_DIR is + the absolute path to the cache directory (which may or may not + exist) whereas LIB_DIR specifies a directory containing various + helper scripts. Options: - -h Display this message + -h Display this help message -v Write verbose messages to stdout + -F Enables "Force Output Mode" + -C Enables "Cache Only Mode" -o Output the transformed document to OUT_FILE Option -h is mutually exclusive of all other options. + The default behavior of the script may be modified by using + option -F or -C, which are mutually exclusive. Force Output + Mode (option -F) forces the return of a fresh resource. The + resource is output on stdout if and only if the server + responds with 200. If the response is 304, an error is thrown. + + Cache Only Mode (option -C) bypasses the GET request altogether + and goes directly to cache. If the resource resides in cache, + it is output on stdout, otherwise an error is thrown. + + Option -o specifies an output file in the local file system. If option -o is omitted, the transformed document is written to stdout. + + LIBRARY + + Environment variable LIB_DIR specifies a directory containing at + least the following library files, which act as helper scripts for + ${0##*/}: + + $LIB_FILENAMES + + EXAMPLES + + stylesheet=/path/to/stylesheets/style.xsl + url=http://md.incommon.org/InCommon/InCommon-metadata.xml + out_file=/tmp/output.txt + ${0##*/} \$stylesheet \$url + ${0##*/} -o \$out_file \$stylesheet \$url + ${0##*/} -F \$stylesheet \$url + ${0##*/} -C \$stylesheet \$url HELP_MSG } @@ -61,20 +95,33 @@ script_name=${0##*/} # equivalent to basename $0 # library filenames (always list command_paths first) LIB_FILENAMES="command_paths.sh -compatible_mktemp.sh" +compatible_mktemp.sh +http_tools.sh" ####################################################################### # Process command-line options and arguments ####################################################################### -help_mode=false; verbose_mode=false -while getopts ":hvo:" opt; do +help_mode=false; verbose_mode=false; local_opts= +force_get_mode=false; cache_only_mode=false +while getopts ":hvFCo:" opt; do case $opt in h) help_mode=true ;; v) verbose_mode=true + local_opts="$local_opts -$opt" + ;; + F) + force_get_mode=true + cache_only_mode=false + local_opts="$local_opts -$opt" + ;; + C) + cache_only_mode=true + force_get_mode=false + local_opts="$local_opts -$opt" ;; o) out_file="$OPTARG" @@ -125,6 +172,22 @@ $verbose_mode && printf "$script_name using XML file at location: %s\n" "$xml_lo # Initialization ####################################################################### +# determine the cache directory +if [ -z "$CACHE_DIR" ]; then + echo "ERROR: $script_name requires env var CACHE_DIR" >&2 + exit 2 +fi +if [ ! -d "$CACHE_DIR" ]; then + # think carefully about this... + /bin/mkdir "$CACHE_DIR" + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "ERROR: $script_name failed to create dir: $CACHE_DIR" >&2 + exit $exit_code + fi +fi +$verbose_mode && printf "$script_name using cache directory: %s\n" "$CACHE_DIR" + # determine the source lib directory if [ -z "$LIB_DIR" ]; then echo "ERROR: $script_name requires env var LIB_DIR" >&2 @@ -136,7 +199,7 @@ if [ ! -d "$LIB_DIR" ]; then fi $verbose_mode && printf "$script_name using source lib directory: %s\n" "$LIB_DIR" -# source lib files +# source lib files (always source command_paths first) for lib_filename in $LIB_FILENAMES; do lib_file="$LIB_DIR/$lib_filename" if [ ! -f "$lib_file" ]; then @@ -152,18 +215,10 @@ for lib_filename in $LIB_FILENAMES; do fi done -# create a temporary directory -tmp_dir=$( make_temp_file -d ) -if [ ! -d "$tmp_dir" ] ; then - printf "ERROR: $script_name unable to create temporary dir\n" >&2 - exit 2 -fi -$verbose_mode && printf "$script_name creating temp dir: %s\n" "$tmp_dir" - # determine temporary directory if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then # use system temporary directory (remove trailing slash) - TMP_DIR="${TMPDIR%%/}/md-transforms" + TMP_DIR="${TMPDIR%%/}/${script_name%%.*}" $verbose_mode && printf "$script_name using temp dir: %s\n" "$TMP_DIR" else # create temporary directory @@ -173,7 +228,7 @@ else exit 2 fi # use temporary directory (remove trailing slash) - TMP_DIR="${tmp_dir%%/}/md-transforms" + TMP_DIR="${tmp_dir%%/}/${script_name%%.*}" $verbose_mode && printf "$script_name creating temp dir: %s\n" "$TMP_DIR" fi if [ ! -d "$TMP_DIR" ]; then @@ -186,7 +241,7 @@ if [ ! -d "$TMP_DIR" ]; then fi # temporary files -xml_file="${TMP_DIR}/http_response_$$.xml" +xml_file="${TMP_DIR}/http_resource_$$.xml" xsltproc_out_file="${TMP_DIR}/xsltproc_output_$$" ####################################################################### @@ -194,23 +249,19 @@ xsltproc_out_file="${TMP_DIR}/xsltproc_output_$$" ####################################################################### # GET the XML document -$verbose_mode && printf "$script_name writing curl output to file: %s\n" "$xml_file" -curl_output=$( /usr/bin/curl --silent \ - --output "$xml_file" \ - --write-out 'redirects:%{num_redirects};response:%{http_code};dns:%{time_namelookup};tcp:%{time_connect};ssl:%{time_appconnect};total:%{time_total}' \ - "$xml_location" -) +$verbose_mode && printf "$script_name requesting resource: %s\n" "$xml_location" +conditional_get $local_opts -d "$CACHE_DIR" -t "$TMP_DIR" "$xml_location" > "$xml_file" exit_code=$? if [ $exit_code -ne 0 ]; then - echo "ERROR: ${script_name}: curl failed with status code: $exit_code" >&2 - echo "$curl_output" >&2 + echo "ERROR: $script_name failed to get resource: $location" >&2 + printf "See output log: %s\n" "$TMP_DIR/$conditional_get_log" >&2 exit $exit_code fi -$verbose_mode && printf "$script_name successfully executed curl: %s\n" "$curl_output" +$verbose_mode && printf "$script_name successfully obtained XML file: %s\n" "$xml_file" # transform the XML document $verbose_mode && printf "$script_name writing xsltproc output to file: %s\n" "$xsltproc_out_file" -/usr/bin/xsltproc --output "$xsltproc_out_file" "$xsl_file" "$xml_file" >&2 +/usr/bin/xsltproc --output "$xsltproc_out_file" "$xsl_file" "$xml_file" exit_code=$? if [ $exit_code -ne 0 ]; then echo "ERROR: ${script_name}: xsltproc failed with status code: $exit_code" >&2