diff --git a/bin/cget.sh b/bin/cget.sh new file mode 100755 index 0000000..dd8c500 --- /dev/null +++ b/bin/cget.sh @@ -0,0 +1,215 @@ +#!/bin/bash + +####################################################################### +# Copyright 2016 Tom Scavo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +####################################################################### + +####################################################################### +# Help message +####################################################################### + +display_help () { +/bin/cat <<- HELP_MSG + This script retrieves and caches HTTP resources on disk. + A previously cached resource is retrieved via HTTP Conditional + GET [RFC 7232]. If the web server responds with HTTP 200 OK, + the resource is cached and written to stdout. OTOH, if the web + server responds with 304 Not Modified, the cache is refreshed + but no output. + + Usage: ${0##*/} [-hv] [-F | -C] URL + + This script takes a single command-line argument. The URL + argument is the absolute URL of an HTTP resource. By default, + the script requests the resource at the given URL using the + curl command-line tool. + + This script requires two environment variables. CACHE_DIR is + the absolute path to the cache directory (which may or may not + exist) whereas LIB_DIR specifies a directory containing various + helper scripts. + + Options: + -h Display this message + -v Write verbose messages to stdout + -F Enables "Force Output Mode" + -C Enables "Cache Only Mode" + + Option -h is mutually exclusive of all other options. + + The default behavior of the script may be modified by using + option -F or -C, which are mutually exclusive. Force Output Mode + outputs the response body even if the server response is 304. + Cache Only Mode bypasses the GET request altogether and goes + directly to cache. If the resource resides in cache, it is + output on stdout, otherwise an error is thrown. + + LIBRARY + + Environment variable LIB_DIR specifies a directory containing at + least the following library files, which act as helper scripts for + ${0##*/}: + + $LIB_FILENAMES + + EXAMPLES + + ${0##*/} URL # Retrieve the resource using HTTP conditional GET + ${0##*/} -F URL # Enable Force Output Mode + ${0##*/} -C URL # Enable Cache Only Mode +HELP_MSG +} + +####################################################################### +# Bootstrap +####################################################################### + +script_name=${0##*/} # equivalent to basename $0 + +# library filenames (always list command_paths first) +LIB_FILENAMES="command_paths.sh +compatible_mktemp.sh +http_tools.sh +conditional_get.sh" + +####################################################################### +# Process command-line options and arguments +####################################################################### + +help_mode=false; verbose_mode=false; local_opts= +force_get_mode=false; cache_only_mode=false +while getopts ":hvFC" opt; do + case $opt in + h) + help_mode=true + ;; + v) + verbose_mode=true + local_opts="$local_opts -$opt" + ;; + F) + force_get_mode=true + cache_only_mode=false + local_opts="$local_opts -$opt" + ;; + C) + cache_only_mode=true + force_get_mode=false + local_opts="$local_opts -$opt" + ;; + \?) + echo "ERROR: $script_name: Unrecognized option: -$OPTARG" >&2 + exit 2 + ;; + :) + echo "ERROR: $script_name: Option -$OPTARG requires an argument" >&2 + exit 2 + ;; + esac +done + +if $help_mode; then + display_help + exit 0 +fi + +# determine the location of the web resource +shift $(( OPTIND - 1 )) +if [ $# -ne 1 ]; then + echo "ERROR: $script_name: wrong number of arguments: $# (1 required)" >&2 + exit 2 +fi +location="$1" + +$verbose_mode && printf "$script_name using location URL: %s\n" "$location" + +####################################################################### +# Initialization +####################################################################### + +# determine the cache directory +if [ -z "$CACHE_DIR" ]; then + echo "ERROR: $script_name requires env var CACHE_DIR" >&2 + exit 2 +fi +if [ ! -d "$CACHE_DIR" ]; then + # think carefully about this... + /bin/mkdir "$CACHE_DIR" + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "ERROR: $script_name failed to create dir: $CACHE_DIR" >&2 + exit $exit_code + fi +fi +$verbose_mode && printf "$script_name using cache directory: %s\n" "$CACHE_DIR" + +# determine the source lib directory +if [ -z "$LIB_DIR" ]; then + echo "ERROR: $script_name requires env var LIB_DIR" >&2 + exit 2 +fi +if [ ! -d "$LIB_DIR" ]; then + echo "ERROR: $script_name: directory does not exist: $LIB_DIR" >&2 + exit 2 +fi +$verbose_mode && printf "$script_name using source lib directory: %s\n" "$LIB_DIR" + +# source lib files (always source command_paths first) +for lib_filename in $LIB_FILENAMES; do + lib_file="$LIB_DIR/$lib_filename" + if [ ! -f "$lib_file" ]; then + echo "ERROR: $script_name: file does not exist: $lib_file" >&2 + exit 2 + fi + $verbose_mode && printf "$script_name sourcing lib file: %s\n" "$lib_file" + source "$lib_file" >&2 + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "ERROR: $script_name failed to source script $lib_file" >&2 + exit $exit_code + fi +done + +# create a temporary directory +tmp_dir=$( make_temp_file -d ) +if [ ! -d "$tmp_dir" ] ; then + printf "ERROR: $script_name unable to create temporary dir\n" >&2 + exit 2 +fi +$verbose_mode && printf "$script_name creating temp dir: %s\n" "$tmp_dir" + +####################################################################### +# Main processing +####################################################################### + +# invoke the function +response_body=$( + conditional_get $local_opts -d "$CACHE_DIR" -t "$tmp_dir" "$location" +) +exit_code=$? +if [ $exit_code -ne 0 ]; then + echo "ERROR: ${script_name} failed to get resource: $location" >&2 + exit $exit_code +fi + +if $verbose_mode; then + /bin/cat "$tmp_dir/conditional_get_log" +else + if [ -n "$response_body" ]; then + echo "$response_body" + fi +fi + +exit 0 diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..53d68e0 --- /dev/null +++ b/install.sh @@ -0,0 +1,118 @@ +#!/bin/bash + +####################################################################### +# Copyright 2016 Tom Scavo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +####################################################################### + +################################################################ +# +# Usage: install.sh BIN_DIR LIB_DIR +# +# Example: Install in /tmp +# +# $ export BIN_DIR=/tmp/bin +# $ export LIB_DIR=/tmp/lib +# $ install.sh $BIN_DIR $LIB_DIR +# +# Example: Install in $HOME +# +# $ export BIN_DIR=$HOME/bin +# $ export LIB_DIR=$HOME/lib +# $ install.sh $BIN_DIR $LIB_DIR +# +################################################################ + +script_bin=${0%/*} # equivalent to dirname $0 +script_name=${0##*/} # equivalent to basename $0 + +# generalize +verbose_mode=true + +# get command-line args +if [ $# -ne 2 ]; then + echo "ERROR: $script_name: wrong number of arguments: $# (2 required)" >&2 + exit 2 +fi +bin_dir=$1 +lib_dir=$2 + +# check bin dir +if [ -z "$bin_dir" ]; then + echo "ERROR: $script_name requires bin directory (BIN_DIR)" >&2 + exit 2 +fi +if [ -d "$bin_dir" ]; then + $verbose_mode && echo "$script_name using bin dir: $bin_dir" +else + $verbose_mode && echo "$script_name creating bin dir: $bin_dir" + /bin/mkdir "$bin_dir" + exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: $script_name failed to create bin dir: $bin_dir" >&2 + exit $exit_status + fi +fi + +# check lib dir +if [ -z "$lib_dir" ]; then + echo "ERROR: $script_name requires lib directory (LIB_DIR)" >&2 + exit 2 +fi +if [ -d "$lib_dir" ]; then + $verbose_mode && echo "$script_name using lib dir: $lib_dir" +else + $verbose_mode && echo "$script_name creating lib dir: $lib_dir" + /bin/mkdir "$lib_dir" + exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: $script_name failed to create lib dir: $lib_dir" >&2 + exit $exit_status + fi +fi + +# initialize bin dir +while read script_file; do + $verbose_mode && echo "$script_name copying executable file: $script_file" + /bin/cp $script_file $bin_dir + exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: $script_name failed to copy script: $script_file" >&2 + exit $exit_status + fi +done <&2 + exit $exit_status + fi +done <&2 + exit 1 +fi diff --git a/lib/compatible_date.sh b/lib/compatible_date.sh new file mode 100755 index 0000000..caba9da --- /dev/null +++ b/lib/compatible_date.sh @@ -0,0 +1,202 @@ +#!/bin/bash + +####################################################################### +# Copyright 2012--2016 Tom Scavo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +####################################################################### + +####################################################################### +# A compatibility wrapper around the date command. +# +# This script refers to the "canonical dateTime string format" given by: +# +# YYYY-MM-DDThh:mm:ssZ +# +# where "T" and "Z" are literals. Such a date is implicitly an UTC +# dateTime string. +# +# This script is compatible with Mac OS and GNU/Linux. +####################################################################### + +# today's date (UTC) in canonical string format (YYYY-MM-DD) +date_today () { + local dateStr + + dateStr=$( /bin/date -u +%Y-%m-%d ) + + local exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: ${0##*/}:date_today failed to produce date string" >&2 + return $exit_status + fi + + echo $dateStr + return 0 +} + +# NOW in locale-specific string format +dateTime_now_locale () { + local dateStr + + dateStr=$( /bin/date ) + + local exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: ${0##*/}:dateTime_now_locale failed to produce date string" >&2 + return $exit_status + fi + + echo $dateStr + return 0 +} + +# NOW in canonical dateTime string format +dateTime_now_canonical () { + local dateStr + + dateStr=$( /bin/date -u +%Y-%m-%dT%TZ ) + + local exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: ${0##*/}:dateTime_now_canonical failed to produce date string" >&2 + return $exit_status + fi + + echo $dateStr + return 0 +} + +# on a 32-bit system, the maximum representable dateTime in canonical string format +dateTime_max32_canonical () { + echo 2038-01-19T03:14:07Z + return 0 +} + +# convert openssl dateTime string to canonical dateTime string +dateTime_openssl2canonical () { + local in_date="$1" + if [ -z "${in_date}" ] ; then + echo "ERROR: ${0##*/}:dateTime_openssl2canonical requires command-line arg" >&2 + return 1 + fi + + local dateStr + if [[ ${OSTYPE} = darwin* ]] ; then + dateStr=$( /bin/date -ju -f "%b %e %T %Y GMT" "${in_date}" +%Y-%m-%dT%TZ ) + elif [[ ${OSTYPE} = linux* ]] ; then + # GNU date(1) understands openssl implicitly + dateStr=$( /bin/date -u -d "${in_date}" +%Y-%m-%dT%TZ ) + else + echo "Error: OS not supported: ${OSTYPE}" >&2 + return 1 + fi + + local exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: ${0##*/}:dateTime_openssl2canonical failed to convert date string ${in_date}" >&2 + return $exit_status + fi + + echo $dateStr + return 0 +} + +# convert apache dateTime string to canonical dateTime string +dateTime_apache2canonical () { + local in_date="$1" + if [ -z "${in_date}" ] ; then + echo "ERROR: ${0##*/}:dateTime_openssl2canonical requires command-line arg" >&2 + return 1 + fi + + local dateStr + if [[ ${OSTYPE} = darwin* ]] ; then + dateStr=$( /bin/date -ju -f "%a, %e %b %Y %T GMT" "${in_date}" +%Y-%m-%dT%TZ ) + elif [[ ${OSTYPE} = linux* ]] ; then + # GNU date(1) understands apache implicitly UNTESTED + dateStr=$( /bin/date -u -d "${in_date}" +%Y-%m-%dT%TZ ) + else + echo "Error: OS not supported: ${OSTYPE}" >&2 + return 1 + fi + + local exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: ${0##*/}:dateTime_openssl2canonical failed to convert date string ${in_date}" >&2 + return $exit_status + fi + + echo $dateStr + return 0 +} + +# convert canonical dateTime string to seconds past the epoch +dateTime_canonical2secs () { + local in_date="$1" + if [ -z "${in_date}" ] ; then + echo "ERROR: ${0##*/}:dateTime_canonical2secs requires command-line arg" >&2 + return 1 + fi + + local secs + if [[ ${OSTYPE} = darwin* ]] ; then + secs=$( /bin/date -ju -f %Y-%m-%dT%TZ "${in_date}" +%s ) + elif [[ ${OSTYPE} = linux* ]] ; then + # The GNU date(1) command will not parse a "canonical dateTime + # string" so we convert the input string to a string that the + # GNU date(1) command will understand: 'YYYY-MM-DD hh:mm:ss UTC' + in_date=$( echo ${in_date} | /bin/sed 's/^\([^T]*\)T\([^Z]*\)Z$/\1 \2 UTC/' ) + secs=$( /bin/date -u -d "${in_date}" +%s ) + else + echo "Error: OS not supported: ${OSTYPE}" >&2 + return 1 + fi + + local exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: ${0##*/}:dateTime_canonical2secs failed to convert date string ${in_date}" >&2 + return $exit_status + fi + + echo $secs + return 0 +} + +# convert seconds past the epoch to canonical dateTime string +dateTime_secs2canonical () { + local in_secs="$1" + if [ -z "${in_secs}" ] ; then + echo "ERROR: ${0##*/}:dateTime_secs2canonical requires command-line arg" >&2 + return 1 + fi + + local dateStr + if [[ ${OSTYPE} = darwin* ]] ; then + dateStr=$( /bin/date -ju -r ${in_secs} +%Y-%m-%dT%TZ ) + elif [[ ${OSTYPE} = linux* ]] ; then + dateStr=$( /bin/date -u -d "1970-01-01 ${in_secs} seconds" +%Y-%m-%dT%TZ ) + else + echo "Error: OS not supported: ${OSTYPE}" >&2 + return 1 + fi + + local exit_status=$? + if [ $exit_status -ne 0 ]; then + echo "ERROR: ${0##*/}:dateTime_secs2canonical failed to convert seconds ${in_secs}" >&2 + return $exit_status + fi + + echo $dateStr + return 0 +} diff --git a/lib/compatible_mktemp.sh b/lib/compatible_mktemp.sh new file mode 100755 index 0000000..5a7bc1c --- /dev/null +++ b/lib/compatible_mktemp.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +####################################################################### +# Copyright 2013--2016 Tom Scavo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +####################################################################### + +# has this file already been sourced? +if [ "$(type -t make_temp_file)" = function ]; then + return 0 +fi + +####################################################################### +# A simple compatibility wrapper around the mktemp command. +# +# Usage: +# $ make_temp_file [-d] [PREFIX] +# +# By default, creates a temporary file (use the -d option to +# create a directory). Takes an optional prefix argument that +# is used to construct the temporary file (or directory) name +# (defaults to some unspecified prefix if the argument is omitted). +# +# This script is compatible with Mac OS and GNU/Linux. +####################################################################### + +make_temp_file () { + local prefix + local _path_mktemp + local mktemp_arg + local temp_file + local return_code + + # process command-line options (if any) + local OPTARG + local OPTIND + local local_opts= + while getopts "d" opt; do + case $opt in + d) + local_opts=-d + ;; + \?) + echo "ERROR: $FUNCNAME: Unrecognized option: -$OPTARG" >&2 + return 1 + ;; + esac + done + + # determine the prefix + shift $((OPTIND-1)) + if [ $# -eq 0 ]; then + prefix="temp" + else + prefix="$1" + fi + + if [[ ${OSTYPE} = darwin* ]] ; then + _path_mktemp=/usr/bin/mktemp + # on Mac OS, mktemp takes a prefix + mktemp_arg="${prefix}" + elif [[ ${OSTYPE} = linux* ]] ; then + _path_mktemp=/bin/mktemp + # on Linux, mktemp takes a template + mktemp_arg="${prefix}.XXXXXXXX" + else + echo "ERROR: OS not supported: ${OSTYPE}" >&2 + return 1 + fi + + # create temporary file + temp_file=$( ${_path_mktemp} ${local_opts} -t ${mktemp_arg} ) + return_code=$? + if [ $return_code -ne 0 ] ; then + echo "ERROR: $FUNCNAME: failed to make temp file" >&2 + return $return_code + fi + + echo $temp_file + return 0 +} + diff --git a/lib/conditional_get.sh b/lib/conditional_get.sh new file mode 100644 index 0000000..2c25a28 --- /dev/null +++ b/lib/conditional_get.sh @@ -0,0 +1,375 @@ +#!/bin/bash + +####################################################################### +# Copyright 2016 Tom Scavo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +####################################################################### + +# has this file already been sourced? +if [ "$(type -t conditional_get)" = function ]; then + return 0 +fi + +####################################################################### +# +# Request a resource using HTTP Conditional GET [RFC 7232] if possible. +# If the server responds with 200, cache the resource and return the +# response body. If the server responds with 304, cache the resource +# but return nothing. +# +# Usage: conditional_get [-v] [-F | -C] -d CACHE_DIR -t TMP_DIR HTTP_LOCATION +# +# This function requires two option arguments (CACHE_DIR and TMP_DIR) +# and a command-line argument (HTTP_LOCATION). The rest of the command +# line is optional. +# +# Options: +# -v verbose mode +# -F force output the HTTP response body even if 304 response +# -C check the cache only (do not request the resource) +# -d the cache directory (REQUIRED) +# -t a temporary directory (REQUIRED) +# +# The output of the curl command-line tool is stored in the following +# temporary files: +# +# $TMP_DIR/conditional_get_curl_headers +# $TMP_DIR/conditional_get_curl_content +# $TMP_DIR/conditional_get_curl_stderr +# +# This function requires the following library files: +# +# command_paths.sh +# http_tools.sh +# +# These library files must be sourced BEFORE calling this function. +# +# TODO: +# - follow redirects? +# +# This script is compatible with Mac OS and GNU/Linux. +####################################################################### + +conditional_get () { + + if ! $COMMAND_PATHS; then + echo "ERROR: global command paths not found" >&2 + return 2 + fi + + local script_version="0.5" + local user_agent_string="HTTP Conditional GET client $script_version" + + local hash + local exit_code + local cached_header_file + local cached_content_file + local conditional_get_mode + local tmp_header_file + local tmp_content_file + local tmp_stderr_file + local curl_opts + local do_conditional_get + local header_value + local cmd + local response_code + local declared_content_length + local actual_content_length + + local verbose_mode=false + local force_output_mode=false + local cache_only_mode=false + local cache_dir + local tmp_dir + local tmp_log_file + local location + + local opt + local OPTARG + local OPTIND + while getopts ":vFCd:t:" opt; do + case $opt in + v) + verbose_mode=true + ;; + F) + force_output_mode=true + cache_only_mode=false + ;; + C) + cache_only_mode=true + force_output_mode=false + ;; + d) + cache_dir="$OPTARG" + ;; + t) + tmp_dir="$OPTARG" + ;; + \?) + echo "ERROR: $FUNCNAME: Unrecognized option: -$OPTARG" >&2 + return 2 + ;; + :) + echo "ERROR: $FUNCNAME: Option -$OPTARG requires an argument" >&2 + return 2 + ;; + esac + done + + # a temporary directory is required + if [ -z "$tmp_dir" ]; then + echo "ERROR: $FUNCNAME: no temporary directory specified" >&2 + return 2 + fi + if [ ! -d "$tmp_dir" ]; then + echo "ERROR: $FUNCNAME: directory does not exist: $tmp_dir" >&2 + return 2 + fi + tmp_log_file="$tmp_dir/${FUNCNAME}_log" + $verbose_mode && echo "$FUNCNAME using temporary directory $tmp_dir" >> "$tmp_log_file" + + # a cache directory is required + if [ -z "$cache_dir" ]; then + echo "ERROR: $FUNCNAME: no cache directory specified" >&2 + return 2 + fi + if [ ! -d "$cache_dir" ]; then + echo "ERROR: $FUNCNAME: directory does not exist: $cache_dir" >&2 + return 2 + fi + $verbose_mode && echo "$FUNCNAME using cache directory $cache_dir" >> "$tmp_log_file" + + # determine the URL location + shift $(( OPTIND - 1 )) + if [ $# -ne 1 ]; then + echo "ERROR: $FUNCNAME: wrong number of arguments: $# (1 required)" >&2 + return 2 + fi + location="$1" + if [ -z "$location" ] ; then + echo "ERROR: $FUNCNAME: empty URL argument" >&2 + return 2 + fi + $verbose_mode && echo "$FUNCNAME using location $location" >> "$tmp_log_file" + + ####################################################################### + # + # Determine the cache files (which may or may not exist at this point) + # + # This cache implementation uses separate files for the header and + # body content. Does it make sense to cache a single file instead? + # Also, should we use SHA-1 instead of MD5? + # + ####################################################################### + + hash=$( echo -n "$location" \ + | /usr/bin/openssl dgst -md5 -hex \ + | $_CUT -d' ' -f2 + ) + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "ERROR: $FUNCNAME failed to hash the location URL" >&2 + return $exit_code + fi + + cached_header_file="$cache_dir/${hash}_headers" + cached_content_file="$cache_dir/${hash}_content" + + if $verbose_mode; then + echo "$FUNCNAME using cached header file: ${cached_header_file}" >> "$tmp_log_file" + echo "$FUNCNAME using cached content file: ${cached_content_file}" >> "$tmp_log_file" + fi + + # check if the resource is cached + if [ -f "$cached_header_file" ] && [ -f "$cached_content_file" ]; then + if $cache_only_mode; then + /bin/cat "$cached_content_file" + return 0 + fi + conditional_get_mode=true + else + # ensure cache integrity + /bin/rm -f "$cached_header_file" "$cached_content_file" >&2 + if $cache_only_mode; then + echo "ERROR: $FUNCNAME failed to find cached resource: $location" >&2 + return 1 + fi + conditional_get_mode=false + fi + + ####################################################################### + # + # Initialization + # + ####################################################################### + + tmp_header_file="$tmp_dir/${FUNCNAME}_curl_headers" + tmp_content_file="$tmp_dir/${FUNCNAME}_curl_content" + tmp_stderr_file="$tmp_dir/${FUNCNAME}_curl_stderr" + + if $verbose_mode; then + echo "$FUNCNAME using temp header file: ${tmp_header_file}" >> "$tmp_log_file" + echo "$FUNCNAME using temp content file: ${tmp_content_file}" >> "$tmp_log_file" + echo "$FUNCNAME using temp stderr file: ${tmp_stderr_file}" >> "$tmp_log_file" + fi + + ####################################################################### + # + # GET the web resource + # + # This implementation issues an HTTP Conditional GET request iff + # the resource is cached. + # + ####################################################################### + + # init curl command-line options + if $verbose_mode; then + curl_opts="--verbose --progress-bar" + else + curl_opts="--silent --show-error" + fi + curl_opts="${curl_opts} --user-agent '${user_agent_string}'" + curl_opts="${curl_opts} --dump-header '${tmp_header_file}'" + curl_opts="${curl_opts} --output '${tmp_content_file}'" + curl_opts="${curl_opts} --stderr '${tmp_stderr_file}'" + + # If the resource is cached, add a conditional GET header. + # Since "A recipient MUST ignore If-Modified-Since if the + # request contains an If-None-Match header field," the + # latter takes precedence in the following code block. + do_conditional_get=false + if $conditional_get_mode; then + header_value=$( get_header_value "$cached_header_file" 'ETag' ) + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "ERROR: $FUNCNAME: get_header_value failed to initialize" >&2 + return $exit_code + fi + if [ -n "$header_value" ]; then + do_conditional_get=true + curl_opts="${curl_opts} --header 'If-None-Match: $header_value'" + else + header_value=$( get_header_value "$cached_header_file" 'Last-Modified' ) + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "ERROR: $FUNCNAME: get_header_value failed to initialize" >&2 + return $exit_code + fi + if [ -n "$header_value" ]; then + do_conditional_get=true + curl_opts="${curl_opts} --header 'If-Modified-Since: $header_value'" + fi + fi + fi + + # invoke curl + cmd="/usr/bin/curl $curl_opts $location" + $verbose_mode && printf "$FUNCNAME issuing curl command: %s\n" "$cmd" >> "$tmp_log_file" + eval $cmd + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "ERROR: $FUNCNAME: curl failed ($exit_code)" >&2 + return $exit_code + fi + + ####################################################################### + # + # Process the response + # + # This cache implementation always tries to cache a 200 response. + # What if the response contains a "no-store" cache directive? + # + ####################################################################### + + # sanity check + if [ ! -f "$tmp_header_file" ]; then + echo "ERROR: $FUNCNAME unable to find header file $tmp_header_file" >&2 + return 1 + fi + + response_code=$( get_response_code "$tmp_header_file" ) + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "ERROR: $FUNCNAME: get_response_code failed to initialize" >&2 + return $exit_code + fi + $verbose_mode && printf "$FUNCNAME received response code: %d\n" "$response_code" >> "$tmp_log_file" + + if [ "$response_code" = "200" ]; then + + # sanity check + declared_content_length=$( get_header_value "$tmp_header_file" 'Content-Length' ) + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "ERROR: $FUNCNAME: get_header_value failed to initialize" >&2 + return $exit_code + fi + actual_content_length=$( /bin/cat "$tmp_content_file" \ + | /usr/bin/wc -c \ + | $_SED -e 's/^[ ]*//' -e 's/[ ]*$//' + ) + if [ -n "$declared_content_length" ]; then + if [ "$declared_content_length" != "$actual_content_length" ]; then + echo "ERROR: $FUNCNAME failed content length check" >&2 + return 1 + fi + else + echo "WARNING: Content-Length response header missing" >&2 + fi + + if $verbose_mode; then + echo "$FUNCNAME downloaded ${actual_content_length} bytes" >> "$tmp_log_file" + if $do_conditional_get; then + echo "$FUNCNAME refreshing cache files" >> "$tmp_log_file" + else + echo "$FUNCNAME initializing cache files" >> "$tmp_log_file" + fi + fi + + # update the cache; maintain cache integrity at all times + /bin/cp -f "$tmp_header_file" "$cached_header_file" >&2 + exit_code=$? + if [ $exit_code -ne 0 ]; then + /bin/rm -f "$cached_header_file" "$cached_content_file" >&2 + echo "ERROR: $FUNCNAME failed copy to file $cached_header_file" >&2 + return $exit_code + fi + /bin/cp -f "$tmp_content_file" "$cached_content_file" >&2 + exit_code=$? + if [ $exit_code -ne 0 ]; then + /bin/rm -f "$cached_header_file" "$cached_content_file" >&2 + echo "ERROR: $FUNCNAME failed copy to file $cached_content_file" >&2 + return $exit_code + fi + + elif [ "$response_code" = "304" ]; then + $verbose_mode && echo "$FUNCNAME downloaded 0 bytes (cache is up-to-date)" >> "$tmp_log_file" + else + echo "ERROR: $FUNCNAME failed with HTTP response code $response_code" >&2 + return 1 + fi + + ####################################################################### + # + # output the resource content + # + ####################################################################### + + if [ "$response_code" = "200" ] || $force_output_mode; then + /bin/cat "${cached_content_file}" + fi + + return 0 +} diff --git a/lib/extract_entity.xsl b/lib/extract_entity.xsl new file mode 100644 index 0000000..925f7d6 --- /dev/null +++ b/lib/extract_entity.xsl @@ -0,0 +1,17 @@ + + + + + + + + + + diff --git a/lib/http_tools.sh b/lib/http_tools.sh new file mode 100755 index 0000000..4f49fa3 --- /dev/null +++ b/lib/http_tools.sh @@ -0,0 +1,134 @@ +#!/bin/bash + +####################################################################### +# Copyright 2013--2016 Tom Scavo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +####################################################################### + +# has this file already been sourced? +if [ "$(type -t get_response_code)" = function ]; then + return 0 +fi + +####################################################################### +# +# This function takes a file containing an HTTP response header and +# returns the HTTP response code. +# +# Usage: get_response_code FILE +# +# This function requires the following library files: +# +# command_paths.sh +# +# These library files must be sourced BEFORE calling this function. +# +####################################################################### + +get_response_code () { + + if ! $COMMAND_PATHS; then + echo "ERROR: global command paths not found" >&2 + return 2 + fi + + # check the number of arguments + if [ $# -ne 1 ]; then + echo "ERROR: incorrect number of arguments: $# (1 required)" >&2 + return 2 + fi + + # make sure the file exists + if [ ! -f "$1" ]; then + echo "ERROR: file does not exist: $1" >&2 + return 2 + fi + + # extract the response code from the header + /bin/cat "$1" \ + | /usr/bin/head -1 \ + | $_SED -e 's/^[^ ]* \([^ ]*\) .*$/\1/' + + return 0 +} + +####################################################################### +# +# This function takes a file containing an HTTP response header and +# a header name, and then returns the header value (if any). +# +# Usage: get_header_value FILE HEADER_NAME +# +# This function requires the following library files: +# +# command_paths.sh +# +# These library files must be sourced BEFORE calling this function. +# +####################################################################### + +get_header_value () { + + if ! $COMMAND_PATHS; then + echo "ERROR: global command paths not found" >&2 + return 2 + fi + + # check the number of arguments + if [ $# -ne 2 ]; then + echo "ERROR: incorrect number of arguments: $# (2 required)" >&2 + return 2 + fi + + # make sure the file exists + if [ ! -f "$1" ]; then + echo "ERROR: file does not exist: $1" >&2 + return 2 + fi + + # extract the desired value from the header +# /bin/cat "$1" \ +# | $_GREP -F "$2" \ +# | /usr/bin/tr -d "\r" \ +# | $_SED -e 's/^[^:]*: [ ]*//' -e 's/[ ]*$//' + /bin/cat "$1" \ + | $_GREP "^$2:" \ + | $_SEDEXT -e 's/^[^:]+:[[:space:]]+//' \ + | $_SEDEXT -e 's/[[:space:]]*$//' + + return 0 +} + +# URL encoder/decoder +# see: https://gist.github.com/cdown/1163649 +url_encode () { + local c + local l=${#1} + + for (( i = 0 ; i < l ; i++ )); do + c=${1:i:1} + case "$c" in + [a-zA-Z0-9.~_-]) printf "$c" ;; + ' ') printf + ;; + *) printf '%%%X' "'$c" + esac + done + + return 0 +} +url_decode () { + local data=${1//+/ } + printf '%b' "${data//%/\x}" +} + diff --git a/lib/md_tools.sh b/lib/md_tools.sh new file mode 100755 index 0000000..184dadd --- /dev/null +++ b/lib/md_tools.sh @@ -0,0 +1,275 @@ +#!/bin/bash + +####################################################################### +# Copyright 2016 Tom Scavo +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +####################################################################### + +# has this file already been sourced? +if [ "$(type -t getEntityFromFile)" = function ]; then + return 0 +fi + +####################################################################### +# Get entity metadata from a local file. +# +# Usage: getEntityFromFile -f MD_PATH ID +# +# A return code > 1 is a serious error worthy of early termination +####################################################################### +getEntityFromFile () { + + if ! $COMMAND_PATHS; then + echo "ERROR: global command paths not found" >&2 + return 2 + fi + + local md_path + local entityID + local entityDescriptor + + local opt + local OPTARG + local OPTIND + while getopts ":f:" opt; do + case $opt in + f) + md_path="$OPTARG" + ;; + \?) + echo "ERROR: $FUNCNAME: Unrecognized option: -$OPTARG" >&2 + return 2 + ;; + :) + echo "ERROR: $FUNCNAME: Option -$OPTARG requires an argument" >&2 + return 2 + ;; + esac + done + + # a metadata file is REQUIRED + if [ -z "$md_path" ]; then + echo "ERROR: $FUNCNAME: MD_PATH (option -f) does not exist" >&2 + return 2 + fi + if [ ! -f "$md_path" ]; then + echo "ERROR: $FUNCNAME: file does not exist: $md_path" >&2 + return 2 + fi + + # make sure there is one (and only one) command-line argument + shift $(( OPTIND - 1 )) + if [ $# -ne 1 ]; then + echo "ERROR: $FUNCNAME: incorrect number of arguments: $# (1 required)" >&2 + return 2 + fi + entityID=$1 + + # get the entity descriptor from the metadata file using sed + #entityDescriptor=$( /bin/cat $md_path \ + # | $_SED -n -e '\;<\(md:\)\{0,1\}EntityDescriptor.* entityID="'${entityID}'";,\;EntityDescriptor>;p' + #) + + # determine the source lib directory + if [ -z "$LIB_DIR" ]; then + echo "ERROR: $FUNCNAME requires env var LIB_DIR" >&2 + return 2 + fi + if [ ! -d "$LIB_DIR" ]; then + echo "ERROR: $FUNCNAME: directory does not exist: $LIB_DIR" >&2 + return 2 + fi + + # get the entity descriptor from the metadata file using parameterized xslt + entityDescriptor=$( /bin/cat $md_path \ + | /usr/bin/xsltproc --stringparam entityID $entityID $LIB_DIR/extract_entity.xsl - + ) + + if [ -z "$entityDescriptor" ]; then + echo "ERROR: $FUNCNAME: no entity descriptor for entityID: $entityID" >&2 + return 1 + fi + + echo "$entityDescriptor" +} + +####################################################################### +# Get entity metadata from a Metadata Query Server. +# +# Usage: getEntityFromServer -d TMP_DIR -u MDQ_BASE_URL ID +# +# A temporary file containing the HTTP response is created in TMP_DIR. +# +# A return code > 1 is a serious error worthy of early termination +####################################################################### +getEntityFromServer () { + + if ! $COMMAND_PATHS; then + echo "ERROR: global command paths not found" >&2 + return 2 + fi + + local tmp_dir + local mdq_base_url + local tmp_response_file + local entityID + local encoded_id + local return_status + local mdq_request_url + local output + local exit_code + local response_code + + local opt + local OPTARG + local OPTIND + while getopts ":d:u:" opt; do + case $opt in + d) + tmp_dir="$OPTARG" + ;; + u) + mdq_base_url="$OPTARG" + ;; + \?) + echo "ERROR: $FUNCNAME: Unrecognized option: -$OPTARG" >&2 + return 2 + ;; + :) + echo "ERROR: $FUNCNAME: Option -$OPTARG requires an argument" >&2 + return 2 + ;; + esac + done + + if [ -z "$mdq_base_url" ]; then + echo "ERROR: $FUNCNAME: MDQ_BASE_URL (option -u) does not exist" >&2 + return 2 + fi + + if [ -z "$tmp_dir" ]; then + echo "ERROR: $FUNCNAME: TMP_DIR (option -d) does not exist" >&2 + return 2 + fi + if [ ! -d "$tmp_dir" ]; then + echo "ERROR: $FUNCNAME: temp directory does not exist: $tmp_dir" >&2 + return 2 + fi + tmp_response_file="$tmp_dir/mdq_response_$$.txt" + tmp_log_file="$tmp_dir/mdq_log_$$.txt" + + # make sure there is one (and only one) command-line argument + shift $(( OPTIND - 1 )) + if [ $# -ne 1 ]; then + echo "ERROR: $FUNCNAME: incorrect number of arguments: $# (1 required)" >&2 + return 2 + fi + entityID=$1 + + # URL-encode the identifier + encoded_id=$( urlencode $entityID ) + return_status=$? + if [ "$return_status" -ne 0 ]; then + echo "ERROR: $FUNCNAME: failed to URL-encode the entityID: $entityID" >&2 + return 3 + fi + + # compute the MDQ protocol request URL + mdq_request_url=$( construct_mdq_url $mdq_base_url $encoded_id ) + return_status=$? + if [ "$return_status" -ne 0 ]; then + echo "ERROR: $FUNCNAME: failed to construct the request URL from the encoded entityID: $encoded_id" >&2 + return 3 + fi + + # get a single entity descriptor via the MDQ protocol + output=$( /usr/bin/curl --silent \ + --output "$tmp_response_file" \ + --write-out 'response:%{http_code};dns:%{time_namelookup};tcp:%{time_connect};pre-start:%{time_pretransfer};start:%{time_starttransfer};total:%{time_total};size:%{size_download}' \ + "$mdq_request_url" + ) + exit_code=$? + if [ "$exit_code" -ne 0 ]; then + echo "ERROR: $FUNCNAME: curl failed (exit code $exit_code $output): $mdq_request_url" >&2 + return 3 + fi + + # log timings + echo "$exit_code $output $entityID" >> "$tmp_log_file" + + # check the HTTP response code + response_code=$( echo "$output" | $_SED -e 's/^response:\([^;]*\).*$/\1/' ) + if [[ "$response_code" != 200 ]]; then + echo "ERROR: $FUNCNAME: query failed (response code $response_code): $mdq_request_url" >&2 + return 1 + fi + + /bin/cat "$tmp_response_file" +} + +####################################################################### +# Construct a request URL per the MDQ Protocol specification. +# See: https://github.com/iay/md-query +# +# Usage: construct_mdq_url +# +# To construct a reference to ALL entities served by the +# metadata query server, simply omit the second argument +####################################################################### +construct_mdq_url () { + + # make sure there are one or two command-line arguments + if [ $# -lt 1 -o $# -gt 2 ]; then + echo "ERROR: $FUNCNAME: incorrect number of arguments: $# (1 or 2 required)" >&2 + return 2 + fi + local base_url=$1 + + # strip the trailing slash from the base URL if necessary + #local length="${#1}" + #if [[ "${base_url:length-1:1}" == '/' ]]; then + # base_url="${base_url:0:length-1}" + #fi + + # append the identifier if there is one + if [ $# -eq 2 ]; then + echo "${base_url%%/}/entities/$2" + else + echo "${base_url%%/}/entities" + fi +} + +####################################################################### +# URL-encode an arbitrary string. +# See: https://gist.github.com/cdown/1163649 +# +# Usage: urlencode +####################################################################### +urlencode () { + + # make sure there is one (and only one) command-line argument + if [ $# -ne 1 ]; then + echo "ERROR: $FUNCNAME: incorrect number of arguments: $# (1 required)" >&2 + return 2 + fi + + local length="${#1}" + for (( i = 0; i < length; i++ )); do + local c="${1:i:1}" + case "$c" in + [a-zA-Z0-9.~_-]) printf "$c" ;; + *) printf '%%%02X' "'$c" + esac + done +}