diff --git a/lib/conditional_get.sh b/lib/conditional_get.sh deleted file mode 100644 index 8e7cee4..0000000 --- a/lib/conditional_get.sh +++ /dev/null @@ -1,387 +0,0 @@ -#!/bin/bash - -####################################################################### -# Copyright 2016 Tom Scavo -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -####################################################################### - -# has this file already been sourced? -if [ "$(type -t conditional_get)" = function ]; then - return 0 -fi - -####################################################################### -# -# Given a web resource and a cache, if the resource is cached, request -# the resource using HTTP Conditional GET [RFC 7232], otherwise issue -# an ordinary GET request. In either case, if the server responds with -# 200, cache the resource and return the response body. If the server -# responds with 304, return the cached resource instead. -# -# Usage: conditional_get [-v] [-F | -C] -d CACHE_DIR -t TMP_DIR HTTP_LOCATION -# -# This function requires two option arguments (CACHE_DIR and TMP_DIR) -# and a command-line argument (HTTP_LOCATION). The rest of the command -# line is optional. -# -# Options: -# -v verbose mode -# -F force the return of a fresh resource -# -C check the cache only -# -d the cache directory (REQUIRED) -# -t a temporary directory (REQUIRED) -# -# Use option -F or -C to alter the default behavior of the function. -# -# Option -F forces the return of a fresh resource, that is, if the -# server responds with 304, an error occurs and the function returns -# with a nonzero return code. -# -# Option -C causes the function to go directly to cache. No GET request -# is issued. This option is useful in offline mode. -# -# The output of the curl command-line tool is stored in the following -# temporary files: -# -# $TMP_DIR/conditional_get_curl_headers -# $TMP_DIR/conditional_get_curl_content -# $TMP_DIR/conditional_get_curl_stderr -# -# This function requires the following library files: -# -# command_paths.sh -# http_tools.sh -# -# These library files must be sourced BEFORE calling this function. -# -# TODO: -# - follow redirects? -# -# This script is compatible with both Mac OS and GNU/Linux. -####################################################################### - -conditional_get () { - - if ! $COMMAND_PATHS; then - echo "ERROR: global command paths not found" >&2 - return 2 - fi - - local script_version="0.6" - local user_agent_string="HTTP Conditional GET client $script_version" - - local hash - local exit_code - local cached_header_file - local cached_content_file - local conditional_get_mode - local tmp_header_file - local tmp_content_file - local tmp_stderr_file - local curl_opts - local do_conditional_get - local header_value - local cmd - local response_code - local declared_content_length - local actual_content_length - - local verbose_mode=false - local force_output_mode=false - local cache_only_mode=false - local cache_dir - local tmp_dir - local tmp_log_file - local location - - local opt - local OPTARG - local OPTIND - while getopts ":vFCd:t:" opt; do - case $opt in - v) - verbose_mode=true - ;; - F) - force_output_mode=true - cache_only_mode=false - ;; - C) - cache_only_mode=true - force_output_mode=false - ;; - d) - cache_dir="$OPTARG" - ;; - t) - tmp_dir="$OPTARG" - ;; - \?) - echo "ERROR: $FUNCNAME: Unrecognized option: -$OPTARG" >&2 - return 2 - ;; - :) - echo "ERROR: $FUNCNAME: Option -$OPTARG requires an argument" >&2 - return 2 - ;; - esac - done - - # a temporary directory is required - if [ -z "$tmp_dir" ]; then - echo "ERROR: $FUNCNAME: no temporary directory specified" >&2 - return 2 - fi - if [ ! -d "$tmp_dir" ]; then - echo "ERROR: $FUNCNAME: directory does not exist: $tmp_dir" >&2 - return 2 - fi - tmp_log_file="$tmp_dir/${FUNCNAME}_log" - $verbose_mode && echo "$FUNCNAME using temporary directory $tmp_dir" >> "$tmp_log_file" - - # a cache directory is required - if [ -z "$cache_dir" ]; then - echo "ERROR: $FUNCNAME: no cache directory specified" >&2 - return 2 - fi - if [ ! -d "$cache_dir" ]; then - echo "ERROR: $FUNCNAME: directory does not exist: $cache_dir" >&2 - return 2 - fi - $verbose_mode && echo "$FUNCNAME using cache directory $cache_dir" >> "$tmp_log_file" - - # determine the URL location - shift $(( OPTIND - 1 )) - if [ $# -ne 1 ]; then - echo "ERROR: $FUNCNAME: wrong number of arguments: $# (1 required)" >&2 - return 2 - fi - location="$1" - if [ -z "$location" ] ; then - echo "ERROR: $FUNCNAME: empty URL argument" >&2 - return 2 - fi - $verbose_mode && echo "$FUNCNAME using location $location" >> "$tmp_log_file" - - ####################################################################### - # - # Determine the cache files (which may or may not exist at this point) - # - # This cache implementation uses separate files for the header and - # body content. Does it make sense to cache a single file instead? - # Also, should we use SHA-1 instead of MD5? - # - ####################################################################### - - hash=$( echo -n "$location" \ - | /usr/bin/openssl dgst -md5 -hex \ - | $_CUT -d' ' -f2 - ) - exit_code=$? - if [ $exit_code -ne 0 ]; then - echo "ERROR: $FUNCNAME failed to hash the location URL" >&2 - return $exit_code - fi - - cached_header_file="$cache_dir/${hash}_headers" - cached_content_file="$cache_dir/${hash}_content" - - if $verbose_mode; then - echo "$FUNCNAME using cached header file: ${cached_header_file}" >> "$tmp_log_file" - echo "$FUNCNAME using cached content file: ${cached_content_file}" >> "$tmp_log_file" - fi - - # check if the resource is cached - if [ -f "$cached_header_file" ] && [ -f "$cached_content_file" ]; then - if $cache_only_mode; then - /bin/cat "$cached_content_file" - return 0 - fi - conditional_get_mode=true - else - # ensure cache integrity - /bin/rm -f "$cached_header_file" "$cached_content_file" >&2 - if $cache_only_mode; then - echo "ERROR: $FUNCNAME failed to find cached resource: $location" >&2 - return 1 - fi - conditional_get_mode=false - fi - - ####################################################################### - # - # Initialization - # - ####################################################################### - - tmp_header_file="$tmp_dir/${FUNCNAME}_curl_headers" - tmp_content_file="$tmp_dir/${FUNCNAME}_curl_content" - tmp_stderr_file="$tmp_dir/${FUNCNAME}_curl_stderr" - - if $verbose_mode; then - echo "$FUNCNAME using temp header file: ${tmp_header_file}" >> "$tmp_log_file" - echo "$FUNCNAME using temp content file: ${tmp_content_file}" >> "$tmp_log_file" - echo "$FUNCNAME using temp stderr file: ${tmp_stderr_file}" >> "$tmp_log_file" - fi - - ####################################################################### - # - # GET the web resource - # - # This implementation issues an HTTP Conditional GET request iff - # the resource is cached. - # - ####################################################################### - - # init curl command-line options - if $verbose_mode; then - curl_opts="--verbose --progress-bar" - else - curl_opts="--silent --show-error" - fi - curl_opts="${curl_opts} --user-agent '${user_agent_string}'" - curl_opts="${curl_opts} --dump-header '${tmp_header_file}'" - curl_opts="${curl_opts} --output '${tmp_content_file}'" - curl_opts="${curl_opts} --stderr '${tmp_stderr_file}'" - - # If the resource is cached, add a conditional GET header. - # Since "A recipient MUST ignore If-Modified-Since if the - # request contains an If-None-Match header field," the - # latter takes precedence in the following code block. - do_conditional_get=false - if $conditional_get_mode; then - header_value=$( get_header_value "$cached_header_file" 'ETag' ) - exit_code=$? - if [ $exit_code -ne 0 ]; then - echo "ERROR: $FUNCNAME: get_header_value failed to initialize" >&2 - return $exit_code - fi - if [ -n "$header_value" ]; then - do_conditional_get=true - curl_opts="${curl_opts} --header 'If-None-Match: $header_value'" - else - header_value=$( get_header_value "$cached_header_file" 'Last-Modified' ) - exit_code=$? - if [ $exit_code -ne 0 ]; then - echo "ERROR: $FUNCNAME: get_header_value failed to initialize" >&2 - return $exit_code - fi - if [ -n "$header_value" ]; then - do_conditional_get=true - curl_opts="${curl_opts} --header 'If-Modified-Since: $header_value'" - fi - fi - fi - - # invoke curl - cmd="/usr/bin/curl $curl_opts $location" - $verbose_mode && printf "$FUNCNAME issuing curl command: %s\n" "$cmd" >> "$tmp_log_file" - eval $cmd - exit_code=$? - if [ $exit_code -ne 0 ]; then - echo "ERROR: $FUNCNAME: curl failed ($exit_code)" >&2 - return $exit_code - fi - - ####################################################################### - # - # Process the response - # - # This cache implementation always tries to cache a 200 response. - # What if the response contains a "no-store" cache directive? - # - ####################################################################### - - # sanity check - if [ ! -f "$tmp_header_file" ]; then - echo "ERROR: $FUNCNAME unable to find header file $tmp_header_file" >&2 - return 1 - fi - - response_code=$( get_response_code "$tmp_header_file" ) - exit_code=$? - if [ $exit_code -ne 0 ]; then - echo "ERROR: $FUNCNAME: get_response_code failed to initialize" >&2 - return $exit_code - fi - $verbose_mode && printf "$FUNCNAME received response code: %d\n" "$response_code" >> "$tmp_log_file" - - if [ "$response_code" = "200" ]; then - - # sanity check - declared_content_length=$( get_header_value "$tmp_header_file" 'Content-Length' ) - exit_code=$? - if [ $exit_code -ne 0 ]; then - echo "ERROR: $FUNCNAME: get_header_value failed to initialize" >&2 - return $exit_code - fi - actual_content_length=$( /bin/cat "$tmp_content_file" \ - | /usr/bin/wc -c \ - | $_SED -e 's/^[ ]*//' -e 's/[ ]*$//' - ) - if [ -n "$declared_content_length" ]; then - if [ "$declared_content_length" != "$actual_content_length" ]; then - echo "ERROR: $FUNCNAME failed content length check" >&2 - return 1 - fi - else - echo "WARNING: Content-Length response header missing" >&2 - fi - - if $verbose_mode; then - echo "$FUNCNAME downloaded ${actual_content_length} bytes" >> "$tmp_log_file" - if $do_conditional_get; then - echo "$FUNCNAME refreshing cache files" >> "$tmp_log_file" - else - echo "$FUNCNAME initializing cache files" >> "$tmp_log_file" - fi - fi - - # update the cache; maintain cache integrity at all times - /bin/cp -f "$tmp_header_file" "$cached_header_file" >&2 - exit_code=$? - if [ $exit_code -ne 0 ]; then - /bin/rm -f "$cached_header_file" "$cached_content_file" >&2 - echo "ERROR: $FUNCNAME failed copy to file $cached_header_file" >&2 - return $exit_code - fi - /bin/cp -f "$tmp_content_file" "$cached_content_file" >&2 - exit_code=$? - if [ $exit_code -ne 0 ]; then - /bin/rm -f "$cached_header_file" "$cached_content_file" >&2 - echo "ERROR: $FUNCNAME failed copy to file $cached_content_file" >&2 - return $exit_code - fi - - elif [ "$response_code" = "304" ]; then - $verbose_mode && echo "$FUNCNAME downloaded 0 bytes (cache is up-to-date)" >> "$tmp_log_file" - else - echo "ERROR: $FUNCNAME failed with HTTP response code $response_code" >&2 - return 1 - fi - - ####################################################################### - # - # output the resource content - # - ####################################################################### - - if $force_output_mode && [ "$response_code" = "304" ]; then - echo "ERROR: $FUNCNAME failed to get fresh resource: $location" >&2 - return 1 - fi - - /bin/cat "${cached_content_file}" - return 0 -}