From f3bd1dc8316b5b67f735a9da44003ab45cc80521 Mon Sep 17 00:00:00 2001 From: Tom Scavo <trscavo@internet2.edu> Date: Wed, 28 Jun 2017 16:57:24 -0400 Subject: [PATCH] Add new -c option and other improvements to command-line options --- lib/http_tools.sh | 226 +++++++++++++++++++++++++++++++--------------- 1 file changed, 152 insertions(+), 74 deletions(-) diff --git a/lib/http_tools.sh b/lib/http_tools.sh index 480d351..215a212 100755 --- a/lib/http_tools.sh +++ b/lib/http_tools.sh @@ -25,7 +25,7 @@ # response body. If the server responds with 304, return the cached # response body instead. # -# Usage: conditional_get [-vFCIx] -d CACHE_DIR -T TMP_DIR HTTP_LOCATION +# Usage: conditional_get [-vFCcx] -d CACHE_DIR -T TMP_DIR HTTP_LOCATION # # This function requires two option arguments (CACHE_DIR and TMP_DIR) # and a command-line argument (HTTP_LOCATION). The rest of the command @@ -33,49 +33,74 @@ # # Options: # -v verbose mode -# -F force the return of fresh content -# -C check the cache only -# -I get and return headers only +# -F force cache update +# -C check cache freshness +# -c cache-only mode (no network) # -x enable HTTP Compression # -d the cache directory (REQUIRED) # -T a temporary directory (REQUIRED) # -# Use option -F, -C, or -I to alter the default behavior of the -# function. Options -F and -C are mutually exclusive of each other. -# Option -I may be used with option -C (but not with option -F). +# Options -F, -C, and -c are mutually exclusive of each other. +# Options -v and -x may be used with any other option. # -# Option -F forces the return of fresh content; that is, if option -F -# is enabled, and the server responds with 304, the function quietly -# returns with a nonzero return code. +# Option -F forces the output of fresh content, that is, if option -F +# is enabled and the server responds with 200, the function returns +# normally. In that case, a cache write will occur. On the other hand, +# if option -F is enabled and the server responds with 304, the function +# quietly returns with a nonzero return code. See Quiet Failure Mode +# below. # -# Option -C causes the function to go directly to cache; that is, no -# HTTP request is issued. (This option is useful in offline mode.) +# Option -C outputs cached content but only if the cache is up-to-date. +# An HTTP request is issued to determine if the cache content is stale. +# If the resource is not cached or the cache is not up-to-date, the +# function quietly returns with a nonzero return code (i.e., Quiet +# Failure Mode). +# +# Option -c outputs cached content whether or not the cache is up-to-date. +# (Since no HTTP request is issued, this option is useful in offline mode.) # If the resource is not cached, the function quietly returns with a -# nonzero return code. +# nonzero return code (i.e., Quiet Failure Mode). +# +# QUIET FAILURE MODE +# +# Options -F, -C, and -c exhibit Quiet Failure Mode. If one of these +# mutually exclusive options is enabled, and a special error condition +# is detected, the function quietly returns error code 1 without emitting +# an error message of any kind. +# +# The error conditions that trigger Quiet Failure Mode are based on the +# following requirements: +# +# Option -F: the HTTP response MUST be 200 +# Option -C: the HTTP response MUST be 304 +# Option -c: the resource MUST be cached +# +# If one of the above requirements is NOT met, the function quietly +# returns error code 1. # -# Option -I issues a HEAD request instead of a GET request, in which case, -# only the response headers are returned in the output. Note that nothing -# is written to cache when option -I is used. +# Quiet Failure Mode guarantees the following: # -# If options -I and -C are used together, the cached headers are returned. -# As with option -C alone, if the resource is not cached, the function -# quietly returns with a nonzero return code. +# Option -F: the cache has been updated (i.e., a cache write occurred) +# Option -C: the resource is cached and the cache is up-to-date +# Option -c: the resource is cached +# +# Note that options -C and -c do not write to cache in any case. # # HTTP COMPRESSION # # Option -x adds an Accept-Encoding header to the request; that is, if # option -x is enabled, the client merely indicates its support for HTTP # Compression in the request. The server may or may not compress the -# response. This implementation does not check to see if the response -# was in fact compressed. The HTTP response header will indicate if this -# is so. +# response, and in fact, this implementation does not check to see if +# the response compressed by the server. The HTTP response header will +# indicate if this is so. # # Important! This implementation treats compressed and uncompressed # requests for the same resource as two distinct resources. For example, # consider the following pair of function calls: # -# conditional_get $url -# conditional_get -x $url +# conditional_get ... $url +# conditional_get -x ... $url # # The above requests result in two distinct cached resources, the content # of which are identical. Assuming the server actually compressed the @@ -104,9 +129,10 @@ # RETURN CODES # # 0: success -# 1: quiet failure mode: -# option -F but no fresh resource available OR -# option -C but no cached resource available +# 1: Quiet Failure Mode: +# option -F but no fresh resource available +# option -C but no up-to-date cached resource available +# option -c but no cached resource available # 2: initialization failure # 3: unspecified failure # 4: hash operation failed @@ -153,32 +179,70 @@ conditional_get () { local actual_content_length local verbose_mode=false - local force_output_mode=false + local force_refresh_mode=false + local check_cache_mode=false local cache_only_mode=false - local headers_only_mode=false local compressed_mode=false local cache_dir local tmp_dir local location + # an undocumented feature + # 'conditional_get -I' === 'conditional_head' + # (the -I notation was borrowed from curl) + local conditional_head_mode=false + local opt local OPTARG local OPTIND - while getopts ":vFCIxd:T:" opt; do + while getopts ":vIFCcxd:T:" opt; do case $opt in v) verbose_mode=true ;; + I) + if $force_refresh_mode; then + echo "ERROR: $FUNCNAME: options -I and -F may not be used together" >&2 + return 2 + fi + conditional_head_mode=true + ;; F) - force_output_mode=true - cache_only_mode=false + if $conditional_head_mode; then + echo "ERROR: $FUNCNAME: options -F and -I may not be used together" >&2 + return 2 + fi + if $check_cache_mode; then + echo "ERROR: $FUNCNAME: options -F and -C may not be used together" >&2 + return 2 + fi + if $cache_only_mode; then + echo "ERROR: $FUNCNAME: options -F and -c may not be used together" >&2 + return 2 + fi + force_refresh_mode=true ;; C) - cache_only_mode=true - force_output_mode=false + if $force_refresh_mode; then + echo "ERROR: $FUNCNAME: options -C and -F may not be used together" >&2 + return 2 + fi + if $cache_only_mode; then + echo "ERROR: $FUNCNAME: options -C and -c may not be used together" >&2 + return 2 + fi + check_cache_mode=true ;; - I) - headers_only_mode=true + c) + if $force_refresh_mode; then + echo "ERROR: $FUNCNAME: options -c and -F may not be used together" >&2 + return 2 + fi + if $check_cache_mode; then + echo "ERROR: $FUNCNAME: options -c and -C may not be used together" >&2 + return 2 + fi + cache_only_mode=true ;; x) compressed_mode=true @@ -200,12 +264,6 @@ conditional_get () { esac done - # - if $force_output_mode && $headers_only_mode; then - echo "ERROR: $FUNCNAME: options -F and -C may not be used together" >&2 - return 2 - fi - # a temporary directory is required if [ -z "$tmp_dir" ]; then echo "ERROR: $FUNCNAME: no temporary directory specified" >&2 @@ -269,9 +327,11 @@ conditional_get () { if $compressed_mode; then cached_header_file="$cache_dir/${hash}_headers_compressed" cached_content_file="$cache_dir/${hash}_content_compressed" + adjective="compressed " else cached_header_file="$cache_dir/${hash}_headers" cached_content_file="$cache_dir/${hash}_content" + adjective= fi print_log_message -D "$FUNCNAME using cached header file: $cached_header_file" @@ -280,8 +340,9 @@ conditional_get () { # check if the resource is cached if [ -f "$cached_header_file" ] && [ -f "$cached_content_file" ]; then + # read from cache without checking resource freshness if $cache_only_mode; then - if $headers_only_mode; then + if $conditional_head_mode; then print_log_message -I "$FUNCNAME reading cached header file: $cached_header_file" /bin/cat "$cached_header_file" else @@ -302,8 +363,8 @@ conditional_get () { /bin/rm -f "$cached_header_file" "$cached_content_file" >&2 # quiet failure mode - if $cache_only_mode; then - print_log_message -W "$FUNCNAME: resource not cached: $location" + if $cache_only_mode || $check_cache_mode; then + print_log_message -W "$FUNCNAME: ${adjective}resource not cached: $location" return 1 fi @@ -326,11 +387,11 @@ conditional_get () { ####################################################################### # - # GET the web resource + # Issue a GET request for the web resource # If option -I was used, issue HEAD request instead # - # This implementation issues an HTTP Conditional GET request iff - # the resource is cached. + # This implementation issues an conditional request + # (GET or HEAD) iff the resource is cached. # ####################################################################### @@ -343,15 +404,13 @@ conditional_get () { curl_opts="${curl_opts} --user-agent '${user_agent_string}'" # set curl --compressed option if necessary - if $compressed_mode; then - adjective="compressed " - curl_opts="${curl_opts} --compressed" - fi + $compressed_mode && curl_opts="${curl_opts} --compressed" # always capture the header in a file - # capture the output iff the script issues a GET request curl_opts="${curl_opts} --dump-header '${tmp_header_file}'" - if $headers_only_mode; then + + # capture the output iff the client issues a GET request + if $conditional_head_mode; then print_log_message -I "$FUNCNAME issuing HEAD request for ${adjective}resource: $location" curl_opts="${curl_opts} --head" curl_opts="${curl_opts} --output '/dev/null'" @@ -359,9 +418,11 @@ conditional_get () { print_log_message -I "$FUNCNAME issuing GET request for ${adjective}resource: $location" curl_opts="${curl_opts} --output '${tmp_content_file}'" fi + + # always capture stderr in a file curl_opts="${curl_opts} --stderr '${tmp_stderr_file}'" - # If the resource is cached, add a conditional GET header. + # If the resource is cached, issue a conditional request. # Since "A recipient MUST ignore If-Modified-Since if the # request contains an If-None-Match header field," the # latter takes precedence in the following code block. @@ -402,10 +463,7 @@ conditional_get () { ####################################################################### # - # Process the response - # - # This cache implementation always tries to cache a 200 response. - # What if the response contains a "no-store" cache directive? + # Response processing # ####################################################################### @@ -423,24 +481,37 @@ conditional_get () { fi print_log_message -I "$FUNCNAME received response code: $response_code" - if $headers_only_mode; then - /bin/cat "$tmp_header_file" - exit_code=$? - if [ $exit_code -ne 0 ]; then - print_log_message -E "$FUNCNAME unable to cat output ($exit_code)" - return 3 - fi - return 0 - fi + # output the header received from the server + if $conditional_head_mode && ! $check_cache_mode; then + /bin/cat "$tmp_header_file" + exit_code=$? + if [ $exit_code -ne 0 ]; then + print_log_message -E "$FUNCNAME unable to cat output ($exit_code)" + return 3 + fi + return 0 + fi ####################################################################### # # Update the cache # + # Open questions: + # What if the response contains a "no-store" cache directive? + # If Check Cache Mode is enabled but the response is 200, + # should the cache be refreshed as a side effect? + # (for now the answer is no) + # ####################################################################### if [ "$response_code" = "200" ]; then + # quiet failure mode + if $check_cache_mode; then + print_log_message -W "$FUNCNAME: ${adjective}resource is not up-to-date: $location" + return 1 + fi + # compute the length of the downloaded content actual_content_length=$( /bin/cat "$tmp_content_file" \ | /usr/bin/wc -c \ @@ -495,9 +566,10 @@ conditional_get () { fi elif [ "$response_code" = "304" ]; then + # quiet failure mode - if $force_output_mode; then - print_log_message -W "$FUNCNAME: resource not modified: $location" + if $force_refresh_mode; then + print_log_message -W "$FUNCNAME: fresh resource not available: $location" return 1 fi @@ -509,12 +581,18 @@ conditional_get () { ####################################################################### # - # Return the cached content + # Return the cached resource + # (since the cache is now up-to-date) # ####################################################################### - - print_log_message -I "$FUNCNAME reading cached content file: ${cached_content_file}" - /bin/cat "$cached_content_file" + + if $conditional_head_mode; then + print_log_message -I "$FUNCNAME reading cached header file: $cached_header_file" + /bin/cat "$cached_header_file" + else + print_log_message -I "$FUNCNAME reading cached content file: $cached_content_file" + /bin/cat "$cached_content_file" + fi exit_code=$? if [ $exit_code -ne 0 ]; then print_log_message -E "$FUNCNAME unable to cat output ($exit_code)"