From f3bd1dc8316b5b67f735a9da44003ab45cc80521 Mon Sep 17 00:00:00 2001
From: Tom Scavo <trscavo@internet2.edu>
Date: Wed, 28 Jun 2017 16:57:24 -0400
Subject: [PATCH] Add new -c option and other improvements to command-line
 options

---
 lib/http_tools.sh | 226 +++++++++++++++++++++++++++++++---------------
 1 file changed, 152 insertions(+), 74 deletions(-)

diff --git a/lib/http_tools.sh b/lib/http_tools.sh
index 480d351..215a212 100755
--- a/lib/http_tools.sh
+++ b/lib/http_tools.sh
@@ -25,7 +25,7 @@
 # response body. If the server responds with 304, return the cached 
 # response body instead.
 #
-# Usage: conditional_get [-vFCIx] -d CACHE_DIR -T TMP_DIR HTTP_LOCATION
+# Usage: conditional_get [-vFCcx] -d CACHE_DIR -T TMP_DIR HTTP_LOCATION
 #
 # This function requires two option arguments (CACHE_DIR and TMP_DIR)
 # and a command-line argument (HTTP_LOCATION). The rest of the command
@@ -33,49 +33,74 @@
 #
 # Options:
 #   -v   verbose mode
-#   -F   force the return of fresh content
-#   -C   check the cache only
-#   -I   get and return headers only
+#   -F   force cache update
+#   -C   check cache freshness
+#   -c   cache-only mode (no network)
 #   -x   enable HTTP Compression
 #   -d   the cache directory (REQUIRED)
 #   -T   a temporary directory (REQUIRED)
 #
-# Use option -F, -C, or -I to alter the default behavior of the 
-# function. Options -F and -C are mutually exclusive of each other. 
-# Option -I may be used with option -C (but not with option -F).
+# Options -F, -C, and -c are mutually exclusive of each other. 
+# Options -v and -x may be used with any other option.
 #
-# Option -F forces the return of fresh content; that is, if option -F
-# is enabled, and the server responds with 304, the function quietly 
-# returns with a nonzero return code.
+# Option -F forces the output of fresh content, that is, if option -F 
+# is enabled and the server responds with 200, the function returns 
+# normally. In that case, a cache write will occur. On the other hand,
+# if option -F is enabled and the server responds with 304, the function
+# quietly returns with a nonzero return code. See Quiet Failure Mode
+# below.
 #
-# Option -C causes the function to go directly to cache; that is, no 
-# HTTP request is issued. (This option is useful in offline mode.) 
+# Option -C outputs cached content but only if the cache is up-to-date.
+# An HTTP request is issued to determine if the cache content is stale.
+# If the resource is not cached or the cache is not up-to-date, the 
+# function quietly returns with a nonzero return code (i.e., Quiet
+# Failure Mode).
+#
+# Option -c outputs cached content whether or not the cache is up-to-date.
+# (Since no HTTP request is issued, this option is useful in offline mode.)
 # If the resource is not cached, the function quietly returns with a 
-# nonzero return code.
+# nonzero return code (i.e., Quiet Failure Mode).
+#
+# QUIET FAILURE MODE
+#
+# Options -F, -C, and -c exhibit Quiet Failure Mode. If one of these 
+# mutually exclusive options is enabled, and a special error condition
+# is detected, the function quietly returns error code 1 without emitting
+# an error message of any kind.
+#
+# The error conditions that trigger Quiet Failure Mode are based on the
+# following requirements:
+#
+#   Option -F: the HTTP response MUST be 200
+#   Option -C: the HTTP response MUST be 304
+#   Option -c: the resource MUST be cached
+#
+# If one of the above requirements is NOT met, the function quietly
+# returns error code 1.
 #
-# Option -I issues a HEAD request instead of a GET request, in which case,
-# only the response headers are returned in the output. Note that nothing 
-# is written to cache when option -I is used.
+# Quiet Failure Mode guarantees the following:
 #
-# If options -I and -C are used together, the cached headers are returned.
-# As with option -C alone, if the resource is not cached, the function 
-# quietly returns with a nonzero return code.
+#   Option -F: the cache has been updated (i.e., a cache write occurred)
+#   Option -C: the resource is cached and the cache is up-to-date
+#   Option -c: the resource is cached
+#
+# Note that options -C and -c do not write to cache in any case.
 #
 # HTTP COMPRESSION
 #
 # Option -x adds an Accept-Encoding header to the request; that is, if
 # option -x is enabled, the client merely indicates its support for HTTP 
 # Compression in the request. The server may or may not compress the 
-# response. This implementation does not check to see if the response 
-# was in fact compressed. The HTTP response header will indicate if this 
-# is so.
+# response, and in fact, this implementation does not check to see if
+# the response compressed by the server. The HTTP response header will
+# indicate if this is so.
 #
 # Important! This implementation treats compressed and uncompressed 
 # requests for the same resource as two distinct resources. For example, 
 # consider the following pair of function calls:
 #
-#   conditional_get $url
-#   conditional_get -x $url
+#   conditional_get ... $url
+#   conditional_get -x ... $url
 #
 # The above requests result in two distinct cached resources, the content
 # of which are identical. Assuming the server actually compressed the
@@ -104,9 +129,10 @@
 # RETURN CODES
 #
 #    0: success
-#    1: quiet failure mode:
-#       option -F but no fresh resource available OR
-#       option -C but no cached resource available
+#    1: Quiet Failure Mode:
+#       option -F but no fresh resource available
+#       option -C but no up-to-date cached resource available
+#       option -c but no cached resource available
 #    2: initialization failure
 #    3: unspecified failure
 #    4: hash operation failed
@@ -153,32 +179,70 @@ conditional_get () {
 	local actual_content_length
 
 	local verbose_mode=false
-	local force_output_mode=false
+	local force_refresh_mode=false
+	local check_cache_mode=false
 	local cache_only_mode=false
-	local headers_only_mode=false
 	local compressed_mode=false
 	local cache_dir
 	local tmp_dir
 	local location
 	
+	# an undocumented feature
+	# 'conditional_get -I' === 'conditional_head'
+	# (the -I notation was borrowed from curl)
+	local conditional_head_mode=false
+
 	local opt
 	local OPTARG
 	local OPTIND
-	while getopts ":vFCIxd:T:" opt; do
+	while getopts ":vIFCcxd:T:" opt; do
 		case $opt in
 			v)
 				verbose_mode=true
 				;;
+			I)
+				if $force_refresh_mode; then
+					echo "ERROR: $FUNCNAME: options -I and -F may not be used together" >&2
+					return 2
+				fi
+				conditional_head_mode=true
+				;;
 			F)
-				force_output_mode=true
-				cache_only_mode=false
+				if $conditional_head_mode; then
+					echo "ERROR: $FUNCNAME: options -F and -I may not be used together" >&2
+					return 2
+				fi
+				if $check_cache_mode; then
+					echo "ERROR: $FUNCNAME: options -F and -C may not be used together" >&2
+					return 2
+				fi
+				if $cache_only_mode; then
+					echo "ERROR: $FUNCNAME: options -F and -c may not be used together" >&2
+					return 2
+				fi
+				force_refresh_mode=true
 				;;
 			C)
-				cache_only_mode=true
-				force_output_mode=false
+				if $force_refresh_mode; then
+					echo "ERROR: $FUNCNAME: options -C and -F may not be used together" >&2
+					return 2
+				fi
+				if $cache_only_mode; then
+					echo "ERROR: $FUNCNAME: options -C and -c may not be used together" >&2
+					return 2
+				fi
+				check_cache_mode=true
 				;;
-			I)
-				headers_only_mode=true
+			c)
+				if $force_refresh_mode; then
+					echo "ERROR: $FUNCNAME: options -c and -F may not be used together" >&2
+					return 2
+				fi
+				if $check_cache_mode; then
+					echo "ERROR: $FUNCNAME: options -c and -C may not be used together" >&2
+					return 2
+				fi
+				cache_only_mode=true
 				;;
 			x)
 				compressed_mode=true
@@ -200,12 +264,6 @@ conditional_get () {
 		esac
 	done
 	
-	#
-	if $force_output_mode && $headers_only_mode; then
-		echo "ERROR: $FUNCNAME: options -F and -C may not be used together" >&2
-		return 2
-	fi
-	
 	# a temporary directory is required
 	if [ -z "$tmp_dir" ]; then
 		echo "ERROR: $FUNCNAME: no temporary directory specified" >&2
@@ -269,9 +327,11 @@ conditional_get () {
 	if $compressed_mode; then
 		cached_header_file="$cache_dir/${hash}_headers_compressed"
 		cached_content_file="$cache_dir/${hash}_content_compressed"
+		adjective="compressed "
 	else
 		cached_header_file="$cache_dir/${hash}_headers"
 		cached_content_file="$cache_dir/${hash}_content"
+		adjective=
 	fi
 
 	print_log_message -D "$FUNCNAME using cached header file: $cached_header_file"
@@ -280,8 +340,9 @@ conditional_get () {
 	# check if the resource is cached
 	if [ -f "$cached_header_file" ] && [ -f "$cached_content_file" ]; then
 	
+		# read from cache without checking resource freshness
 		if $cache_only_mode; then
-			if $headers_only_mode; then
+			if $conditional_head_mode; then
 				print_log_message -I "$FUNCNAME reading cached header file: $cached_header_file"
 				/bin/cat "$cached_header_file"
 			else
@@ -302,8 +363,8 @@ conditional_get () {
 		/bin/rm -f "$cached_header_file" "$cached_content_file" >&2
 		
 		# quiet failure mode
-		if $cache_only_mode; then
-			print_log_message -W "$FUNCNAME: resource not cached: $location"
+		if $cache_only_mode || $check_cache_mode; then
+			print_log_message -W "$FUNCNAME: ${adjective}resource not cached: $location"
 			return 1
 		fi
 		
@@ -326,11 +387,11 @@ conditional_get () {
 
 	#######################################################################
 	#
-	# GET the web resource
+	# Issue a GET request for the web resource
 	# If option -I was used, issue HEAD request instead
 	#
-	# This implementation issues an HTTP Conditional GET request iff
-	# the resource is cached.
+	# This implementation issues an conditional request
+	# (GET or HEAD) iff the resource is cached.
 	#
 	#######################################################################
 
@@ -343,15 +404,13 @@ conditional_get () {
 	curl_opts="${curl_opts} --user-agent '${user_agent_string}'"
 	
 	# set curl --compressed option if necessary
-	if $compressed_mode; then
-		adjective="compressed "
-		curl_opts="${curl_opts} --compressed"
-	fi
+	$compressed_mode && curl_opts="${curl_opts} --compressed"
 
 	# always capture the header in a file
-	# capture the output iff the script issues a GET request
 	curl_opts="${curl_opts} --dump-header '${tmp_header_file}'"
-	if $headers_only_mode; then
+	
+	# capture the output iff the client issues a GET request
+	if $conditional_head_mode; then
 		print_log_message -I "$FUNCNAME issuing HEAD request for ${adjective}resource: $location"
 		curl_opts="${curl_opts} --head"
 		curl_opts="${curl_opts} --output '/dev/null'"
@@ -359,9 +418,11 @@ conditional_get () {
 		print_log_message -I "$FUNCNAME issuing GET request for ${adjective}resource: $location"
 		curl_opts="${curl_opts} --output '${tmp_content_file}'"
 	fi
+
+	# always capture stderr in a file
 	curl_opts="${curl_opts} --stderr '${tmp_stderr_file}'"
 
-	# If the resource is cached, add a conditional GET header.
+	# If the resource is cached, issue a conditional request.
 	# Since "A recipient MUST ignore If-Modified-Since if the 
 	# request contains an If-None-Match header field," the
 	# latter takes precedence in the following code block.
@@ -402,10 +463,7 @@ conditional_get () {
 
 	#######################################################################
 	#
-	# Process the response
-	#
-	# This cache implementation always tries to cache a 200 response. 
-	# What if the response contains a "no-store" cache directive?
+	# Response processing
 	#
 	#######################################################################
 
@@ -423,24 +481,37 @@ conditional_get () {
 	fi
 	print_log_message -I "$FUNCNAME received response code: $response_code"
 
-	if $headers_only_mode; then
-		/bin/cat "$tmp_header_file"
-		exit_code=$?
-		if [ $exit_code -ne 0 ]; then
-			print_log_message -E "$FUNCNAME unable to cat output ($exit_code)"
-			return 3
-		fi
-		return 0
-	fi
+	# output the header received from the server
+ 	if $conditional_head_mode && ! $check_cache_mode; then
+ 		/bin/cat "$tmp_header_file"
+ 		exit_code=$?
+ 		if [ $exit_code -ne 0 ]; then
+ 			print_log_message -E "$FUNCNAME unable to cat output ($exit_code)"
+ 			return 3
+ 		fi
+ 		return 0
+ 	fi
 
 	#######################################################################
 	#
 	# Update the cache
 	#
+	# Open questions:
+	#   What if the response contains a "no-store" cache directive?
+	#   If Check Cache Mode is enabled but the response is 200,
+	#   should the cache be refreshed as a side effect?
+	#   (for now the answer is no)
+	#
 	#######################################################################
 
 	if [ "$response_code" = "200" ]; then
 
+		# quiet failure mode
+		if $check_cache_mode; then
+			print_log_message -W "$FUNCNAME: ${adjective}resource is not up-to-date: $location"
+			return 1
+		fi
+		
 		# compute the length of the downloaded content
 		actual_content_length=$( /bin/cat "$tmp_content_file" \
 			| /usr/bin/wc -c \
@@ -495,9 +566,10 @@ conditional_get () {
 		fi
 
 	elif [ "$response_code" = "304" ]; then
+	
 		# quiet failure mode
-		if $force_output_mode; then
-			print_log_message -W "$FUNCNAME: resource not modified: $location"
+		if $force_refresh_mode; then
+			print_log_message -W "$FUNCNAME: fresh resource not available: $location"
 			return 1
 		fi
 		
@@ -509,12 +581,18 @@ conditional_get () {
 
 	#######################################################################
 	#
-	# Return the cached content
+	# Return the cached resource
+	# (since the cache is now up-to-date)
 	#
 	#######################################################################
-	
-	print_log_message -I "$FUNCNAME reading cached content file: ${cached_content_file}"
-	/bin/cat "$cached_content_file"
+
+	if $conditional_head_mode; then
+		print_log_message -I "$FUNCNAME reading cached header file: $cached_header_file"
+		/bin/cat "$cached_header_file"
+	else
+		print_log_message -I "$FUNCNAME reading cached content file: $cached_content_file"
+		/bin/cat "$cached_content_file"
+	fi
 	exit_code=$?
 	if [ $exit_code -ne 0 ]; then
 		print_log_message -E "$FUNCNAME unable to cat output ($exit_code)"