Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Implement support for HTTP Compression
Tom Scavo committed Jun 17, 2017
1 parent 01be04b commit 3857af9
Showing 1 changed file with 100 additions and 40 deletions.
140 changes: 100 additions & 40 deletions lib/http_tools.sh
@@ -23,41 +23,68 @@
# issue an ordinary GET request for the resource. In either case, if
# the server responds with 200, cache the resource and return the
# response body. If the server responds with 304, return the cached
# resource instead.
# response body instead.
#
# Usage: conditional_get [-v] [-F | -C] [-I] -d CACHE_DIR -T TMP_DIR HTTP_LOCATION
# Usage: conditional_get [-vFCIx] -d CACHE_DIR -T TMP_DIR HTTP_LOCATION
#
# This function requires two option arguments (CACHE_DIR and TMP_DIR)
# and a command-line argument (HTTP_LOCATION). The rest of the command
# line is optional.
#
# Options:
# -v verbose mode
# -F force the return of a fresh resource
# -F force the return of fresh content
# -C check the cache only
# -I return headers only
# -I get and return headers only
# -x enable HTTP Compression
# -d the cache directory (REQUIRED)
# -T a temporary directory (REQUIRED)
#
# Use option -F, -C, or -I to alter the default behavior of the function.
# Options -F and -C are mutually exclusive of each another. Option -I
# may be used with option -C (but not with option -F).
# Use option -F, -C, or -I to alter the default behavior of the
# function. Options -F and -C are mutually exclusive of each other.
# Option -I may be used with option -C (but not with option -F).
#
# Option -F forces the return of a fresh resource, that is, if the
# server responds with 304, the function quietly returns with a nonzero
# return code.
# Option -F forces the return of fresh content; that is, if option -F
# is enabled, and the server responds with 304, the function quietly
# returns with a nonzero return code.
#
# Option -C causes the function to go directly to cache. No HTTP request
# is issued. (This option is useful in offline mode.) If the resource
# is not cached, the function quietly returns with a nonzero return code.
# Option -C causes the function to go directly to cache; that is, no
# HTTP request is issued. (This option is useful in offline mode.)
# If the resource is not cached, the function quietly returns with a
# nonzero return code.
#
# Option -I issues a HEAD request instead of a GET request. In that case,
# only the response headers are returned in the output. Nothing is written
# when option -I is used.
# Option -I issues a HEAD request instead of a GET request, in which case,
# only the response headers are returned in the output. Note that nothing
# is written to cache when option -I is used.
#
# If options -I and -C are used together, the cached headers are returned
# instead. As with option -C alone, if the resource is not cached, the
# function quietly returns with a nonzero return code.
# If options -I and -C are used together, the cached headers are returned.
# As with option -C alone, if the resource is not cached, the function
# quietly returns with a nonzero return code.
#
# HTTP COMPRESSION
#
# Option -x adds an Accept-Encoding header to the request; that is, if
# option -x is enabled, the client merely indicates its support for HTTP
# Compression in the request. The server may or may not compress the
# response. This implementation does not check to see if the response
# was in fact compressed. The HTTP response header will indicate if this
# is so.
#
# Important! This implementation treats compressed and uncompressed
# requests for the same resource as two distinct resources. For example,
# consider the following pair of function calls:
#
# conditional_get $url
# conditional_get -x $url
#
# The above requests result in two distinct cached resources, the content
# of which are identical. Assuming the server actually compressed the
# response of the latter, the headers will be different, however. In
# particular, the Content-Length values will be different in each case.
# Most importantly, the compressed response header will include a
# Content-Encoding header (whose value is invariably "gzip").
#
# OUTPUT
#
# The output of the curl command-line tool is stored in the following
# temporary files:
@@ -66,6 +93,8 @@
# $TMP_DIR/conditional_get_curl_content
# $TMP_DIR/conditional_get_curl_stderr
#
# DEPENDENCIES
#
# This function requires the following library file:
#
# core_lib.sh
@@ -115,6 +144,7 @@ conditional_get () {
local tmp_content_file
local tmp_stderr_file
local curl_opts
local adjective
local do_conditional_get
local header_value
local cmd
@@ -126,14 +156,15 @@ conditional_get () {
local force_output_mode=false
local cache_only_mode=false
local headers_only_mode=false
local compressed_mode=false
local cache_dir
local tmp_dir
local location

local opt
local OPTARG
local OPTIND
while getopts ":vFCId:T:" opt; do
while getopts ":vFCIxd:T:" opt; do
case $opt in
v)
verbose_mode=true
@@ -149,6 +180,9 @@ conditional_get () {
I)
headers_only_mode=true
;;
x)
compressed_mode=true
;;
d)
cache_dir="$OPTARG"
;;
@@ -212,8 +246,12 @@ conditional_get () {
# Determine the cache files (which may or may not exist at this point)
#
# This cache implementation uses separate files for the header and
# body content. Does it make sense to cache a single file instead?
# Also, should we use SHA-1 instead of MD5?
# body content. It also uses a separate pair of files if option -x
# (i.e., HTTP Compression) is specified on the command line.
#
# Open Questions
# Does it make sense to cache a single file instead?
# Should we use SHA-1 instead of MD5?
#
#######################################################################

@@ -227,8 +265,14 @@ conditional_get () {
return 4
fi

cached_header_file="$cache_dir/${hash}_headers"
cached_content_file="$cache_dir/${hash}_content"
# use distinct cache filenames for compressed mode
if $compressed_mode; then
cached_header_file="$cache_dir/${hash}_headers_compressed"
cached_content_file="$cache_dir/${hash}_content_compressed"
else
cached_header_file="$cache_dir/${hash}_headers"
cached_content_file="$cache_dir/${hash}_content"
fi

print_log_message -D "$FUNCNAME using cached header file: $cached_header_file"
print_log_message -D "$FUNCNAME using cached content file: $cached_content_file"
@@ -283,6 +327,7 @@ conditional_get () {
#######################################################################
#
# GET the web resource
# If option -I was used, issue HEAD request instead
#
# This implementation issues an HTTP Conditional GET request iff
# the resource is cached.
@@ -297,15 +342,21 @@ conditional_get () {
fi
curl_opts="${curl_opts} --user-agent '${user_agent_string}'"

# set curl --compressed option if necessary
if $compressed_mode; then
adjective="compressed "
curl_opts="${curl_opts} --compressed"
fi

# always capture the header in a file
# capture the output iff the script issues a GET request
curl_opts="${curl_opts} --dump-header '${tmp_header_file}'"
if $headers_only_mode; then
print_log_message -I "$FUNCNAME issuing HEAD request for resource: $location"
print_log_message -I "$FUNCNAME issuing HEAD request for ${adjective}resource: $location"
curl_opts="${curl_opts} --head"
curl_opts="${curl_opts} --output '/dev/null'"
else
print_log_message -I "$FUNCNAME issuing GET request for resource: $location"
print_log_message -I "$FUNCNAME issuing GET request for ${adjective}resource: $location"
curl_opts="${curl_opts} --output '${tmp_content_file}'"
fi
curl_opts="${curl_opts} --stderr '${tmp_stderr_file}'"
@@ -390,27 +441,36 @@ conditional_get () {

if [ "$response_code" = "200" ]; then

# sanity check
declared_content_length=$( get_header_value "$tmp_header_file" 'Content-Length' )
return_code=$?
if [ $return_code -ne 0 ]; then
print_log_message -E "$FUNCNAME: get_header_value failed (return code: $return_code)"
return 6
fi
# compute the length of the downloaded content
actual_content_length=$( /bin/cat "$tmp_content_file" \
| /usr/bin/wc -c \
| $_SED -e 's/^[ ]*//' -e 's/[ ]*$//'
)
if [ -n "$declared_content_length" ]; then
if [ "$declared_content_length" != "$actual_content_length" ]; then
print_log_message -E "$FUNCNAME failed content length check"
return 3
return_code=$?
if [ $return_code -ne 0 ]; then
print_log_message -E "$FUNCNAME: length calculation failed (return code: $return_code)"
return 3
fi
print_log_message -D "$FUNCNAME downloaded ${actual_content_length} bytes"

# this sanity check is applied only if option -x was NOT used
if ! $compressed_mode; then
declared_content_length=$( get_header_value "$tmp_header_file" 'Content-Length' )
return_code=$?
if [ $return_code -ne 0 ]; then
print_log_message -E "$FUNCNAME: get_header_value failed (return code: $return_code)"
return 6
fi
if [ -n "$declared_content_length" ]; then
if [ "$declared_content_length" != "$actual_content_length" ]; then
print_log_message -E "$FUNCNAME failed content length check"
return 3
fi
else
print_log_message -W "$FUNCNAME: Content-Length response header missing"
fi
else
print_log_message -W "$FUNCNAME: Content-Length response header missing"
fi

print_log_message -D "$FUNCNAME downloaded ${actual_content_length} bytes"
if $do_conditional_get; then
print_log_message -D "$FUNCNAME refreshing cache files"
else

0 comments on commit 3857af9

Please sign in to comment.