From 92b071326a8d4504856f56dfe2cd54d0c4e80ffc Mon Sep 17 00:00:00 2001
From: Tom Scavo <trscavo@internet2.edu>
Date: Sat, 10 Jun 2017 12:02:31 -0400
Subject: [PATCH] Add script to test compressed resources

---
 bin/test_compressed_resources.sh | 478 +++++++++++++++++++++++++++++++
 install.sh                       |   1 +
 2 files changed, 479 insertions(+)
 create mode 100755 bin/test_compressed_resources.sh

diff --git a/bin/test_compressed_resources.sh b/bin/test_compressed_resources.sh
new file mode 100755
index 0000000..b225107
--- /dev/null
+++ b/bin/test_compressed_resources.sh
@@ -0,0 +1,478 @@
+#!/bin/bash
+
+#######################################################################
+# Copyright 2017 Internet2
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#######################################################################
+
+#######################################################################
+# Help message
+#######################################################################
+
+display_help () {
+/bin/cat <<- HELP_MSG
+	This script tests one or more resources for HTTP Compression
+	by requesting a compressed response just-in-time. The compressed 
+	response is compared to a cached response, which is assumed to 
+	be uncompressed.
+	
+	Usage: ${0##*/} [-hv] -d OUT_DIR LOCATION ...
+	
+	The script takes one or more HTTP locations on the command line. 
+	Each location is tested for HTTP Compression. The script produces 
+	a JSON array, with one array element for each location. The 
+	resulting JSON file is moved to the output directory specified 
+	on the command line.
+	
+	Options:
+	   -h      Display this help message
+	   -v      Enable DEBUG mode
+	   -d      Specify the output directory
+
+	Option -h is mutually exclusive of all other options.
+	
+	Option -d specifies the ultimate output directory, which is
+	usually a web directory. This option is REQUIRED.
+	
+	ENVIRONMENT
+	
+	This script leverages a handful of environment variables:
+	
+	  LIB_DIR    A source library directory
+	  CACHE_DIR  A persistent HTTP cache
+	  TMPDIR     A temporary directory
+	  LOG_FILE   A persistent log file
+	  LOG_LEVEL  The global log level [0..5]
+	
+	All of the above environment variables are REQUIRED
+	except LOG_LEVEL, which defaults to LOG_LEVEL=3 (i.e. INFO).
+	
+	The following environment variables are REQUIRED:
+	
+	$( printf "  %s\n" ${env_vars[*]} )
+	
+	The following directories will be used:
+	
+	$( printf "  %s\n" ${dir_paths[*]} )
+	
+	The following log file will be used:
+	
+	$( printf "  %s\n" $LOG_FILE )
+	
+	INSTALLATION
+	
+	At least the following source library files MUST be installed 
+	in LIB_DIR:
+	
+	$( printf "  %s\n" ${lib_filenames[*]} )
+	
+	OUTPUT
+	
+	The script outputs a JSON file to OUT_DIR:
+	
+	  $out_filename
+	  
+	The JSON file contains a single array. Each array element is 
+	a JavaScript object with the following fields:
+	
+	  successFlag       boolean    success or failure?
+	  message           string     message string
+	  location          string     HTTP location
+	  ResponseCode      string     HTTP response code
+	  Date              string     HTTP response header
+	  LastModified      string     HTTP response header
+	  ETag              string     HTTP response header
+	  ContentLength     string     HTTP response header
+	  ContentType       string     HTTP response header
+	  ContentEncoding   string     HTTP response header
+	  
+	For example:
+	
+	  {
+	    "successFlag": true,
+	    "message": "Integrity of compressed metadata confirmed",
+	    "location": "http://md.incommon.org/InCommon/InCommon-metadata.xml",
+	    "ResponseCode": "200",
+	    "Date": "Fri, 09 Jun 2017 20:04:12 GMT",
+	    "LastModified": "Fri, 09 Jun 2017 19:05:16 GMT",
+	    "ETag": "\"80bbff-5518ba6585320\"",
+	    "ContentLength": "8436735",
+	    "ContentType": "application/samlmetadata+xml",
+	    "ContentEncoding": "gzip"
+	  }
+	
+	EXAMPLES
+	
+	  \$ ${0##*/} -h
+	  \$ locations="http://md.incommon.org/InCommon/InCommon-metadata.xml
+	  > http://md.incommon.org/InCommon/InCommon-metadata-export.xml"
+	  \$ out_dir=/home/htdocs/www.incommonfederation.org/federation/metadata/
+	  \$ ${0##*/} -d \$out_dir \$locations
+HELP_MSG
+}
+
+#######################################################################
+# Bootstrap
+#######################################################################
+
+script_name=${0##*/}  # equivalent to basename $0
+
+# required environment variables
+env_vars[1]="LIB_DIR"
+env_vars[2]="CACHE_DIR"
+env_vars[3]="TMPDIR"
+env_vars[4]="LOG_FILE"
+
+# check environment variables
+for env_var in ${env_vars[*]}; do
+	eval "env_var_val=\${$env_var}"
+	if [ -z "$env_var_val" ]; then
+		echo "ERROR: $script_name requires env var $env_var" >&2
+		exit 2
+	fi
+done
+
+# required directories
+dir_paths[1]="$LIB_DIR"
+dir_paths[2]="$CACHE_DIR"
+dir_paths[3]="$TMPDIR"
+
+# check required directories
+for dir_path in ${dir_paths[*]}; do
+	if [ ! -d "$dir_path" ]; then
+		echo "ERROR: $script_name: directory does not exist: $dir_path" >&2
+		exit 2
+	fi
+done
+
+# check the log file
+# devices such as /dev/tty and /dev/null are allowed
+if [ ! -f "$LOG_FILE" ] && [[ $LOG_FILE != /dev/* ]]; then
+	echo "ERROR: $script_name: file does not exist: $LOG_FILE" >&2
+	exit 2
+fi
+
+# default to INFO logging
+if [ -z "$LOG_LEVEL" ]; then
+	LOG_LEVEL=3
+fi
+
+# library filenames
+lib_filenames[1]="core_lib.sh"
+lib_filenames[2]="http_tools.sh"
+
+# check lib files
+for lib_filename in ${lib_filenames[*]}; do
+	lib_file="$LIB_DIR/$lib_filename"
+	if [ ! -f "$lib_file" ]; then
+		echo "ERROR: $script_name: file does not exist: $lib_file" >&2
+		exit 2
+	fi
+done
+
+# output filename
+out_filename="compressed_response_headers.json"
+
+#######################################################################
+# Process command-line options and arguments
+#######################################################################
+
+help_mode=false
+local_opts=; curl_opts="--silent"
+while getopts ":hvd:" opt; do
+	case $opt in
+		h)
+			help_mode=true
+			;;
+		v)
+			LOG_LEVEL=4
+			local_opts="$local_opts -$opt"
+			curl_opts="--verbose --progress-bar"
+			;;
+		d)
+			out_dir="$OPTARG"
+			;;
+		\?)
+			echo "ERROR: $script_name: Unrecognized option: -$OPTARG" >&2
+			exit 2
+			;;
+		:)
+			echo "ERROR: $script_name: Option -$OPTARG requires an argument" >&2
+			exit 2
+			;;
+	esac
+done
+
+if $help_mode; then
+	display_help
+	exit 0
+fi
+
+# check the output directory
+if [ -z "$out_dir" ]; then
+	echo "ERROR: $script_name: no output directory specified (option -d)" >&2
+	exit 2
+fi
+if [ ! -d "$out_dir" ]; then
+	echo "ERROR: $script_name: directory does not exist: $out_dir" >&2
+	exit 2
+fi
+
+# at least one metadata location is required
+shift $(( OPTIND - 1 ))
+if [ $# -lt 1 ]; then
+	echo "ERROR: $script_name: wrong number of arguments: $# (at least 1 required)" >&2
+	exit 2
+fi
+	
+#######################################################################
+# Initialization
+#######################################################################
+
+# source lib files
+for lib_filename in ${lib_filenames[*]}; do
+	lib_file="$LIB_DIR/$lib_filename"
+	source "$lib_file"
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		echo "ERROR: $script_name failed ($status_code) to source lib file $lib_file" >&2
+		exit 2
+	fi
+done
+
+# create a temporary subdirectory
+tmp_dir="${TMPDIR%%/}/${script_name%%.*}_$$"
+/bin/mkdir "$tmp_dir"
+status_code=$?
+if [ $status_code -ne 0 ]; then
+	echo "ERROR: $script_name failed ($status_code) to create tmp dir $tmp_dir" >&2
+	exit 2
+fi
+
+# specify temporary files
+out_file="${tmp_dir}/$out_filename"
+compressed_content="${tmp_dir}/compressed-resource-content.xml"
+uncompressed_content="${tmp_dir}/uncompressed-resource-content.xml"
+header_file="${tmp_dir}/resource-header.txt"
+
+#######################################################################
+# Functions
+#######################################################################
+
+escape_special_json_chars () {
+	local str="$1"
+	
+	# backslash (\) and double quote (") are special
+	echo "$str" | $_SED -e 's/\\/\\\\/g' -e 's/"/\\"/g'
+}
+
+append_json_object () {
+	local message=$( escape_special_json_chars "$message" )
+	local location=$( escape_special_json_chars "$location" )
+	local response_code=$( escape_special_json_chars "$response_code" )
+	local response_date=$( escape_special_json_chars "$response_date" )
+	local last_modified=$( escape_special_json_chars "$last_modified" )
+	local e_tag=$( escape_special_json_chars "$e_tag" )
+	local content_length=$( escape_special_json_chars "$content_length" )
+	local content_type=$( escape_special_json_chars "$content_type" )
+	local content_encoding=$( escape_special_json_chars "$content_encoding" )
+
+	local boolean_value="true"
+	! $success && boolean_value="false"
+	
+	/bin/cat <<- JSON_OBJECT
+	  {
+	    "successFlag": $boolean_value,
+	    "message": "$message",
+	    "location": "$location",
+	    "ResponseCode": "$response_code",
+	    "Date": "$response_date",
+	    "LastModified": "$last_modified",
+	    "ETag": "$e_tag",
+	    "ContentLength": "$content_length",
+	    "ContentType": "$content_type",
+	    "ContentEncoding": "$content_encoding"
+	  }
+JSON_OBJECT
+}
+
+get_compressed_response () {
+
+	local status_code
+
+	location="$1"
+
+	# get the compressed resource
+	print_log_message -I "$script_name requesting (compressed) resource: $location"
+	/usr/bin/curl $curl_opts --compressed --dump-header $header_file $location > $compressed_content
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		print_log_message -E "$script_name: curl failed ($status_code) on resource: $location"
+		clean_up_and_exit -d "$tmp_dir" $status_code
+	fi
+}
+
+parse_compressed_response () {
+
+	local header_name
+	local status_code
+
+	# get the HTTP response code
+	response_code=$( get_response_code $header_file )
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		print_log_message -E "$script_name: get_response_code failed ($status_code) to parse response code"
+		clean_up_and_exit -d "$tmp_dir" $status_code
+	fi
+
+	# get the Date response header
+	header_name=Date
+	response_date=$( get_header_value $header_file $header_name )
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		print_log_message -E "$script_name: get_header_value failed ($status_code) to parse response header: $header_name"
+		clean_up_and_exit -d "$tmp_dir" $status_code
+	fi
+
+	# get the Last-Modified response header
+	header_name=Last-Modified
+	last_modified=$( get_header_value $header_file $header_name )
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		print_log_message -E "$script_name: get_header_value failed ($status_code) to parse response header: $header_name"
+		clean_up_and_exit -d "$tmp_dir" $status_code
+	fi
+
+	# get the ETag response header
+	header_name=ETag
+	e_tag=$( get_header_value $header_file $header_name )
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		print_log_message -E "$script_name: get_header_value failed ($status_code) to parse response header: $header_name"
+		clean_up_and_exit -d "$tmp_dir" $status_code
+	fi
+
+	# get the Content-Length response header
+	header_name=Content-Length
+	content_length=$( get_header_value $header_file $header_name )
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		print_log_message -E "$script_name: get_header_value failed ($status_code) to parse response header: $header_name"
+		clean_up_and_exit -d "$tmp_dir" $status_code
+	fi
+
+	# get the Content-Type response header
+	header_name=Content-Type
+	content_type=$( get_header_value $header_file $header_name )
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		print_log_message -E "$script_name: get_header_value failed ($status_code) to parse response header: $header_name"
+		clean_up_and_exit -d "$tmp_dir" $status_code
+	fi
+
+	# get the Content-Encoding response header
+	header_name=Content-Encoding
+	content_encoding=$( get_header_value $header_file $header_name )
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		print_log_message -E "$script_name: get_header_value failed ($status_code) to parse response header: $header_name"
+		clean_up_and_exit -d "$tmp_dir" $status_code
+	fi
+}
+
+test_compressed_response () {
+
+	local status_code
+
+	# was the response actually compressed?
+	success=true
+	if [ "$response_code" -ne 200 ]; then
+		print_log_message -E "$script_name: unexpected response ($response_code) on resource: $location"
+		message="Unexpected response code: $response_code"
+		return
+	elif [ -z "$content_encoding" ]; then
+		print_log_message -E "$script_name: Content-Encoding response header not found"
+		message="Content-Encoding response header not found"
+		return
+	fi
+
+	# TODO: Check the cached resource for Content-Encoding response header
+	
+	# get the cached resource
+	conditional_get $local_opts -C -d "$CACHE_DIR" -T "$tmp_dir" "$location" > "$uncompressed_content"
+	status_code=$?
+	if [ $status_code -eq 1 ]; then
+		# metadata must be cached
+		print_log_message -E "$script_name: metadata file not cached: $location"
+		clean_up_and_exit -d "$tmp_dir" 1
+	fi
+	if [ $status_code -gt 1 ]; then
+		print_log_message -E "$script_name: conditional_get failed ($status_code) on location: $location"
+		clean_up_and_exit -d "$tmp_dir" $status_code
+	fi
+
+	# compare the compressed and uncompressed content
+	print_log_message -D "$script_name comparing compressed and uncompressed content"
+	/usr/bin/diff -q $compressed_content $uncompressed_content
+	status_code=$?
+	if [ $status_code -ne 0 ]; then
+		print_log_message -C "$script_name: diff failed ($status_code) on resource: $location"
+		clean_up_and_exit -d "$tmp_dir" 3
+	fi
+	message="Integrity of compressed metadata confirmed"
+}
+
+print_output_file () {
+
+	# begin output list
+	printf "[\n"
+
+	while true; do
+	
+		get_compressed_response "$1"
+		parse_compressed_response
+		test_compressed_response
+		append_json_object
+		
+		shift; (( "$#" )) || break
+		
+		# print comma separator
+		printf "  ,\n"
+	done
+
+	# end output list
+	printf "]\n"
+}
+
+#######################################################################
+# Main processing
+#######################################################################
+
+print_log_message -I "$script_name BEGIN"
+
+# create the JSON output
+print_output_file "$@" > "$out_file"
+print_log_message -I "$script_name writing output file: $out_filename"
+
+# move the output file to the web directory
+print_log_message -I "$script_name moving output file to dir: $out_dir"
+/bin/mv "$out_file" $out_dir
+status_code=$?
+if [ $status_code -ne 0 ]; then
+	print_log_message -E "$script_name: mv failed ($status_code) to dir: $out_dir"
+    clean_up_and_exit -d "$tmp_dir" $status_code
+fi
+
+print_log_message -I "$script_name END"
+clean_up_and_exit -d "$tmp_dir" 0
diff --git a/install.sh b/install.sh
index 387a274..990cb5b 100755
--- a/install.sh
+++ b/install.sh
@@ -95,6 +95,7 @@ done <<SCRIPTS
 $script_bin/bin/cget.sh
 $script_bin/bin/compute_vital_statistics.sh
 $script_bin/bin/request_http_resource.sh
+$script_bin/bin/test_compressed_resources.sh
 SCRIPTS
 
 # initialize lib dir