diff --git a/build.xml b/build.xml
index f908df7b..eb21fe25 100644
--- a/build.xml
+++ b/build.xml
@@ -144,9 +144,12 @@
+
+
+
-
-
+
+
+
+
+
+
+
+
+
-
+
+
-
+
-
@@ -271,6 +284,7 @@
+
@@ -291,11 +305,6 @@
-
-
-
Stage 3.1 Success: Aggregates send to keymaster for processing.
@@ -497,76 +508,83 @@
-
- Stage 3.3 Success: Signed aggregates and stats file comitted to data repository, pushed to origin.
+
+ Stage 4 Success: MDQ cache created; all files comitted to data repository.
-
+
-
- Stage 4 Success: MDQ cache created (not yet implemented!); all files comitted to data repository.
+
+ Stage 4.2 Success: Signed aggregates and stats file comitted to data repository.
- Stage 5 Success: Master branch pushed to origin, new tag created and pushed, message sent to start publication.
+ Stage 5 Success: Master branch pushed to origin, new tag created and pushed, mdq cache sent to repo, message sent to start publication.
-
+
@@ -776,7 +794,7 @@
-
+
@@ -794,7 +812,7 @@
-
+
@@ -810,9 +828,8 @@
-
-
+
@@ -830,7 +847,7 @@
-
+
@@ -848,7 +865,7 @@
-
+
@@ -1054,7 +1071,7 @@
Commit unsigned files to local products repository
-->
- Commiting all changes in products repository.
+ Pushing the latest tooling repository to keymaster.
@@ -1087,6 +1104,59 @@
+
+
+
+
+ Updating local stats cache
+
+
+
+
+
+
+
+ Triggering daily stats job
+
+
+
+
+
+
+
+
+
+ Triggering monthly stats job
+
+
+
+
+
+
+
+
+ Triggering yearly stats job
+
+
+
+
+
+
@@ -1120,30 +1186,21 @@
-
+
-
+
Verifying @{i}...
-
-
-
-
+
-
+
@@ -1184,11 +1237,6 @@
Checksum of file matches expected value
-
-
-
@@ -1222,56 +1270,67 @@
property="mdaggr.export.preview.signed.checksum"/>
Verifying metadata held at ${md.dist.host1.name}
-
-
-
-
-
-
-
Verifying metadata held at ${md.dist.host2.name}
-
-
-
-
-
-
-
Verifying metadata held at ${md.dist.host3.name}
-
-
-
-
-
-
-
Verification completed.
+
+
+ Verifying MDQ held at ${mdq.dist.name}
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+ Testing signing the unsigned main aggregate.
-
-
+ Test signing UKfed prod metadata.
+
-
-
+
Verifying signed UK metadata.
-
+
Verifying signed UK WAYF metadata.
-
+
Verifying signed UK CDS full metadata.
-
+
Verifying signed UK test metadata.
-
+
Verifying signed UK export metadata.
-
+
Verifying signed UK export preview metadata.
-
+
Verifying signed UK fallback metadata.
-
+
Verification completed.
+
+
+ Testing verifying the signed main aggregate.
+
+
+ Test verification completed.
+
+
+
+
+
+
+ Generating per-entity metadata in ${mda.mdq.output}
+ from production aggregate in ${mda.mdq.input}
+
+
+
+ Generation complete.
+ Generating all.xml in ${mda.mdq.output}
+
+ Generation complete.
+
+
+
+
+
+
+ Generating per-entity metadata in ${mda.mdq.output}
+ from test metadata in ${mda.mdq.input}
+
+
+
+ Generation complete.
+
+
-
- Creating MDQ cache.
- -> Not yet implemented. This is not a failure, other than a moral one
+
+ MDQ cache created.
@@ -1961,6 +2057,31 @@
+
+
+ Pushing UK Federation mdq cache to MD dist.
+ -> MD1
+
+
+
+
+
+ -> MD2
+
+
+
+
+
+ -> MD3
+
+
+
+
+
+
+
-
-
-
- Please select the channel to use (e.g., us_incommon):
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
+
+
@@ -33,100 +35,100 @@
Total entities
-
+
Identity providers
-
+
Service providers
-
+
-
+
:
-
+
-
+
mdui:UIInfo
-
+
mdui:Logo
-
+
mdui:Description
-
+
mdui:DisplayName
-
+
mdui:Keywords
-
+
mdui:InformationURL
-
+
mdui:PrivacyStatementURL
-
+
mdui:DiscoHints
-
+
mdui:IPHint
-
+
mdui:DomainHint
-
+
mdui:GeolocationHint
-
+
-
+
@@ -142,5 +144,5 @@
)
-
+
diff --git a/macosx.properties b/macosx.properties
index 1b5744a2..0b062e2e 100644
--- a/macosx.properties
+++ b/macosx.properties
@@ -5,3 +5,13 @@
# those defined in default.properties, but may be overridden by
# properties defined in a machine-specific build.properties file.
#
+
+#
+# Location of the PKCS#11 configuration file for OpenSC on Mac OS X.
+#
+sign.uk.pkcs11Config = ${basedir}/mdx/uk/opensc-mac.cfg
+
+#
+# Signing key alias within the keystore.
+#
+sign.uk.keyAlias = key10
diff --git a/mdx/_rules/check_sirtfi.xsl b/mdx/_rules/check_sirtfi.xsl
new file mode 100644
index 00000000..78240c01
--- /dev/null
+++ b/mdx/_rules/check_sirtfi.xsl
@@ -0,0 +1,73 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ SIRTFI requires a REFEDS security contact
+
+
+
+
+
+
+
+ SIRTFI requires a REFEDS security contact with a GivenName
+
+
+
+
+ SIRTFI requires a REFEDS security contact with an EmailAddress
+
+
+
+
+
+
diff --git a/mdx/common-beans.xml b/mdx/common-beans.xml
index b7ea95f6..ee5bc181 100644
--- a/mdx/common-beans.xml
+++ b/mdx/common-beans.xml
@@ -56,14 +56,6 @@
-
-
-
-
-
-
@@ -197,27 +181,6 @@
-
-
-
-
-
-
-
-
+
@@ -254,22 +221,22 @@
+
+
-
-
-
+
-
+
-
+
@@ -286,7 +253,7 @@
-
+
@@ -302,6 +269,49 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/mdx/identity.xsl b/mdx/identity.xsl
new file mode 100644
index 00000000..dc2ad8b1
--- /dev/null
+++ b/mdx/identity.xsl
@@ -0,0 +1,30 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/mdx/int_edugain/beans.xml b/mdx/int_edugain/beans.xml
index 7bf73818..05b7c9b3 100644
--- a/mdx/int_edugain/beans.xml
+++ b/mdx/int_edugain/beans.xml
@@ -15,7 +15,8 @@
-
+
+
-
diff --git a/mdx/schema/uk-fed-label.xsd b/mdx/schema/uk-fed-label.xsd
index 83213fb8..8c1656ae 100644
--- a/mdx/schema/uk-fed-label.xsd
+++ b/mdx/schema/uk-fed-label.xsd
@@ -2,7 +2,7 @@
@@ -43,13 +43,46 @@
-
+
+
+
+
+ An organization ID is a string constrained to be the string "ukforg"
+ followed by a numeric code.
+
+ These codes are defined as IDs in the members.xml database, but
+ are simple strings in entity metadata.
+
+
+
+
+
+
+
Indicates an entity whose owner is a member in good standing
of the UK federation.
+
+
+
+
+
+
+ The orgID attribute is described as "optional" in this schema
+ so that it can be omitted in published metadata. However, it
+ is mandatory in entity fragment files.
+
+ orgID can become a mandatory attribute if we ever stop
+ publishing UKFederationMember labels.
+
+
+
+
+
+
diff --git a/mdx/uk/beans.xml b/mdx/uk/beans.xml
index 1acb9162..eb185e63 100644
--- a/mdx/uk/beans.xml
+++ b/mdx/uk/beans.xml
@@ -125,38 +125,39 @@
-
-
+
+
+
+
+
-
-
+
-
+
-
-
-
-
-
-
-
+
+
+
+
+ unknown assurance certification URI
+
+
+
+
+
diff --git a/mdx/uk/generate.xml b/mdx/uk/generate.xml
index b259957e..9285cab2 100644
--- a/mdx/uk/generate.xml
+++ b/mdx/uk/generate.xml
@@ -119,7 +119,71 @@
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -242,6 +310,11 @@
+
+
+
@@ -299,6 +372,11 @@
+
+
+
@@ -477,6 +555,11 @@
+
+
+
@@ -587,6 +670,11 @@
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- *** ENTITY COMMENT GOES HERE ***
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- *** VERIFY OR REMOVE THE FOLLOWING ELEMENT ***
-
- *** VERIFY OR REMOVE THE FOLLOWING ELEMENT ***
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *** ENTITY COMMENT GOES HERE ***
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *** FILL IN APPROPRIATE orgID VALUE BELOW ***
+
+
+
+
+ *** VERIFY OR REMOVE THE FOLLOWING ELEMENT ***
+
+ *** VERIFY OR REMOVE THE FOLLOWING ELEMENT ***
+
+
+
+
+
+
+
+
+
+
+
+
-
- http://ukfederation.org.uk
-
-
-
-
- en
- http://ukfederation.org.uk/doc/mdrps-20130902
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- *** FILL IN ***
- *** FILL IN ***
- http://*** FILL IN ***/
-
-
-
-
-
-
-
-
-
-
-
- *** FILL IN ***
- *** FILL IN ***
- mailto:*** FILL IN ***
-
-
-
-
-
-
-
-
-
-
-
- *** FILL IN ***
- *** FILL IN ***
- mailto:*** FILL IN ***
-
-
-
-
-
-
-
-
-
-
-
- *** FILL IN ***
- *** FILL IN ***
- mailto:*** FILL IN ***
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- urn:oasis:names:tc:SAML:profiles:SSO:idp-discovery-protocol
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+ *** FILL IN ***
+ *** FILL IN ***
+ http://*** FILL IN ***/
+
+
+
+
+
+
+
+
+
+
+
+ *** FILL IN ***
+ *** FILL IN ***
+ mailto:*** FILL IN ***
+
+
+
+
+
+
+
+
+
+
+
+ *** FILL IN ***
+ *** FILL IN ***
+ mailto:*** FILL IN ***
+
+
+
+
+
+
+
+
+
+
+
+ *** FILL IN ***
+ *** FILL IN ***
+ mailto:*** FILL IN ***
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ urn:oasis:names:tc:SAML:profiles:SSO:idp-discovery-protocol
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/mdx/uk/mdq-multisign.xml b/mdx/uk/mdq-multisign.xml
new file mode 100644
index 00000000..443f09a8
--- /dev/null
+++ b/mdx/uk/mdq-multisign.xml
@@ -0,0 +1,97 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/mdx/uk/ns_norm_test.xsl b/mdx/uk/ns_norm_test.xsl
index 23b6bdac..b05bf888 100644
--- a/mdx/uk/ns_norm_test.xsl
+++ b/mdx/uk/ns_norm_test.xsl
@@ -33,6 +33,7 @@
xmlns:mdattr="urn:oasis:names:tc:SAML:metadata:attribute"
xmlns:mdrpi="urn:oasis:names:tc:SAML:metadata:rpi"
xmlns:mdui="urn:oasis:names:tc:SAML:metadata:ui"
+ xmlns:remd="http://refeds.org/metadata"
xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion"
xmlns:shibmd="urn:mace:shibboleth:metadata:1.0"
xmlns:ukfedlabel="http://ukfederation.org.uk/2006/11/label"
diff --git a/mdx/uk/opensc-mac.cfg b/mdx/uk/opensc-mac.cfg
new file mode 100644
index 00000000..afad28fc
--- /dev/null
+++ b/mdx/uk/opensc-mac.cfg
@@ -0,0 +1,3 @@
+# PKCS#11 provider configuration for for OpenSC running under Mac OS X
+name = OpenSC
+library = /Library/OpenSC/lib/pkcs11/opensc-pkcs11.so
diff --git a/mdx/uk/statistics.xsl b/mdx/uk/statistics.xsl
index ef64c117..c31d1721 100644
--- a/mdx/uk/statistics.xsl
+++ b/mdx/uk/statistics.xsl
@@ -124,7 +124,6 @@
Entity Statistics
Entities by Owner
Identity Provider Accountability
- Members by Primary Scope
Members Lacking Deployment
Shibboleth 1.3 Remnants
Export Aggregate: Entities Opted Out
@@ -151,14 +150,6 @@
to an aggregate IdP.
Other IdP outsourcing, and any SP outsourcing, is not recorded in the table.
-
- The final column in the table, Primary Scope, records a scope (or security domain)
- owned by the member and designated as its main (or only) scope.
- ('Primary Scope' is a useful concept, but is not precisely defined.
- It is only recorded if the member in question owns an IdP or outsources its IdP provision -
- and perhaps not even then, as it it sometimes unclear which of the scopes
- it owns should be designated as 'primary'.)
-
Member
@@ -166,7 +157,6 @@
IdPs
SPs
OSrc
- Primary Scope
@@ -973,33 +963,6 @@
-
-
- Primary Scope is a useful concept, but is not precisely defined.
- It is only recorded if the member in question owns an IdP or outsources its IdP provision -
- and perhaps not even then, as it it sometimes unclear which of the scopes
- it owns should be designated as 'primary'.
-
-
- Primary Scope
- Member
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -1342,7 +1292,6 @@
:
- [not-M]
[IdP]
+
:
diff --git a/mdx/uk/strip_extensions.xsl b/mdx/uk/strip_extensions.xsl
index edf0ca14..335a0236 100644
--- a/mdx/uk/strip_extensions.xsl
+++ b/mdx/uk/strip_extensions.xsl
@@ -22,7 +22,15 @@
-->
-
+
+
diff --git a/mdx/uk/strip_sirtfi_contacts.xsl b/mdx/uk/strip_sirtfi_contacts.xsl
new file mode 100644
index 00000000..9975d051
--- /dev/null
+++ b/mdx/uk/strip_sirtfi_contacts.xsl
@@ -0,0 +1,34 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/mdx/uk/ukfederation-2014.jks b/mdx/uk/ukfederation-2014.jks
deleted file mode 100644
index b06e2eb2..00000000
Binary files a/mdx/uk/ukfederation-2014.jks and /dev/null differ
diff --git a/mdx/uk/ukfederation-mdq.pem b/mdx/uk/ukfederation-mdq.pem
new file mode 100644
index 00000000..2b402a3a
--- /dev/null
+++ b/mdx/uk/ukfederation-mdq.pem
@@ -0,0 +1,31 @@
+-----BEGIN CERTIFICATE-----
+MIIFTTCCAzWgAwIBAgIEXGA32DANBgkqhkiG9w0BAQsFADBQMSEwHwYDVQQDExhV
+SyBmZWRlcmF0aW9uIE1EUSBTaWduZXIxHjAcBgNVBAoTFUppc2MgU2VydmljZXMg
+TGltaXRlZDELMAkGA1UEBhMCR0IwHhcNMTYxMTIzMTgxNjU2WhcNMzcxMjMxMTgx
+NjU2WjBQMSEwHwYDVQQDExhVSyBmZWRlcmF0aW9uIE1EUSBTaWduZXIxHjAcBgNV
+BAoTFUppc2MgU2VydmljZXMgTGltaXRlZDELMAkGA1UEBhMCR0IwggIiMA0GCSqG
+SIb3DQEBAQUAA4ICDwAwggIKAoICAQCI5H5i6x+PJrKQyfI8ALGEisMiHwQLUbzs
+h2Sx8ssRkldAohR5CHp5qeMMpBDb1Pv9bBGppe+10oh2URYcPE+gBuajZT1dL8pg
+jE7F3UUOJa+MXh9jBeDmoiCmXO8V8T4DWtQAA2ObbYPKynCZ6FaGsGV8N7GYUsMK
+SXT3dfkbAzk6J7l4Top4gg4yZd6ELQwarLG5M5h0xnIIaoNSIspxTLTkIMDgJRo8
+4VObLUriJwiLPzfHXAJxJdq+0AzHzhlDrg1hTtB82dOMGGyXZd4R6E6Aar8OrKa6
+uz8OYWj8oeLzHGmzdw7dr+7WesO+4ofNksPh3lyGoRlvhWTKgBIyzXTiPRWRl2k7
+b2EWEFBoBk4+GgVhi8hjA5yriTEe99RcigFq2Y1SemKYtz3ur2wmrBag+NsWm2rm
+OHBehrYEDjlkHqzhvgqygoj2JFogP7L0ZvLh1VdU4waLAkLBLi5EJmlNjfN0b124
+UrJHXN7z/zFAl2r+Or1KZbZnWKBRD5IKZBAo/iRT4ULGqxImF+/yURXpuI12wz4P
+JQXXmU9NNzJrWLaDH5mesCeVLWg64/RoqbIVIbMCd9FTxhJTH6rr/hLkldGtHjiy
+EuvUE7lZ+2Xu2QAnW68tKmsSqk0/C3gt9l/3xhnUBaguhUo8OWrnZ1pxr+GSdnJ6
+NRm+f46RAQIDAQABoy8wLTAMBgNVHRMEBTADAQH/MB0GA1UdDgQWBBSbDGYuV4tc
+bEWVEpPE3MjgF0c+UjANBgkqhkiG9w0BAQsFAAOCAgEAeBgy2CgA31Sriyw1tBnY
+kzb6Vlemnv/UwZjivoOftqdp1TS8AeMs9qGgTBBeZkCV/6G8abq5gYBU8BETifR5
+FWxuIicU1oCNO4JwYoCpUNxwZfTbvuTKRcLia5o2OYvJo5friL5a8fWdhUy43tSh
+ubOTRqeIPSDOYQif9D0Kq6A8+oURHEBA+wwDthkhRanvJYdHp6Z6YKiwTUXp1MCH
+qe0q+LnoQ2ZRXRmSZ0y2t9ghPCFY9pD4OKnyyAxjQZdn1qFyMtYlkY9acT/ZdLDq
+3LcmaGAJEqgH0dAbl3xRkwqotP//JJ/4ffTaJHF+D3yN9y2hJ1xYukfd8caRTB+W
+O6yiQwcR7707irmF5HdW5hxIQlGgR1w/akz188KuGRP3MSWVIGEdwjCVz41XxI7V
+0MC7tZs/gujXpb58BcWIog5fceTY2dux9g4MzYKifVAPORgVWXDyXtiyddWbVorI
+He6vvbpRs5UaTyiLbUJkEs8ApJYHApZwJ2Ewz4Uea02qqP0nCVgcr+fnyugyVx4T
+KWBrvb9T2A2Z2HuQlTWksTAdapluRUj3pvvzZ+tCTXYbW0YdYSMKKH+QEwzEe90+
+gy4dJqx8m9bQ3hOu60GqyYHT7ng+dx3SxZ8zA97iXEqJnqJksaIRhzLB/kku2obf
+YC3UXJnkRumoAW1o2AjWQGg=
+-----END CERTIFICATE-----
diff --git a/mdx/validation-beans.xml b/mdx/validation-beans.xml
index eb0b2633..3ded8bdf 100644
--- a/mdx/validation-beans.xml
+++ b/mdx/validation-beans.xml
@@ -311,6 +311,21 @@
p:XSLResource="classpath:_rules/check_shibboleth.xsl"/>
+
+
+
+
+
+
orgid='$orgid'\n";
+ $name_to_orgid{$name} = $orgid;
+}
+close(F);
+
+open(F, "ukid-to-name.txt") || die "could not open ukid-to-name map";
+while () {
+ my ($ukid, $name) = split /[\t\n]/;
+ # print "ukid='$ukid' --> name='$name'\n";
+ if (defined $name_to_orgid{$name}) {
+ # print " --> orgid='$name_to_orgid{$name}'\n"
+ my $orgid = $name_to_orgid{$name};
+ $command = "perl -i patch.pl $orgid entities/$ukid.xml";
+ print "$ukid --> $orgid $command\n";
+ system($command);
+ } else {
+ die "'$name' unmapped";
+ # print " --> undefined\n";
+ }
+}
+close(F);
diff --git a/utilities/2016-09-16/gen-id-to-name.xsl b/utilities/2016-09-16/gen-id-to-name.xsl
new file mode 100644
index 00000000..a2c477cc
--- /dev/null
+++ b/utilities/2016-09-16/gen-id-to-name.xsl
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/utilities/2016-09-16/gen-ukid-to-name.xsl b/utilities/2016-09-16/gen-ukid-to-name.xsl
new file mode 100644
index 00000000..3df2bd3b
--- /dev/null
+++ b/utilities/2016-09-16/gen-ukid-to-name.xsl
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/utilities/2016-09-16/patch.pl b/utilities/2016-09-16/patch.pl
new file mode 100755
index 00000000..c47766c3
--- /dev/null
+++ b/utilities/2016-09-16/patch.pl
@@ -0,0 +1,10 @@
+#!/usr/bin/env perl -W
+
+my $orgID = shift @ARGV;
+
+while (<>) {
+ if (/UKFederationMember/ && !/orgID/) {
+ s/UKFederationMember/UKFederationMember orgID="$orgID"/;
+ }
+ print $_;
+}
diff --git a/utilities/2016-10-06/README.md b/utilities/2016-10-06/README.md
new file mode 100644
index 00000000..40080799
--- /dev/null
+++ b/utilities/2016-10-06/README.md
@@ -0,0 +1,25 @@
+# `utilities/2016-10-06`
+
+These transforms and scripts were used to add an `orgID` attribute to the
+`Grant` and `GrantAll` elements on all participants in the `members.xml` file.
+
+## Step 1
+
+Generate `id-to-name.txt` as follows:
+
+ xsltproc --output id-to-name.txt gen-id-to-name.xsl members/members.xml
+
+This file contains a mapping between organization IDs and canonical
+organization names, like this:
+
+ukforg4590 Ian A. Young
+
+The first field is separated from the second by a single tab character.
+
+## Step 2
+
+Apply the `patch.pl` script to generate a new version of `members.xml`.
+
+ ./patch.pl members/members.xml >members/members-new.xml
+
+Compare the two versions of the file before replacing the old one.
diff --git a/utilities/2016-10-06/gen-id-to-name.xsl b/utilities/2016-10-06/gen-id-to-name.xsl
new file mode 100644
index 00000000..a2c477cc
--- /dev/null
+++ b/utilities/2016-10-06/gen-id-to-name.xsl
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/utilities/2016-10-06/patch.pl b/utilities/2016-10-06/patch.pl
new file mode 100755
index 00000000..638f5d7d
--- /dev/null
+++ b/utilities/2016-10-06/patch.pl
@@ -0,0 +1,32 @@
+#!/usr/bin/env perl -W
+
+open(F, "id-to-name.txt") || die "could not open id-to-name map";
+while () {
+ my ($orgid, $name) = split /[\t\n]/;
+ #print "name='$name' --> orgid='$orgid'\n";
+ $name_to_orgid{$name} = $orgid;
+}
+close(F);
+
+while (<>) {
+ # 12 2 3 3 1 4 4 5 5
+ if (/^((.*);
-
+
#
# Make an array of components, each something like "DNS:example.co.uk"
#
@@ -322,17 +303,17 @@ sub comment {
my @altNames = split /\s*,\s*/, $next;
# my $altSet = "{" . join(", ", @altNames) . "}";
# print "Alt set: $altSet\n";
-
+
#
# Each "DNS" component is an additional name for this certificate.
#
while (@altNames) {
my ($type, $altName) = split(":", pop @altNames);
- $names{lc $altName}++ if $type eq 'DNS';
+ $names{lc $altName}++ if $type eq 'DNS';
}
next;
}
-
+
}
close SSL;
#print " text lines: $#lines\n";
@@ -376,27 +357,13 @@ sub comment {
#
close $fh;
- #
- # Add a warning for certain issuers.
- #
- if (defined $issuerMark{$issuerCN}) {
- my $mark = $issuerMark{$issuerCN};
- if ($mark eq '?') {
- warning("issuer '$issuerCN' suspect; verify");
- }
- }
-
#
# Count issuers.
#
if ($issuer eq $subject) {
$issuers{'(self-signed certificate)'}++;
} else {
- if ($verboseIssuers) {
- $issuers{$issuer}++;
- } else {
- $issuers{$issuerCN}++;
- }
+ $issuers{'Other'}++;
}
#
diff --git a/utilities/diff-between-publications.sh b/utilities/diff-between-publications.sh
index 3b1dc26d..6928a14c 100755
--- a/utilities/diff-between-publications.sh
+++ b/utilities/diff-between-publications.sh
@@ -60,7 +60,7 @@ aggregatesizediffpc=$(echo "scale=5;$aggregatesizediff/$previousaggregatesize" |
# -> Finally, get a git log between those two dates (epoch) in data repo
currenttagdate=$(git --work-tree=$sharedwsdir/$repoproducts --git-dir=$sharedwsdir/$repoproducts/.git log -1 $currenttag --format=%ct)
previoustagdate=$(git --work-tree=$sharedwsdir/$repoproducts --git-dir=$sharedwsdir/$repoproducts/.git log -1 $previoustag --format=%ct)
-gitlog=$(git --work-tree=$sharedwsdir/$repodata --git-dir=$sharedwsdir/$repodata/.git log --format="" --after=$previoustagdate --before=$currenttagdate)
+gitlog=$(git --work-tree=$sharedwsdir/$repodata --git-dir=$sharedwsdir/$repodata/.git log --format="" --after=$previoustagdate --before=$currenttagdate | sed "s/'//g")
gitlognumentries=$(git --work-tree=$sharedwsdir/$repodata --git-dir=$sharedwsdir/$repodata/.git log --format="%h" --after=$previoustagdate --before=$currenttagdate | wc -l | awk '{print $1}')
# =====
diff --git a/utilities/expiry_whitelist.txt b/utilities/expiry_whitelist.txt
index 9bedfb6d..e3fd21fa 100644
--- a/utilities/expiry_whitelist.txt
+++ b/utilities/expiry_whitelist.txt
@@ -41,9 +41,6 @@ F9:04:F9:4A:4B:D4:7D:30:42:88:64:1B:C8:51:EF:CC:43:D9:30:10 \
90:A3:BB:7B:C3:8E:EB:57:8D:DA:4E:42:01:64:3B:11:D9:B4:F5:75 \
Certificate expires 8 January 2015 - merger - Stourbridge College. Entity uk001743. Call 11565.
-10:6D:8F:2D:14:31:B6:56:18:D8:CB:E9:BD:AB:96:DC:9D:ED:30:35 \
- Certificate expired 8 March 2015 - Shibboleth IdP for Southport College. Entity uk000308. Call 11905.
-
64:44:D0:DF:86:52:F3:CD:3D:D6:75:8F:8E:84:82:92:7F:4E:93:C3 \
Certificate expired 10 April 2015 - Dawson Shibboleth SP for Semantico Limited. Entity uk002112. Call 12202.
@@ -70,4 +67,7 @@ F3:63:1C:35:CC:BC:FD:E7:A4:B7:3B:C8:54:FF:AF:0F:0F:A2:66:04 \
13:C7:EB:D0:42:30:4A:41:40:1C:6F:F8:08:AA:EB:89:B2:31:05:2B \
Expired certificate (09/05/2016) for a Shibboleth IdP for King George V College. Entity uk001322. Call 15465.
+
+F6:FF:A2:1A:61:1A:05:C0:2A:A3:5B:30:63:95:32:7D:16:39:D7:05 \
+ Expired certificate in IdP for Henley College Coventry (uk001729) which is being replaced - SR00113049
# END
diff --git a/build/normalise_all_fragments b/utilities/normalise_all_fragments
similarity index 55%
rename from build/normalise_all_fragments
rename to utilities/normalise_all_fragments
index 371cb0be..f19c6496 100755
--- a/build/normalise_all_fragments
+++ b/utilities/normalise_all_fragments
@@ -1,4 +1,4 @@
#!/bin/bash
-find ../entities -name uk\*.xml \
+find ../../ukf-data/entities -name uk\*.xml \
-exec python normalise_fragment \{\} \;
diff --git a/build/normalise_fragment b/utilities/normalise_fragment
similarity index 78%
rename from build/normalise_fragment
rename to utilities/normalise_fragment
index 06f58582..a5cc93ac 100755
--- a/build/normalise_fragment
+++ b/utilities/normalise_fragment
@@ -51,25 +51,27 @@ ED_TEMPLATE = Template('''
xmlns:mdattr="urn:oasis:names:tc:SAML:metadata:attribute"
xmlns:mdrpi="urn:oasis:names:tc:SAML:metadata:rpi"
xmlns:mdui="urn:oasis:names:tc:SAML:metadata:ui"
+ xmlns:remd="http://refeds.org/metadata"
xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion"
xmlns:shibmd="urn:mace:shibboleth:metadata:1.0"
xmlns:ukfedlabel="http://ukfederation.org.uk/2006/11/label"
xmlns:wayf="http://sdss.ac.uk/2006/06/WAYF"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="urn:oasis:names:tc:SAML:2.0:metadata ../xml/saml-schema-metadata-2.0.xsd
- urn:oasis:names:tc:SAML:metadata:algsupport ../xml/sstc-saml-metadata-algsupport-v1.0.xsd
- urn:oasis:names:tc:SAML:metadata:attribute ../xml/sstc-metadata-attr.xsd
- urn:oasis:names:tc:SAML:metadata:rpi ../xml/saml-metadata-rpi-v1.0.xsd
- urn:oasis:names:tc:SAML:metadata:ui ../xml/sstc-saml-metadata-ui-v1.0.xsd
- urn:oasis:names:tc:SAML:profiles:SSO:idp-discovery-protocol ../xml/sstc-saml-idp-discovery.xsd
- urn:oasis:names:tc:SAML:profiles:SSO:request-init ../xml/sstc-request-initiation.xsd
- urn:oasis:names:tc:SAML:2.0:assertion ../xml/saml-schema-assertion-2.0.xsd
- urn:mace:shibboleth:metadata:1.0 ../xml/shibboleth-metadata-1.0.xsd
- http://ukfederation.org.uk/2006/11/label ../xml/uk-fed-label.xsd
- http://sdss.ac.uk/2006/06/WAYF ../xml/uk-wayf.xsd
- http://www.w3.org/2001/04/xmlenc# ../xml/xenc-schema.xsd
- http://www.w3.org/2009/xmlenc11# ../xml/xenc-schema-11.xsd
- http://www.w3.org/2000/09/xmldsig# ../xml/xmldsig-core-schema.xsd"
+ xsi:schemaLocation="urn:oasis:names:tc:SAML:2.0:metadata saml-schema-metadata-2.0.xsd
+ urn:oasis:names:tc:SAML:metadata:algsupport sstc-saml-metadata-algsupport-v1.0.xsd
+ urn:oasis:names:tc:SAML:metadata:attribute sstc-metadata-attr.xsd
+ urn:oasis:names:tc:SAML:metadata:rpi saml-metadata-rpi-v1.0.xsd
+ urn:oasis:names:tc:SAML:metadata:ui sstc-saml-metadata-ui-v1.0.xsd
+ urn:oasis:names:tc:SAML:profiles:SSO:idp-discovery-protocol sstc-saml-idp-discovery.xsd
+ urn:oasis:names:tc:SAML:profiles:SSO:request-init sstc-request-initiation.xsd
+ urn:oasis:names:tc:SAML:2.0:assertion saml-schema-assertion-2.0.xsd
+ urn:mace:shibboleth:metadata:1.0 shibboleth-metadata-1.0.xsd
+ http://ukfederation.org.uk/2006/11/label uk-fed-label.xsd
+ http://refeds.org/metadata refeds-metadata.xsd
+ http://sdss.ac.uk/2006/06/WAYF uk-wayf.xsd
+ http://www.w3.org/2001/04/xmlenc# xenc-schema.xsd
+ http://www.w3.org/2009/xmlenc11# xenc-schema-11.xsd
+ http://www.w3.org/2000/09/xmldsig# xmldsig-core-schema.xsd"
ID="${ID}" entityID="${entityID}">
''')
diff --git a/utilities/stats-generate.sh b/utilities/stats-generate.sh
new file mode 100755
index 00000000..3e694661
--- /dev/null
+++ b/utilities/stats-generate.sh
@@ -0,0 +1,766 @@
+#!/bin/bash
+
+# This script will calculate stats
+#
+# Expects the following to be provided as arguments:
+# * Time period - day/month/year
+# * Time - YYYY-MM-DD/YYYY-MM/YYYY
+
+# Assumes you've just run stats-sync.sh to make sure the source
+# log files are up to date
+
+
+
+
+# =====
+# = Some common functions
+# =====
+
+bytestohr()
+{
+ value=$1
+ valueint=$1
+ i=0
+ suffix=" KMGTPEZY"
+ while [ $valueint -gt 1024 ]; do
+ i=$((i+1))
+ valueint=$((valueint/1024))
+ value=$(echo "scale=1;$value/1024" | bc)
+ done
+ echo $value ${suffix:$i:1}B
+}
+
+
+
+
+# =====
+# = Set some common options
+# =====
+
+logslocation="/var/stats"
+usageerrormsg="usage: generate-stats.sh []"
+
+
+
+
+# =====
+# = Preamble
+# =====
+
+#
+# Fail if required input isn't provided.
+#
+if [[ -z $1 ]]; then
+ echo $usageerrormsg
+ exit 1
+fi
+
+
+#
+# Get the input
+#
+timeperiod=$1
+date=$2
+
+
+#
+# Fail if time period provided isn't day/month/year
+#
+if ! { [[ "$timeperiod" == "day" ]] || [[ "$timeperiod" == "month" ]] || [[ "$timeperiod" == "year" ]]; }; then
+ echo $usageerrormsg
+ exit 1
+fi
+
+#
+# If no date provided, the use the following:
+# * Day - Previous day
+# * Month - Previous month
+# * Year - Previous year
+#
+if [[ -z $2 ]]; then
+ if [[ "$timeperiod" == "day" ]]; then
+ date=$(date -d "yesterday 12:00" '+%Y-%m-%d')
+ elif [[ "$timeperiod" == "month" ]]; then
+ date=$(date -d "last month" '+%Y-%m')
+ else
+ date=$(date -d "last year" '+%Y')
+ fi
+fi
+
+#
+# Fail if date format provided doesn't match time period
+#
+if [[ "$timeperiod" == "day" ]]; then
+ if [[ ! $date =~ ^[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}$ ]]; then
+ echo "Wrong type of input date for $1, must be YYYY-MM-DD"
+ exit 1
+ fi
+elif [[ "$timeperiod" == "month" ]]; then
+ if [[ ! $date =~ ^[[:digit:]]{4}-[[:digit:]]{2}$ ]]; then
+ echo "Wrong type of input date for $1, must be YYYY-MM"
+ exit 1
+ fi
+elif [[ "$timeperiod" == "year" ]]; then
+ if [[ ! $date =~ ^[[:digit:]]{4}$ ]]; then
+ echo "Wrong type of input date for $1, must be YYYY"
+ exit 1
+ fi
+else
+ echo $usageerrormsg
+ exit 1
+fi
+
+#
+# Fail if date provided isn't valid for time period
+#
+if [[ "$timeperiod" == "day" ]]; then
+ if [[ ! $(date -d ${date} 2> /dev/null) ]]; then
+ echo "YYYY-MM-DD provided, but not a valid date."
+ exit 1
+ fi
+elif [[ "$timeperiod" == "month" ]]; then
+ if [[ ! $(date -d ${date}-01 2> /dev/null) ]]; then
+ echo "YYYY-MM provided, but not a valid date."
+ exit 1
+ fi
+elif [[ "$timeperiod" == "year" ]]; then
+ if [[ ! $(date -d ${date}-01-01 2> /dev/null) ]]; then
+ echo "YYYY provided, but not a valid date."
+ exit 1
+ fi
+else
+ echo $usageerrormsg
+ exit 1
+fi
+
+
+
+
+# =====
+# = Calculate the correct date things to search for in the log files
+# =====
+
+
+if [[ "$timeperiod" == "day" ]]; then
+ #
+ # Daily stuff
+ #
+ apachesearchterm="$(date -d $date '+%d')/$(date -d $date '+%b')/$(date -d $date '+%Y'):"
+ javasearchterm="$(date -d $date '+%Y%m%d')T"
+
+elif [[ "$timeperiod" == "month" ]]; then
+ #
+ # Monthly stuff
+ #
+ apachesearchterm="/$(date -d $date-01 '+%b')/$(date -d $date-01 '+%Y'):"
+ javasearchterm="$(date -d $date-01 '+%Y%m')"
+
+else
+ #
+ # Yearly stuff
+ #
+ apachesearchterm="/$(date -d $date-01-01 '+%Y'):"
+ javasearchterm="$(date -d $date-01-01 '+%Y')"
+
+fi
+
+
+
+
+# =====
+# = Generate stats sets
+# =====
+
+
+# =====
+# MD stats
+# =====
+
+# Get the filesize of the latest uncompressed main aggregate.
+# Since this is just used for estimation, we'll just take the biggest
+# unique filesize for the relevant periods
+aggrfilesizebytes=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-metadata.xml" | grep "\" 200" | grep "GET" | grep -v "GZIP" | cut -f 10 -d " " | sort -r | uniq | head -1)
+
+#
+# Download counts
+#
+
+# Aggregate requests. Everything for .xml (HEAD/GET, 200 and 304)
+mdaggrcount=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | wc -l)
+mdaggrcountfriendly=$(echo $mdaggrcount | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Main Aggregate requests. Everything for ukfederation-metadata.xml (HEAD/GET, 200 and 304)
+mdaggrmaincount=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-metadata.xml" | wc -l)
+mdaggrmaincountfriendly=$(echo $mdaggrmaincount | awk '{ printf ("%'"'"'d\n", $0) }')
+if [[ "$mdaggrmaincount" -ne "0" ]]; then
+ mdaggrmainpc=$(echo "scale=4;($mdaggrmaincount/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+else
+ mdaggrmainpc="0.0"
+fi
+
+# Other aggregate requests (don't calculate these if doing daily stats)
+if [[ "$timeperiod" != "day" ]]; then
+ mdaggrbackcount=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-back.xml" | wc -l)
+ mdaggrbackcountfriendly=$(echo $mdaggrbackcount | awk '{ printf ("%'"'"'d\n", $0) }')
+ if [[ "$mdaggrbackcount" -ne "0" ]]; then
+ mdaggrbackpc=$(echo "scale=4;($mdaggrbackcount/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ else
+ mdaggrbackpc="0.0"
+ fi
+ mdaggrcdsallcount=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-cdsall.xml" | wc -l)
+ mdaggrcdsallcountfriendly=$(echo $mdaggrcdsallcount | awk '{ printf ("%'"'"'d\n", $0) }')
+ if [[ "$mdaggrcdsallcount" -ne "0" ]]; then
+ mdaggrcdsallpc=$(echo "scale=4;($mdaggrcdsallcount/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ else
+ mdaggrcdsallpc="0.0"
+ fi
+ mdaggrexportpreviewcount=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-export-preview.xml" | wc -l)
+ mdaggrexportpreviewcountfriendly=$(echo $mdaggrexportpreviewcount | awk '{ printf ("%'"'"'d\n", $0) }')
+ if [[ "$mdaggrexportpreviewkcount" -ne "0" ]]; then
+ mdaggrexportpreviewpc=$(echo "scale=4;($mdaggrexportpreviewcount/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ else
+ mdaggrexportpreviewpc="0.0"
+ fi
+ mdaggrexportcount=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-export.xml" | wc -l)
+ mdaggrexportcountfriendly=$(echo $mdaggrexportcount | awk '{ printf ("%'"'"'d\n", $0) }')
+ if [[ "$mdaggrexportcount" -ne "0" ]]; then
+ mdaggrexportpc=$(echo "scale=4;($mdaggrexportcount/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ else
+ mdaggrexportpc="0.0"
+ fi
+ mdaggrtestcount=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-test.xml" | wc -l)
+ mdaggrtestcountfriendly=$(echo $mdaggrtestcount | awk '{ printf ("%'"'"'d\n", $0) }')
+ if [[ "$mdaggrtestcount" -ne "0" ]]; then
+ mdaggrtestpc=$(echo "scale=4;($mdaggrtestcount/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ else
+ mdaggrtestpc="0.0"
+ fi
+ mdaggrwayfcount=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-wayf.xml" | wc -l)
+ mdaggrwayfcountfriendly=$(echo $mdaggrwayfcount | awk '{ printf ("%'"'"'d\n", $0) }')
+ if [[ "$mdaggrwayfcount" -ne "0" ]]; then
+ mdaggrwayfpc=$(echo "scale=4;($mdaggrwayfcount/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ else
+ mdaggrwayfpc="0.0"
+ fi
+fi
+
+# Aggregate downloads (i.e. GETs with HTTP 200 responses only)
+mdaggrcountfull=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404| grep "\" 200" | grep "GET" | wc -l)
+mdaggrcountfullfriendly=$(echo $mdaggrcountfull | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Main Aggregate downloads (i.e. GETs with HTTP 200 responses only)
+mdaggrmaincountfull=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-metadata.xml" | grep "\" 200" | grep "GET" | wc -l)
+mdaggrmaincountfullfriendly=$(echo $mdaggrmaincountfull | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Percentage of GETs with HTTP 200 responses compared to total requests
+if [[ "$mdaggrcount" -ne "0" ]]; then
+ mdaggrfullpc=$(echo "scale=2;($mdaggrcountfull/$mdaggrcount)*100" | bc | awk '{printf "%.0f\n", $0}')
+else
+ mdaggrfullpc="N/A"
+fi
+
+# Compressed downloads for all
+mdaggrcountfullcompr=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | grep "\" 200" | grep "GET" | grep "\"GZIP\"" | wc -l)
+mdaggrcountfullcomprfriendly=$(echo $mdaggrcountfullcompr | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Compressed downloads for main aggregate
+mdaggrmaincountfullcompr=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-metadata.xml" | grep "\" 200" | grep "GET" | grep "\"GZIP\"" | wc -l)
+
+# Percentage of GZIPPED HTTP 200 responses compared to total full downloads
+if [[ "$mdaggrcountfull" -ne "0" ]]; then
+ mdaggrfullcomprpc=$(echo "scale=2;($mdaggrcountfullcompr/$mdaggrcountfull)*100" | bc | awk '{printf "%.0f\n", $0}')
+else
+ mdaggrfullcomprpc="N/A"
+fi
+
+# Unique IP addresses requesting aggregates
+mdaggruniqueip=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq | wc -l)
+mdaggruniqueipfriendly=$(echo $mdaggruniqueip | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Unique IP addresses requesting aggregates, full D/Ls only
+mdaggruniqueipfull=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | grep "\" 200" | grep "GET" | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq | wc -l)
+
+#
+# Data shipped
+#
+
+# Total data shipped, all .xml files
+mdaggrtotalbytes=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | grep "\" 200" | grep "GET" | cut -f 10 -d " " | awk '{sum+=$1} END {print sum}')
+if [[ "$mdaggrtotalbytes" -gt "0" ]]; then
+ mdaggrtotalhr=$(bytestohr $mdaggrtotalbytes)
+else
+ mdaggrtotalhr="0 B"
+fi
+
+# Total data shipped, ukfederation-metadata.xml file
+mdaggrmaintotalbytes=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "ukfederation-metadata.xml" | grep "\" 200" | grep "GET" | cut -f 10 -d " " | awk '{sum+=$1} END {print sum}')
+if [[ "$mdaggrtotalbytes" -gt "0" ]]; then
+ mdaggrmaintotalhr=$(bytestohr $mdaggrmaintotalbytes)
+else
+ mdaggrmaintotalhr="0 B"
+fi
+
+# Estimate total data shipped without compression
+mdaggrmaintotalestnocompressbytes=$(( mdaggrmaincountfull * aggrfilesizebytes ))
+if [[ "$mdaggrmaintotalestnocompressbytes" -gt "0" ]]; then
+ mdaggrmaintotalestnocompresshr=$(bytestohr $mdaggrmaintotalestnocompressbytes)
+else
+ mdaggrmaintotalestnocompresshr="0 B"
+fi
+
+# Estimate total data shipped without compression & conditional get
+mdaggrmaintotalestnocompressnocgetbytes=$(( mdaggrmaincount * aggrfilesizebytes ))
+ if [[ "$mdaggrmaintotalestnocompressnocgetbytes" -gt "0" ]]; then
+ mdaggrmaintotalestnocompressnocgethr=$(bytestohr $mdaggrmaintotalestnocompressnocgetbytes)
+else
+ mdaggrmaintotalestnocompressnocgethr="0 B"
+fi
+
+#
+# Other things
+#
+
+# IPv4 vs IPv6 traffic (don't calculate these if doing daily stats)
+# Note, while all v6 traffic passes through v6v4proxy1/2, we're counting accesses from the IPv4 addresses of those servers vs all others.
+# When we add "real" v6 support to the servers, this needs changing to count IPv4 addresses vs IPv6 addresses.
+if [[ "$timeperiod" != "day" ]]; then
+ mdaggrv4count=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | grep -v 193.63.72.83 | grep -v 194.83.7.211 | wc -l)
+ mdaggrv4pc=$(echo "scale=4;($mdaggrv4count/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ mdaggrv6count=$(( mdaggrcount - mdaggrv4count ))
+ mdaggrv6pc=$(echo "scale=4;($mdaggrv6count/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+
+ # Per-server request count
+ mdaggrmd1count=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | cut -f 5 -d "/" | grep md1 | wc -l)
+ mdaggrmd1pc=$(echo "scale=4;($mdaggrmd1count/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ mdaggrmd2count=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | cut -f 5 -d "/" | grep md2 | wc -l)
+ mdaggrmd2pc=$(echo "scale=4;($mdaggrmd1count/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ mdaggrmd3count=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | cut -f 5 -d "/" | grep md3 | wc -l)
+ mdaggrmd3pc=$(echo "scale=4;($mdaggrmd1count/$mdaggrcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+fi
+
+
+# Min queries per IP
+if [[ $mdaggrcount -gt "0" ]]; then
+ mdaggrminqueriesperip=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq -c | sort -nr | tail -1 | awk '{print $1}' | awk '{ printf ("%'"'"'d\n", $0) }')
+else
+ mdqaggrinqueriesperip="0"
+fi
+
+# Avg queries per IP
+if [[ "$mdaggruniqueip" -ne "0" ]]; then
+ mdaggravgqueriesperip=$(echo "scale=2;($mdaggrcount/$mdaggruniqueip)" | bc | awk '{printf "%.0f\n", $0}')
+else
+ mdaggravgqueriesperip="0"
+fi
+
+# Max queries per IP
+if [[ $mdaggrcount -gt "0" ]]; then
+ mdaggrmaxqueriesperip=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq -c | sort -nr | head -1 | awk '{print $1}' | awk '{ printf ("%'"'"'d\n", $0) }')
+else
+ mdaggrmaxqueriesperip="0"
+fi
+
+# Min queries per IP, full D/L only
+if [[ $mdaggrcountfull -gt "0" ]]; then
+ mdaggrminqueriesperipfull=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | grep "\" 200" | grep "GET" | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq -c | sort -nr | tail -1 | awk '{print $1}' | awk '{ printf ("%'"'"'d\n", $0) }')
+else
+ mdqaggrinqueriesperipfull="0"
+fi
+
+# Avg queries per IP, full D/L only
+if [[ "$mdaggruniqueipfull" -ne "0" ]]; then
+ mdaggravgqueriesperipfull=$(echo "scale=2;($mdaggrcountfull/$mdaggruniqueipfull)" | bc | awk '{printf "%.0f\n", $0}')
+else
+ mdaggravgqueriesperipfull="0"
+fi
+
+# Max queries per IP, full D/L only
+if [[ $mdaggrcountfull -gt "0" ]]; then
+ mdaggrmaxqueriesperipfull=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | grep "\" 200" | grep "GET" | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq -c | sort -nr | head -1 | awk '{print $1}' | awk '{ printf ("%'"'"'d\n", $0) }')
+else
+ mdaggrmaxqueriesperipfull="0"
+fi
+
+# Don't count these when doing daily stats
+if [[ "$timeperiod" != "day" ]]; then
+
+ # Top 10 downloaders and how many downloads / total data shipped (full downloads only)
+ if [[ "$timeperiod" != "day" ]]; then
+ mdaggrtoptenipsbycount=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | grep "\" 200" | grep "GET" | grep -v 193.63.72.83 | grep -v 194.83.7.211 | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq -c | sort -nr | head -10)
+ fi
+
+ #
+ # Manipute results of the top 10
+ #
+
+ # Blank the output we're going to set
+ mdaggrtoptenipsbycountdetailed=""
+
+ i=1
+ IFS=$'\n'
+ for line in $mdaggrtoptenipsbycount
+ do
+ # Parse the line
+ count=$(echo $line | awk '{print $1}')
+ ipaddr=$(echo $line | awk '{print $2'})
+
+ # Make count friendly
+ countfriendly=$(echo $count | awk '{ printf ("%'"'"'d\n", $0) }')
+
+ # Figure out total traffic shipped to this IP
+ totaldataforthisip=$(grep $apachesearchterm $logslocation/md/md1/metadata.uou-access_log* $logslocation/md/md2/metadata.uou-access_log* $logslocation/md/md3/metadata.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep ".xml" | grep -v 404 | grep "\" 200" | grep "GET" | grep $ipaddr | cut -f 10 -d " " | grep -v - | awk '{sum+=$1} END {print sum}')
+ if [[ "$totaldataforthisip" -gt "0" ]]; then
+ totaldataforthisiphr=$(bytestohr $totaldataforthisip)
+ else
+ totaldataforthisiphr="0 B"
+ fi
+
+ # Get Reverse DNS for this IP address
+ rdnsforthisip=$(dig +noall +answer -x $ipaddr | awk '{print $5}')
+ if [[ -z $rdnsforthisip ]]; then
+ rdnsforthisip="No RDNS available"
+ fi
+
+ # Add to the output
+ mdaggrtoptenipsbycountdetailed+="$i: $countfriendly D/Ls ($totaldataforthisiphr) from $ipaddr ($rdnsforthisip)\n"
+
+ # Increment the count, and blank the rdns response
+ i=$((i+1))
+ rdnsforthisip=""
+ done
+fi
+
+
+# =====
+# MDQ stats
+# =====
+
+# MDQ requests
+mdqcount=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep -v 404 | grep "/entities" | grep -v "/entities " | wc -l)
+mdqcountfriendly=$(echo $mdqcount | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# MDQ downloads (i.e. HTTP 200 responses only)
+mdqcountfull=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities" | grep -v "/entities " | grep -v 404 | grep "\" 200" | grep "GET" | wc -l)
+mdqcountfullfriendly=$(echo $mdqcountfull | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Percentage of HTTP 200 responses compared to total requests
+if [[ "$mdqcount" -ne "0" ]]; then
+ mdqfullpc=$(echo "scale=2;($mdqcountfull/$mdqcount)*100" | bc | awk '{printf "%.0f\n", $0}')
+else
+ mdqfullpc="N/A"
+fi
+
+# Compressed downloads
+mdqfullcomprcount=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities" | grep -v "/entities " | grep -v 404 | grep "\" 200" | grep "GET" | grep "\"GZIP\"" | wc -l)
+mdqfullcomprcountfriendly=$(echo $mdqfullcomprcount | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Percentage of GZIPPED HTTP 200 responses compared to total full downloads
+if [[ "$mdqcountfull" -ne "0" ]]; then
+ mdqfullcomprpc=$(echo "scale=2;($mdqfullcomprcount/$mdqcountfull)*100" | bc | awk '{printf "%.0f\n", $0}')
+else
+ mdqfullcomprpc="N/A"
+fi
+
+
+# IPv4 vs IPv6 traffic (don't calculate this for daily stats)
+
+if [[ "$timeperiod" != "day" ]]; then
+ # Note, while all v6 traffic passes through v6v4proxy1/2, we're counting accesses from the IPv4 addresses of those servers vs all others.
+ # When we add "real" v6 support to the servers, this needs changing to count IPv4 addresses vs IPv6 addresses.
+ if [[ "$mdqcount" -ne "0" ]]; then
+ mdqv4count=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities" | grep -v "/entities " | grep -v 404 | grep -v 193.63.72.83 | grep -v 194.83.7.211 | wc -l)
+ mdqv4pc=$(echo "scale=4;($mdqv4count/$mdqcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ mdqv6count=$(( mdqcount - mdqv4count ))
+ mdqv6pc=$(echo "scale=4;($mdqv6count/$mdqcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ else
+ mdqv4pc="N/A"
+ mdqv6pc="N/A"
+ fi
+fi
+
+# MDQ requests for entityId based names
+mdqcountentityid=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities" | grep -v "/entities " | grep -v 404 | grep "/entities/http" | wc -l)
+if [[ "$mdqcount" -ne "0" ]]; then
+ mdqcountentityidpc=$(echo "scale=3;($mdqcountentityid/$mdqcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+else
+ mdqcountentityidpc="N/A"
+fi
+mdqcountentityidfriendly=$(echo $mdqcountentityid | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# MDQ requests for hash based names
+mdqcountsha1=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities" | grep -v "/entities " | grep -v 404 | grep sha1 | wc -l)
+if [[ "$mdqcount" -ne "0" ]]; then
+ mdqcountsha1pc=$(echo "scale=3;($mdqcountsha1/$mdqcount)*100" | bc | awk '{printf "%.1f\n", $0}')
+else
+ mdqcountsha1pc="N/A"
+fi
+mdqcountsha1friendly=$(echo $mdqcountsha1 | awk '{ printf ("%'"'"'d\n", $0) }')
+
+
+# MDQ requests for all entities
+mdqcountallentities=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities " | grep -v 404 | wc -l)
+
+# Unique IP addresses requesting MDQ
+mdquniqueip=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities/" | grep -v 404 | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq | wc -l)
+mdquniqueipfriendly=$(echo $mdquniqueip | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Total data shipped
+mdqtotalbytes=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities/" | grep -v 404 | grep "\" 200" | cut -f 10 -d " " | grep -v - | awk '{sum+=$1} END {print sum}')
+if [[ "$mdqtotalbytes" -gt "0" ]]; then
+ mdqtotalhr=$(bytestohr $mdqtotalbytes)
+else
+ mdqtotalhr="0 B"
+fi
+
+# Min queries per IP
+if [[ $mdqcount -gt "0" ]]; then
+ mdqminqueriesperip=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities" | grep -v 404 | grep -v "/entities/ " | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq -c | sort -nr | tail -1 | awk '{print $1}' | awk '{ printf ("%'"'"'d\n", $0) }')
+else
+ mdqminqueriesperip="0"
+fi
+
+# Avg queries per IP
+if [[ "$mdquniqueip" -ne "0" ]]; then
+ mdqavgqueriesperip=$(echo "scale=2;($mdqcount/$mdquniqueip)" | bc | awk '{printf "%.0f\n", $0}')
+else
+ mdqavgqueriesperip="0"
+fi
+
+# Max queries per IP
+if [[ $mdqcount -gt "0" ]]; then
+ mdqmaxqueriesperip=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities" | grep -v 404 | grep -v "/entities/ " | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq -c | sort -nr | head -1 | awk '{print $1}' | awk '{ printf ("%'"'"'d\n", $0) }')
+else
+ mdqmaxqueriesperip="0"
+fi
+
+if [[ "$timeperiod" != "day" ]]; then
+ # Top 10 downloaders and how many downloads / total data shipped
+ mdqtoptenipsbycount=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep -v 193.63.72.83 | grep -v 194.83.7.211 | grep "/entities" | grep -v 404 | grep -v "/entities/ " | cut -f 2 -d ":" | cut -f 1 -d " " | sort | uniq -c | sort -nr | head -10)
+
+ #
+ # Manipute results of the top 10
+ #
+
+ # Blank the output we're going to set
+ mdqtoptenipsbycountdetailed=""
+
+ i=1
+ IFS=$'\n'
+ for line in $mdqtoptenipsbycount
+ do
+ # Parse the line
+ count=$(echo $line | awk '{print $1}')
+ ipaddr=$(echo $line | awk '{print $2'})
+
+ # Make count friendly
+ countfriendly=$(echo $count | awk '{ printf ("%'"'"'d\n", $0) }')
+
+ # Figure out total traffic shipped to this IP
+ totaldataforthisip=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep "/entities/" | grep -v 404 | grep "\" 200" | grep $ipaddr | cut -f 10 -d " " | grep -v - | awk '{sum+=$1} END {print sum}')
+ if [[ "$totaldataforthisip" -gt "0" ]]; then
+ totaldataforthisiphr=$(bytestohr $totaldataforthisip)
+ else
+ totaldataforthisiphr="0 B"
+ fi
+
+ # Get Reverse DNS for this IP address
+ rdnsforthisip=$(dig +noall +answer -x $ipaddr | awk '{print $5}')
+ if [[ -z $rdnsforthisip ]]; then
+ rdnsforthisip="No RDNS available"
+ fi
+
+ # Add to the output
+ mdqtoptenipsbycountdetailed+="$i: $countfriendly D/Ls ($totaldataforthisiphr) from $ipaddr ($rdnsforthisip)\n"
+
+ # Increment the count, and blank the rdns response
+ i=$((i+1))
+ rdnsforthisip=""
+ done
+
+
+ # Top 10 queries and how many downloads / total data shipped
+ mdqtoptenqueriesbycount=$(grep $apachesearchterm $logslocation/md/md1/mdq.uou-access_log* $logslocation/md/md2/mdq.uou-access_log* $logslocation/md/md3/mdq.uou-access_log* | grep -Ev "(Sensu-HTTP-Check|dummy|check_http|Balancer)" | grep -v 193.63.72.83 | grep -v 194.83.7.211 | grep /entities/ | grep -v 404 | grep -v "/entities/ " | awk '{print $7}' | cut -f 3 -d "/" | sed "s@+@ @g;s@%@\\\\x@g" | xargs -0 printf "%b" | sort | uniq -c | sort -nr | head -10)
+fi
+
+# =====
+# CDS stats
+# =====
+
+# How many accesses to .ds.
+cdscount=$(grep $apachesearchterm $logslocation/cds/shib-cds1/ssl_access_log* $logslocation/cds/shib-cds2/ssl_access_log* $logslocation/cds/shib-cds3/ssl_access_log* | grep .ds? | wc -l)
+cdscountfriendly=$(echo $cdscount | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# IPv4 vs IPv6 traffic (don't count these for daily stats)
+if [[ "$timeperiod" != "day" ]]; then
+ # Note, while all v6 traffic passes through v6v4proxy1/2, we're counting accesses from the IPv4 addresses of those servers vs all others.
+ # When we add "real" v6 support to the servers, this needs changing to count IPv4 addresses vs IPv6 addresses.
+ cdsv4count=$(grep $apachesearchterm $logslocation/cds/shib-cds1/ssl_access_log* $logslocation/cds/shib-cds2/ssl_access_log* $logslocation/cds/shib-cds3/ssl_access_log* | grep .ds? | grep -v 193.63.72.83 | grep -v 194.83.7.211 | wc -l)
+ cdsv4pc=$(echo "scale=4;($cdsv4count/$cdscount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ cdsv6count=$(( cdscount - cdsv4count ))
+ cdsv6pc=$(echo "scale=4;($cdsv6count/$cdscount)*100" | bc | awk '{printf "%.1f\n", $0}')
+
+ # Per-server request count
+ cds1count=$(grep $apachesearchterm $logslocation/cds/shib-cds1/ssl_access_log* $logslocation/cds/shib-cds2/ssl_access_log* $logslocation/cds/shib-cds3/ssl_access_log* | grep .ds? | grep shib-cds1 | wc -l)
+ cds1pc=$(echo "scale=4;($cds1count/$cdscount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ cds2count=$(grep $apachesearchterm $logslocation/cds/shib-cds1/ssl_access_log* $logslocation/cds/shib-cds2/ssl_access_log* $logslocation/cds/shib-cds3/ssl_access_log* | grep .ds? | grep shib-cds2 | wc -l)
+ cds2pc=$(echo "scale=4;($cds2count/$cdscount)*100" | bc | awk '{printf "%.1f\n", $0}')
+ cds3count=$(grep $apachesearchterm $logslocation/cds/shib-cds1/ssl_access_log* $logslocation/cds/shib-cds2/ssl_access_log* $logslocation/cds/shib-cds3/ssl_access_log* | grep .ds? | grep shib-cds3 | wc -l)
+ cds3pc=$(echo "scale=4;($cds3count/$cdscount)*100" | bc | awk '{printf "%.1f\n", $0}')
+fi
+
+# How many of these were to the DS (has entityId in the parameters)
+cdsdscount=$(grep $apachesearchterm $logslocation/cds/shib-cds1/ssl_access_log* $logslocation/cds/shib-cds2/ssl_access_log* $logslocation/cds/shib-cds3/ssl_access_log* | grep .ds? | grep entityID | wc -l | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# How many of these were to the WAYF (has shire in the parameters)
+cdswayfcount=$(grep $apachesearchterm $logslocation/cds/shib-cds1/ssl_access_log* $logslocation/cds/shib-cds2/ssl_access_log* $logslocation/cds/shib-cds3/ssl_access_log* | grep .ds? | grep shire | wc -l | awk '{ printf ("%'"'"'d\n", $0) }')
+
+
+# =====
+# Wugen stats
+# =====
+
+# Total WAYFless URLs generated
+wugencount=$(grep $date $logslocation/wugen/urlgenerator-audit.* | wc -l | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# New subscribers to WAYFless URLs
+wugennewsubs=$(grep $date $logslocation/wugen/urlgenerator-process.* | grep "Subscribing user and service provider" | wc -l | awk '{ printf ("%'"'"'d\n", $0) }')
+
+
+# =====
+# Test IdP stats
+# =====
+
+# How many logins did the IdP process?
+testidplogincount=$(zgrep "^$javasearchterm" $logslocation/test-idp/idp-audit* | grep "sso/browser" | wc -l | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# And to how many unique SPs?
+testidpspcount=$(zgrep "^$javasearchterm" $logslocation/test-idp/idp-audit* | grep "sso/browser" | cut -f 4 -d "|" | sort | uniq | wc -l | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Don't count these for daily stats
+if [[ "$timeperiod" != "day" ]]; then
+ # Top 10 SPs the IdP has logged into
+ testidptoptenspsbycount=$(zgrep "^$javasearchterm" $logslocation/test-idp/idp-audit* | grep "sso/browser" | cut -d "|" -f 4 | sort | uniq -c | sort -nr | head -10)
+
+ # Which Test IdPs accounts are being used, and how much?
+ testidplogincountbyuser=$(zgrep "^$javasearchterm" $logslocation/test-idp/idp-audit* | grep "sso/browser" | cut -d "|" -f 9 | sort | uniq -ic)
+fi
+
+
+# =====
+# Test SP stats
+# =====
+
+# How many logins were there to the SP?
+testsplogincount=$(grep $date $logslocation/test-sp/shibd.log* | grep "new session created" | wc -l | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# And from how many unique IdPs?
+testspidpcount=$(grep $date $logslocation/test-sp/shibd.log* | grep "new session created" | cut -f 12 -d " " | sort | uniq | wc -l | awk '{ printf ("%'"'"'d\n", $0) }')
+
+# Don't count these for daily stats
+if [[ "$timeperiod" != "day" ]]; then
+ # Top 10 IdPs used to log into the Test SP
+ testsptoptenidpsbycount=$(grep $date $logslocation/test-sp/shibd.log* | grep "new session created" | awk '{print $12}' | cut -d "(" -f 2 | cut -d ")" -f 1 | sort | uniq -c | sort -nr | head -10)
+fi
+
+
+# =====
+# = Now we're ready to build the message. Different message for daily vs month/year
+# =====
+
+if [[ "$timeperiod" == "day" ]]; then
+ #
+ # Daily message, usually output via slack
+ #
+ msg="Daily stats for $(date -d $date '+%a %d %b %Y'):\n"
+ msg+=">*MD dist:* $mdaggrcountfriendly requests* from $mdaggruniqueipfriendly IPs, $mdaggrtotalhr shipped.\n"
+ msg+=">-> * $mdaggrcountfullfriendly ($mdaggrfullpc%) were full D/Ls, of which $mdaggrcountfullcomprfriendly ($mdaggrfullcomprpc%) were compressed.\n"
+ msg+=">-> ukf-md.xml: $mdaggrmaintotalhr actual; est. $mdaggrmaintotalestnocompresshr w/no compr, $mdaggrmaintotalestnocompressnocgethr also w/no c/get.\n"
+ msg+=">-> $mdaggrminqueriesperip/$mdaggravgqueriesperip/$mdaggrmaxqueriesperip min/avg/max queries per querying IP (all reqs)\n"
+ msg+=">-> $mdaggrminqueriesperipfull/$mdaggravgqueriesperipfull/$mdaggrmaxqueriesperipfull min/avg/max queries per querying IP (full D/Ls only)\n"
+ msg+=">*MDQ:* $mdqcountfriendly requests* from $mdquniqueipfriendly IPs, $mdqtotalhr shipped.\n"
+ msg+=">-> * $mdqcountfullfriendly ($mdqfullpc%) were full D/Ls, of which $mdqfullcomprcountfriendly ($mdqfullcomprpc%) were compressed.\n"
+ msg+=">-> $mdqcountentityidfriendly ($mdqcountentityidpc%) entityId vs $mdqcountsha1friendly ($mdqcountsha1pc%) sha1 based queries\n"
+ msg+=">-> $mdqminqueriesperip/$mdqavgqueriesperip/$mdqmaxqueriesperip min/avg/max queries per querying IP\n"
+ msg+=">-> $mdqcountallentities queries for collection of all entities\n"
+ msg+=">*CDS:* $cdscountfriendly requests serviced (DS: $cdsdscount / WAYF: $cdswayfcount).\n"
+ msg+=">*Wugen:* $wugencount WAYFless URLs generated, $wugennewsubs new subscriptions.\n"
+ msg+=">*Test IdP:* $testidplogincount logins to $testidpspcount SPs.\n"
+ msg+=">*Test SP:* $testsplogincount logins from $testspidpcount IdPs."
+
+else
+ #
+ # Monthly/yearly message, usually output via email
+ #
+ msg="==========\n"
+ if [[ "$timeperiod" == "month" ]]; then
+ msg+="= Monthly UKf systems stats for $(date -d $date-01 '+%b %Y')\n"
+ else
+ msg+="= Yearly UKf systems stats for $date\n"
+ fi
+ msg+="==========\n"
+ msg+="\n-----\n"
+ msg+="Metadata aggregate distribution:\n"
+ msg+="-> $mdaggrcountfriendly requests* from $mdaggruniqueipfriendly clients, $mdaggrtotalhr shipped.\n"
+ msg+="--> * $mdaggrcountfullfriendly ($mdaggrfullpc%) were full downloads, of which $mdaggrcountfullcomprfriendly ($mdaggrfullcomprpc%) were compressed.\n"
+ msg+="--> ukfederation-metadata.xml: $mdaggrmaintotalhr of data actually shipped; would have been an estimated $mdaggrmaintotalestnocompresshr without compression, and $mdaggrmaintotalestnocompressnocgethr without compression or conditional gets.\n"
+ msg+="-> IPv4: $mdaggrv4pc% vs IPv6: $mdaggrv6pc%\n"
+ msg+="-> Server distribution: md1: $mdaggrmd1pc% md2: $mdaggrmd2pc% md3: $mdaggrmd3pc%\n"
+ msg+="-> $mdaggrminqueriesperip/$mdaggravgqueriesperip/$mdaggrmaxqueriesperip min/avg/max queries per querying IP (all reqs)\n"
+ msg+="-> $mdaggrminqueriesperipfull/$mdaggravgqueriesperipfull/$mdaggrmaxqueriesperipfull min/avg/max queries per querying IP (full D/Ls only)\n"
+ msg+="\nRequests per published aggregate\n"
+ msg+="-> * ukfederation-metadata.xml = $mdaggrmaincountfriendly requests ($mdaggrmainpc% of total)\n"
+ msg+="-> * ukfederation-back.xml = $mdaggrbackcountfriendly requests ($mdaggrbackpc% of total)\n"
+ msg+="-> * ukfederation-test.xml = $mdaggrtestcountfriendly requests ($mdaggrtestpc% of total)\n"
+ msg+="-> * ukfederation-export.xml = $mdaggrexportcountfriendly requests ($mdaggrexportpc% of total)\n"
+ msg+="-> * ukfed'-export-preview.xml = $mdaggrexportpreviewcountfriendly requests ($mdaggrexportpreviewpc% of total)\n"
+ msg+="-> * ukfederation-cdsall.xml = $mdaggrcdsallcountfriendly requests ($mdaggrcdsallpc% of total)\n"
+ msg+="-> * ukfederation-wayf.xml = $mdaggrwayfcountfriendly requests ($mdaggrwayfpc% of total)\n"
+ msg+="\nTop 10 downloaders (full downloads only):\n"
+ msg+="$mdaggrtoptenipsbycountdetailed\n"
+ msg+="\n-----\n"
+ msg+="MDQ:\n"
+ msg+="-> $mdqcountfriendly requests* from $mdquniqueipfriendly clients, $mdqtotalhr shipped.\n"
+ msg+="--> * $mdqcountfullfriendly ($mdqfullpc%) were full downloads, of which $mdqfullcomprcountfriendly ($mdqfullcomprpc%) were compressed.\n"
+ msg+="-> $mdqcountentityidfriendly ($mdqcountentityidpc%) entityId vs $mdqcountsha1friendly ($mdqcountsha1pc%) sha1 based queries\n"
+ msg+="-> IPv4: $mdqv4pc% vs IPv6: $mdqv6pc%\n"
+ msg+="-> $mdqminqueriesperip min/$mdqavgqueriesperip avg/$mdqmaxqueriesperip max queries per querying IP\n"
+ msg+="-> $mdqcountallentities queries for collection of all entities\n"
+ msg+="\nTop 10 queryers:\n"
+ msg+="$mdqtoptenipsbycountdetailed\n"
+ msg+="\nTop 10 entities queried for:\n"
+ msg+="$mdqtoptenqueriesbycount\n"
+ msg+="\n-----\n"
+ msg+="Central Discovery Service:\n"
+ msg+="-> $cdscountfriendly total requests serviced\n"
+ msg+="-> IPv4: $cdsv4pc% vs IPv6: $cdsv6pc%\n"
+ msg+="-> Server distribution: shib-cds1: $cds1pc% shib-cds2: $cds2pc% shib-cds3: $cds3pc%\n"
+ msg+="-> DS: $cdsdscount / WAYF: $cdswayfcount\n"
+ msg+="\n-----\n"
+ msg+="Wugen:\n"
+ msg+="-> $wugencount WAYFless URLs generated\n"
+ msg+="-> $wugennewsubs new subscriptions.\n"
+ msg+="\n-----\n"
+ msg+="Test IdP usage:\n"
+ msg+="-> $testidplogincount logins to $testidpspcount SPs.\n"
+ msg+="\n-> Logins per test user:\n"
+ msg+="$testidplogincountbyuser\n"
+ msg+="\n-> Top 10 SPs logged into:\n"
+ msg+="$testidptoptenspsbycount\n"
+ msg+="\n-----\n"
+ msg+="Test SP usage:\n"
+ msg+="-> $testsplogincount logins from $testspidpcount IdPs.\n"
+ msg+="\n-> Top 10 IdPs logged in from:\n"
+ msg+="$testsptoptenidpsbycount\n"
+ msg+="\n-----"
+fi
+
+
+
+
+# =====
+# = Output the message.
+# =====
+
+
+echo -e "$msg"
+exit 0
\ No newline at end of file
diff --git a/utilities/stats-sync.sh b/utilities/stats-sync.sh
new file mode 100755
index 00000000..af5f02b2
--- /dev/null
+++ b/utilities/stats-sync.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# This script will sync the logfiles from all of the backend servers into a central location on repo
+#
+
+# Set some common options
+logslocation="/var/stats"
+
+
+# Logs from API
+
+# Logs from MD servers
+rsync -at --exclude modsec* stats@md1:/var/log/httpd/* $logslocation/md/md1/
+rsync -at --exclude modsec* stats@md2:/var/log/httpd/* $logslocation/md/md2/
+rsync -at --exclude modsec* stats@md3:/var/log/httpd/* $logslocation/md/md3/
+
+# Logs from CDS servers
+rsync -at --exclude modsec* stats@shib-cds1:/var/log/httpd/* $logslocation/cds/shib-cds1/
+rsync -at --exclude modsec* stats@shib-cds2:/var/log/httpd/* $logslocation/cds/shib-cds2/
+rsync -at --exclude modsec* stats@shib-cds3:/var/log/httpd/* $logslocation/cds/shib-cds3/
+
+# Logs from websites
+rsync -at --exclude modsec* stats@web1:/var/log/httpd/* $logslocation/www/web1/
+rsync -at --exclude modsec* stats@web2:/var/log/httpd/* $logslocation/www/web2/
+
+# Logs from Wugen
+rsync -at --exclude modsec* stats@wugen:/var/log/httpd/* $logslocation/wugen/
+rsync -at stats@wugen:/opt/wugen/logs/urlgenerator-* $logslocation/wugen/
+
+# Logs from Test IdP
+rsync -at --exclude modsec* stats@test-idp:/var/log/httpd/* $logslocation/test-idp/
+rsync -at stats@test-idp:/opt/shibboleth-idp/logs/idp-audit* $logslocation/test-idp/
+
+# Logs from Test SP
+rsync -at --exclude modsec* stats@test-sp:/var/log/httpd/* $logslocation/test-sp/
+rsync -at stats@test-sp:/var/log/shibboleth/shibd* $logslocation/test-sp/
+rsync -at stats@test-sp:/var/log/shibboleth/transaction* $logslocation/test-sp/
+
+# Exit happily
+exit 0
\ No newline at end of file