Permalink
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
inc-meta/utilities/normalise_fragment
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
See ukf/ukf-meta#71.
executable file
136 lines (111 sloc)
4.78 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
''' | |
Normalises a fragment file. | |
The assumption is that the input file has already been pretty-printed to | |
some extent. This script normalises it by adjusting the start of the | |
file: | |
* ensures that the file starts with an appropriate XML declaration | |
* arranges for all appropriate namespaces to appear on the EntityDescriptor | |
* arranges for an appropriate collection of schemaLocation values | |
* puts any ID and entityID attributes in the right place | |
The script also modifies any use of the shibmeta prefix to the modern shibmd form. | |
None of this can really be done by any means within XML itself, so this | |
is a pure text processing application. | |
With no command-line arguments, the script acts as a filter. | |
With one command-line argument, the script overwrites the named file. | |
With two command-line arguments, the script reads from one file and | |
writes to another. | |
This script was developed under Python 2.7, but will probably work under 2.6. | |
Let me know if that turns out not to be the case. | |
''' | |
import re | |
import sys | |
from string import Template | |
# | |
# Template to use for the start of the file, up to the EntityDescriptor. | |
# | |
# Note that the indentation in this template should be four *spaces* per | |
# level, independent of the type of indentation used for the rest of the | |
# script. | |
# | |
ED_TEMPLATE = Template('''<?xml version="1.0" encoding="UTF-8"?> | |
<EntityDescriptor xmlns="urn:oasis:names:tc:SAML:2.0:metadata" | |
xmlns:alg="urn:oasis:names:tc:SAML:metadata:algsupport" | |
xmlns:ds="http://www.w3.org/2000/09/xmldsig#" | |
xmlns:idpdisc="urn:oasis:names:tc:SAML:profiles:SSO:idp-discovery-protocol" | |
xmlns:init="urn:oasis:names:tc:SAML:profiles:SSO:request-init" | |
xmlns:mdattr="urn:oasis:names:tc:SAML:metadata:attribute" | |
xmlns:mdrpi="urn:oasis:names:tc:SAML:metadata:rpi" | |
xmlns:mdui="urn:oasis:names:tc:SAML:metadata:ui" | |
xmlns:remd="http://refeds.org/metadata" | |
xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion" | |
xmlns:shibmd="urn:mace:shibboleth:metadata:1.0" | |
xmlns:ukfedlabel="http://ukfederation.org.uk/2006/11/label" | |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
xsi:schemaLocation="urn:oasis:names:tc:SAML:2.0:metadata saml-schema-metadata-2.0.xsd | |
urn:oasis:names:tc:SAML:metadata:algsupport sstc-saml-metadata-algsupport-v1.0.xsd | |
urn:oasis:names:tc:SAML:metadata:attribute sstc-metadata-attr.xsd | |
urn:oasis:names:tc:SAML:metadata:rpi saml-metadata-rpi-v1.0.xsd | |
urn:oasis:names:tc:SAML:metadata:ui sstc-saml-metadata-ui-v1.0.xsd | |
urn:oasis:names:tc:SAML:profiles:SSO:idp-discovery-protocol sstc-saml-idp-discovery.xsd | |
urn:oasis:names:tc:SAML:profiles:SSO:request-init sstc-request-initiation.xsd | |
urn:oasis:names:tc:SAML:2.0:assertion saml-schema-assertion-2.0.xsd | |
urn:mace:shibboleth:metadata:1.0 shibboleth-metadata-1.0.xsd | |
http://ukfederation.org.uk/2006/11/label uk-fed-label.xsd | |
http://refeds.org/metadata refeds-metadata.xsd | |
http://www.w3.org/2001/04/xmlenc# xenc-schema.xsd | |
http://www.w3.org/2009/xmlenc11# xenc-schema-11.xsd | |
http://www.w3.org/2000/09/xmldsig# xmldsig-core-schema.xsd" | |
ID="${ID}" entityID="${entityID}"> | |
''') | |
def ingest(input): | |
'''Read the given input file and split it into header and remainder.''' | |
text = input.read() | |
regex = r"^(.*<EntityDescriptor[^>]+?>[ \t]*?\n?)(.*)$" | |
pattern = re.compile(regex, re.DOTALL) | |
return pattern.match(text).groups() | |
def extract(header): | |
'''Extract ID and entityID attributes from the header.''' | |
entityID = re.search(r"(?:\bentityID=[\'\"](.*?)[\'\"])", header).group(1) | |
ID = re.search(r"(?:\bID=[\'\"](.*?)[\'\"])", header).group(1) | |
return ID, entityID | |
def construct_header(ID, entityID): | |
'''Construct a new header with the given attributes.''' | |
return ED_TEMPLATE.substitute(ID=ID, entityID=entityID) | |
def fix_scope_prefix(text): | |
'''Changes any use of the shibmeta prefix into shibmd.''' | |
return re.sub(r"\bshibmeta:", "shibmd:", text) | |
def usage(): | |
'''Display usage information for script.''' | |
print '''Usage: normalise_fragment.py [[infile] outfile]''' | |
def main(args): | |
'''Command-line application.''' | |
if not args: | |
# no command line arguments, read from standard input | |
infile = sys.stdin | |
elif len(args) <= 2: | |
# read from a named file | |
infile = file(args[0], "r") | |
else: | |
usage(); | |
sys.exit(1) | |
(head, remainder) = ingest(infile) | |
infile.close() | |
# print "head:%s:\nrest:%s:" % (head, remainder) | |
(ID, entityID) = extract(head) | |
new_head = construct_header(ID, entityID) | |
remainder = fix_scope_prefix(remainder) | |
new_text = new_head + remainder | |
if not args: | |
outfile = sys.stdout | |
elif len(args) == 1: | |
outfile = file(args[0], "w") | |
elif len(args) == 2: | |
outfile = file(args[1], "w") | |
else: | |
usage() | |
sys.exit(1) | |
outfile.write(new_text) | |
outfile.close() | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |