From c83ef1811e7455381f3da5094692af7e84c48369 Mon Sep 17 00:00:00 2001 From: Ian Young Date: Tue, 12 Apr 2011 09:58:12 +0000 Subject: [PATCH] Prototype script to verify entity IDs against known registries. Final version needs to use the public suffix list. --- attic/sift_entityids.pl | 64 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100755 attic/sift_entityids.pl diff --git a/attic/sift_entityids.pl b/attic/sift_entityids.pl new file mode 100755 index 00000000..71c5127f --- /dev/null +++ b/attic/sift_entityids.pl @@ -0,0 +1,64 @@ +#!/usr/bin/perl + +@registries = ( + # gTLDs + '.com', + '.edu', + '.net', + '.org', + '.info', + + # ccTLDs which allow top-level registration + '.es', + '.eu', + '.nl', + '.tv', + + # ccTLD: cn + '.edu.cn', + + # ccTLD: jp + '.ac.jp', + + # ccTLD: my + '.edu.my', + + # CC TLD: uk + '.ac.uk', + '.bl.uk', + '.co.uk', + '.gov.uk', + '.org.uk', + '.parliament.uk', +); + +LINE: while (<>) { + chop; + + # + # Extract a domain from the entityID + # + if (/^https?:\/\/([^\:\/]+)/) { + $domain = $1; + } elsif (/^urn:mace:ac.uk:sdss.ac.uk:provider:(service|identity):([^:]+)/) { + $domain = $2; + } elsif (/^urn:mace:eduserv.org.uk:athens:federation:(uk|beta)$/) { + $domain = 'eduserv.org.uk'; + } elsif (/^urn:mace:eduserv.org.uk:athens:provider:(.*)/) { + $domain = $1; + } else { + print "*** can't extract domain from $_\n"; + next; + } + + # + # Now figure out the registrar involved with this domain. + # + foreach $registry (@registries) { + if (substr($domain, -length($registry)) eq $registry) { + # print "$domain matched $registry\n"; + next LINE; + } + } + print "*** no registry match for $domain\n"; +}