Skip to content

Commit

Permalink
Add githook and an ant target to push it to metadata servers
Browse files Browse the repository at this point in the history
See ukf-meta#244
  • Loading branch information
Alex Stuart committed Jul 14, 2020
1 parent bf312b2 commit 48fbe76
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 0 deletions.
35 changes: 35 additions & 0 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@
<property name="mdx.dir" value="${basedir}/mdx"/>
<property name="rules.dir" value="${mdx.dir}/_rules"/>
<property name="utilities.dir" value="${basedir}/utilities"/>
<property name="githook.dir" value="${utilities.dir}/githooks"/>

<!--
Location of externally supplied tool bundles.
Expand Down Expand Up @@ -330,6 +331,7 @@
-->
<property name="mdq.cache" value="mdqcache.tar.gz"/>
<property name="mdaggr.stats" value="ukfederation-stats.html"/>
<property name="post-receive-githook" value="post-receive"/>


<!--
Expand Down Expand Up @@ -631,6 +633,7 @@
git.data.merge.masterintoimmediate,
git.data.merge.immediateintodeferred,
git.data.allbranches.pushtoorigin,
scp.githook,
publish.mdqcache,
publish.md,
publish.otherfiles,
Expand Down Expand Up @@ -2144,6 +2147,38 @@
***********************************************
-->

<target name="scp.githook">
<!--
Metadata servers have a post-receive githook to fix up outputs of metadata aggregation.
Not needed for CDI because githook included in the container already.
-->
<echo>Pushing post-receive githook to MD dist.</echo>
<echo>-> MD-NE-01</echo>
<scp failonerror="true" remoteTodir="${md.user}@${md.dist.host-ne-01.name}:/tmp" keyfile="~/.ssh/id_rsa" knownhosts="~/.ssh/known_hosts">
<fileset dir="${githook.dir}">
<include name="${post-receive-githook}"/>
</fileset>
</scp>
<echo>-> MD-NE-02</echo>
<scp failonerror="true" remoteTodir="${md.user}@${md.dist.host-ne-02.name}:/tmp" keyfile="~/.ssh/id_rsa" knownhosts="~/.ssh/known_hosts">
<fileset dir="${githook.dir}">
<include name="${post-receive-githook}"/>
</fileset>
</scp>
<echo>-> MD-WE-01</echo>
<scp failonerror="true" remoteTodir="${md.user}@${md.dist.host-we-01.name}:/tmp" keyfile="~/.ssh/id_rsa" knownhosts="~/.ssh/known_hosts">
<fileset dir="${githook.dir}">
<include name="${post-receive-githook}"/>
</fileset>
</scp>
<echo>-> MD-WE-02</echo>
<scp failonerror="true" remoteTodir="${md.user}@${md.dist.host-we-02.name}:/tmp" keyfile="~/.ssh/id_rsa" knownhosts="~/.ssh/known_hosts">
<fileset dir="${githook.dir}">
<include name="${post-receive-githook}"/>
</fileset>
</scp>
</target>

<target name="publish.md">
<!--
Push metadata files for the UK Federation to the MD dist servers
Expand Down
92 changes: 92 additions & 0 deletions utilities/githooks/post-receive
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/bin/bash

# Git provides the following as part of STDIN when invoking this script: <oldrev> <newrev> <refname>
read oldrev newrev refname

# Set the location of the git repo and the apache directories to serve content from
gitdir=/var/git/ukf-products
apacheaggrdir=/var/www/html/metadata.uou
apachemdqdir=/var/www/html/mdq.uou/entities

# Set the location of the temporary mdq cache dir
mdqcachedir=/tmp/mdqcache

# This Git repo has had the latest stuff pushed to it, but it hasn't checked it out yet. So let's do it.
git --work-tree=$gitdir --git-dir=$gitdir/.git checkout -f

# Make a gzipped version of each aggregate
echo -n "Gzipping each aggregate... "
for f in $gitdir/aggregates/*.xml
do
gzip -9 < $f > $f.gz
done
echo "Done."

# The MDQ cache should have been SCPed to /tmp and be sitting there happily.
# First, we should untar it.
echo -n "Untarring mdq cache... "
rm -rf $mdqcachedir
mkdir $mdqcachedir
cd $mdqcachedir
tar xzf /tmp/mdqcache.tar.gz
echo "Done."

# Make a gzipped version of each per-entity fragment; also create symlink
# to the XML file and its .gz version named from the SHA1 hash of the entityId
echo -n "Gzipping each fragment file, and symlinking to the file and the .gz... "
cd $mdqcachedir
for f in $mdqcachedir/*.xml
do

# First we're going to figure out some stuff about the request and how it'll
# map to other versions of the name

# Convert the /full/path/and/filename.xml to just filename.xml
filename=${f##*/}

# And then filename.xml to just filename (i.e. the % encoded entityId)
entityidpercentencoded=${filename%.*}

# Un-%encode the entityId
entityid=$(echo $entityidpercentencoded | sed "s@+@ @g;s@%@\\\\x@g" | xargs -0 printf "%b")

# Calculate the sha1 hash of the entityId
entityidsha1=$(echo -n $entityid | openssl sha1 | awk '{print $2}')


# Now we're actually going to do something with that!

# Create the gzipped version of the file
gzip -9 < $filename > x_gz-$filename.gz

# Remove .xml from the filenames
mv -f $filename $entityidpercentencoded
mv -f x_gz-$filename.gz x_gz-$entityidpercentencoded.gz

# Create the symlinks to the XML file and the gzipped version
ln -s $entityidpercentencoded {sha1}$entityidsha1
ln -s x_gz-$entityidpercentencoded.gz x_gz-{sha1}$entityidsha1.gz

done
echo "Done."

# Get the timestamp of the commit
mtime=$(git --work-tree=$gitdir --git-dir=$gitdir/.git show $newrev --quiet --pretty=format:%ct)

# Set the timestamp on each of the files to that of the commit
echo -n "Setting the timestamp on each file to that of the commit... "
find $gitdir -regextype posix-extended -regex '.*\.(xml|gz)' -exec touch -d @$mtime {} \;
find $mdqcachedir -exec touch -d @$mtime {} \;
echo "Done."

# Put files into the correct directory
echo -n "Rsyncing files to the appropriate apache directory... "
rsync -at $gitdir/aggregates/*.{xml,gz} $apacheaggrdir
rsync -at --delete $mdqcachedir/ $apachemdqdir
echo "Done."

# Remove the temporary files
echo -n "Removing temporary files... "
find $gitdir -name "*.gz" -exec rm -f {} \;
rm -rf $mdqcachedir
echo "Done."

0 comments on commit 48fbe76

Please sign in to comment.