Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
1171 lines (935 sloc) 39.9 KB
<?php
/**
* COmanage Match Match Service
*
* Portions licensed to the University Corporation for Advanced Internet
* Development, Inc. ("UCAID") under one or more contributor license agreements.
* See the NOTICE file distributed with this work for additional information
* regarding copyright ownership.
*
* UCAID licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* @link http://www.internet2.edu/comanage COmanage Project
* @package match
* @since COmanage Match v1.0.0
* @license Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
*/
declare(strict_types = 1);
namespace App\Lib\Match;
use Cake\Log\Log;
use Cake\ORM\TableRegistry;
use Cake\Utility\Hash;
use \App\Lib\Enum\ConfidenceModeEnum;
use \App\Lib\Enum\ReferenceIdEnum;
use \App\Lib\Enum\SearchTypeEnum;
use \App\Lib\Enum\StatusEnum;
use \App\Lib\Enum\TrustModeEnum;
class MatchService { //extends PostgresService {
use \App\Lib\Traits\DatabaseTrait;
protected $mgConfig = null;
protected $mgTable = "";
/**
* Insert a row for the specified record, and assign a Reference ID for it.
* ie: Consider this a new record that does not match any existing record.
* Should only be called after some form of search has verified no matching
* records.
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label
* @param string $sorid SOR Record ID
* @param AttributeManager $attributes Request Attributes
* @return string Newly assigned Reference ID
*/
public function assignReferenceId(string $sor, string $sorid, AttributeManager $attributes) {
$referenceId = $this->generateReferenceId();
Log::write('debug', $sor . "/" . $sorid . " Generated reference ID for request: " . $referenceId);
// We use upsert rather than insert because a forced re-match (by blanking
// out the referenceId via the UI) will rematch on an existing row.
$this->upsert($sor, $sorid, $attributes, $referenceId);
return $referenceId;
}
/**
* Attach a Reference ID for the specified record, which may or may not already
* be in the Matchgrid as a pending record.
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label
* @param string $sorid SOR Record ID
* @param AttributeManager $attributes Request Attributes
* @param string $referenceId Requested Reference ID, or "new" to assign a new Reference ID
* @return string Newly assigned Reference ID
*/
public function attachReferenceId(string $sor, string $sorid, AttributeManager $attributes, string $referenceId) {
$refId = $referenceId;
if($referenceId == 'new') {
// Assign a new reference ID and upsert
$refId = $this->generateReferenceId();
Log::write('debug', $sor . "/" . $sorid . " Generated reference ID for request: " . $referenceId);
} else {
Log::write('debug', $sor . "/" . $sorid . " Attaching reference ID for request: " . $referenceId);
}
$this->upsert($sor, $sorid, $attributes, $refId);
return $refId;
}
/**
* Calculate which attributes from a set of candidates differ from the original.
* Primarily intended to facilitate frontend rendering, the results of this
* function may not exactly match what search() does.
*
* @since COmanage Match v1.1.0
* @param array $original Original request data
* @param array $candidates Set of candidates, as returned by getRawResults()
* @return array An array of arrays, indexed by requested ID and then attribute
*/
public function diffCandidates(array $original, array $candidates) {
$ret = [];
// Walk through the attribute configuration and map it into a hash
// based on attribute name for easier manipulation.
$attrConfig = [];
foreach($this->mgConfig->attributes as $a) {
$attrConfig[$a->name] = $a;
}
foreach($candidates as $c) {
if($c['id'] == $original['id']) {
// This is also the original request, basically skip it
$ret[ $c['id'] ] = [];
continue;
}
foreach(array_keys($c) as $attr) {
// This will include both actual attributes (eg: "firstname")
// AND metadata (eg: "sorid"). We'll effectively skip the metadata
// because it won't have an Attribute configuration.
if(!empty($attrConfig[$attr])) {
// There's a fair amount of conceptual overlap between this code
// and search(), below. We don't perform all possible checks here,
// though plausibly over time more could be added. eg: Dictionary
// checks require a SQL call to process, we can't just figure it
// out in PHP code. Also, RuleAttributes can affect how matching
// works, and right now we don't look at those at all.
$origvalue = !empty($original[$attr]) ? $original[$attr] : '';
$value = !empty($c[$attr]) ? $c[$attr] : '';
if($attrConfig[$attr]->alphanumeric) {
$origvalue = preg_replace('/[^A-Za-z0-9]/', '', $origvalue);
$value = preg_replace('/[^A-Za-z0-9]/', '', $value);
}
if(!$attrConfig[$attr]->case_sensitive) {
$origvalue = mb_strtolower($origvalue ?: "");
$value = mb_strtolower($value ?: "");
}
// Null equivalents are handled by Attribute Manager.
if($attrConfig[$attr]->null_equivalents) {
if(!preg_match('/[1-9[:alpha:]]/', $origvalue)) {
$origvalue = "";
}
if(!preg_match('/[1-9[:alpha:]]/', $value)) {
$value = "";
}
}
if($value != $origvalue) {
$ret[ $c['id'] ][] = $attr;
}
}
}
}
return $ret;
}
/**
* Generate a Reference ID in accordance with the current configuration.
*
* @since COmanage Match v1.0.0
* @return string Newly generated Reference ID
*/
protected function generateReferenceId() {
$MatchgridSettings = TableRegistry::getTableLocator()->get('MatchgridSettings');
if($MatchgridSettings->getReferenceIdMethod($this->mgConfig->id) == ReferenceIdEnum::Sequence) {
$IdService = new \App\Lib\Identifier\Sequence;
} else {
$IdService = new \App\Lib\Identifier\Uuid;
}
return $IdService->generate($this->mgConfig, $this->dbcxn);
}
/**
* Obtain the given Reference ID for the specified match request.
*
* @since COmanage Match v1.0.0
* @param int $id Match Request ID
* @return string Reference ID, or null if no record found or Reference ID not set
*/
public function getReferenceIdForRequest(int $id) {
$sql = "SELECT referenceid
FROM " . $this->mgTable . "
WHERE id=?";
$stmt = $this->dbcxn->Prepare($sql);
$stmt->bindValue(1, $id);
$resultSet = $stmt->executeQuery();
$r = $resultSet->fetchOne();
return ($r ?: null);
}
/**
* Obtain a specified match request
*
* @since COmanage Match v1.0.0
* @param int $id Match Request ID
* @return ResultManager Result Manager
* @throws RuntimeException
*/
public function getRequest(int $id) {
$results = new ResultManager;
$results->setConfig($this->mgConfig->attributes);
$sql = "SELECT *
FROM " . $this->mgTable . "
WHERE id=?";
$stmt = $this->dbcxn->Prepare($sql);
$stmt->bindValue(1, $id);
$resultSet = $stmt->executeQuery();
$row = $resultSet->fetchAssociative();
if(!empty($row)) {
$results->add($row);
}
return $results;
}
/**
* Obtain the request ID for an existing record for $sor + $sorid.
*
* @since COmanage Match v1.0.0
* @param string $sor Requesting System of Record
* @param string $sorid Requesting SOR's Identifier
* @return int Row ID or NULL
*/
public function getRequestIdForSorId(string $sor, string $sorid) {
$sql = "SELECT id
FROM " . $this->mgTable . "
WHERE sor=?
AND sorid=?";
$stmt = $this->dbcxn->Prepare($sql);
$stmt->bindValue(1, $sor);
$stmt->bindValue(2, $sorid);
$resultSet = $stmt->executeQuery();
$r = $resultSet->fetchOne();
return ($r ? (int)$r : null);
}
/**
* Obtain match requests of a given status.
*
* @since COmanage Match v1.0.0
* @param string $status "pending" or "resolved"
* @return ResultManager Result Manager
* @throws RuntimeException
*/
public function getRequests(string $status) {
$results = new ResultManager;
$results->setConfig($this->mgConfig->attributes);
$sql = "SELECT *
FROM " . $this->mgTable . "
WHERE resolution_time IS " . ($status == 'resolved' ? "NOT " : "") . " NULL";
$stmt = $this->dbcxn->Prepare($sql);
$resultSet = $stmt->executeQuery();
while($row = $resultSet->fetchAssociative()) {
$results->add($row);
}
return $results;
}
/**
* Obtain match requests for a given Reference ID.
*
* @since COmanage Match v1.0.0
* @param string $referenceId Reference ID
* @return ResultManager Result Manager
* @throws RuntimeException
*/
public function getRequestsForReferenceId(string $referenceId) {
$results = new ResultManager;
$results->setConfig($this->mgConfig->attributes);
$sql = "SELECT *
FROM " . $this->mgTable . "
WHERE referenceid=?";
$stmt = $this->dbcxn->Prepare($sql);
$stmt->bindValue(1, $referenceId);
$resultSet = $stmt->executeQuery();
while($row = $resultSet->fetchAssociative()) {
$results->add($row);
}
return $results;
}
/**
* Determine the number of rows in the Matchgrid,
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label, if provided only count rows for this SOR
* @return int The number of rows counted
*/
public function getRowCount(string $sor="") {
$sql = "SELECT COUNT(*)
FROM " . $this->mgTable;
if($sor != "") {
$sql .= " WHERE sor=?";
}
$stmt = $this->dbcxn->Prepare($sql);
if($sor != "") {
$stmt->bindValue(1, $sor);
}
$resultSet = $stmt->executeQuery();
return $resultSet->fetchOne();
}
/**
* Obtain the current attributes for an SOR record.
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label
* @param string $sorid SOR Record Identifier
* @return ResultManager Result Manager
* @throws RuntimeException
*/
public function getSorAttributes(string $sor, string $sorid) {
$results = new ResultManager;
$results->setConfig($this->mgConfig->attributes);
$sql = "SELECT *
FROM " . $this->mgTable . "
WHERE sor=?
AND sorid=?";
$stmt = $this->dbcxn->Prepare($sql);
$stmt->bindValue(1, $sor);
$stmt->bindValue(2, $sorid);
$resultSet = $stmt->executeQuery();
$row = $resultSet->fetchAssociative();
if(!empty($row)) {
$results->add($row);
}
return $results;
}
/**
* Obtain a list of all SOR IDs for a given SOR.
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label
* @return array Array of SOR IDs, which may be empty if none found
* @throws RuntimeException
*/
public function getSorIds(string $sor) {
$sql = "SELECT sorid
FROM " . $this->mgTable . "
WHERE sor=?";
$stmt = $this->dbcxn->Prepare($sql);
$stmt->bindValue(1, $sor);
$resultSet = $stmt->executeQuery();
return $resultSet->fetchFirstColumn();
}
/**
* Insert a new record into the Matchgrid.
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label
* @param string $sorid SOR Record ID
* @param AttributeManager $attributes Request Attributes
* @param string $referenceId Requested Reference ID, or "new" to assign a new Reference ID
* @return string Row ID
* @throws RuntimeException
*/
protected function insert(string $sor, string $sorid, AttributeManager $attributes, string $referenceId=null) {
// For most cases, code will either call upsert() or do other sanity checking
// such that insert shouldn't typically fail if there is a row for $sor+$sorid
// already in the matchgrid.
// Request time is now, resolution time is also now if a referenceId was specified.
$requestTime = gmdate('Y-m-d H:i:s');
$resolutionTime = ($referenceId ? $requestTime : null);
$attrs = ['sor', 'sorid', 'referenceid', 'request_time', 'resolution_time'];
$vals = [$sor, $sorid, $referenceId, $requestTime, $resolutionTime];
// Walk the list of configured attributes and build a list
foreach($this->mgConfig->attributes as $attr) {
// Only add this attribute if there is a value specified
$val = $attributes->getValueByAttribute($attr);
if($val) {
$attrs[] = $attr->name;
$vals[] = $val;
}
}
// "RETURNING id" is not portable SQL
$sql = "INSERT INTO " . $this->mgTable . " (" . implode(",", $attrs) . ")
VALUES (" . str_repeat("?,", count($vals)-1) . "?)
RETURNING id";
$stmt = $this->dbcxn->Prepare($sql);
for($i = 1;$i <= count($vals);$i++) {
// Note that DBAL counts from 1, but PHP counts from 0
$stmt->bindValue($i, $vals[$i-1]);
}
// Normally we'd use executeStatement for an INSERT, but we want the
// RETURNING value back
// See https://stackoverflow.com/questions/32048634/how-to-get-the-value-of-an-update-returning-query-in-postgresql-in-doctrine
$rstmt = $stmt->executeQuery();
$rowid = $rstmt->fetchOne();
if(!$rowid) {
Log::write('error', $this->dbc->errorMsg());
throw new \RuntimeException($this->dbc->errorMsg());
}
Log::write('debug', "Inserted new matchgrid entry at row ID " . $rowid);
return $rowid;
}
/**
* Insert a new, pending record into the Matchgrid.
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label
* @param string $sorid SOR Record ID
* @param AttributeManager $attributes Request Attributes
* @return string Row ID
*/
public function insertPending(string $sor, string $sorid, AttributeManager $attributes) {
// We call upsert in case an SOR issues an update match attributes request on
// a record that doesn't yet have a match reference ID.
return $this->upsert($sor, $sorid, $attributes);
}
/**
* Merge deprecated referenceIds to a primary ID.
*
* @since COmanage Match v1.0.0
* @param string $targetId Target Reference ID (to keep)
* @param array $referenceIds Array of deprecated referenceIds (to merge)
* @return bool True on success
* @throws RuntimeException
*/
public function merge(string $targetId, array $referenceIds) {
$sql = "UPDATE " . $this->mgTable . "
SET referenceid=?
WHERE referenceid IN (" . str_repeat("?,", count($referenceIds)-1) . "?)";
$vals = array_merge([$targetId], $referenceIds);
$stmt = $this->dbcxn->Prepare($sql);
for($i = 1;$i <= count($vals);$i++) {
// Note that DBAL counts from 1, but PHP counts from 0
$stmt->bindValue($i, $vals[$i-1]);
}
$stmt->executeStatement();
return true;
}
/**
* Remove an entry from the matchgrid.
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label
* @param string $sorid SOR Record Identifier
* @param boolean True if an entry was removed, false if no entry was found
* @throws RuntimeException
*/
public function remove(string $sor, string $sorid) {
$sql = "DELETE
FROM " . $this->mgTable . "
WHERE sor=?
AND sorid=?
RETURNING id"; // Postgres SQL Extension
$stmt = $this->dbcxn->Prepare($sql);
$stmt->bindValue(1, $sor);
$stmt->bindValue(2, $sorid);
$rstmt = $stmt->executeQuery();
// This should only ever match zero or one rows
$rowid = $rstmt->fetchOne();
return !empty($rowid);
}
/**
* Remove the Reference ID associated with an SOR record.
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label
* @param string $sorid SOR Record Identifier
* @return boolean True if the Reference ID was removed
* @throws RuntimeException
*/
public function removeReferenceId(string $sor, string $sorid) {
$sql = "UPDATE " . $this->mgTable . "
SET referenceid=null,
resolution_time=null
WHERE sor=?
AND sorid=?";
$stmt = $this->dbcxn->Prepare($sql);
$stmt->bindValue(1, $sor);
$stmt->bindValue(2, $sorid);
$rstmt = $stmt->executeStatement();
return true;
}
/**
* Perform a search of the matchgrid according to the current configuration.
*
* @since COmanage Match v1.0.0
* @param ConfidenceModeEnum $mode Confidence Mode
* @param string $sor SOR Label
* @param string $sorid SOR ID
* @param AttributeManager $attributes Search attibutes
* @param bool $skipSor If true, do not match against existing entries from $sor
* @return ResultManager Result Manager
* @throws LogicException
* @throws RuntimeException
*/
protected function search(string $mode,
string $sor,
string $sorid,
AttributeManager $attributes,
bool $skipSor=false): ResultManager {
$results = new ResultManager;
$results->setConfidenceMode($mode);
$results->setConfig($this->mgConfig->attributes);
$ruleObjs = ($mode == ConfidenceModeEnum::Canonical ? "canonical" : "potential") . "_rules";
Log::write('debug', $sor . "/" . $sorid . " Searching with confidence mode " . $mode);
foreach($this->mgConfig->$ruleObjs as $rule) {
// We generate SQL fragments for each rule attribute, then we OR together
// any SQL fragments for the same attribute (eg: if Crosscheck is in use,
// we want any one of them to succeed) and then AND together the SQL
// fragment across attributes (any attributes specified must match).
// Each of 'sql' and 'vals' will hold an array of arrays, keyed on the
// attribute id
$attrSql = [
'sql' => [],
'vals' => []
];
foreach($rule->rule_attributes as $ruleattr) {
if($ruleattr->search_type == SearchTypeEnum::Skip
// We skip invalidate rules as well, these are handled in searchReferenceId()
|| $ruleattr->search_type == SearchTypeEnum::Invalidate) {
continue;
}
// If we don't have a value for this attribute then we can't process this rule
$val = $attributes->getValueByAttribute($ruleattr->attribute);
if(!$val) {
if($ruleattr->required) {
Log::write('debug', $sor . "/" . $sorid . " No value found for required attribute " . $ruleattr->attribute->name . ", skipping rule " . $rule->name);
continue 2;
}
if(!$ruleattr->match_empty) {
Log::write('debug', $sor . "/" . $sorid . " No value found for optional attribute " . $ruleattr->attribute->name . ", ignoring it");
continue;
}
}
// From here, we use the Crosscheck Attribute if it is specified instead
// of the original Attribute. In other words, Crosscheck allows a single
// attribute from the API message to map to multiple database columns.
$attribute = $ruleattr->crosscheck_attribute ?: $ruleattr->attribute;
$attrclause = "";
// The column name
$colclause = $attribute->name;
if(!$val) {
// We skip all the column construction stuff before since we're
// comparing a NULL or empty value, so configurations like search
// distance or mapping don't apply or make sense.
$attrclause = $colclause . " IS NULL";
} else {
// If the attribute is case insensitive, we insert LOWER clauses
if(!$attribute->case_sensitive) {
$colclause = "LOWER(" . $colclause . ")";
$val = strtolower($val);
}
// If the attribute is alphanumeric only, we strip out non-alphanumeric characters
if($attribute->alphanumeric) {
$colclause = "REGEXP_REPLACE(" . $colclause . ", '[^A-Za-z0-9]', '', 'g')";
$val = preg_replace('/[^A-Za-z0-9]/', '', $val);
}
// XXX we only want search_type=E for canonical rules (should we enforce this here, or just at config time?)
// XXX complain if there are no Exact rules? Maybe in the UI during configuration?
// XXX document in wiki https://spaces.at.internet2.edu/display/COmanage/Match+Attributes
// how we handle all this
switch($ruleattr->search_type) {
case SearchTypeEnum::Distance:
if(empty($attribute->search_distance)) {
// We're configured for search distance but we don't have a value
Log::write('debug', $sor . "/" . $sorid . " No search distance configured for attribute " . $attribute->name . ", skipping rule " . $rule->name);
continue 2;
}
$maxdistance = (int)($attribute->search_distance)+1;
$attrclause = "LEVENSHTEIN_LESS_EQUAL("
. $colclause
. ",?,"
. $attribute->search_distance
. ") < "
. $maxdistance;
break;
case SearchTypeEnum::Exact:
$attrclause = $colclause . "=?";
break;
case SearchTypeEnum::Mapping:
if(empty($attribute->attribute_map_id)) {
// We're configured for attribute mapping, but we don't have a Map
Log::write('debug', $sor . "/" . $sorid . " No Attribute Map configured for attribute " . $attribute->name . ", skipping rule " . $rule->name);
continue 2;
}
$qclause = (!$attribute->case_sensitive ? "LOWER(query)" : "query");
$attrclause = "(" . $colclause . "
IN (SELECT value
FROM attribute_mappings
WHERE attribute_map_id=" . $attribute->attribute_map_id ."
AND " . $qclause . "=?)
OR " . $colclause . "=?)";
// We need two copies of $val in the param list
$attrSql['vals'][$ruleattr->attribute->id][] = (!$attribute->case_sensitive ? strtolower($val) : $val);
break;
case SearchTypeEnum::Substring:
if(empty($attribute->search_substr_from) || empty($attribute->search_substr_for)) {
// We're configured for substring search but we don't have a value
Log::write('debug', $sor . "/" . $sorid . " Substring search values not properly configured for attribute " . $attribute->name . ", skipping rule " . $rule->name);
continue 2;
}
$attrclause = "SUBSTRING("
. $colclause
. " FROM "
. $attribute->search_substr_from
. " FOR "
. $attribute->search_substr_for
. ") = SUBSTRING(? FROM "
. $attribute->search_substr_from
. " FOR "
. $attribute->search_substr_for
. ")";
break;
default:
throw new LogicException(__('match.er.search_type', [$ruleattr->search_type]));
break;
}
}
// Note here we revert to using the original Attribute ID, since if
// multiple configurations are specified for it we want to OR them together
$attrSql['sql'][$ruleattr->attribute->id][] = $attrclause;
if($val) {
$attrSql['vals'][$ruleattr->attribute->id][] = $val;
}
}
if(empty($attrSql['vals'])) {
// We need at least one attribute to search on. If we didn't process
// any in the request, complain.
Log::write('debug', $sor . "/" . $sorid . " No searchable attributes found in request, skipping rule " . $rule->name);
continue;
}
// Start building the actual SQL
$sql = "SELECT *
FROM " . $this->mgTable . "
WHERE referenceid IS NOT NULL"; // Don't match pending requests
$vals = [];
if($skipSor) {
$sql .= " AND sor <> ?";
$vals[] = $sor;
}
foreach(array_keys($attrSql['sql']) as $attrId) {
if(count($attrSql['sql'][$attrId]) > 1) {
// We OR together all of the clauses
$sql .= " AND (" . implode(" OR ", $attrSql['sql'][$attrId]) . ")";
} else {
// We simply append the clause
$sql .= " AND " . $attrSql['sql'][$attrId][0];
}
if(!empty($attrSql['vals'][$attrId])) {
$vals = array_merge($vals, $attrSql['vals'][$attrId]);
}
}
LOG::write('debug', $sor . "/" . $sorid . " SQL: " . $sql);
$stmt = $this->dbcxn->Prepare($sql);
for($i = 1;$i <= count($vals);$i++) {
// Note that DBAL counts from 1, but PHP counts from 0
$stmt->bindValue($i, $vals[$i-1]);
}
$resultSet = $stmt->executeQuery();
$count = 0;
while($row = $resultSet->fetchAssociative()) {
$results->add($row, $rule->name);
$count++;
}
if($count == 0) {
// Record that this rule completed successfully. We do this to indirectly
// validate the attributes passed in the request body, since otherwise we'd
// have to recalculate the attributes against the rules (which we already
// do here).
$results->add([], $rule->name);
}
Log::write('debug', $sor . "/" . $sorid . " Matched " . $count . " candidate(s) using rule " . $rule->name);
if($mode == ConfidenceModeEnum::Canonical && $count > 0) {
// We stop processing Canonical rules if any match
break;
}
}
return $results;
}
/**
* Attempt to obtain a reference ID based on search attributes.
*
* @since COmanage Match v1.0.0
* @param string $sor Requesting System of Record
* @param string $sorid Requesting SOR's Identifier
* @param AttributeManager $attributes Attribute Manager, holding search attributes
* @return ResultManager Result Manager
*/
public function searchReferenceId(string $sor, string $sorid, AttributeManager $attributes) {
// Before we start, pull the SystemOfRecord configuration.
$SystemsOfRecord = TableRegistry::getTableLocator()->get('SystemsOfRecord');
$trustMode = $SystemsOfRecord->getTrustMode($this->mgConfig->id, $sor);
$sorMatch = null; // Only used if TrustMode == Potential
if($trustMode == TrustModeEnum::Trust) {
Log::write('debug', $sor . "/" . $sorid . " Trust Mode enabled, ignoring existing records in the same SOR");
}
// First try canonical matches
$canonicalMatches = $this->search(
mode: ConfidenceModeEnum::Canonical,
sor: $sor,
sorid: $sorid,
attributes: $attributes,
skipSor: $trustMode == TrustModeEnum::Trust
);
// Was a canonical match downgraded?
$downgraded = false;
switch($canonicalMatches->count()) {
case 1:
// Before returning this canonical match, we need to perform some checks that
// might demote the result to potential. Note that $results may have more than
// one row even for a canonical match, since there may be multiple matchgrid
// entries that point to the same Reference ID.
$results = $canonicalMatches->getRawResults();
// First, check if any Attribute Rules in "Invalidate" mode will demote this result.
foreach($results as $rowId => $attrs) {
// Find the rule that generated this match.
$rulename = $canonicalMatches->getRuleForResult($rowId);
foreach($this->mgConfig->canonical_rules as $rule) {
if($rule->name == $rulename) {
// This is the correct rule, check the Rule Attributes. Note there could
// be more than one Rule Attribute with Search Type "Invalidate".
foreach($rule->rule_attributes as $ruleAttribute) {
if($ruleAttribute->search_type == SearchTypeEnum::Invalidate) {
// The name of the attribute for this Rule Attribute, eg "dob".
// This is the column name, not the API name.
$ruleAttributeName = $ruleAttribute->attribute->name;
// The value in the search request
$searchValue = $attributes->getValueByAttribute($ruleAttribute->attribute);
// The value in the matched row
$rowValue = $results[$rowId][$ruleAttributeName];
// We only perform the validation check if both values are not empty.
if(!empty($searchValue)
&& !empty($rowValue)
&& ($searchValue !== $rowValue)) {
// The returned value does not match, invalidate the request and drop
// the result to potential
Log::write('debug', $sor . "/" . $sorid . " Invalidate mode for $ruleAttributeName ($rulename) downgrading result to Potential");
$downgraded = [
'rule' => "_invalidate",
'attrs' => $attrs
];
// No need to continue with any loop
break 3;
}
}
}
// We found the rule we're looking for, no need to continue with the loop
break;
}
}
// If we have multiple rows, we should have the same rule for each row,
// so we can stop after the first.
break;
}
if(!$downgraded) {
// Next we need to handle Potential Trust Mode, check to see if any result row
// matched the SOR.
if($trustMode != TrustModeEnum::Potential) {
// Exact match, return
return $canonicalMatches;
}
foreach($results as $rowId => $attrs) {
if($attrs['sor'] == $sor) {
$sorMatch = $attrs;
break;
}
}
if(!$sorMatch) {
// We matched against a _different_ SOR, so return a canonical match
return $canonicalMatches;
}
$downgraded = [
'rule' => "_trustmode",
'attrs' => $sorMatch
];
Log::write('debug', $sor . "/" . $sorid . " Trust Mode downgrading result to Potential");
}
break;
case 0:
// Fall through and try potential matches
break;
default:
// Multiple canonical matches: demote to potential
Log::write('debug', $sor . "/" . $sorid . " Found " . $canonicalMatches->count() . " canonical matches, downgrading to potential");
$canonicalMatches->setConfidenceMode(ConfidenceModeEnum::Potential);
// XXX should we continue with other potential rules and merge results?
return $canonicalMatches;
break;
}
// Next try potential matches
$potentialMatches = $this->search(
mode: ConfidenceModeEnum::Potential,
sor: $sor,
sorid: $sorid,
attributes: $attributes,
skipSor: $trustMode == TrustModeEnum::Trust
);
// Add in the downgraded potential match from above, if configured
if($downgraded !== false) {
// We use a psuedo-rule name
$potentialMatches->add($downgraded['attrs'], $downgraded['rule']);
}
// The calling code generally checks to see if any rules successfully ran,
// since if there were no valid attributes or rules we treat that as an error.
// If no potential rules ran, we return the canonicalMatches result instead.
// (If no canonical rules ran, it doesn't matter which we return, but if at
// least one did, this is how we indicate at least something tried to happen.)
// See also CO-2127.
if(empty($potentialMatches->getSuccessfulRules())) {
return $canonicalMatches;
}
return $potentialMatches;
}
/**
* Obtain and store the configuration for the specified matchgrid.
*
* @since COmanage Match v1.0.0
* @param int $matchgridId Matchgrid ID
* @throws Cake\Datasource\Exception\RecordNotFoundException
*/
public function setConfig(int $matchgridId) {
$Matchgrids = TableRegistry::getTableLocator()->get('Matchgrids');
$this->mgConfig = $Matchgrids->findById($matchgridId)
->where(['status' => StatusEnum::Active])
->contain(['Attributes' => 'AttributeGroups',
'CanonicalRules' => [
// We already pull attributes above, but this makes it
// easier to access them in search()
'RuleAttributes' => [
'Attributes' => 'AttributeGroups',
'CrosscheckAttributes' => 'AttributeGroups'
],
'sort' => ['CanonicalRules.ordr' => 'ASC']
],
'MatchgridSettings',
'PotentialRules' => [
'RuleAttributes' => [
'Attributes' => 'AttributeGroups',
'CrosscheckAttributes' => 'AttributeGroups'
],
'sort' => ['PotentialRules.ordr' => 'ASC']
]])
->firstOrFail();
$this->mgTable = $this->mgConfig->prefixed_table_name;
}
/**
* Update an existing Matchgrid record.
*
* @since COmanage Match v1.0.0
* @param int $rowid Row ID
* @param AttributeManager $attributes Request Attributes
* @param string $referenceId Requested Reference ID
* @return string Row ID
* @throws RuntimeException
*/
protected function update(int $rowid, AttributeManager $attributes, string $referenceId=null) {
// XXX create a history record
// We don't update request time
$resolutionTime = ($referenceId ? gmdate('Y-m-d H:i:s') : null);
$attrs = [];
$vals = [];
if($referenceId) {
// If we don't have a referenceId, we're probably updating an existing record
// (Update Match Attributes).
$attrs = ['referenceid', 'resolution_time'];
$vals = [$referenceId, $resolutionTime];
}
// Walk the list of configured attributes and build a list, but only if attributes
// were provided in the request. (eg: Reassign Reference Identifier does not require
// attributes to be provided.)
if($attributes->attributesAvailable()) {
foreach($this->mgConfig->attributes as $attr) {
// By default, this array shouldn't have sor or sorid. However, we'll double
// check in case an admin tries to explicitly list it... we don't want the
// ability to update sor or sorid via this interface.
if(in_array(strtolower($attr->name), ['sor', 'sorid']))
continue;
// Only add this attribute if there is a value specified
$val = $attributes->getValueByAttribute($attr);
if($val) {
$attrs[] = $attr->name;
$vals[] = $val;
}
}
}
$sql = "UPDATE " . $this->mgTable . "
SET ";
foreach($attrs as $a) {
$sql .= $a . "=?,";
}
// Toss the last comma
$sql = rtrim($sql, ",");
$sql .= " WHERE id=?";
$vals[] = $rowid;
$stmt = $this->dbcxn->Prepare($sql);
for($i = 1;$i <= count($vals);$i++) {
// Note that DBAL counts from 1, but PHP counts from 0
$stmt->bindValue($i, $vals[$i-1]);
}
$stmt->executeStatement();
Log::write('debug', "Updated matchgrid entry at row ID " . $rowid);
// Return $rowid for consistency with insert()
return $rowid;
}
/**
* Update the Attributes associated with an existing Matchgrid record.
*
* @since COmanage Match v1.0.0
* @param int $rowid Row ID
* @param AttributeManager $attributes Request Attributes
* @return string Row ID
* @throws RuntimeException
*/
public function updateSorAttributes(int $rowid, AttributeManager $attributes) {
return $this->update($rowid, $attributes);
}
/**
* Insert or update a record in the Matchgrid.
*
* @since COmanage Match v1.0.0
* @param string $sor SOR Label
* @param string $sorid SOR Record ID
* @param AttributeManager $attributes Request Attributes
* @param string $referenceId Requested Reference ID, or "new" to assign a new Reference ID
* @return string Row ID
* @throws RuntimeException
*/
protected function upsert(string $sor, string $sorid, AttributeManager $attributes, string $referenceId=null) {
// Dispatch to insert() or update()
$rowid = null;
$this->dbcxn->beginTransaction();
try {
// SELECT FOR UPDATE only locks rows of matching records. ie: On an insert
// it won't lock anything. But the unique constraint on sor+sorid should
// prevent duplicate inserts.
$sql = "SELECT id
FROM " . $this->mgTable . "
WHERE sor=?
AND sorid=?
FOR UPDATE";
$stmt = $this->dbcxn->Prepare($sql);
$stmt->bindValue(1, $sor);
$stmt->bindValue(2, $sorid);
$resultSet = $stmt->executeQuery();
$rowid = $resultSet->fetchOne();
if($rowid !== false) {
// $rowid should be the same before and after
$rowid = $this->update((int)$rowid, $attributes, $referenceId);
} else {
$rowid = $this->insert($sor, $sorid, $attributes, $referenceId);
}
$this->dbcxn->commit();
}
catch(\Exception $e) {
$this->dbcxn->rollBack();
Log::write('error', $sor . "/" . $sorid . " Upsert error: " . $e->getMessage());
throw new \RuntimeException($e->getMessage());
}
return $rowid;
}
}