diff --git a/app/src/Lib/Match/AttributeManager.php b/app/src/Lib/Match/AttributeManager.php index 3cebf9622..c0fc478aa 100644 --- a/app/src/Lib/Match/AttributeManager.php +++ b/app/src/Lib/Match/AttributeManager.php @@ -32,6 +32,8 @@ namespace App\Lib\Match; +use Cake\Log\Log; + class AttributeManager { /* We treat attribute groups (eg: preferred name vs official name) the same * way as attribute types (eg: identifier/national vs identifier/network), @@ -85,6 +87,8 @@ public function getRequestedReferenceId() { */ public function getValueByAttribute(\App\Model\Entity\Attribute $attribute) { + $ret = null; + // We can have three types of attributes here: // (1) simple, eg "dateOfBirth", context = "_default" (or NULL) // (2) typed, eg "identifiers:identifier/national", context = type (eg: "national") @@ -92,19 +96,29 @@ public function getValueByAttribute(\App\Model\Entity\Attribute $attribute) { // The attribute group name needs to match the type, eg for names/type=official // define attribute_groups:name as "official". We'll need a better solution though - // if we ever had two different types of "official" attributes. + // if we ever had two different types of "official" attributes. if($attribute->attribute_group_id) { // Type 3 (Grouped) - return $this->getValueByContext($attribute->api_name, $attribute->attribute_group->name); + $ret = $this->getValueByContext($attribute->api_name, $attribute->attribute_group->name); } elseif(strpos($attribute->api_name, '/') !== false) { // Type 2 (Typed) $a = explode('/', $attribute->api_name); - return $this->getValueByContext($a[0], $a[1]); + $ret = $this->getValueByContext($a[0], $a[1]); } else { // Type 1 (Simple) - return $this->getValueByContext($attribute->api_name); + $ret = $this->getValueByContext($attribute->api_name); } + + if($ret + && $attribute->null_equivalents + && !preg_match('/[1-9[:alpha:]]/', $ret)) { + // We require one alphabetic character or a number other than 0, otherwise return null + Log::write('debug', "Ignoring null equivalent value '" . $ret . "' for attribute " . $attribute->name); + return null; + } + + return $ret; } /** diff --git a/app/src/Lib/Match/MatchService.php b/app/src/Lib/Match/MatchService.php index 3df78e176..1ad7a4f1b 100644 --- a/app/src/Lib/Match/MatchService.php +++ b/app/src/Lib/Match/MatchService.php @@ -455,7 +455,6 @@ protected function search(string $mode, Log::write('debug', $sor . "/" . $sorid . " Searching with confidence mode " . $mode); foreach($this->mgConfig->$ruleObjs as $rule) { -// XXX need to implement strtolower, preg_replace, etc (see PoC buildAttributeSql) $sql = "SELECT * FROM " . $this->mgTable . " WHERE referenceid IS NOT NULL"; // Don't match pending requests @@ -471,17 +470,32 @@ protected function search(string $mode, $val = $attributes->getValueByAttribute($ruleattr->attribute); if(!$val) { + if($ruleattr->attribute->required) { + Log::write('debug', $sor . "/" . $sorid . " No value found for required attribute " . $ruleattr->attribute->name . " skipping rule " . $rule->name); + throw new \RuntimeException(__('match.er.attr.req', [$ruleattr->attribute->name])); + } + Log::write('debug', $sor . "/" . $sorid . " No value found for attribute " . $ruleattr->attribute->name . " skipping rule " . $rule->name); continue 2; } - $andclause = ""; $colclause = ""; // The column name $colclause = $ruleattr->attribute->name; + // If the attribute is case insensitive, we insert LOWER clauses + if(!$ruleattr->attribute->case_sensitive) { + $colclause = "LOWER(" . $colclause . ")"; + } + + // If the attribute is alphanumeric only, we strip out non-alphanumeric characters + if($ruleattr->attribute->alphanumeric) { + $colclause = "REGEXP_REPLACE(" . $colclause . ", '[^A-Za-z0-9]', '', 'g')"; + $val = preg_replace('/[^A-Za-z0-9]/', '', $val); + } + // XXX we only want search_type=E for canonical rules (should we enforce this here, or just at config time?) // XXX complain if there are no Exact rules? Maybe in the UI during configuration? // XXX document in wiki https://spaces.at.internet2.edu/display/COmanage/Match+Attributes @@ -521,6 +535,14 @@ protected function search(string $mode, $vals[] = $val; } + if(count($vals) == 0) { + // We need at least one attribute to search on. If we didn't process + // any in the request, complain. + + Log::write('debug', $sor . "/" . $sorid . " No searchable attributes found in request, skipping rule " . $rule->name); + continue; + } + LOG::write('debug', $sor . "/" . $sorid . " SQL: " . $sql); $stmt = $this->dbc->Prepare($sql); @@ -533,10 +555,20 @@ protected function search(string $mode, $count = 0; while($row = $r->fetchRow()) { - $results->add($row); + + $results->add($row, $rule->name); $count++; } + if($count == 0) { + // Record that this rule completed successfully. We do this to indirectly + // validate the attributes passed in the request body, since otherwise we'd + // have to recalculate the attributes against the rules (which we already + // do here). + + $results->add([], $rule->name); + } + Log::write('debug', $sor . "/" . $sorid . " Matched " . $count . " candidate(s) using rule " . $rule->name); if($mode == ConfidenceModeEnum::Canonical && $count > 0) { @@ -573,6 +605,7 @@ public function searchReferenceId(string $sor, string $sorid, AttributeManager $ break; default: // Multiple canonical matches: demote to potential + Log::write('debug', $sor . "/" . $sorid . " Found " . $canonicalMatches->count() . " canonical matches, downgrading to potential"); $canonicalMatches->setConfidenceMode(ConfidenceModeEnum::Potential); // XXX should we continue with other potential rules and merge results? return $canonicalMatches; diff --git a/app/src/Locale/en_US/default.po b/app/src/Locale/en_US/default.po index 3007b8432..f1744bf23 100644 --- a/app/src/Locale/en_US/default.po +++ b/app/src/Locale/en_US/default.po @@ -158,6 +158,9 @@ msgstr "Suspended" msgid "match.er.args" msgstr "Incorrect arguments provided to {0}" +msgid "match.er.attr.req" +msgstr "Required attribute {0} not found in request" + msgid "match.er.build" msgstr "Error applying matchgrid schema: {0}" @@ -203,6 +206,9 @@ msgstr "Request ID {0} already resolved" msgid "match.er.reconcile.notfound" msgstr "Request ID {0} not found" +msgid "match.er.rules.unsuccessful" +msgstr "No rules successfully completed" + msgid "match.er.save" msgstr "Save Failed ({0})" diff --git a/app/src/Template/Attributes/fields.inc b/app/src/Template/Attributes/fields.inc index 2b3b0be8d..a8d9b24af 100644 --- a/app/src/Template/Attributes/fields.inc +++ b/app/src/Template/Attributes/fields.inc @@ -35,7 +35,8 @@ if($action == 'add' || $action == 'edit') { print $this->Field->control('alphanumeric', [], false); print $this->Field->control('case_sensitive', [], false); - print $this->Field->control('invalidates', [], false); +// CO-1762 +// print $this->Field->control('invalidates', [], false); print $this->Field->control('null_equivalents', [], false); print $this->Field->control('required', [], false);