From fba23ecce2732f1b27222a07da46f4e91e310f62 Mon Sep 17 00:00:00 2001 From: Benn Oshrin Date: Sun, 15 Aug 2021 11:08:39 -0400 Subject: [PATCH] Implement "match empty" (CO-2136) --- app/config/schema/schema.json | 3 +- app/src/Lib/Match/MatchService.php | 134 ++++++++++-------- app/src/Locale/en_US/default.po | 3 + app/src/Model/Table/RuleAttributesTable.php | 9 +- app/src/Template/Element/Flash/default.ctp | 2 +- app/src/Template/Element/Flash/error.ctp | 2 +- .../Template/Element/Flash/information.ctp | 2 +- app/src/Template/Element/Flash/success.ctp | 2 +- app/src/Template/RuleAttributes/fields.inc | 31 +++- app/src/Template/Rules/fields.inc | 2 +- 10 files changed, 122 insertions(+), 68 deletions(-) diff --git a/app/config/schema/schema.json b/app/config/schema/schema.json index 1eccbb93a..8f7f6b3b2 100644 --- a/app/config/schema/schema.json +++ b/app/config/schema/schema.json @@ -184,7 +184,8 @@ "attribute_id": { "type": "integer", "foreignkey": { "table": "attributes", "column": "id" } }, "crosscheck_attribute_id": { "type": "integer", "foreignkey": { "table": "attributes", "column": "id" } }, "search_type": { "type": "string", "size": 2 }, - "required": { "type": "boolean" } + "required": { "type": "boolean" }, + "match_empty": { "type": "boolean" } }, "indexes": { "rule_attributes_i1": { diff --git a/app/src/Lib/Match/MatchService.php b/app/src/Lib/Match/MatchService.php index d94ddc671..14689870f 100644 --- a/app/src/Lib/Match/MatchService.php +++ b/app/src/Lib/Match/MatchService.php @@ -543,8 +543,10 @@ protected function search(string $mode, continue 2; } - Log::write('debug', $sor . "/" . $sorid . " No value found for optional attribute " . $ruleattr->attribute->name . ", ignoring it"); - continue; + if(!$ruleattr->match_empty) { + Log::write('debug', $sor . "/" . $sorid . " No value found for optional attribute " . $ruleattr->attribute->name . ", ignoring it"); + continue; + } } // From here, we use the Crosscheck Attribute if it is specified instead @@ -559,68 +561,78 @@ protected function search(string $mode, // The column name $colclause = $attribute->name; - // If the attribute is case insensitive, we insert LOWER clauses - if(!$attribute->case_sensitive) { - $colclause = "LOWER(" . $colclause . ")"; - $val = strtolower($val); - } - - // If the attribute is alphanumeric only, we strip out non-alphanumeric characters - if($attribute->alphanumeric) { - $colclause = "REGEXP_REPLACE(" . $colclause . ", '[^A-Za-z0-9]', '', 'g')"; - $val = preg_replace('/[^A-Za-z0-9]/', '', $val); - } - - // XXX we only want search_type=E for canonical rules (should we enforce this here, or just at config time?) - // XXX complain if there are no Exact rules? Maybe in the UI during configuration? - // XXX document in wiki https://spaces.at.internet2.edu/display/COmanage/Match+Attributes - // how we handle all this - switch($ruleattr->search_type) { - case SearchTypeEnum::Distance: - $maxdistance = (int)($attribute->search_distance)+1; - $attrclause = "LEVENSHTEIN_LESS_EQUAL(" - . $colclause - . ",?," - . $attribute->search_distance - . ") < " - . $maxdistance; - break; - case SearchTypeEnum::Exact: - $attrclause = $colclause . "=?"; - break; - case SearchTypeEnum::Mapping: - $qclause = (!$attribute->case_sensitive ? "LOWER(query)" : "query"); - $attrclause = "(" . $colclause . " - IN (SELECT value - FROM attribute_mappings - WHERE attribute_map_id=" . $attribute->attribute_map_id ." - AND " . $qclause . "=?) - OR " . $colclause . "=?)"; - // We need two copies of $val in the param list - $attrSql['vals'][$ruleattr->attribute->id][] = (!$attribute->case_sensitive ? strtolower($val) : $val); - break; - case SearchTypeEnum::Substring: - $attrclause = "SUBSTRING(" - . $colclause - . " FROM " - . $attribute->search_substr_from - . " FOR " - . $attribute->search_substr_for - . ") = SUBSTRING(? FROM " - . $attribute->search_substr_from - . " FOR " - . $attribute->search_substr_for - . ")"; - break; - default: - throw new LogicException(__('match.er.search_type', [$ruleattr->search_type])); - break; + if(!$val) { + // We skip all the column construction stuff before since we're + // comparing a NULL or empty value, so configurations like search + // distance or mapping don't apply or make sense. + + $attrclause = $colclause . " IS NULL"; + } else { + // If the attribute is case insensitive, we insert LOWER clauses + if(!$attribute->case_sensitive) { + $colclause = "LOWER(" . $colclause . ")"; + $val = strtolower($val); + } + + // If the attribute is alphanumeric only, we strip out non-alphanumeric characters + if($attribute->alphanumeric) { + $colclause = "REGEXP_REPLACE(" . $colclause . ", '[^A-Za-z0-9]', '', 'g')"; + $val = preg_replace('/[^A-Za-z0-9]/', '', $val); + } + + // XXX we only want search_type=E for canonical rules (should we enforce this here, or just at config time?) + // XXX complain if there are no Exact rules? Maybe in the UI during configuration? + // XXX document in wiki https://spaces.at.internet2.edu/display/COmanage/Match+Attributes + // how we handle all this + switch($ruleattr->search_type) { + case SearchTypeEnum::Distance: + $maxdistance = (int)($attribute->search_distance)+1; + $attrclause = "LEVENSHTEIN_LESS_EQUAL(" + . $colclause + . ",?," + . $attribute->search_distance + . ") < " + . $maxdistance; + break; + case SearchTypeEnum::Exact: + $attrclause = $colclause . "=?"; + break; + case SearchTypeEnum::Mapping: + $qclause = (!$attribute->case_sensitive ? "LOWER(query)" : "query"); + $attrclause = "(" . $colclause . " + IN (SELECT value + FROM attribute_mappings + WHERE attribute_map_id=" . $attribute->attribute_map_id ." + AND " . $qclause . "=?) + OR " . $colclause . "=?)"; + // We need two copies of $val in the param list + $attrSql['vals'][$ruleattr->attribute->id][] = (!$attribute->case_sensitive ? strtolower($val) : $val); + break; + case SearchTypeEnum::Substring: + $attrclause = "SUBSTRING(" + . $colclause + . " FROM " + . $attribute->search_substr_from + . " FOR " + . $attribute->search_substr_for + . ") = SUBSTRING(? FROM " + . $attribute->search_substr_from + . " FOR " + . $attribute->search_substr_for + . ")"; + break; + default: + throw new LogicException(__('match.er.search_type', [$ruleattr->search_type])); + break; + } } // Note here we revert to using the original Attribute ID, since if // multiple configurations are specified for it we want to OR them together $attrSql['sql'][$ruleattr->attribute->id][] = $attrclause; - $attrSql['vals'][$ruleattr->attribute->id][] = $val; + if($val) { + $attrSql['vals'][$ruleattr->attribute->id][] = $val; + } } if(empty($attrSql['vals'])) { @@ -650,7 +662,9 @@ protected function search(string $mode, $sql .= " AND " . $attrSql['sql'][$attrId][0]; } - $vals = array_merge($vals, $attrSql['vals'][$attrId]); + if(!empty($attrSql['vals'][$attrId])) { + $vals = array_merge($vals, $attrSql['vals'][$attrId]); + } } LOG::write('debug', $sor . "/" . $sorid . " SQL: " . $sql); diff --git a/app/src/Locale/en_US/default.po b/app/src/Locale/en_US/default.po index c928a6072..f773338b1 100644 --- a/app/src/Locale/en_US/default.po +++ b/app/src/Locale/en_US/default.po @@ -423,6 +423,9 @@ msgstr "Required" msgid "match.fd.resolution_mode" msgstr "Resolution Mode" +msgid "match.fd.RuleAttributes.match_empty" +msgstr "Match Empty Values" + msgid "match.fd.search_distance" msgstr "Search Distance" diff --git a/app/src/Model/Table/RuleAttributesTable.php b/app/src/Model/Table/RuleAttributesTable.php index 26e9c6c60..c9ff90924 100644 --- a/app/src/Model/Table/RuleAttributesTable.php +++ b/app/src/Model/Table/RuleAttributesTable.php @@ -132,7 +132,14 @@ public function validationDefault(Validator $validator) { 'toggle', [ 'rule' => [ 'boolean' ] ] ); - $validator->notEmpty('required'); + $validator->allowEmpty('required'); + + $validator->add( + 'match_empty', + 'toggle', + [ 'rule' => [ 'boolean' ] ] + ); + $validator->allowEmpty('match_empty'); return $validator; } diff --git a/app/src/Template/Element/Flash/default.ctp b/app/src/Template/Element/Flash/default.ctp index c8bced80c..77e07c22c 100644 --- a/app/src/Template/Element/Flash/default.ctp +++ b/app/src/Template/Element/Flash/default.ctp @@ -16,7 +16,7 @@ ', $filteredMessage); print ""; diff --git a/app/src/Template/Element/Flash/error.ctp b/app/src/Template/Element/Flash/error.ctp index 39d510ec7..1b46f51ea 100644 --- a/app/src/Template/Element/Flash/error.ctp +++ b/app/src/Template/Element/Flash/error.ctp @@ -5,7 +5,7 @@ if(!empty($message)) { // Strip tags then escape quotes before handing Flash message to noty.js - $filteredMessage = filter_var(filter_var($message,FILTER_SANITIZE_STRING,FILTER_FLAG_NO_ENCODE_QUOTES),FILTER_SANITIZE_MAGIC_QUOTES); + $filteredMessage = filter_var(filter_var($message,FILTER_SANITIZE_STRING,FILTER_FLAG_NO_ENCODE_QUOTES),FILTER_SANITIZE_ADD_SLASHES); // Replace all newlines with html breaks $filteredMessage = str_replace(array("\r", "\n"), '
', $filteredMessage); print ""; diff --git a/app/src/Template/Element/Flash/information.ctp b/app/src/Template/Element/Flash/information.ctp index edf52fbd1..600ec08c3 100644 --- a/app/src/Template/Element/Flash/information.ctp +++ b/app/src/Template/Element/Flash/information.ctp @@ -5,7 +5,7 @@ if(!empty($message)) { // Strip tags then escape quotes before handing Flash message to noty.js - $filteredMessage = filter_var(filter_var($message,FILTER_SANITIZE_STRING,FILTER_FLAG_NO_ENCODE_QUOTES),FILTER_SANITIZE_MAGIC_QUOTES); + $filteredMessage = filter_var(filter_var($message,FILTER_SANITIZE_STRING,FILTER_FLAG_NO_ENCODE_QUOTES),FILTER_SANITIZE_ADD_SLASHES); // Replace all newlines with html breaks $filteredMessage = str_replace(array("\r", "\n"), '
', $filteredMessage); print ""; diff --git a/app/src/Template/Element/Flash/success.ctp b/app/src/Template/Element/Flash/success.ctp index 864d55de3..e82d381b2 100644 --- a/app/src/Template/Element/Flash/success.ctp +++ b/app/src/Template/Element/Flash/success.ctp @@ -5,7 +5,7 @@ if(!empty($message)) { // Strip tags then escape quotes before handing Flash message to noty.js - $filteredMessage = filter_var(filter_var($message,FILTER_SANITIZE_STRING,FILTER_FLAG_NO_ENCODE_QUOTES),FILTER_SANITIZE_MAGIC_QUOTES); + $filteredMessage = filter_var(filter_var($message,FILTER_SANITIZE_STRING,FILTER_FLAG_NO_ENCODE_QUOTES),FILTER_SANITIZE_ADD_SLASHES); // Replace all newlines with html breaks $filteredMessage = str_replace(array("\r", "\n"), '
', $filteredMessage); print ""; diff --git a/app/src/Template/RuleAttributes/fields.inc b/app/src/Template/RuleAttributes/fields.inc index 923238725..9e0fac9e4 100644 --- a/app/src/Template/RuleAttributes/fields.inc +++ b/app/src/Template/RuleAttributes/fields.inc @@ -25,10 +25,39 @@ * @license Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) */ +use \App\Lib\Enum\ConfidenceModeEnum; +use \App\Lib\Enum\SearchTypeEnum; +?> + +Field->control('attribute_id', ['empty' => true]); print $this->Field->control('crosscheck_attribute_id', ['empty' => true], false, __('match.fd.RuleAttributes.crosscheck_attribute_id')); print $this->Field->control('search_type', ['empty' => true]); - print $this->Field->control('required'); + print $this->Field->control('required', ['onChange'=>'fields_update_gadgets();'], false); + print $this->Field->control('match_empty', [], false); } \ No newline at end of file diff --git a/app/src/Template/Rules/fields.inc b/app/src/Template/Rules/fields.inc index a21030a87..cc72f6a38 100644 --- a/app/src/Template/Rules/fields.inc +++ b/app/src/Template/Rules/fields.inc @@ -30,5 +30,5 @@ if($action == 'add' || $action == 'edit') { print $this->Field->control('name'); print $this->Field->control('description', [], false); print $this->Field->control('confidence_mode', ['empty' => true]); - print $this->Field->control('ordr'); + print $this->Field->control('ordr', [], false); } \ No newline at end of file