Skip to content

Commit

Permalink
Pipeline EmailAddress and Identifier Match Strategies (CFM-375)
Browse files Browse the repository at this point in the history
  • Loading branch information
Benn Oshrin committed Jan 16, 2024
1 parent 4ff6d37 commit 39027ed
Show file tree
Hide file tree
Showing 9 changed files with 242 additions and 23 deletions.
8 changes: 5 additions & 3 deletions app/config/schema/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,8 @@
"description": {},
"status": {},
"match_strategy": { "type": "string", "size": 2 },
"match_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } },
"match_email_address_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } },
"match_identifier_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } },
"match_server_id": { "type": "integer", "foreignkey": { "table": "servers", "column": "id" } },
"sync_status_on_delete": { "type": "string", "size": 2 },
"sync_affiliation_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } },
Expand All @@ -618,12 +619,13 @@
},
"indexes": {
"pipelines_i1": { "columns": [ "co_id" ] },
"pipelines_i2": { "needed": false, "columns": [ "match_type_id" ] },
"pipelines_i2": { "needed": false, "columns": [ "match_email_address_type_id" ] },
"pipelines_i3": { "needed": false, "columns": [ "match_server_id" ] },
"pipelines_i4": { "needed": false, "columns": [ "sync_affiliation_type_id" ] },
"pipelines_i5": { "needed": false, "columns": [ "sync_cou_id" ] },
"pipelines_i6": { "needed": false, "columns": [ "sync_replace_cou_id" ] },
"pipelines_i7": { "needed": false, "columns": [ "sync_identifier_type_id" ] }
"pipelines_i7": { "needed": false, "columns": [ "sync_identifier_type_id" ] },
"pipelines_i8": { "needed": false, "columns": [ "match_identifier_type_id" ] }
}
},

Expand Down
8 changes: 8 additions & 0 deletions app/resources/locales/en_US/field.po
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
# When adding entries to this file, group non-model specific translations at the top,
# then model specific translations alphabetically by model.

msgid "action"
msgstr "Action"

msgid "actions"
msgstr "{0,plural,=1{Action} other{Actions}}"

Expand Down Expand Up @@ -461,6 +464,11 @@ msgstr "Start Summary"

msgid "Jobs.start_time"
msgstr "Started"
msgid "Pipelines.match_email_address_type_id"
msgstr "Email Address Type"

msgid "Pipelines.match_identifier_type_id"
msgstr "Identifier Type"

msgid "Pipelines.match_strategy"
msgstr "Match Strategy"
Expand Down
5 changes: 4 additions & 1 deletion app/resources/locales/en_US/result.po
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,14 @@ msgstr "Person Role status recalculated from {0} to {1}"
msgid "Pipelines.complete"
msgstr "Pipeline {0} complete for EIS {1} source key {2}"

msgid "Pipelines.matched"
msgstr "Pipeline {0} ({1}) matched EIS {2} ({3}) source key {4} to Person using Match Strategy {5}"

msgid "Pipelines.ei.added"
msgstr "Created new External Identity via Pipeline {0} ({1}) using Source {2} ({3}) Key {4}"

msgid "Pipelines.started"
msgstr "Pipeline {0} started for EIS {1} source key {2}"
msgstr "Pipeline {0} ({1}) started for EIS {2} ({3}) source key {4}"

msgid "saved"
msgstr "Saved"
Expand Down
1 change: 1 addition & 0 deletions app/src/Lib/Enum/ActionEnum.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class ActionEnum extends StandardEnum {
const MVEAEdited = 'EMVE';
const NamePrimary = 'PNAM';
const PersonAddedPipeline = 'ACPL';
const PersonMatchedPipeline = 'MCPL';
const PersonPipelineComplete = 'CCPL';
const PersonPipelineStarted = 'SCPL';
const PersonStatusRecalculated = 'RCPS';
Expand Down
25 changes: 25 additions & 0 deletions app/src/Model/Table/EmailAddressesTable.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,31 @@ public function localAfterSave(\Cake\Event\EventInterface $event, \Cake\Datasour
return true;
}

/**
* Look up a Person ID from an email address and email address type ID.
* Only verified addresses can be used for lookups.
*
* @since COmanage Registry v5.0.0
* @param int $typeId Email Address Type ID
* @param string $identifier Email Address
* @return int Person ID
* @throws Cake\Datasource\Exception\RecordNotFoundException
*/

public function lookupPerson(int $typeId, string $identifier): int {
// The second parameter is called $identifier for consistency with IdentifiersTable::lookupPerson()
$id = $this->find()
->where([
'LOWER(mail)' => strtolower($identifier),
'type_id' => $typeId,
'verified' => true,
'person_id IS NOT NULL'
])
->firstOrFail();

return $id->person_id;
}

/**
* Perform a keyword search.
*
Expand Down
2 changes: 1 addition & 1 deletion app/src/Model/Table/HistoryRecordsTable.php
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public function initialize(array $config): void {
'ActorPeople' => ['Names' => ['queryBuilder' => function ($q) {
return $q->where(['primary_name' => true]);
}]],
'ExternalIdentities' => ['PrimaryName'],
'ExternalIdentities',
'Groups'
]);

Expand Down
1 change: 1 addition & 0 deletions app/src/Model/Table/IdentifiersTable.php
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ public function localAfterSave(\Cake\Event\EventInterface $event, \Cake\Datasour

/**
* Look up a Person ID from an identifier and identifier type ID.
* Only active Identifiers can be used for lookups.
*
* @since COmanage Registry v5.0.0
* @param int $typeId Identifier Type ID
Expand Down
181 changes: 166 additions & 15 deletions app/src/Model/Table/PipelinesTable.php
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,14 @@ public function initialize(array $config): void {
->setClassName('Servers')
->setForeignKey('match_server_id')
->setProperty('match_server');
$this->belongsTo('MatchTypes')
$this->belongsTo('MatchEmailAddressTypes')
->setClassName('Types')
->setForeignKey('match_type_id')
->setProperty('match_type');
->setForeignKey('match_email_address_type_id')
->setProperty('match_email_address_type');
$this->belongsTo('MatchIdentifierTypes')
->setClassName('Types')
->setForeignKey('match_identifier_type_id')
->setProperty('match_identifier_type');
$this->belongsTo('SyncAffiliationTypes')
->setClassName('Types')
->setForeignKey('sync_affiliation_type_id')
Expand All @@ -110,6 +114,14 @@ public function initialize(array $config): void {
$this->setRequiresCO(true);

$this->setAutoViewVars([
'matchEmailAddressTypes' => [
'type' => 'type',
'attribute' => 'EmailAddresses.type'
],
'matchIdentifierTypes' => [
'type' => 'type',
'attribute' => 'Identifiers.type'
],
'matchStrategies' => [
'type' => 'enum',
'class' => 'MatchStrategyEnum'
Expand Down Expand Up @@ -455,20 +467,22 @@ public function execute(

// (2) Match against an existing Person or create a new Person, in
// accordance with the Pipeline's Match Strategy
$person = $this->obtainPerson(
$personInfo = $this->obtainPerson(
$pipeline,
$eis,
$eisRecord['record'],
$eisBackendRecord['entity_data']
);

$person = $personInfo['person'];

// We can't record the start history until we have a Person entity
$this->Cos->People->ExternalIdentities->recordHistory(
entity: $person,
action: ActionEnum::PersonPipelineStarted,
comment: __d('result',
'Pipelines.started',
[$id, $eisId, $eisBackendRecord['source_key']])
[$pipeline->description, $id, $eis->description, $eisId, $eisBackendRecord['source_key']])
);

// (3) Create or update an External Identity based on the sync strategy
Expand All @@ -481,6 +495,19 @@ public function execute(
$eisBackendRecord['entity_data']
);

// If the Person record was matched (meaning it isn't new) create a
// History Record here, now that we have an External Identity

if($personInfo['status'] == 'matched') {
$this->Cos->People->ExternalIdentities->recordHistory(
entity: $person,
action: ActionEnum::PersonMatchedPipeline,
comment: __d('result',
'Pipelines.matched',
[$pipeline->description, $id, $eis->description, $eisId, $eisBackendRecord['source_key'], $personInfo['strategy']])
);
}

// (4) Sync the External Identity attributes with the Person record
$person = $this->syncPerson(
$pipeline,
Expand Down Expand Up @@ -813,15 +840,17 @@ protected function mapIdentifier(int $typeId, string $identifier): ?int {
* @param ExternalIdentitySource $eis External Identity Source
* @param ExtIdentitySourceRecord $eisRecord External Identity Source Record
* @param array $eisAttributes Attributes provided by EIS Backend
* @return Person Person, possibly newly created
* @return array 'person': Person object
* 'status': 'linked', 'created', 'matched'
* 'strategy': If status = 'matched', the MatchStrategy
*/

protected function obtainPerson(
Pipeline $pipeline,
ExternalIdentitySource $eis,
ExtIdentitySourceRecord $eisRecord,
array $eisAttributes
): Person {
): array {
// Shorthand...
$sourceKey = $eisRecord->source_key;

Expand All @@ -830,37 +859,142 @@ protected function obtainPerson(

if(!empty($eisRecord->external_identity_id)) {
$this->llog('trace', "Using previously linked Person " . $eisRecord->external_identity->person->id . " for EIS " . $eis->description . " (" . $eis->id . ") source key $sourceKey");
return $eisRecord->external_identity->person;
return [
'person' => $eisRecord->external_identity->person,
'status' => 'linked'
];
}

// There isn't a Person associated with the request, run the configured
// Match Strategy to see if one exists

$personId = null;
$person = null;
$referenceId = null;

$this->llog('trace', "Using Match Strategy " . $pipeline->match_strategy . " for EIS " . $eis->description . " (" . $eis->id . ") source key $sourceKey");

switch($pipeline->match_strategy) {
case MatchStrategyEnum::EmailAddress:
case MatchStrategyEnum::Identifier:
$person = $this->searchByAttribute(
$eis,
$eisRecord,
$pipeline->match_strategy,
($pipeline->match_strategy == MatchStrategyEnum::EmailAddress
? $pipeline->match_email_address_type_id
: $pipeline->match_identifier_type_id),
$eisAttributes
);
break;
case MatchStrategyEnum::External:
// XXX If we get a reference ID, attach it to the $eisRecord here CFM-33
case MatchStrategyEnum::Identifier:
throw new \RuntimeException('NOT IMPLEMENTED');
break;
case MatchStrategyEnum::NoMatching:
// No matching configured, so just fall through and create a new Person
break;
}

if(!$personId) {
if(!$person) {
// We didn't find an existing Person, so create a new one
$this->llog('trace', "No existing Person found, creating new Person record for EIS " . $eis->description . " (" . $eis->id . ") source key $sourceKey");

$person = $this->createPersonFromEIS($pipeline, $eis, $eisRecord, $eisAttributes);
return [
'person' => $this->createPersonFromEIS($pipeline, $eis, $eisRecord, $eisAttributes),
'status' => 'created'
];
}

return $person;
return [
'person' => $person,
'status' => 'matched',
'strategy' => $pipeline->match_strategy
];
}

/**
* Search for an existing Person using an attribute provided in the EIS Record.
*
* @since COmanage Registry v5.0.0
* @param ExternalIdentitySource $eis External Identity Source
* XXX params/return
* @return Person Person if found, null otherwise
* @throws InvalidArgumentException
*/

protected function searchByAttribute(
ExternalIdentitySource $eis,
ExtIdentitySourceRecord $eisRecord,
string $matchStrategy,
int $attributeTypeId,
array $attributes
): ?Person {
// By the time the Pipeline is called, $attributes (while an array) should be
// normalized to the Registry data model (though we haven't yet called
// mapAttributesToCO).

// First map the search type ID from the configuration to the expected API string

$Types = TableRegistry::getTableLocator()->get('Types');

$typeLabel = $Types->getTypeLabel($attributeTypeId);

// Make sure we have a valid search item

$searchValue = null;
$searchString = null;
$SearchTable = null;

if($matchStrategy == MatchStrategyEnum::EmailAddress) {
$SearchTable = TableRegistry::getTableLocator()->get('EmailAddresses');
$searchValue = Hash::extract($attributes, "email_addresses.{n}[type=$typeLabel]");

if(!empty($searchValue)) {
$searchString = $searchValue[0]['mail'];
}
} elseif($matchStrategy == MatchStrategyEnum::Identifier) {
$SearchTable = TableRegistry::getTableLocator()->get('Identifiers');
$searchValue = Hash::extract($attributes, "identifiers.{n}[type=$typeLabel]");

if(!empty($searchValue)) {
$searchString = $searchValue[0]['identifier'];
}
} else {
throw new \InvalidArgumentException("Unknown Match Strategy '" . $matchStrategy . "' in PipelinesTable::searchByAttribute()");
}

if(empty($searchString)) {
$this->llog('trace', "No attribute found of type $typeLabel for Match Strategy, creating new Person record for EIS " . $eis->description . " (" . $eis->id . ") source key " . $eisRecord->source_key);
return null;
}

// Perform the search

$personId = null;

try {
$personId = $SearchTable->lookupPerson($attributeTypeId, $searchString);
}
catch(\Cake\Datasource\Exception\RecordNotFoundException $e) {
// No match
}

if(!empty($personId)) {
// For consistency with createPersonFromEIS, we retrieve the Person and Names.
// syncExternalIdentity will pull whatever Person attributes it actually needs.

// AR-Pipeline-2 Pipeline Person Matching ignores the existing Person status.
$person = $SearchTable->People->get($personId, ['contain' => ['Names']]);

// We can't record history yet since we don't have an External Identity
// (we'll do that in execute()), but we can at least log

$this->llog('trace', "Matched to existing Person ID $personId using Match Strategy $matchStrategy and search string '$searchString' for EIS " . $eis->description . " (" . $eis->id . ") source key " . $eisRecord->source_key);

return $person;
}

return null;
}

/**
Expand Down Expand Up @@ -1660,10 +1794,27 @@ public function validationDefault(Validator $validator): Validator {
]);
$validator->notEmptyString('match_strategy');

$validator->add('match_type_id', [
$validator->add('match_email_address_type_id', [
'content' => ['rule' => 'isInteger']
]);
$validator->notEmptyString(
field: 'match_email_address_type_id',
when: function ($context) {
return (!empty($context['data']['match_strategy'])
&& ($context['data']['match_strategy'] == MatchStrategyEnum::EmailAddress));
}
);

$validator->add('match_identifier_type_id', [
'content' => ['rule' => 'isInteger']
]);
$validator->allowEmptyString('match_type_id');
$validator->notEmptyString(
field: 'match_identifier_type_id',
when: function ($context) {
return (!empty($context['data']['match_strategy'])
&& ($context['data']['match_strategy'] == MatchStrategyEnum::Identifier));
}
);

$validator->add('match_server_id', [
'content' => ['rule' => 'isInteger']
Expand Down
Loading

0 comments on commit 39027ed

Please sign in to comment.