From c2d6cd30f755fd1d5b06ed8f45c220764c2f47eb Mon Sep 17 00:00:00 2001 From: Benn Oshrin Date: Tue, 2 Jul 2024 06:30:14 -0400 Subject: [PATCH] Initial implementation of SqlSource (CFM-312) and SyncJob (CFM-372) --- .../resources/locales/en_US/sql_connector.po | 51 + .../src/Controller/SqlSourcesController.php | 40 + .../src/Lib/Enum/SqlSourceTableModeEnum.php | 37 + .../src/Model/Entity/SqlSource.php | 49 + .../src/Model/Table/SqlProvisionersTable.php | 3 +- .../src/Model/Table/SqlSourcesTable.php | 1021 +++++++++++++++++ .../SqlConnector/src/config/plugin.json | 32 + .../templates/SqlSources/fields-nav.inc | 31 + .../templates/SqlSources/fields.inc | 106 ++ app/config/schema/schema.json | 3 +- .../resources/locales/en_US/core_job.po | 42 + app/plugins/CoreJob/src/Lib/Jobs/SyncJob.php | 570 +++++++++ app/plugins/CoreJob/src/config/plugin.json | 22 +- .../src/Model/Table/SqlServersTable.php | 3 +- app/resources/locales/en_US/error.po | 6 + app/resources/locales/en_US/field.po | 15 + app/src/Command/JobCommand.php | 11 + app/src/Controller/AppController.php | 11 +- app/src/Lib/Enum/SyncModeEnum.php | 3 +- app/src/Lib/Events/CoIdEventListener.php | 84 ++ .../Table/ExternalIdentitySourcesTable.php | 137 ++- app/src/Model/Table/JobsTable.php | 29 +- app/src/Model/Table/PipelinesTable.php | 34 +- .../ExternalIdentitySources/fields.inc | 5 +- .../ExternalIdentitySources/search.php | 2 +- app/templates/element/form/nameDiv.php | 17 +- 26 files changed, 2309 insertions(+), 55 deletions(-) create mode 100644 app/availableplugins/SqlConnector/src/Controller/SqlSourcesController.php create mode 100644 app/availableplugins/SqlConnector/src/Lib/Enum/SqlSourceTableModeEnum.php create mode 100644 app/availableplugins/SqlConnector/src/Model/Entity/SqlSource.php create mode 100644 app/availableplugins/SqlConnector/src/Model/Table/SqlSourcesTable.php create mode 100644 app/availableplugins/SqlConnector/templates/SqlSources/fields-nav.inc create mode 100644 app/availableplugins/SqlConnector/templates/SqlSources/fields.inc create mode 100644 app/plugins/CoreJob/src/Lib/Jobs/SyncJob.php create mode 100644 app/src/Lib/Events/CoIdEventListener.php diff --git a/app/availableplugins/SqlConnector/resources/locales/en_US/sql_connector.po b/app/availableplugins/SqlConnector/resources/locales/en_US/sql_connector.po index ceb0662fa..fdf0f1eb3 100644 --- a/app/availableplugins/SqlConnector/resources/locales/en_US/sql_connector.po +++ b/app/availableplugins/SqlConnector/resources/locales/en_US/sql_connector.po @@ -25,15 +25,66 @@ msgid "controller.SqlProvisioners" msgstr "{0,plural,=1{SQL Provisioner} other{SQL Provisioners}}" +msgid "enumeration.SqlSourceTableModeEnum.FL" +msgstr "Flat" + +msgid "enumeration.SqlSourceTableModeEnum.RL" +msgstr "Relational" + msgid "error.table_prefix" msgstr "Table Name Prefix must be alphanumeric and end with an underscore" +msgid "error.SqlSources.threshold" +msgstr "Aborting sync due to {0}% of records changed (threshold is {1}%)" + +msgid "error.SqlSources.type" +msgstr "Type must be set when using Flat Mode" + msgid "field.SqlProvisioners.table_prefix" msgstr "Table Name Prefix" msgid "field.SqlProvisioners.table_prefix.desc" msgstr "Prefix used when constructing table names, must be alphanumeric and end with an underscore (_)" +msgid "field.SqlSources.address_type_id" +msgstr "Address Type" + +msgid "field.SqlSources.email_address_type_id" +msgstr "Email Address Type" + +msgid "field.SqlSources.identifier_type_id" +msgstr "Identifier Type" + +msgid "field.SqlSources.name_type_id" +msgstr "Name Type" + +msgid "field.SqlSources.pronouns_type_id" +msgstr "Pronouns Type" + +msgid "field.SqlSources.source_table" +msgstr "Source Table" + +msgid "field.SqlSources.table_mode" +msgstr "Table Mode" + +msgid "field.SqlSources.telephone_number_type_id" +msgstr "Telephone Number Type" + +msgid "field.SqlSources.threshold_check" +msgstr "Check Threshold" + +msgid "field.SqlSources.threshold_check.desc" +msgstr "If set, and this percentage of records have changed, a full sync will not be performed" + +msgid "field.SqlSources.threshold_override" +msgstr "Threshold Override" + +msgid "field.SqlSources.threshold_override.desc" +msgstr "If set, and the next run exceeds the Check Threshold, complete the sync anyway" + +msgid "field.SqlSources.url_type_id" +msgstr "URL Type" + msgid "operation.reapply" msgstr "Reapply Target Database Schema" diff --git a/app/availableplugins/SqlConnector/src/Controller/SqlSourcesController.php b/app/availableplugins/SqlConnector/src/Controller/SqlSourcesController.php new file mode 100644 index 000000000..091938103 --- /dev/null +++ b/app/availableplugins/SqlConnector/src/Controller/SqlSourcesController.php @@ -0,0 +1,40 @@ + [ + 'SqlSources.id' => 'asc' + ] + ]; +} diff --git a/app/availableplugins/SqlConnector/src/Lib/Enum/SqlSourceTableModeEnum.php b/app/availableplugins/SqlConnector/src/Lib/Enum/SqlSourceTableModeEnum.php new file mode 100644 index 000000000..7dce47d40 --- /dev/null +++ b/app/availableplugins/SqlConnector/src/Lib/Enum/SqlSourceTableModeEnum.php @@ -0,0 +1,37 @@ + + */ + protected $_accessible = [ + '*' => true, + 'id' => false, + 'slug' => false, + ]; +} diff --git a/app/availableplugins/SqlConnector/src/Model/Table/SqlProvisionersTable.php b/app/availableplugins/SqlConnector/src/Model/Table/SqlProvisionersTable.php index b8902a411..b1b0db3c4 100644 --- a/app/availableplugins/SqlConnector/src/Model/Table/SqlProvisionersTable.php +++ b/app/availableplugins/SqlConnector/src/Model/Table/SqlProvisionersTable.php @@ -786,7 +786,8 @@ public function validationDefault(Validator $validator): Validator { $this->registerStringValidation($validator, $schema, 'table_prefix', true); - // Table prefixes must be alphanumeric and end in an underscore + // Table prefixes must be alphanumeric and end in an underscore. + // (We don't use validateSqlIdentifier because of the trailing underscore requirement.) $validator->add('table_prefix', [ 'format' => [ 'rule' => function ($value, $context) { diff --git a/app/availableplugins/SqlConnector/src/Model/Table/SqlSourcesTable.php b/app/availableplugins/SqlConnector/src/Model/Table/SqlSourcesTable.php new file mode 100644 index 000000000..24fca2141 --- /dev/null +++ b/app/availableplugins/SqlConnector/src/Model/Table/SqlSourcesTable.php @@ -0,0 +1,1021 @@ +addBehavior('Changelog'); + $this->addBehavior('Log'); + $this->addBehavior('Timestamp'); + + $this->setTableType(\App\Lib\Enum\TableTypeEnum::Configuration); + + // Define associations + $this->belongsTo('ExternalIdentitySources'); + $this->belongsTo('Servers'); + $this->belongsTo('AddressTypes') + ->setClassName('Types') + ->setForeignKey('address_type_id') + ->setProperty('address_type'); + $this->belongsTo('EmailAddressTypes') + ->setClassName('Types') + ->setForeignKey('email_address_type_id') + ->setProperty('email_address_type'); + $this->belongsTo('IdentifierTypes') + ->setClassName('Types') + ->setForeignKey('identifier_type_id') + ->setProperty('identifier_type'); + $this->belongsTo('NameTypes') + ->setClassName('Types') + ->setForeignKey('name_type_id') + ->setProperty('name_type'); + $this->belongsTo('PronounsTypes') + ->setClassName('Types') + ->setForeignKey('pronouns_type_id') + ->setProperty('pronouns_type'); + $this->belongsTo('TelephoneNumberTypes') + ->setClassName('Types') + ->setForeignKey('telephone_number_type_id') + ->setProperty('telephone_number_type'); + $this->belongsTo('UrlTypes') + ->setClassName('Types') + ->setForeignKey('url_type_id') + ->setProperty('url_type'); + + $this->setDisplayField('server_id'); + + $this->setPrimaryLink(['external_identity_source_id']); + $this->setRequiresCO(true); + + $this->setAutoViewVars([ + 'addressTypes' => [ + 'type' => 'type', + 'attribute' => 'Addresses.type' + ], + 'emailAddressTypes' => [ + 'type' => 'type', + 'attribute' => 'EmailAddresses.type' + ], + 'identifierTypes' => [ + 'type' => 'type', + 'attribute' => 'Identifiers.type' + ], + 'nameTypes' => [ + 'type' => 'type', + 'attribute' => 'Names.type' + ], + 'pronounsTypes' => [ + 'type' => 'type', + 'attribute' => 'Pronouns.type' + ], + 'servers' => [ + 'type' => 'select', + 'model' => 'Servers', + 'where' => ['plugin' => 'CoreServer.SqlServers'] + ], + 'telephoneNumberTypes' => [ + 'type' => 'type', + 'attribute' => 'TelephoneNumbers.type' + ], + 'urlTypes' => [ + 'type' => 'type', + 'attribute' => 'Urls.type' + ], + 'tableModes' => [ + 'type' => 'enum', + 'class' => 'SqlConnector.SqlSourceTableModeEnum' + ] + ]); + + $this->setPermissions([ + // Actions that operate over an entity (ie: require an $id) + 'entity' => [ + 'delete' => false, // Delete the pluggable object instead + 'edit' => ['platformAdmin', 'coAdmin'], + 'view' => ['platformAdmin', 'coAdmin'] + ], + // Actions that operate over a table (ie: do not require an $id) + 'table' => [ + 'add' => false, //['platformAdmin', 'coAdmin'], + 'index' => ['platformAdmin', 'coAdmin'] + ] + ]); + } + + /** + * Obtain the set of changed records from the source database. + * + * @since COmanage Registry v5.0.0 + * @param ExternalIdentitySource $source External Identity Source + * @param int $lastStart Timestamp of last run + * @param int $curStart Timestamp of current run + * @param bool $count If true, return a count of changed records + * @return int|array|bool An array of changed source keys, or a count of changed source keys (if $count), or false + */ + + protected function getChanges( + \App\Model\Entity\ExternalIdentitySource $source, + int $lastStart, + int $curStart, + bool $count=false + ): int|array|bool { + $SourceTable = $this->getRecordTable($source->sql_source); + + // Our first preference for building a changelist is the use of the modified column + // on the table (or for relational mode the primary table) + + if($SourceTable->getSchema()->getColumnType('modified')) { + $this->llog('trace', "Calculating changes via modified timestamp for " . $source->description); + + $query = $SourceTable->find('list', [ + 'keyField' => 'source_key', + 'valueField' => 'modified' + ]) + ->where([ + 'modified >' => date('Y-m-d H:i:s', $lastStart), + 'modified <=' => date('Y-m-d H:i:s', $curStart) + ]); + + if($count) { + return $query->count(); + } else { + $records = $query->toArray(); + + return array_keys($records); + } + } + + // If there is an _archive table defined, use that to perform a diff. + // If not, this will return false. At that point, we don't have an efficient way + // of determining a changelist, so we fall back to the default behavior. + + // NOTYETIMPLEMENTED return $this->getChangeListFromArchive(); + + return false; + } + + /** + * Obtain the set of changed records from the source database. + * + * @since COmanage Registry v5.0.0 + * @param ExternalIdentitySource $source External Identity Source + * @param int $lastStart Timestamp of last run + * @param int $curStart Timestamp of current run + * @return array|bool An array of changed source keys, or false + */ + + public function getChangeList( + \App\Model\Entity\ExternalIdentitySource $source, + int $lastStart, // timestamp of last run + int $curStart // timestamp of current run + ): array|bool { + return $this->getChanges($source, $lastStart, $curStart); + } + + /** + * Obtain the full set of records from the source database. + * + * @since COmanage Registry v5.0.0 + * @param ExternalIdentitySource $source External Identity Source + * @param bool $count If true, return a count of records + * @return int|array An array of source keys, or a count of source keys (if $count) + */ + + protected function getInventory( + \App\Model\Entity\ExternalIdentitySource $source, + bool $count=false + ): int|array { + $SourceTable = $this->getRecordTable($source->sql_source); + + $query = $SourceTable->find('list', [ + 'keyField' => 'source_key', + 'valueField' => 'modified' + ]); + + if($count) { + return $query->count(); + } else { + $records = $query->toArray(); + + return array_keys($records); + } + } + + /** + * Obtain a Table for Source Records. + * + * @since COmanage Registry v5.0.0 + * @param SqlSource $SqlSource SqlSource configuration entity + * @param string $relatedModel If specified, obtain the Table for this related model + * @param bool $archive If true, get the archive table version + * @return Table Cake Table + */ + + protected function getRecordTable( + \SqlConnector\Model\Entity\SqlSource $SqlSource, + $relatedModel=null, + $archive=false + ) { + // We need a special database connection to talk to the inbound server. + // The configuration gets passed to TableRegistry, but Cake only allows a + // given Table Alias to be passed a configuration once. If we're called + // multiple times with different configurations (eg: during a sync process) + // this can be problematic, so we need to append the server ID to both + // the database connection and the table alias to ensure the correct + // connections are maintained and retrieved. + + $cxnLabel = "sqlsource" . $SqlSource->server_id; +// XXX add support for archive + $sourceAlias = "SourceRecord" . $relatedModel . $SqlSource->server_id; + $sourceTableName = $SqlSource->source_table; + + if(!empty($relatedModel)) { + $sourceTableName .= "_" . Inflector::tableize($relatedModel); + } + + // To avoid some overhead, we also cache tables on a per server basis. + if(!empty($this->tableCache[$cxnLabel][$sourceTableName])) { + return $this->tableCache[$cxnLabel][$sourceTableName]; + } + + $SqlServer = TableRegistry::getTableLocator()->get('CoreServer.SqlServers'); + + $SqlServer->connect($SqlSource->server_id, $cxnLabel); + + $options = [ + 'table' => $sourceTableName, + 'alias' => $sourceAlias, + 'connection' => ConnectionManager::get($cxnLabel) + ]; + + $SourceTable = TableUtilities::getTableFromRegistry( + alias: $sourceAlias, + options: $options + ); + + $this->tableCache[$cxnLabel][$sourceTableName] = $SourceTable; + + return($SourceTable); + } + + /** + * Obtain the full set of records from the source database. + * + * @since COmanage Registry v5.0.0 + * @param ExternalIdentitySource $source External Identity Source + * @return array An array of source keys + */ + + public function inventory( + \App\Model\Entity\ExternalIdentitySource $source + ): array { + return $this->getInventory($source); + } + + /** + * Map a Type ID back to its label. + * + * @since COmanage Registry v5.0.0 + * @param int $typeId Type ID + * @return string Type label + */ + + protected function mapTypeToLabel(int $typeId): string { + // We maintain a local cache since we'll probably look these up a lot. + + if(empty($this->typeCache[$typeId])) { + $Types = TableRegistry::getTableLocator()->get('Types'); + + $this->typeCache[$typeId] = $Types->getTypeLabel($typeId); + } + + return $this->typeCache[$typeId]; + } + + /** + * Perform checks before a Sync Job proceeds. + * + * @since COmanage Registry v5.0.0 + * @param ExternalIdentitySource $source External Identity Source + * @param int $lastStart Timestamp of last run + * @param int $curStart Timestamp of current run + * @throws RuntimeException + */ + + public function preRunChecks( + \App\Model\Entity\ExternalIdentitySource $source, + int $lastStart, + int $curStart + ) { + // If a threshold is set, check to make sure less than that many records changed + // (by percent) + + if(!empty($source->sql_source->threshold_check) + && $source->sql_source->threshold_check > 0) { + // threshold_check is not supported if neither modified timestamps nor + // archive tables are available, since we can't efficiently calculate diffs. + + // In flat mode, any number of changes to a single row will count as "1" + // change out of the total count (number of rows in table). + // In relational mode, each change in each related table will count as "1" + // change (so name + email address = 2 changes), but total count is still the + // same (so it's possible for more than 100% of records to change). + + $changeCount = $this->getChanges( + source: $source, + lastStart: $lastStart, + curStart: $curStart, + count: true + ); + + if($changeCount === false) { + // Could not calculated diff count, probably because neither modified timestamps + // nor archive tables are in use. + $this->llog('error', "Could not calculate change set, ignoring threshold_check for " . $source->description); + } else { + $totalCount = $this->getInventory(source: $source, count: true); + $percent = (int)round(($changeCount * 100) / $totalCount); + + $this->llog('trace', "$percent% of records changed ($changeCount of $totalCount) for " . $source->description); + + if($percent >= $source->sql_source->threshold_check) { + if(isset($source->sql_source->threshold_override) + && $source->sql_source->threshold_override) { + // Override is set, clear the flag and allow this run to proceed + $this->llog('trace', "$percent% of records changed, threshold is " . $source->sql_source->threshold_check . "%, but override is set, so continuing with sync for " . $source->description); + + $source->sql_source->threshold_override = false; + $this->save($source->sql_source); + } else { + // Threshold met, abort + $this->llog('error', "$percent% of records changed, threshold is " . $source->sql_source->threshold_check . "%, aborting sync for " . $source->description); + throw new \RuntimeException(__d('sql_connector', 'error.SqlSources.threshold', [$percent, $source->sql_source->threshold_check])); + } + } + } + } + } + + /** + * Perform tasks following a Sync Job. + * + * @since COmanage Registry v5.0.0 + * @param ExternalIdentitySource $source External Identity Source + */ + + public function postRunTasks( + \App\Model\Entity\ExternalIdentitySource $source + ) { + // XXX If archive tables are in use, update them here + } + + /** + * Convert one or more records from the SqlSource data to a record suitable for + * construction of an Entity. This call is for use with Flat Mode. + * + * @since COmanage Registry v5.0.0 + * @param SqlSource $SqlSource SqlSource configuration entity + * @param array $results Array of SqlSource records (in entity format) + * @return array Entity record (in array format) + */ + + protected function resultsToEntityData( + \SqlConnector\Model\Entity\SqlSource $SqlSource, + array $results + ): array { + // Because the EIS Pipeline code expect a type label instead of a type ID + // we have to map back to the type label even though we have type IDs from + // the plugin configuration. (This applies to flat mode only.) + + // Build the External Identity as an array + $eidata = []; + + // There is some inherent ambiguity in supporting multiple roles via flat mode, + // especially around MVEAs and single valued attributes (ie: date_of_birth). + // We make very specific design decisions here that are most consistent with the + // data model; use cases not met here should use Relational mode. + + $eidata['date_of_birth'] = null; + + foreach($results as $result) { + // Start with the role key and other role specific attributes + // that require no special handling + $eirdata = [ + // We use the row ID as the role_key, even if there is only one role + 'role_key' => $result->id, + 'affiliation' => $result->affiliation, + 'department' => $result->department, + 'manager_identifier' => $result->manager_identifier, + 'organization' => $result->organization, + 'sponsor_identifier' => $result->sponsor_identifier, + 'title' => $result->title, + 'valid_from' => $result->valid_from, + 'valid_through' => $result->valid_through + ]; + + if(!empty($result->date_of_birth) && empty($eidata['date_of_birth'])) { + // We take the first DoB we see. Multiple rows should have the same + // DoB, if they don't that's a problem in the data that needs to be fixed. + + // We have to convert the DateTime back to a string + $eidata['date_of_birth'] = $result->date_of_birth->format('Y-m-d'); + } + + // MVEAs that have a foreign key to EIR get attached to the EIR + + if(!empty($result->address)) { + $eirdata['addresses'][] = [ + 'street' => $result->address, + 'type' => $this->mapTypeToLabel($SqlSource->address_type_id) + ]; + } + + if(!empty($result->telephone_number)) { + $eirdata['telephone_numbers'][] = [ + 'number' => $result->telephone_number, + 'type' => $this->mapTypeToLabel($SqlSource->telephone_number_type_id) + ]; + } + + if(!empty($result->url)) { + $eirdata['urls'][] = [ + 'url' => $result->url, + 'type' => $this->mapTypeToLabel($SqlSource->url_type_id) + ]; + } + + // Any field beginning a_ is an AdHoc Attribute + + $eidata['ad_hoc_attributes'] = []; + + foreach($result->getVisible() as $field) { + if(strncmp($field, "a_", 2)==0) { + $eirdata['ad_hoc_attributes'][] = [ + // Remove the a_ from the column name to construct the tag + 'tag' => substr($field, 2), + 'value' => $result->$field + ]; + } + } + + // MVEAs that do not have foreign key to EIR get attach to the EI, + // but we need to check for duplicates. The check for existing names + // is a bit more complicated than the simple Hash check we can do + // fot the other MVEAs, which have only one meaningful attribute. + + $nameFound = false; + + if(!empty($eidata['names'])) { + foreach($eidata['names'] as $n) { + if($n['honorific'] == $result->honorific + && $n['given'] == $result->given + && $n['middle'] == $result->middle + && $n['family'] == $result->family + && $n['suffix'] == $result->suffix) { + $nameFound = true; + break; + } + } + } + + if(!$nameFound) { + $eidata['names'][] = [ + 'honorific' => $result->honorific, + 'given' => $result->given, + 'middle' => $result->middle, + 'family' => $result->family, + 'suffix' => $result->suffix, + 'type' => $this->mapTypeToLabel($SqlSource->name_type_id) + ]; + } + + // We use Hash to perform a simple test to avoid duplicates. Using the + // model notation on the search path allows us to avoid testing if + // (eg) $eidata['email_addresses'] is empty. + + if(!empty($result->mail) + && empty(Hash::extract($eidata, 'email_addresses.{n}[mail=' . $result->mail . ']'))) { + + $eidata['email_addresses'][] = [ + 'mail' => $result->mail, + 'type' => $this->mapTypeToLabel($SqlSource->email_address_type_id) + ]; + } + + if(!empty($result->identifier) + && empty(Hash::extract($eidata, 'identifiers.{n}[identifier=' . $result->identifier . ']'))) { + $eidata['identifiers'][] = [ + 'identifier' => $result->identifier, + 'type' => $this->mapTypeToLabel($SqlSource->identifier_type_id) + ]; + } + + if(!empty($result->pronouns)) { + $eidata['pronouns'][] = [ + 'pronouns' => $result->pronouns, + 'type' => $this->mapTypeToLabel($SqlSource->pronouns_type_id) + ]; + } + + $eidata['external_identity_roles'][] = $eirdata; + } + + return $eidata; + } + + /** + * Convert a record from the SqlSource data to a record suitable for + * construction of an Entity. This call is for use with Relational Mode. + * + * @since COmanage Registry v5.0.0 + * @param SqlSource $SqlSource SqlSource configuration entity + * @param Entity $result SqlSource record + * @return array Entity record (in array format) + */ + + protected function resultToEntityData( + \SqlConnector\Model\Entity\SqlSource $SqlSource, + \Cake\ORM\Entity $result + ): array { + // Build the External Identity as an array + $eidata = []; + + $eidata['date_of_birth'] = null; + + if(!empty($result->date_of_birth)) { + // We have to convert the DateTime back to a string + $eidata['date_of_birth'] = $result->date_of_birth->format('Y-m-d'); + } + + // Convert the entities back to arrays + foreach([ + 'addresses', + 'email_addresses', + 'external_identity_roles', + 'identifiers', + 'names', + 'pronouns', + 'telephone_numbers', + 'urls' + ] as $m) { + if(!empty($result->$m)) { + foreach($result->$m as $n) { + $a = $n->toArray(); + + // source_key as the foreign key just adds noise in the array + unset($a['source_key']); + + // id is the de facto role key + unset($a['id']); + + $eidata[$m][] = $a; + } + } + } + + return $eidata; + } + + /** + * Retrieve a record from the External Identity Source. + * + * @since COmanage Registry v5.0.0 + * @param ExternalIdentitySource $source EIS Entity with instantiated plugin configuration + * @param string $source_key Backend source key for requested record + * @return array Array of source_key, source_record, and entity_data + * @throws InvalidArgumentException + */ + + public function retrieve( + \App\Model\Entity\ExternalIdentitySource $source, + string $source_key + ): array { + $ret['source_key'] = $source_key; + + if($source->sql_source->table_mode == SqlSourceTableModeEnum::Flat) { + // Establish a connection to the source database + $SourceTable = $this->getRecordTable($source->sql_source); + + try { + $results = $SourceTable->find() + ->where(['source_key' => $source_key]) + // We support retrieving more than one row for + // multi-role support in flat mode + ->all(); + + $ret['entity_data'] = $this->resultsToEntityData($source->sql_source, $results->toArray()); + $ret['source_record'] = json_encode($results); + } + catch(\Exception $e) { + throw new \InvalidArgumentException(__d('error', 'notfound', [$source_key])); + } + } else { + // Relational mode, pull data from the associated models. It's easier to just + // retrieve the related models ourselves than to try to force containable to do it. + + // Be careful with $result (the entity we're building), $results (the MVEA set + // returned from a query), and $r (the iteration of the MVEA set). + + $SourceTable = $this->getRecordTable($source->sql_source); + + // This will hold the initial entity, and then we'll attach related + // entities to it manually. + $result = null; + + try { + $result = $SourceTable->find() + ->where(['source_key' => $source_key]) + ->firstOrFail(); + } + catch(\Exception $e) { + throw new \InvalidArgumentException(__d('error', 'notfound', [$source_key])); + } + + // From here on out if a table doesn't exist we simply ignore it. + + // We pull roles before the MVEAs because we'll manually process each MVEA record, + // and if it attaches to a role we need to put it in the right place. + + try { + $SourceTable = $this->getRecordTable($source->sql_source, "Role"); + + $results = $SourceTable->find() + ->where(['source_key' => $source_key]) + ->all(); + + $result->external_identity_roles = []; + + foreach($results as $r) { + // Note we key the role on its ID to make it easier to work with + $r->role_key = $r->id; + $result->external_identity_roles[ $r->id ] = $r; + } + } + catch(\Exception $e) { + // Strictly speaking, we should fail here, since this table is documented + // as required + $this->llog('trace', "Could not find Roles table for " . $source->description); + } + + // MVEAs that do not have a Role FK + foreach(['EmailAddress', 'Identifiers', 'Name', 'Pronouns'] as $model) { + $table = Inflector::tableize($model); + + try { + $SourceTable = $this->getRecordTable($source->sql_source, $model); + + $results = $SourceTable->find() + ->where(['source_key' => $source_key]) + ->all(); + + $result->$table = []; + + foreach($results as $r) { + $result->$table[] = $r; + } + } + catch(\Exception $e) { + $this->llog('trace', "Could not find $model table for " . $source->description . ", skipping"); + } + } + + // MVEAs that do have a Role FK + foreach(['Addresses', 'TelephoneNumbers', 'Url'] as $model) { + $table = Inflector::tableize($model); + + try { + $SourceTable = $this->getRecordTable($source->sql_source, $model); + + $results = $SourceTable->find() + ->where(['source_key' => $source_key]) + ->all(); + + // These models can attach either to the External Identity or the + // External Identity Role, depending on role_id being set + $result->$table = []; + + foreach($results as $r) { + if(!empty($r->role_id)) { + if(!isset($result->external_identity_roles[$r->role_id]->$table)) { + $result->external_identity_roles[$r->role_id]->$table = []; + } + + $result->external_identity_roles[$r->role_id]->$table[] = $r; + } else { + $result->$table[] = $r; + } + } + } + catch(\Exception $e) { + $this->llog('trace', "Could not find $model table for " . $source->description . ", skipping"); + } + } + + // Now that we're done, create the structure the interface expects. + $ret['entity_data'] = $this->resultToEntityData($source->sql_source, $result); + $ret['source_record'] = json_encode($result); + } + + return $ret; + } + + /** + * Search the External Identity Source. + * + * @since COmanage Registry v5.0.0 + * @param ExternalIdentitySource $source EIS Entity with instantiated plugin configuration + * @param array $searchAttrs Array of search attributes and values, as configured by searchAttributes() + * @return array Array of matching records + * @throws InvalidArgumentException + */ + + public function search( + \App\Model\Entity\ExternalIdentitySource $source, + array $searchAttrs + ): array { + $ret = []; + + if($source->sql_source->table_mode == SqlSourceTableModeEnum::Flat) { + // Flat Mode + + // Establish a connection to the source database + $SourceTable = $this->getRecordTable($source->sql_source); + + // We use OR to search each supported field, but we don't substring identifiers + + $whereClause = []; + + // source_key and identifiers require exact search + foreach([ + 'source_key', + 'identifier' + ] as $f) { + $whereClause['OR'][$f] = $searchAttrs['q']; + } + + // email requires case insensitive exact serarch + foreach([ + 'mail' + ] as $f) { + $whereClause['OR']['LOWER('.$f.')'] = strtolower($searchAttrs['q']); + } + + // other fields allow substrings + foreach([ + 'given', + 'family' + ] as $f) { + $whereClause['OR']['LOWER('.$f.') LIKE'] = '%' . strtolower($searchAttrs['q']) . '%'; + } + + $results = $SourceTable->find() + ->where($whereClause) + ->all(); + + // Because we allow multiple rows to describe multiple Roles for the same + // External Identity, handling search results gets a bit more complicated. + // We'll group the results by source_key, then process each source_key once, + // even if it appears multiple times, so that the "combined" record is displayed. + + $groupedResults = $results->groupBy('source_key')->toArray(); + + $sourceKeys = array_keys($groupedResults); + sort($sourceKeys); + + foreach($sourceKeys as $source_key) { + $ret[$source_key] = $this->resultsToEntityData($source->sql_source, $groupedResults[$source_key]); + } + } else { + // Relational searches are a bit more complicated, but basically we'll + // perform a search on each supported attribute and then OR the results + // together. We call retrieve() on each resulting record to ensure we + // have a consistent set of attributes. + + $results = []; + + // Start with source_key, the only attribute in the primary table + + $SourceTable = $this->getRecordTable($source->sql_source); + + $results = $SourceTable->find() + ->where(['source_key' => $searchAttrs['q']]) + // This should really just return max(1) + ->all(); + + foreach($results as $result) { + $data = $this->retrieve($source, $result->source_key); + + $ret[ $result->source_key ] = $data['entity_data']; + } + + // Identifiers are case sensitive + $SourceTable = $this->getRecordTable($source->sql_source, "Identifier"); + + $results = $SourceTable->find() + ->where(['identifier' => $searchAttrs['q']]) + // This should really just return max(1) + ->all(); + + foreach($results as $result) { + $data = $this->retrieve($source, $result->source_key); + + $ret[ $result->source_key ] = $data['entity_data']; + } + + // Email addresses are case insensitive + $SourceTable = $this->getRecordTable($source->sql_source, "EmailAddress"); + + $results = $SourceTable->find() + ->where(['LOWER(mail)' => strtolower($searchAttrs['q'])]) + ->all(); + + foreach($results as $result) { + $data = $this->retrieve($source, $result->source_key); + + $$ret[ $result->source_key ] = $data['entity_data']; + } + + // Names allow substrings + $SourceTable = $this->getRecordTable($source->sql_source, "Names"); + + $results = $SourceTable->find() + ->where([ + 'OR' => [ + 'LOWER(given) LIKE' => '%'.strtolower($searchAttrs['q']).'%', + 'LOWER(family) LIKE' => '%'.strtolower($searchAttrs['q']).'%' + ]]) + ->all(); + + foreach($results as $result) { + $data = $this->retrieve($source, $result->source_key); + + $ret[ $result->source_key ] = $data['entity_data']; + } + } + + return $ret; + } + + /** + * Obtain the set of searchable attributes for this backend. + * + * @since COmanage Registry v5.0.0 + * @return array Array of searchable attributes and localized descriptions + */ + + public function searchableAttributes(): array { + // In v4 we accepted structured search attributes (name, email, etc), but + // with CSV v2 (the only currently supported format) it's not clear what + // the benefit of this is anymore, so for PE we switch to a simple search + // string. + + return [ + 'q' => __d('field', 'search.placeholder') + ]; + } + + /** + * Validate that a type is set for Flat mode. + * + * @since COmanage Registry v5.0.0 + * @param string $value Value to validate + * @param array $context Validation context, which must include the schema definition + * @return mixed True if $value validates, or an error string otherwise + */ + + public function validateSqlSourceType($value, array $context) { + // When in Flat mode, Type IDs must be set for the various MVEAs. + + if(empty($value) + && isset($context['data']['table_mode']) + && $context['data']['table_mode'] == SqlSourceTableModeEnum::Flat) { + return __d('sql_connector', 'error.SqlSources.type'); + } + + return true; + } + + /** + * Set validation rules. + * + * @since COmanage Registry v5.0.0 + * @param Validator $validator Validator + * @return Validator Validator + * @throws InvalidArgumentException + * @throws RecordNotFoundException + */ + + public function validationDefault(Validator $validator): Validator { + $schema = $this->getSchema(); + + $validator->add('external_source_identity_id', [ + 'content' => ['rule' => 'isInteger'] + ]); + $validator->notEmptyString('external_source_identity_id'); + + $validator->add('server_id', [ + 'content' => ['rule' => 'isInteger'] + ]); + $validator->notEmptyString('server_id'); + + $validator->add('table_mode', [ + 'content' => ['rule' => ['inList', SqlSourceTableModeEnum::getConstValues()]] + ]); + $validator->notEmptyString('table_mode'); + + $validator->add('source_table', [ + 'content' => [ + 'rule' => 'validateSqlIdentifier', + 'provider' => 'table' + ] + ]); + $validator->notEmptyString('source_table'); + + // These all effectively become required when table_mode is flat + foreach([ + 'address_type_id', + 'email_address_type_id', + 'identifier_type_id', + 'name_type_id', + 'pronouns_type_id', + 'telephone_number_type_id', + 'url_type_id', + ] as $field) { + $validator->add($field, [ + 'content' => [ + 'rule' => ['validateSqlSourceType'], + 'provider' => 'table' + ] + ]); + } + + $validator->add('threshold_check', [ + 'content' => ['rule' => 'isInteger'] + ]); + $validator->add('threshold_check', [ + 'range' => ['rule' => 'range', 0, 100] + ]); + $validator->allowEmptyString('threshold_check'); + + return $validator; + } +} \ No newline at end of file diff --git a/app/availableplugins/SqlConnector/src/config/plugin.json b/app/availableplugins/SqlConnector/src/config/plugin.json index bbed1e07a..5d32cb333 100644 --- a/app/availableplugins/SqlConnector/src/config/plugin.json +++ b/app/availableplugins/SqlConnector/src/config/plugin.json @@ -2,6 +2,9 @@ "types": { "provisioner": [ "SqlProvisioners" + ], + "source": [ + "SqlSources" ] }, "schema": { @@ -16,6 +19,35 @@ "indexes": { "sql_provisioners_i1": { "columns": [ "provisioning_target_id" ]} } + }, + "sql_sources": { + "columns": { + "id": {}, + "external_identity_source_id": {}, + "server_id": { "notnull": false }, + "table_mode": { "type": "string", "size": 2 }, + "source_table": { "type": "string", "size": 80 }, + "address_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } }, + "email_address_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } }, + "identifier_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } }, + "name_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } }, + "pronouns_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } }, + "telephone_number_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } }, + "url_type_id": { "type": "integer", "foreignkey": { "table": "types", "column": "id" } }, + "threshold_check": { "type": "integer" }, + "threshold_override": { "type": "boolean" } + }, + "indexes": { + "sql_sources_i1": { "columns": [ "external_identity_source_id" ]}, + "sql_sources_i2": { "columns": [ "server_id" ]}, + "sql_sources_i3": { "needed": false, "columns": [ "address_type_id" ]}, + "sql_sources_i4": { "needed": false, "columns": [ "email_address_type_id" ]}, + "sql_sources_i5": { "needed": false, "columns": [ "identifier_type_id" ]}, + "sql_sources_i6": { "needed": false, "columns": [ "name_type_id" ]}, + "sql_sources_i7": { "needed": false, "columns": [ "pronouns_type_id" ]}, + "sql_sources_i8": { "needed": false, "columns": [ "telephone_number_type_id" ]}, + "sql_sources_i9": { "needed": false, "columns": [ "url_type_id" ]} + } } } }, diff --git a/app/availableplugins/SqlConnector/templates/SqlSources/fields-nav.inc b/app/availableplugins/SqlConnector/templates/SqlSources/fields-nav.inc new file mode 100644 index 000000000..1ac382831 --- /dev/null +++ b/app/availableplugins/SqlConnector/templates/SqlSources/fields-nav.inc @@ -0,0 +1,31 @@ + 'plugin', + 'active' => 'plugin' + ]; \ No newline at end of file diff --git a/app/availableplugins/SqlConnector/templates/SqlSources/fields.inc b/app/availableplugins/SqlConnector/templates/SqlSources/fields.inc new file mode 100644 index 000000000..f0b65af0b --- /dev/null +++ b/app/availableplugins/SqlConnector/templates/SqlSources/fields.inc @@ -0,0 +1,106 @@ + + +element('form/listItem', [ + 'arguments' => [ + 'fieldName' => 'server_id' + ] + ]); + + print $this->element('form/listItem', [ + 'arguments' => [ + 'fieldName' => 'table_mode', + 'fieldOptions' => [ + 'onChange' => 'updateGadgets()' + ], + 'fieldType' => 'select' + ] + ]); + + foreach([ + 'source_table', + 'address_type_id', + 'email_address_type_id', + 'identifier_type_id', + 'name_type_id', + 'pronouns_type_id', + 'telephone_number_type_id', + 'url_type_id' + ] as $field) { + print $this->element('form/listItem', [ + 'arguments' => [ + 'fieldName' => $field, + 'fieldOptions' => [ + 'required' => false + ] + ] + ]); + } + + print $this->element('form/listItem', [ + 'arguments' => [ + 'fieldName' => 'threshold_check' + ] + ]); + + print $this->element('form/listItem', [ + 'arguments' => [ + 'fieldName' => 'threshold_override' + ] + ]); +} diff --git a/app/config/schema/schema.json b/app/config/schema/schema.json index b024a06d7..54d0239a2 100644 --- a/app/config/schema/schema.json +++ b/app/config/schema/schema.json @@ -702,7 +702,8 @@ "status": {}, "sor_label": { "type": "string", "size": 40 }, "pipeline_id": { "type": "integer", "foreignkey": { "table": "pipelines", "column": "id" }, "notnull": true }, - "hash_source_record": { "type": "boolean" } + "hash_source_record": { "type": "boolean" }, + "suppress_noop_logs": { "type": "boolean" } }, "indexes": { "external_identity_sources_i1": { "columns": [ "co_id" ] }, diff --git a/app/plugins/CoreJob/resources/locales/en_US/core_job.po b/app/plugins/CoreJob/resources/locales/en_US/core_job.po index 4839f6e49..4deabe791 100644 --- a/app/plugins/CoreJob/resources/locales/en_US/core_job.po +++ b/app/plugins/CoreJob/resources/locales/en_US/core_job.po @@ -37,6 +37,18 @@ msgstr "Model to provision" msgid "opt.provisioner.provisioning_target_id" msgstr "Provisioning Target ID" +msgid "opt.sync.co_id" +msgstr "CO ID (to process all sources)" + +msgid "opt.sync.external_identity_source_id" +msgstr "External Identity Source ID (to process a single source)" + +msgid "opt.sync.force" +msgstr "If true, force records to process even if no changes have been detected" + +msgid "opt.sync.source_keys" +msgstr "Source Keys to process, comma separated (requires external_identity_source_id)" + msgid "Assigner.cancel_summary" msgstr "Job canceled after reviewing {0} entities and assigning {1} Identifier(s)" @@ -66,3 +78,33 @@ msgstr "Reprovisioned" msgid "Provisioner.start_summary" msgstr "Reprovisioning {0} {1} entities for Provisioning Target {2}" + +msgid "Sync.error.disabled" +msgstr "This EIS is currently disabled" + +msgid "Sync.error.post_run_tasks" +msgstr "Post-Run Tasks failed: {0}" + +msgid "Sync.error.pre_run_checks" +msgstr "Pre-Run Checks failed: {0}" + +msgid "Sync.finish_summary" +msgstr "Sync Finished" + +msgid "Sync.finish_summary.count" +msgstr "Sync Finished ({0} new, {1} updated, {2} errors, {3} total)" + +msgid "Sync.record.new" +msgstr "New record loaded from source" + +msgid "Sync.record.unchanged" +msgstr "Source record is unchanged, no changes were processed" + +msgid "Sync.record.updated" +msgstr "Record updated based on updated source record" + +msgid "Sync.start_summary.eis" +msgstr "Processing EIS {0} in {1} mode" + +msgid "Sync.start_summary.keys" +msgstr "Processing EIS {0} with {1} requested record(s)" diff --git a/app/plugins/CoreJob/src/Lib/Jobs/SyncJob.php b/app/plugins/CoreJob/src/Lib/Jobs/SyncJob.php new file mode 100644 index 000000000..363200269 --- /dev/null +++ b/app/plugins/CoreJob/src/Lib/Jobs/SyncJob.php @@ -0,0 +1,570 @@ + [ + 'help' => __d('core_job', 'opt.sync.external_identity_source_id'), + 'type' => 'fk', + 'required' => false + ], + 'force' => [ + 'help' => __d('core_job', 'opt.sync.force'), + 'type' => 'bool', + 'required' => false + ], +// XXX addd reference_id + 'source_keys' => [ + 'help' => __d('core_job', 'opt.sync.source_keys'), + 'type' => 'string', + 'required' => false + ] + ]; + } + + /** + * Perform a full sync (Full or Update modes) of an External Identity Source. + * + * @since COmanage Registry v5.0.0 + */ + + protected function fullSync() { + // Flag the job as started, before beginning pre-run checks. + $this->runContext->JobsTable->start( + job: $this->runContext->job, + summary: __d('core_job', 'Sync.start_summary.eis', [$this->runContext->parameters['external_identity_source_id'], __d('enumeration', 'SyncModeEnum.'.$this->runContext->eis->status)]) + ); + + $this->llog('trace', "Beginning sync of EIS " . $this->runContext->eis->description + . " in mode " . $this->runContext->eis->status + . " (job " . $this->runContext->job->id . ")"); + + // Determine the last run time for this sync. In v4, we queried the CoJob table to + // determine the last run of a specific source. However, this is a bit tricky in PE + // given how parameters are used to configure a given Job, so we instead maintain + // our own metadata about when a job was last run (started) to pass to preRunChecks, + // getChangeList, etc. + + $lastStart = $this->lastStart($this->runContext->eis->id); + + // Note the start_time for last run calculations. + $curStart = time(); + + // Next see if there are pre-run checks. The plugin can interrupt the sync job, + // but preRunChecks() will handle that. + + if(!$this->preRunChecks(lastStart: $lastStart, curStart: $curStart)) { + // preRunChecks() will have finish()d the job, so we don't need to do anything here. + return; + } + + // Now perform the actual sync. Start by pulling the list of known source keys. + // We maintain this in memory as a simple hash since this _should_ fit within the + // memory requirements of our larger expected deployments, and it's significantly + // simpler to perform diff calculations this way. This might need to be refactored + // at some point... + + $knownKeys = $this->runContext->EISTable->getKnownSourceKeys($this->runContext->eis->id); + + // Flip the array since it's faster to check for a key than a value + $knownKeysHash = array_flip($knownKeys); + + // We'll start by assuming the record count is the known source key count, but in full + // mode we'll override this to be the inventory count + $this->runContext->count = count($knownKeysHash); + + $this->llog('trace', "EIS " . $this->runContext->eis->description . " has " + . count($knownKeys) . " known source key(s) already synced"); + + // We first update any already sync'd records. This should also handle deletes. + // If the plugin supports changelists, this might also include new records. + + if($this->runContext->eis->status == SyncModeEnum::Full + || $this->runContext->eis->status == SyncModeEnum::Update) { + // Try to get a changelist from the Plugin. If force is set, we can't use + // getChangeList since we need to process all records. + + $changeList = false; + + if($this->runContext->force) { + $this->llog('trace', "EIS " . $this->runContext->eis->description + . " skipping changelist call due to force mode"); + } else { + $changeList = $this->runContext->EISTable->getChangeList( + $this->runContext->eis->id, + $lastStart, + $curStart + ); + + // Remove any duplicate keys + $changeList = array_unique($changeList); + } + + if($changeList !== false) { + $this->llog('trace', "EIS " . $this->runContext->eis->description . " plugin returned " + . count($changeList) . " updated record(s)"); + } else { + // We couldn't get a changelist, so we iterate over all known entries. + + $this->llog('trace', "EIS " . $this->runContext->eis->description + . " plugin does not support changelist, iterating over all known source keys"); + + $changeList = $knownKeys; + } + + // Process updates. It's possible that getChangeList will return new records + // that we haven't seen yet, so we'll check $knownKeys and only process those + // records that we know about. (New records will be handled below.) + + foreach($changeList as $sourceKey) { + if(isset($knownKeysHash[$sourceKey])) { + $this->llog('trace', "EIS " . $this->runContext->eis->description + . " updating entry $sourceKey"); + + $this->syncRecord($sourceKey); + } else { + $this->llog('trace', "EIS " . $this->runContext->eis->description + . " skipping changelist entry $sourceKey on update since record is not already synced"); + } + } + } + + // Next, for Full mode only, look for records that haven't yet been synced. + // We do this by comparing the plugin's inventory with our cached inventory, + // and processing any records the plugin reported that we didn't know about. + + if($this->runContext->eis->status == SyncModeEnum::Full) { + $allKeys = $this->runContext->EISTable->inventory($this->runContext->eis->id); + + $this->runContext->count = count($allKeys); + + $newKeys = array_diff($allKeys, $knownKeys); + + foreach($newKeys as $sourceKey) { + $this->llog('trace', "EIS " . $this->runContext->eis->description + . " processing new entry $sourceKey"); + + $this->syncRecord($sourceKey); + } + } + + // Last run time is updated by postRunTasks, which won't update the time on failure. + // Because processing has basically finished at this point, plugins can't abort + // processing via postRunTasks. + + $this->postRunTasks(curStart: $curStart); + + // Report the Job as finished + + $this->runContext->JobsTable->finish( + job: $this->runContext->job, + summary: __d( + 'core_job', + 'Sync.finish_summary.count', + [$this->runContext->created, $this->runContext->updated, $this->runContext->errors, $this->runContext->count] + ) + ); + } + + /** + * Cache the current run context. + * + * @since COmanage Registry v5.0.0 + * @param JobsTable $JobsTable JobsTable + * @param JobHistoryRecordsTable $JobHistoryRecordsTable JobHistoryRecordsTable + * @param Job $job Current Job + * @param array $parameters Job Parameters (from the command line) + * @param int $eisId External Identity Source ID + */ + + protected function getRunContext( + \App\Model\Table\JobsTable $JobsTable, + \App\Model\Table\JobHistoryRecordsTable $JobHistoryRecordsTable, + \App\Model\Entity\Job $job, + array $parameters, + int $eisId=null + ): \StdClass { + // We use $runContext so we don't have to pass a complicated set of parameters around. + + // It's not yet clear if we'll need to support updating the run context + // for multiple EIS (ie: full sync for all EIS in a CO), so for now we + // simply return the runContext if it's already set. + + if(!empty($this->runContext)) { + return $this->runContext; + } + + $this->runContext = new \StdClass(); + + // Table handles + $this->runContext->JobsTable = $JobsTable; + $this->runContext->JobHistoryRecordsTable = $JobHistoryRecordsTable; + $this->runContext->EISTable = TableRegistry::getTableLocator()->get('ExternalIdentitySources'); + + // Stuff below here might need to be reset when we have a multi-EIS job running + // (ie: run all sync jobs for a CO) + // The Job + $this->runContext->job = $job; + $this->runContext->parameters = $parameters; + $this->runContext->force = isset($parameters['force']) && $parameters['force']; + + // The EIS + if($eisId) { + // We're working with a specific EIS, pull it here but don't check status. + + $this->runContext->eis = $this->runContext->EISTable->get( + $eisId, + ['contain' => 'SqlSources'] + ); + } + + // Record counts + $this->runContext->count = 0; // Number of records to process + $this->runContext->created = 0; // Number of records created + $this->runContext->updated = 0; // Number of updated records + $this->runContext->unchanged = 0; // Number of unchanged records + $this->runContext->errors = 0; // Number of errors encounter + $this->runContext->lastPct = 0; // Last % update + + return $this->runContext; + } + + /** + * Determine the last start time of an EIS (Full) Sync. + * + * @since COmanage Registry v5.0.0 + * @param int $eisId ExternalIdentitySource ID + * @return int Last start time, or 0 if not yet run + */ + + protected function lastStart( + int $eisId + ): int { + // There's no formal model, but autovivification should suffice + $LastRunTable = TableRegistry::getTableLocator()->get('CoreJob.SyncJobLastRuns'); + + $lastRun = $LastRunTable->find() + ->where(['external_identity_source_id' => $eisId]) + ->first(); + + return !empty($lastRun->start_time) ? (int)$lastRun->start_time->toUnixString() : 0; + } + + /** + * Perform Pre Run Checks. + * + * @since COmanage Registry v5.0.0 + * @param int $lastStart Time of the most recent run of this Job + * @param int $curStart Time of the start of the current run of this Job + * @return bool true if the Job should continue, false otherwise + */ + + protected function preRunChecks( + int $lastStart, + int $curStart + ): bool { + $Plugin = TableRegistry::getTableLocator()->get($this->runContext->eis->plugin); + + if(method_exists($Plugin, 'preRunChecks')) { + $this->llog('trace', "Running Pre-Run Checks for EIS " . $this->runContext->eis->id + . " (job " . $this->runContext->job->id . ")"); + + try { + $Plugin->preRunChecks( + source: $this->runContext->eis, + lastStart: $lastStart, + curStart: $curStart + ); + } + catch(\Exception $e) { + // Checks failed: finish the job and return false + + $this->runContext->JobsTable->finish( + job: $this->runContext->job, + summary: __d('core_job', 'Sync.error.pre_run_checks', $e->getMessage()), + result: JobStatusEnum::Failed + ); + + return false; + } + } else { + $this->llog('trace', "No Pre-RunChecks defined for EIS " . $this->runContext->eis->description + . " (job " . $this->runContext->job->id . ")"); + } + + return true; + } + + /** + * Perform Post Run Tasks + * + * @since COmanage Registry v5.0.0 + * @param int $curStart Time of the start of the current run of this Job + */ + + protected function postRunTasks( + int $curStart + ) { + // Update sync_job_last_runs + + $LastRunTable = TableRegistry::getTableLocator()->get('CoreJob.SyncJobLastRuns'); + + $lastRun = $LastRunTable->find() + ->where(['external_identity_source_id' => $this->runContext->eis->id]) + ->first(); + + if($lastRun) { + // Update the existing row + $lastRun->start_time = $curStart; + } else { + $lastRun = $LastRunTable->newEntity([ + 'external_identity_source_id' => $this->runContext->eis->id, + 'job_id' => $this->runContext->job->id, + 'start_time' => $curStart + ]); + } + + $LastRunTable->save($lastRun); + + // Call the plugin if it has anything it wants to do + + $Plugin = TableRegistry::getTableLocator()->get($this->runContext->eis->plugin); + + if(method_exists($Plugin, 'postRunTasks')) { + $this->llog('trace', "Running Post-Run Tasks for EIS " . $this->runContext->eis->description + . " (job " . $this->runContext->job->id . ")"); + + try { + $Plugin->postRunTasks( + source: $this->runContext->eis + ); + } + catch(\Exception $e) { + // Tasks failed: record an error but keep going + + $this->runContext->JobHistoryRecordsTable->record( + jobId: $this->runContext->job->id, + recordKey: null, + comment: __d('core_job', 'Sync.error.post_run_tasks', $e->getMessage()), + status: JobStatusEnum::Failed + ); + } + } else { + $this->llog('trace', "No Pre-RunChecks defined for EIS " . $this->runContext->eis->id + . " (job " . $this->runContext->job->id . ")"); + } + } + + /** + * Run the requested Job. + * + * @since COmanage Registry v5.0.0 + * @param JobsTable $JobsTable Jobs table, for updating the Job status + * @param JobHistoryRecordsTable $JobHistoryRecordsTable Job History Records table, for recording additional history + * @param Job $job Job entity + * @param array $parameters Parameters for this Job + */ + + public function run( + \App\Model\Table\JobsTable $JobsTable, + \App\Model\Table\JobHistoryRecordsTable $JobHistoryRecordsTable, + \App\Model\Entity\Job $job, + array $parameters + ) { + if(!empty($parameters['external_identity_source_id'])) { + $this->getRunContext( + $JobsTable, + $JobHistoryRecordsTable, + $job, + $parameters, + (int)$parameters['external_identity_source_id'] + ); + + if($this->runContext->eis->status == SyncModeEnum::Disabled) { + throw new \InvalidArgumentException('core_job', 'Sync.error.disabled'); + } + + if(!empty($parameters['source_keys'])) { + // We're processing a comma separated list of source keys + + $keys = explode(',', $parameters['source_keys']); + + $this->runContext->count = count($keys); + + $JobsTable->start( + job: $job, + summary: __d('core_job', 'Sync.start_summary.keys', [$parameters['external_identity_source_id'], $this->runContext->count]) + ); + + foreach($keys as $key) { + if(!$this->syncRecord($key)) { + break; + } + } + + $JobsTable->finish( + job: $job, + summary: __d( + 'core_job', + 'Sync.finish_summary.count', + [$this->runContext->created, $this->runContext->updated, $this->runContext->errors, $this->runContext->count] + ) + ); + } else { + // We're processing all records within this EIS. + + $this->fullSync(); + } + } elseif(!empty($parameters['co_id'])) { + // We're processing all EIS within this CO + + // In v4, we create a new Job record for each EIS - can we do the same thing here? + // Or maybe create a sub-job and record that in the parent Job? + // XXX pull only EIS in "Full" or "Update" mode + // Note eis is stored in runContext, may need to update that on each run, + // or just ignore it entirely + // - should be able to reset context then call fullSync() for each EIS + // XXX check for cancellations between EIS + throw new \RuntimeException('NOT IMPLEMENTED'); + + $JobsTable->finish(job: $job, summary: __d('core_job', 'Sync.finish_summary')); + } + } + + /** + * Sync a single record. + * + * @since COmanage Registry v5.0.0 + * @param string $key Source Key to process + * @return bool True if processing should continue, false otherwise + */ + + protected function syncRecord(string $key): bool { + // comment and status for HistoryRecords + $c = "unknown"; + $s = JobStatusEnum::Failed; + + try { + $result = $this->runContext->EISTable->sync( + id: (int)$this->runContext->parameters['external_identity_source_id'], + sourceKey: $key, + force: $this->runContext->force + ); + + switch($result) { + case 'new': + $this->runContext->created++; + $s = JobStatusEnum::Complete; + $c = __d('core_job', 'Sync.record.new'); + break; + case 'unchanged': + $this->runContext->unchanged++; + $s = JobStatusEnum::Complete; + $c = __d('core_job', 'Sync.record.unchanged'); + break; + case 'updated': + $this->runContext->updated++; + $s = JobStatusEnum::Complete; + $c = __d('core_job', 'Sync.record.updated'); + break; + default: + $this->runContext->errors++; + } + } + catch(\Exception $e) { + $c = $e->getMessage(); + $this->runContext->errors++; + } + + // unchanged results are considered "no-ops", which we might be configured to not log + if($result != 'unchanged' + || !isset($this->runContext->eis->suppress_noop_logs) + || !$this->runContext->eis->suppress_noop_logs) { + $this->runContext->JobHistoryRecordsTable->record( + jobId: $this->runContext->job->id, + recordKey: $key, + comment: $c, + status: $s + ); + } + + // Check to see if the Job was canceled, or update the percent complete + if($this->runContext->JobsTable->isCanceled($this->runContext->job->id)) { + // The Job was already marked Canceled, but we can optionally add a History Record + $this->runContext->JobHistoryRecordsTable->record( + jobId: $job->id, + recordKey: "", + comment: __d( + 'core_job', + 'Sync.finish_summary.count', + [$this->runContext->created, $this->runContext->updated, $this->runContext->errors, $this->runContext->count] + ), + status: JobStatusEnum::Canceled + ); + + return false; + } else { + // Maybe update % complete + + $processed = $this->runContext->created + $this->runContext->unchanged + $this->runContext->updated + $this->runContext->errors; + $newPct = ($this->runContext->count > 0 + ? (int)round(($processed * 100) / $this->runContext->count) + : 0); + + if($newPct > $this->runContext->lastPct) { + $this->runContext->JobsTable->setPercentComplete(job: $this->runContext->job, percent: $newPct); + $this->runContext->lastPct = $newPct; + } + } + + return true; + } +} \ No newline at end of file diff --git a/app/plugins/CoreJob/src/config/plugin.json b/app/plugins/CoreJob/src/config/plugin.json index 662b33d90..c23f5b520 100644 --- a/app/plugins/CoreJob/src/config/plugin.json +++ b/app/plugins/CoreJob/src/config/plugin.json @@ -2,7 +2,27 @@ "types": { "job": [ "AssignerJob", - "ProvisionerJob" + "ProvisionerJob", + "SyncJob" ] + }, + "schema": { + "tables": { + "sync_job_last_runs": { + "comment": "This is a meta-table for SyncJob, and does not have a corresponding MVC", + "columns": { + "id": {}, + "external_identity_source_id": {}, + "job_id": { "type": "integer", "foreignkey": { "table": "jobs", "column": "id" } }, + "start_time": { "type": "datetime" } + }, + "indexes": { + "sync_job_last_runs_i1": { "columns": [ "external_identity_source_id" ] }, + "sync_job_last_runs_i2": { "needed": false, "columns": [ "job_id" ] } + }, + "changelog": false, + "timestamps": false + } + } } } \ No newline at end of file diff --git a/app/plugins/CoreServer/src/Model/Table/SqlServersTable.php b/app/plugins/CoreServer/src/Model/Table/SqlServersTable.php index 7d3c9aaa1..546717899 100644 --- a/app/plugins/CoreServer/src/Model/Table/SqlServersTable.php +++ b/app/plugins/CoreServer/src/Model/Table/SqlServersTable.php @@ -165,7 +165,8 @@ public function connect(int $serverId, string $name): bool { // when we try to connect. $dbconfig['className'] = 'CakeDC\OracleDriver\Database\OracleConnection'; - $dbconfig['driver'] = 'CakeDC\OracleDriver\Database\Driver\OracleOCI'; # For OCI8 + $dbconfig['driver'] = 'CakeDC\OracleDriver\Database\Driver\OracleOCI'; // For OCI8 + $dbconfig['quoteIdentifiers'] = true; // Use 'CakeDC\\OracleDriver\\Database\\Driver\\OraclePDO' for PDO_OCI, but CakeDC // recommends OCI8 diff --git a/app/resources/locales/en_US/error.po b/app/resources/locales/en_US/error.po index fa4c480b6..fbb338b83 100644 --- a/app/resources/locales/en_US/error.po +++ b/app/resources/locales/en_US/error.po @@ -196,6 +196,12 @@ msgstr "Invalid value \"{0}\"" msgid "Jobs.failed.abnormal" msgstr "The Job terminated unexpectedly" +msgid "Jobs.plugin.parameter.bool" +msgstr "Boolean values may only be 0 or 1" + +msgid "Jobs.plugin.parameter.fk" +msgstr "Record {0} is not in CO {1}" + msgid "Jobs.plugin.parameter.int" msgstr "Provided value is not an integer" diff --git a/app/resources/locales/en_US/field.po b/app/resources/locales/en_US/field.po index 5afb97386..83ed93afb 100644 --- a/app/resources/locales/en_US/field.po +++ b/app/resources/locales/en_US/field.po @@ -167,6 +167,9 @@ msgstr "Email Address" msgid "manager" msgstr "Manager" +msgid "manager_identifier" +msgstr "Manager Identifier" + msgid "middle" msgstr "Middle" @@ -243,6 +246,9 @@ msgstr "Source Record" msgid "sponsor" msgstr "Sponsor" +msgid "sponsor_identifier" +msgstr "Sponsor Identifier" + msgid "starts_at" msgstr "Starts at:" @@ -387,9 +393,18 @@ msgstr "Limit Global Search Scope" msgid "CoSettings.search_global_limited_models.desc" msgstr "If true, Global Search will only search Names, Email Addresses, and Identifiers. This may result in faster searches for larger deployments." +msgid "ExternalIdentitySources.hash_source_record" +msgstr "Hash Source Records" + msgid "ExternalIdentitySources.source_record.empty" msgstr "The source record is empty. This suggests the record is no longer available from the datasource." +msgid "ExternalIdentitySources.suppress_noop_logs" +msgstr "Suppress No-op Logs" + +msgid "ExternalIdentitySources.suppress_noop_logs.desc" +msgstr "Do not record Job History Records for records that were unchanged or not processed" + msgid "GroupMembers.source" msgstr "Membership Source" diff --git a/app/src/Command/JobCommand.php b/app/src/Command/JobCommand.php index 97e52b399..8eed31355 100644 --- a/app/src/Command/JobCommand.php +++ b/app/src/Command/JobCommand.php @@ -34,8 +34,10 @@ use Cake\Console\ConsoleIo; use Cake\Console\ConsoleOptionParser; use Cake\Datasource\ConnectionManager; +use Cake\Event\EventManager; use Cake\Utility\Security; use App\Lib\Enum\JobStatusEnum; +use App\Lib\Events\CoIdEventListener; class JobCommand extends Command { @@ -148,6 +150,12 @@ public function execute(Arguments $args, ConsoleIo $io) $maxjobs = 100; foreach($coIds as $coId) { + // We probably need to do something like this (from synchronous running, below) + // $CoIdEventListener = new CoIdEventListener((int)$args->getOption('co_id')); + // EventManager::instance()->on($CoIdEventListener); + // but this wouldn't remove the previous $coId, so for now Sync Jobs can't be run + // via the queue. See CFM-400. + // We start counting from 1 rather than 0 to simplify console output for($i = 1;$i <= $parallel;$i++) { $io->out(__d('command', 'job.run.start', [$i, $parallel, $coId])); @@ -255,6 +263,9 @@ public function execute(Arguments $args, ConsoleIo $io) $params[ $p[0] ] = $p[1]; } + $CoIdEventListener = new CoIdEventListener((int)$args->getOption('co_id')); + EventManager::instance()->on($CoIdEventListener); + $job = $JobTable->register( coId: (int)$args->getOption('co_id'), plugin: $args->getOption('job'), diff --git a/app/src/Controller/AppController.php b/app/src/Controller/AppController.php index 8b7562454..01fd2ab98 100644 --- a/app/src/Controller/AppController.php +++ b/app/src/Controller/AppController.php @@ -564,12 +564,11 @@ protected function setCO() { $this->$modelsName->setCurCoId((int)$coid); } - /* This doesn't work for the current model since it has already been - initialized, but it could be an option for related models later... - (eg when we try to save a name via EIS or EF). But see also the new - approach below. - $CoIdEventListener = new CoIdEventListener($coid); - EventManager::instance()->on($CoIdEventListener);*/ + // This doesn't work for the current model since it has already been + // initialized, but it should work for related models later... + // (eg when we try to save a name via EIS or EF). But see also CFM-400. + $CoIdEventListener = new CoIdEventListener((int)$coid); + EventManager::instance()->on($CoIdEventListener); // Walk through the first level associations and pass the CO ID to them, // as well. We could ultimately cascade this via the table once we have diff --git a/app/src/Lib/Enum/SyncModeEnum.php b/app/src/Lib/Enum/SyncModeEnum.php index cd2ef5b4a..8ddca75a7 100644 --- a/app/src/Lib/Enum/SyncModeEnum.php +++ b/app/src/Lib/Enum/SyncModeEnum.php @@ -33,6 +33,7 @@ class SyncModeEnum extends StandardEnum { const Disabled = 'X'; const Full = 'F'; const Manual = 'M'; - const Query = 'Q'; +// Query is not yet supported (CFM-372) + // const Query = 'Q'; const Update = 'U'; } \ No newline at end of file diff --git a/app/src/Lib/Events/CoIdEventListener.php b/app/src/Lib/Events/CoIdEventListener.php new file mode 100644 index 000000000..aadb65c0f --- /dev/null +++ b/app/src/Lib/Events/CoIdEventListener.php @@ -0,0 +1,84 @@ +coId = $coId; + } + + /** + * Before save event listener. + * + * @since COmanage Registry v5.0.0 + * @param Event $event Cake Event + * @param EntityInterface $entity Entity subject of the event (ie: object to be saved) + * @param ArrayObject $options Save options + */ + + public function atInitialize(Event $event) { + $table = $event->getSubject(); + + if(method_exists($table, "acceptsCoId") + && $table->acceptsCoId()) { + $table->setCurCoId($this->coId); + } + } + + /** + * Define the list of implemented events. + * + * @since COmanage Registry v5.0.0 + * @return array Array of implemented events and associated configuration. + */ + + public function implementedEvents(): array { + return [ + 'Model.initialize' => [ + 'callable' => 'atInitialize' + ] + ]; + } +} diff --git a/app/src/Model/Table/ExternalIdentitySourcesTable.php b/app/src/Model/Table/ExternalIdentitySourcesTable.php index e7519a967..345eed39a 100644 --- a/app/src/Model/Table/ExternalIdentitySourcesTable.php +++ b/app/src/Model/Table/ExternalIdentitySourcesTable.php @@ -48,6 +48,9 @@ class ExternalIdentitySourcesTable extends Table { use \App\Lib\Traits\PrimaryLinkTrait; use \App\Lib\Traits\TableMetaTrait; use \App\Lib\Traits\ValidationTrait; + + // Cache of the EIS configuration, keyed on id + protected $eisCache = null; /** * Perform Cake Model initialization. @@ -123,26 +126,113 @@ public function initialize(array $config): void { } /** - * Retrieve a record from an External Identity Source. + * Obtain the changelist from the backend, if supported. * * @since COmanage Registry v5.0.0 * @param int $id External Identity Source ID - * @param string $source_key EIS Backend Source Key - * @return array Array of source_key, source_record, and entity_data + * @param int $lastStart Timestamp of last run + * @param int $curStart Timestamp of current run + * @return array|bool Array of updated source keys, or false if not supported + */ + + public function getChangeList( + int $id, + int $lastStart, + int $curStart + ): array|bool { + $source = $this->getEIS($id); + + // We directly retrieve the table object here rather than use $this->$model + // because the latter is actually an instance of \Cake\ORM\Association\HasOne, + // so we can't tell if the plugin has implemented getChangeList that way. + $Plugin = TableRegistry::getTableLocator()->get($source->plugin); + + if(method_exists($Plugin, 'getChangeList')) { + return $Plugin->getChangeList($source, $lastStart, $curStart); + } + + return false; + } + + /** + * Get an EIS configuration, possibly via the cache. + * + * @since COmanage Registry v5.0.0 + * @param int $id External Identity Source ID + * @return ExternalIdentitySource */ - public function retrieve(int $id, string $source_key): array { + protected function getEIS(int $id) { // We want to pull the plugin configuration along with the EIS, to make // the query simpler we contain all possible relations, which will // usually only be a small number. - $source = $this->get($id, ['contain' => $this->getPluginRelations()]); + if(empty($this->eisCache[$id])) { + $this->eisCache[$id] = $this->get($id, ['contain' => $this->getPluginRelations()]); + } + + return $this->eisCache[$id]; + } + + /** + * Obtain all known source keys for an EIS. + * + * @since COmanage Registry v5.0.0 + * @param int $id External Identity Source ID + * @return array Source keys + */ + + public function getKnownSourceKeys(int $id): array { + // For now we don't use an iterator (like PaginatedSqlIterator) because + // even for the larger deployments we expect to work with, the array of + // source keys _should_ fit in memory (for a reasonably sized VM/etc). + + $records = $this->ExtIdentitySourceRecords + ->find('list', [ + 'keyField' => 'source_key', + 'valueField' => 'external_identity_id' + ]) + ->where(['external_identity_source_id' => $id]) + ->toArray(); + + return array_keys($records); + } + + /** + * Obtain the inventory from the backend. + * + * @since COmanage Registry v5.0.0 + * @param int $id External Identity Source ID + * @return array Array of all source keys + */ + + public function inventory( + int $id, + ): array|bool { + $source = $this->getEIS($id); + + $pModel = StringUtilities::pluginModel($source->plugin); + + return $this->$pModel->inventory($source); + } + + /** + * Retrieve a record from an External Identity Source. + * + * @since COmanage Registry v5.0.0 + * @param int $id External Identity Source ID + * @param string $sourceKey EIS Backend Source Key + * @return array Array of source_key, source_record, and entity_data + */ + + public function retrieve(int $id, string $sourceKey): array { + $source = $this->getEIS($id); $pModel = StringUtilities::pluginModel($source->plugin); - $record = $this->$pModel->retrieve($source, $source_key); + $record = $this->$pModel->retrieve($source, $sourceKey); // Inject the source key so every backend doesn't have to do this - $record['entity_data']['source_key'] = $source_key; + $record['entity_data']['source_key'] = $sourceKey; return $record; } @@ -157,10 +247,7 @@ public function retrieve(int $id, string $source_key): array { */ public function search(int $id, array $attrs): array { - // We want to pull the plugin configuration along with the EIS, to make - // the query simpler we contain all possible relations, which will - // usually only be a small number. - $source = $this->get($id, ['contain' => $this->getPluginRelations()]); + $source = $this->getEIS($id); $pModel = StringUtilities::pluginModel($source->plugin); @@ -185,23 +272,25 @@ public function searchableAttributes(int $id) { * * @since COmanage Registry v5.0.0 * @param int $id External Identity Source ID - * @param string $source_key EIS Backend Source Key + * @param string $sourceKey EIS Backend Source Key + * @param bool $force Whether to force the full Pipeline to run even if the backend record didn't change + * @return string Record status (new, unchanged, unknown, updated) */ - public function sync(int $id, string $source_key) { + public function sync(int $id, string $sourceKey, bool $force=true): string { // All work is actually handled by the Pipeline, but we need our configuration // to know which Pipeline. - $eis = $this->get($id); + $source = $this->getEIS($id); // Also get the current record from the Backend, which might have been deleted - $eisBackendRecord = $this->retrieve($id, $source_key); + $eisBackendRecord = $this->retrieve($id, $sourceKey); - $this->Pipelines->execute( - id: $eis->pipeline_id, + return $this->Pipelines->execute( + id: $source->pipeline_id, eisId: $id, eisBackendRecord: $eisBackendRecord, // Force the full Pipeline run even if the backend record didn't change - force: true + force: $force ); } @@ -221,7 +310,7 @@ public function validationDefault(Validator $validator): Validator { ]); $validator->notEmptyString('co_id'); - $this->registerStringValidation($validator, $schema, 'description', false); + $this->registerStringValidation($validator, $schema, 'description', true); $validator->add('status', [ 'content' => ['rule' => ['inList', SyncModeEnum::getConstValues()]] @@ -237,6 +326,16 @@ public function validationDefault(Validator $validator): Validator { ]); $validator->notEmptyString('pipeline_id'); + $validator->add('hash_source_record', [ + 'content' => ['rule' => ['boolean']] + ]); + $validator->allowEmptyString('hash_source_record'); + + $validator->add('suppress_noop_logs', [ + 'content' => ['rule' => ['boolean']] + ]); + $validator->allowEmptyString('suppress_noop_logs'); + return $validator; } } \ No newline at end of file diff --git a/app/src/Model/Table/JobsTable.php b/app/src/Model/Table/JobsTable.php index f6be504fe..f12721016 100644 --- a/app/src/Model/Table/JobsTable.php +++ b/app/src/Model/Table/JobsTable.php @@ -481,6 +481,8 @@ public function register( } $cxn->rollback(); + + $this->llog(level: 'error', msg: rtrim($err, ",")); throw new \InvalidArgumentException(rtrim($err, ",")); } @@ -618,8 +620,31 @@ protected function validateJobParameters(string $plugin, int $coId, array $param switch($pluginParameters[$p]['type']) { case 'bool': case 'boolean': - throw new \RuntimeException('not implemented'); -// XXX implement + // Because we want code that uses these parameters to be able to do + // something like if($params['p']), we only accept values that PHP + // will correctly parse in that context. For simplicity, we allow only + // 0 and 1. + if($val != 0 && $val != 1) { + $ret[$p] = __d('error', 'Jobs.plugin.parameter.bool'); + } + break; + case 'fk': + // The provided parameter must be in $coId. We don't actually need + // to verify the format since the value either exists in the database + // or it doesn't. + $className = StringUtilities::foreignKeyToClassName($p); + $Table = TableRegistry::getTableLocator()->get($className); + + $vals = explode(',', $val); + + foreach($vals as $v) { + $entity = $Table->get($val); + + if($Table->calculateCoForRecord($entity) != $coId) { + $ret[$p] = __d('error', 'Jobs.plugin.parameter.fk', [$v, $coId]); + break; + } + } break; case 'int': case 'integer': diff --git a/app/src/Model/Table/PipelinesTable.php b/app/src/Model/Table/PipelinesTable.php index 09f1fa28d..084f91ff8 100644 --- a/app/src/Model/Table/PipelinesTable.php +++ b/app/src/Model/Table/PipelinesTable.php @@ -140,8 +140,8 @@ public function initialize(array $config): void { ], 'syncIdentifierTypes' => [ 'type' => 'select', -// XXX We need to filter this to just Person Identifiers - 'model' => 'Types' + 'model' => 'Types', + 'where' => ['attribute' => 'Identifiers.type'] ], 'syncReplaceCous' => [ 'type' => 'select', @@ -441,6 +441,7 @@ protected function duplicateFilterEntityData($entity): array { * @param int $eisId Exxternal Identity Source ID * @param array $eisBackendRecord Record returned by EIS Backend * @param bool $force Force the Pipeline to run all steps, even if no changes were detected + * @return string Record status (new, unchanged, unknown, updated) */ public function execute( @@ -448,7 +449,7 @@ public function execute( int $eisId, array $eisBackendRecord, bool $force=false - ) { + ): string { // Start with our configuration(s) $pipeline = $this->get($id); $eis = $this->ExternalIdentitySources->get($eisId); @@ -473,7 +474,7 @@ public function execute( $this->llog('trace', "Record for EIS $eisId source key " . $eisBackendRecord['source_key'] . " is unchanged, stopping Pipeline"); $cxn->commit(); - return; + return $eisRecord['status']; } // (2) Match against an existing Person or create a new Person, in @@ -567,6 +568,8 @@ public function execute( $this->llog('trace', "Pipeline $id complete for EIS $eisId source key " . $eisBackendRecord['source_key']); $cxn->commit(); + + return $eisRecord['status']; } catch(\Exception $e) { $cxn->rollback(); @@ -597,6 +600,9 @@ protected function manageEISRecord( ): array { $status = 'unknown'; + // Are we supposed to use record hashes instead? + $useHash = isset($eis->hash_source_record) && $eis->hash_source_record; + // Do we already have an EISRecord for this source_key? $eisRecord = $this->ExternalIdentitySources->ExtIdentitySourceRecords ->find() @@ -613,17 +619,20 @@ protected function manageEISRecord( // EIS record as changed, even if it's not material to the attributes // that construct the External Identity. -// XXX update this to test hashed value, once implemented - if((empty($eisRecord->source_record) && !empty($sourceRecord)) - || (!empty($eisRecord->source_record) && empty($sourceRecord)) - || (!empty($eisRecord->source_record) && !empty($sourceRecord) - && $eisRecord->source_record != $sourceRecord)) { + if((empty($eisRecord->source_record) && !empty($sourceRecord)) // New record + || (!empty($eisRecord->source_record) && empty($sourceRecord)) // Deleted record + || (!empty($eisRecord->source_record) && !empty($sourceRecord) // Updated record? + // Note when $useHash we don't md5 the $eisRecord because it was + // stored as an md5 hash. (This does mean the first time we sync + // a record after hash_source_record is enabled we'll reprocess it + // even if nothing changed.) + && (($useHash && ($eisRecord->source_record != md5($sourceRecord))) + || (!$useHash && ($eisRecord->source_record != $sourceRecord))))) { // We have an update of some form or another, including, possibly, a delete $this->llog('trace', "Updating Record for EIS " . $eis->description . " (" . $eis->id . ") source key $sourceKey"); - // XXX support hashing here - $eisRecord->source_record = $sourceRecord; + $eisRecord->source_record = $useHash ? md5($sourceRecord) : $sourceRecord; $eisRecord->last_update = date('Y-m-d H:i:s', time()); $status = 'updated'; @@ -640,8 +649,7 @@ protected function manageEISRecord( ->newEntity([ 'external_identity_source_id' => $eis->id, 'source_key' => $sourceKey, -// XXX support hashing here - 'source_record' => $sourceRecord, + 'source_record' => $useHash ? md5($sourceRecord) : $sourceRecord, 'last_update' => date('Y-m-d H:i:s', time()) ]); diff --git a/app/templates/ExternalIdentitySources/fields.inc b/app/templates/ExternalIdentitySources/fields.inc index 12d631fd0..1f35ac8c4 100644 --- a/app/templates/ExternalIdentitySources/fields.inc +++ b/app/templates/ExternalIdentitySources/fields.inc @@ -33,7 +33,10 @@ if($vv_action == 'add' || $vv_action == 'edit') { 'status', 'plugin', 'pipeline_id', - 'sor_label', +// Not yet implemented +// 'sor_label', + 'hash_source_record', + 'suppress_noop_logs' ] as $field) { $params = [ 'arguments' => [ diff --git a/app/templates/ExternalIdentitySources/search.php b/app/templates/ExternalIdentitySources/search.php index 65c166613..2f61e4e23 100644 --- a/app/templates/ExternalIdentitySources/search.php +++ b/app/templates/ExternalIdentitySources/search.php @@ -118,7 +118,7 @@ - + diff --git a/app/templates/element/form/nameDiv.php b/app/templates/element/form/nameDiv.php index 06ab77505..dfd74c93c 100644 --- a/app/templates/element/form/nameDiv.php +++ b/app/templates/element/form/nameDiv.php @@ -58,10 +58,14 @@ [$label, $desc] = $this->Field->calculateLabelAndDescription($fn); $label = $vv_field_arguments['fieldLabel'] ?? $label; -// Override the default required behavior if the field has the required -// option set -$optionsRequired = isset($vv_field_arguments['fieldOptions']['required']) - && $vv_field_arguments['fieldOptions']['required']; +// We determine if a field is rquired by first getting the "expected" value +// from FieldHelper, then overriding that value if an argument was passed in. +$isRequired = $this->Field->isReqField($fn); + +if(isset($vv_field_arguments['fieldOptions']['required'])) { + // Use this value (which could be either false or true) + $isRequired = $vv_field_arguments['fieldOptions']['required']; +} // Extra class required for the grouped controls elements if(isset($groupedControls)) { @@ -88,10 +92,7 @@ /* * Required Span */ - if($this->Field->isEditable() - && - ($this->Field->isReqField($fn) || $optionsRequired) - ) { + if($this->Field->isEditable() && $isRequired) { print $this->element('form/requiredSpan', [], [ 'cache' => '_html_elements', ]);