From 54fba95864601a7b3d8c39ec409fd4b2e1bd263a Mon Sep 17 00:00:00 2001 From: Ioannis Igoumenos Date: Thu, 19 Feb 2026 11:40:57 +0000 Subject: [PATCH] Improve transmogrification process of selected tables --- .../Transmogrify/config/schema/tables.json | 156 ++++++++++++------ .../src/Command/TransmogrifyCommand.php | 96 ++++++++--- .../src/Lib/Traits/CacheTrait.php | 15 +- .../src/Lib/Traits/RowTransformationTrait.php | 17 ++ 4 files changed, 203 insertions(+), 81 deletions(-) diff --git a/app/plugins/Transmogrify/config/schema/tables.json b/app/plugins/Transmogrify/config/schema/tables.json index f0836d800..634189241 100644 --- a/app/plugins/Transmogrify/config/schema/tables.json +++ b/app/plugins/Transmogrify/config/schema/tables.json @@ -51,7 +51,8 @@ "created": "&mapNow", "modified": "&mapNow" }, - "cache": [["co_id", "attribute", "value"]] + "cache": [["co_id", "attribute", "value"]], + "dependencies": ["cos"] }, "co_settings": { "source": "cm_co_settings", @@ -86,7 +87,8 @@ "group_create_admin_only": null, "t_and_c_return_url_allowlist": null, "population_hide": null - } + }, + "dependencies": ["cos"] }, "api_users": { "source": "cm_api_users", @@ -95,12 +97,14 @@ "cache": ["co_id"], "fieldMap": { "password": "api_key" - } + }, + "dependencies": ["cos"] }, "cous": { "source": "cm_cous", "displayField": "name", - "sqlSelect": "couSqlSelect" + "sqlSelect": "couSqlSelect", + "dependencies": ["cos"] }, "servers": { "source": "cm_servers", @@ -111,7 +115,8 @@ "fieldMap": { "plugin": "&mapServerTypeToPlugin", "server_type": null - } + }, + "dependencies": ["cos"] }, "http_servers": { "source": "cm_http_servers", @@ -127,7 +132,8 @@ "serverurl": "url", "ssl_verify_peer": "skip_ssl_verification", "ssl_verify_host": null - } + }, + "dependencies": ["servers"] }, "oauth2_servers": { "source": "cm_oauth2_servers", @@ -139,7 +145,8 @@ "fieldMap": { "serverurl": "url", "proxy": null - } + }, + "dependencies": ["servers"] }, "sql_servers": { "source": "cm_sql_servers", @@ -149,7 +156,8 @@ "addChangelog": true, "fieldMap": { "dbport": "port" - } + }, + "dependencies": ["servers"] }, "match_servers": { "source": "cm_match_servers", @@ -166,7 +174,8 @@ "auth_type": "?BA", "ssl_verify_peer": "skip_ssl_verification", "ssl_verify_host": null - } + }, + "dependencies": ["servers"] }, "match_server_attributes": { "source": "cm_match_server_attributes", @@ -180,7 +189,8 @@ "fieldMap": { "type_id": "&mapMatchAttributeTypeId", "type": null - } + }, + "dependencies": ["match_servers", "types"] }, "message_templates": { "source": "cm_co_message_templates", @@ -191,7 +201,8 @@ "message_body": "body_text", "message_body_html": "body_html", "co_message_template_id": "message_template_id" - } + }, + "dependencies": ["cos"] }, "pipelines": { "source": "cm_co_pipelines", @@ -215,7 +226,8 @@ "sync_identifier_type": null, "establish_clusters": null, "co_enrollment_flow_id": null - } + }, + "dependencies": ["cos", "types"] }, "external_identity_sources": { "source": "cm_org_identity_sources", @@ -233,7 +245,8 @@ "sync_on_user_login": null, "eppn_identifier_type": null, "eppn_suffix": null - } + }, + "dependencies": ["cos", "pipelines"] }, "orcid_sources": { "source": "cm_orcid_sources", @@ -251,14 +264,16 @@ "name_type_id": "&mapToDefaultNameTypeId", "telephone_number_type_id": "&mapToDefaultTelephoneNumberTypeId" }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["external_identity_sources", "types"] }, "orcid_tokens": { "source": "cm_orcid_tokens", "displayField": "id", "plugin": "OrcidSource", "cache": ["orcid_identifier", "orcid_source_id"], - "addChangelog": true + "addChangelog": true, + "dependencies": ["orcid_sources"] }, "env_sources": { "source": "cm_env_sources", @@ -289,7 +304,8 @@ "env_identifier_network_login": null, "duplicate_mode": null, "default_affiliation": null - } + }, + "dependencies": ["external_identity_sources", "types"] }, "api_sources": { "source": "cm_api_sources", @@ -302,7 +318,8 @@ "poll_mode": null, "kafka_server_id": null }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["external_identity_sources"] }, "api_source_endpoints": { "source": "cm_api_sources", @@ -315,7 +332,8 @@ "poll_mode": null, "kafka_server_id": null }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["external_identity_sources"] }, "file_sources": { "source": "cm_file_sources", @@ -330,7 +348,8 @@ "format": "=C3", "threshold_warn": "threshold_check" }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["external_identity_sources"] }, "sql_sources": { "source": "cm_sql_sources", @@ -356,7 +375,8 @@ "telephone_number_type": null, "url_type": null }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["external_identity_sources", "types"] }, "authenticators": { "source": "cm_authenticators", @@ -366,13 +386,15 @@ "plugin": "&mapAuthenticatorPlugin", "co_message_template_id": "message_template_id" }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["cos", "message_templates"] }, "ssh_key_authenticators": { "source": "cm_ssh_key_authenticators", "displayField": "id", "cache": ["authenticator_id"], - "addChangelog": true + "addChangelog": true, + "dependencies": ["authenticators"] }, "password_authenticators": { "source": "cm_password_authenticators", @@ -387,7 +409,8 @@ "fieldMap": { "password_source": "source_mode" }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["authenticators"] }, "identifier_assignments": { "source": "cm_co_identifier_assignments", @@ -416,7 +439,8 @@ "identifier_type": null, "email_type": null }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["cos", "groups", "types"] }, "format_assigner_sequences": { "source": "cm_co_sequential_identifier_assignments", @@ -424,13 +448,15 @@ "fieldMap": { "co_identifier_assignment_id": "format_assigner_id" }, - "addChangelog": false + "addChangelog": false, + "dependencies": ["identifier_assignments"] }, "__NOTES__": "DATA MIGRATIONS", "authentication_events": { "source": "cm_authentication_events", "displayField": "authenticated_identifier", - "canSkip": "true" + "canSkip": "true", + "dependencies": [] }, "people": { "source": "cm_co_people", @@ -439,7 +465,8 @@ "fieldMap": { "co_person_id": "person_id", "status": "&mapPersonStatus" - } + }, + "dependencies": ["cos"] }, "authenticator_statuses": { "source": "cm_authenticator_statuses", @@ -451,7 +478,8 @@ "fieldMap": { "co_person_id": "person_id" }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["people", "authenticators"] }, "ssh_keys": { "source": "cm_ssh_keys", @@ -459,7 +487,8 @@ "cache": ["person_id", "ssh_key_authenticator_id"], "fieldMap": { "co_person_id": "person_id" - } + }, + "dependencies": ["people", "ssh_key_authenticators"] }, "passwords": { "source": "cm_passwords", @@ -469,7 +498,8 @@ "fieldMap": { "co_person_id": "person_id", "password_type": "type" - } + }, + "dependencies": ["people", "password_authenticators"] }, "person_roles": { "source": "cm_co_person_roles", @@ -486,7 +516,8 @@ "ou": "department", "source_org_identity_id": null, "affiliation": null - } + }, + "dependencies": ["people", "types"] }, "external_identities": { "source": "cm_org_identities", @@ -506,7 +537,8 @@ "valid_from": null, "valid_through": null, "co_id": null - } + }, + "dependencies": ["people"] }, "groups": { "source": "cm_co_groups", @@ -520,7 +552,8 @@ "introduction": null, "auto": null }, - "postTable": "createOwnersGroups" + "postTable": "createOwnersGroups", + "dependencies": ["cos", "cous"] }, "group_nestings": { "source": "cm_co_group_nestings", @@ -530,7 +563,8 @@ "co_group_id": "group_id", "target_co_group_id": "target_group_id", "co_group_nesting_id": "group_nesting_id" - } + }, + "dependencies": ["groups"] }, "group_members": { "source": "cm_co_group_members", @@ -545,7 +579,8 @@ "source_org_identity_id": null, "member": null, "owner": null - } + }, + "dependencies": ["groups", "people", "group_nestings"] }, "names": { "source": "cm_names", @@ -557,7 +592,8 @@ "co_person_id": "person_id", "org_identity_id": "external_identity_id", "type": null - } + }, + "dependencies": ["people", "external_identities", "types"] }, "email_addresses": { "source": "cm_email_addresses", @@ -571,7 +607,8 @@ "co_department_id": null, "organization_id": null, "type": null - } + }, + "dependencies": ["people", "external_identities", "types"] }, "identifiers": { "source": "cm_identifiers", @@ -589,7 +626,8 @@ "organization_id": null, "type": null, "language": null - } + }, + "dependencies": ["groups", "people", "external_identities", "types"] }, "urls": { "source": "cm_urls", @@ -603,7 +641,8 @@ "organization_id": null, "type": null, "language": null - } + }, + "dependencies": ["people", "external_identities", "types"] }, "addresses": { "source": "cm_addresses", @@ -616,7 +655,8 @@ "co_department_id": null, "organization_id": null, "type": null - } + }, + "dependencies": ["person_roles", "external_identities", "types"] }, "telephone_numbers": { "source": "cm_telephone_numbers", @@ -629,7 +669,8 @@ "co_department_id": null, "organization_id": null, "type": null - } + }, + "dependencies": ["person_roles", "external_identities", "types"] }, "ad_hoc_attributes": { "source": "cm_ad_hoc_attributes", @@ -641,7 +682,8 @@ "co_department_id": null, "organization_id": null }, - "postTable": "migrateExtendedAttributesToAdHocAttributes" + "postTable": "migrateExtendedAttributesToAdHocAttributes", + "dependencies": ["person_roles", "external_identities"] }, "notifications": { "source": "cm_co_notifications", @@ -663,7 +705,8 @@ "source_id": null, "source_arg0": null, "source_val0": null - } + }, + "dependencies": ["people", "groups"] }, "history_records": { "source": "cm_history_records", @@ -679,7 +722,8 @@ "action": "&mapHistoryAction", "co_email_list_id": null, "co_service_id": null - } + }, + "dependencies": ["people", "person_roles", "groups", "external_identities"] }, "jobs": { "source": "cm_co_jobs", @@ -695,7 +739,8 @@ "job_type_fk": null, "job_mode": null }, - "preRow": "validateJobIsTransmogrifiable" + "preRow": "validateJobIsTransmogrifiable", + "dependencies": ["cos"] }, "job_history_records": { "source": "cm_co_job_history_records", @@ -706,7 +751,8 @@ "co_job_id": "job_id", "co_person_id": "person_id", "org_identity_id": "external_identity_id" - } + }, + "dependencies": ["jobs", "people", "external_identities"] }, "enrollment_flows": { "source": "cm_co_enrollment_flows", @@ -762,7 +808,8 @@ "theme_stacking": null, "establish_authenticators": null, "establish_cluster_accounts": null - } + }, + "dependencies": ["cos", "cous", "groups", "message_templates"] }, "petitions": { "source": "cm_co_petitions", @@ -790,7 +837,8 @@ "enrollee_co_person_role_id": null, "token": null, "co_id": null - } + }, + "dependencies": ["enrollment_flows", "cous", "people"] }, "petition_meta_hist_recs": { "source": "cm_co_petitions", @@ -811,7 +859,8 @@ "reference_identifier": null, "petitioner_co_person_id": null, "enrollee_co_person_id": null - } + }, + "dependencies": ["enrollment_flows", "external_identities", "person_roles", "people", "petitions"] }, "petition_hist_attrs": { "source": "cm_co_petition_attributes", @@ -823,7 +872,8 @@ "attribute_foreign_key": null, "co_petition_attribute_id": null }, - "addChangelog": true + "addChangelog": true, + "dependencies": ["petitions"] }, "ext_identity_source_records": { "source": "cm_org_identity_source_records", @@ -838,7 +888,8 @@ "reference_identifier": "reference_identifier", "org_identity_source_record_id": "ext_identity_source_record_id", "co_petition_id": null - } + }, + "dependencies": ["external_identity_sources", "external_identities", "people"] }, "api_source_records": { "source": "cm_api_source_records", @@ -846,6 +897,7 @@ "plugin": "ApiConnector", "fieldMap": { "sorid": "source_key" - } + }, + "dependencies": ["api_sources"] } } diff --git a/app/plugins/Transmogrify/src/Command/TransmogrifyCommand.php b/app/plugins/Transmogrify/src/Command/TransmogrifyCommand.php index eab8bfacb..e20e33e0a 100644 --- a/app/plugins/Transmogrify/src/Command/TransmogrifyCommand.php +++ b/app/plugins/Transmogrify/src/Command/TransmogrifyCommand.php @@ -108,6 +108,7 @@ public function __construct( * @param array $argv * @param ConsoleIo $io * @return int + * @since COmanage Registry v5.2.0 */ public function run(array $argv, ConsoleIo $io): int { @@ -266,11 +267,27 @@ public function execute(Arguments $args, ConsoleIo $io): int return $code; } + // Determine the actual list of tables to process. + // If specific tables are selected, we recursively resolve dependencies and sort them + // according to the configuration order. + // Otherwise, we process all tables in configuration order. + $tablesToProcess = !empty($selected) + ? $this->resolveDependencies($selected) + : array_keys($this->tables); + + if (!empty($selected)) { + $this->cmdPrinter->info('Tables to process (including dependencies):'); + foreach ($tablesToProcess as $table) { + $this->cmdPrinter->info(' - ' . $table); + } + } + // Register the current version for future upgrade purposes $this->metaTable = TableRegistry::getTableLocator()->get('Meta'); $this->metaTable->setUpgradeVersion(); - // Track remaining selected tables (if any) so we can exit early when done + // Track remaining selected tables (if any) so we can exit early when done. + // Note: $selected contains only the explicitly requested tables, not dependencies. $pendingSelected = []; if (!empty($selected)) { $pendingSelected = array_fill_keys($selected, true); @@ -287,8 +304,7 @@ public function execute(Arguments $args, ConsoleIo $io): int } } - $allTables = array_keys($this->tables); - foreach ($allTables as $t) { + foreach ($tablesToProcess as $t) { // Check per-table skip configuration and optionally prompt user $canSkipCfg = $this->tables[$t]['canSkip'] ?? null; if (filter_var($canSkipCfg, FILTER_VALIDATE_BOOLEAN)) { @@ -315,7 +331,6 @@ public function execute(Arguments $args, ConsoleIo $io): int // Initializations per table migration $outboundTableEmpty = true; - $skipTableTransmogrification = false; $inboundQualifiedTableName = $this->inconn->qualifyTableName($this->tables[$t]['source']); $outboundQualifiedTableName = $this->outconn->qualifyTableName($t); $Model = TableRegistry::getTableLocator()->get($t); @@ -342,19 +357,8 @@ public function execute(Arguments $args, ConsoleIo $io): int $this->cmdPrinter->warning("Table (" . $t . ") is not empty. We will not overwrite existing data."); } - // Skip tables not in the selected subset if specified - // We will print a warning, and we will parse all the tables because we need to construct the cache. - // Nevertheless, we will not allow any database processing - if ( - !empty($selected) - && !in_array($t, $selected) - ) { - $skipTableTransmogrification = true; - $this->cmdPrinter->warning("Skipping Transmogrification. Table ($t) is not in the selected subset."); - } - - // Mark the table as skipped if it is not empty and not selected - $this->cache['skipInsert'][$outboundQualifiedTableName] = !$outboundTableEmpty || $skipTableTransmogrification; + // Mark the table as skipped if it is not empty since we have to process all the tables in the $tablesToProcess array + $this->cache['skipInsert'][$outboundQualifiedTableName] = !$outboundTableEmpty; $this->cache['current'] = $outboundQualifiedTableName; /* * End of checks @@ -442,7 +446,8 @@ public function execute(Arguments $args, ConsoleIo $io): int $this->runPostRowHook($t, $origRow, $row); } - // Store row data in cache for potential later use + // Store row data in cache for potential later use. + // This happens even if insert is skipped (which is the goal of handling dependencies). $this->cacheResults($t, $row, $origRow); } catch (ForeignKeyConstraintViolationException $e) { // A foreign key associated with this record did not load, so we can't @@ -496,7 +501,7 @@ public function execute(Arguments $args, ConsoleIo $io): int $this->runPostTableHook($t); } - // If user selected a subset, exit as soon as all selected tables are processed + // If user selected a subset, exit as soon as all explicitly selected tables are processed if (!empty($pendingSelected) && isset($pendingSelected[$t])) { unset($pendingSelected[$t]); if (empty($pendingSelected)) { @@ -506,10 +511,10 @@ public function execute(Arguments $args, ConsoleIo $io): int } // Prompt for confirmation before processing table - $tables = array_keys($this->tables); - $currentIndex = array_search($t, $tables); - if (isset($tables[$currentIndex + 1])) { - $this->cmdPrinter->info("Next table to process: " . $tables[$currentIndex + 1]); + // Note: we use $tablesToProcess for the index lookup to find the next table correctly + $currentIndex = array_search($t, $tablesToProcess); + if (isset($tablesToProcess[$currentIndex + 1])) { + $this->cmdPrinter->info("Next table to process: " . $tablesToProcess[$currentIndex + 1]); } else { $this->cmdPrinter->out(PHP_EOL . "Table import complete. Exiting."); } @@ -654,6 +659,48 @@ private function buildSelectedTables(Arguments $args): array return array_values(array_unique($selected)); } + /** + * Recursively resolve dependencies for the selected tables. + * + * @param array $selected Selected tables + * @return array Selected tables with dependencies included + */ + private function resolveDependencies(array $selected): array + { + $queue = $selected; + // Track visited tables to prevent infinite loops and re-processing. + // We initialize this with the selected tables so we don't add them as dependencies of themselves. + $visited = array_flip($selected); + $dependencies = []; + + while (!empty($queue)) { + $table = array_shift($queue); + + if (isset($this->tables[$table]['dependencies'])) { + foreach ($this->tables[$table]['dependencies'] as $dependency) { + if (!isset($visited[$dependency])) { + $visited[$dependency] = true; + // Track this as a discovered dependency + $dependencies[$dependency] = true; + $queue[] = $dependency; + $this->cmdPrinter?->verbose("Adding dependency table '$dependency' for '$table'"); + } + } + } + } + + $allTables = array_keys($this->tables); + + // 1. Sort the discovered dependencies according to the configuration file order + $orderedDependencies = array_values(array_intersect($allTables, array_keys($dependencies))); + + // 2. Sort the explicitly selected tables according to the configuration file order + $orderedSelected = array_values(array_intersect($allTables, $selected)); + + // 3. Merge: Run dependencies first, then the selected tables + return array_merge($orderedDependencies, $orderedSelected); + } + /** * Validate selected tables against config and warn about partial migration. * Returns exit code on error, or null if OK. @@ -689,6 +736,7 @@ private function maybeValidateSelectedTables(array $selected, ConsoleIo $io): ?i * the corresponding model will be "CoreServer.Servers". * * @return void + * @since COmanage Registry v5.2.0 */ protected function pluginBootstrap(): void { @@ -772,6 +820,7 @@ protected function pluginBootstrap(): void * @param string $tableName Name of table to check * @return bool True if table exists * @throws \Exception + * @since COmanage Registry v5.2.0 */ protected function tableExists(string $tableName): bool { @@ -794,6 +843,7 @@ protected function tableExists(string $tableName): bool * @param string $currentTable Logical target table name (eg, 'job_history_records') * @param array $row Row to insert * @return bool True if the row should be skipped, false otherwise + * @since COmanage Registry v5.2.0 */ private function skipIfRejectedParent(string $currentTable, array $row): bool { diff --git a/app/plugins/Transmogrify/src/Lib/Traits/CacheTrait.php b/app/plugins/Transmogrify/src/Lib/Traits/CacheTrait.php index 8148df656..198c51f85 100644 --- a/app/plugins/Transmogrify/src/Lib/Traits/CacheTrait.php +++ b/app/plugins/Transmogrify/src/Lib/Traits/CacheTrait.php @@ -120,19 +120,22 @@ protected function cacheFieldById(string $table, array $row, string $field): voi * Cache results as configured for the specified table. * * @since COmanage Registry v5.2.0 - * @param string $table Table to cache - * @param array $row Row of table data - * @param array $orinRow Original Row of table data + * @param string $table Table to cache + * @param array $row Row of table data + * @param array $orinRow Original Row of table data + * @param array $cacheConfig Optional cache configuration (overrides tables.json) */ - protected function cacheResults(string $table, array $row, array $orinRow): void + protected function cacheResults(string $table, array $row, array $orinRow, ?array $cacheConfig = []): void { - if (empty($this->tables[$table]['cache'])) { + $config = !empty($cacheConfig) ? $cacheConfig : ($this->tables[$table]['cache'] ?? []); + + if (empty($config)) { return; } // Cache the requested fields. For now, at least, we key on row ID only. - foreach ($this->tables[$table]['cache'] as $field) { + foreach ($config as $field) { if (is_array($field)) { $this->cacheCompositeKey($table, $row, $field); } else { diff --git a/app/plugins/Transmogrify/src/Lib/Traits/RowTransformationTrait.php b/app/plugins/Transmogrify/src/Lib/Traits/RowTransformationTrait.php index 2e46be62c..fb0736bb2 100644 --- a/app/plugins/Transmogrify/src/Lib/Traits/RowTransformationTrait.php +++ b/app/plugins/Transmogrify/src/Lib/Traits/RowTransformationTrait.php @@ -546,7 +546,24 @@ protected function createIdentifierAssignmentPluginRecord(array $originRow, arra try { $this->outconn->beginTransaction(); $this->outconn->insert($qualifiedTableName, $formatAssignerRow); + // Get the ID of the inserted record + if (!method_exists($this->outconn, 'lastInsertId')) { + throw new \RuntimeException('Could not retrieve Format Assigner ID'); + } + $formatAssignerId = (int)$this->outconn->lastInsertId(); + $this->outconn->commit(); + + // Cache the result so we can map it later (e.g. for format_assigner_sequences) + // We manually populate the cache since this insertion happens outside the main loop + + // Cache by ID + $this->cacheResults( + 'format_assigners', + array_merge(['id' => $formatAssignerId], $formatAssignerRow), + $formatAssignerRow, + ['identifier_assignment_id', 'id'], + ); } catch (\Throwable $e) { $this->outconn->rollBack(); throw new \RuntimeException("Failed to create Format Assigner record for IdentifierAssignment $identifierAssignmentId: " . $e->getMessage());