From 23e64e15d11a80cb4d6eefdb12dd7f9ea5843611 Mon Sep 17 00:00:00 2001 From: Ioannis Igoumenos Date: Fri, 27 Feb 2026 08:57:29 +0000 Subject: [PATCH] Drop non unique indexes on data loading --- .../src/Command/TransmogrifyCommand.php | 58 ++- .../src/Lib/Util/IndexManager.php | 329 ++++++++++++++++++ .../Transmogrify/src/TransmogrifyPlugin.php | 4 +- 3 files changed, 389 insertions(+), 2 deletions(-) create mode 100644 app/plugins/Transmogrify/src/Lib/Util/IndexManager.php diff --git a/app/plugins/Transmogrify/src/Command/TransmogrifyCommand.php b/app/plugins/Transmogrify/src/Command/TransmogrifyCommand.php index c1f98f485..dfa4f99fd 100644 --- a/app/plugins/Transmogrify/src/Command/TransmogrifyCommand.php +++ b/app/plugins/Transmogrify/src/Command/TransmogrifyCommand.php @@ -53,6 +53,7 @@ use Transmogrify\Lib\Util\CommandLinePrinter; use Transmogrify\Lib\Util\DbInfoPrinter; use Transmogrify\Lib\Util\GroupsHealth; +use Transmogrify\Lib\Util\IndexManager; use Transmogrify\Lib\Util\OrgIdentitiesHealth; use Transmogrify\Lib\Util\RawSqlQueries; use Transmogrify\Service\ConfigLoaderService; @@ -86,6 +87,9 @@ class TransmogrifyCommand extends BaseCommand { protected ?CommandLinePrinter $cmdPrinter = null; + /** @var float Start time of command execution */ + private float $startTime; + /** @var string Absolute path to plugin root directory */ private string $pluginRoot; @@ -98,6 +102,7 @@ class TransmogrifyCommand extends BaseCommand { public function __construct( private DbInfoService $dbInfoService, private ConfigLoaderService $configLoader, + private IndexManager $indexManager, ) { $this->pluginRoot = dirname(__DIR__, 2); parent::__construct(); @@ -219,9 +224,15 @@ public function execute(Arguments $args, ConsoleIo $io): int $this->args = $args; $this->io = $io; - // Now that BaseCommand set verbosity, construct the printer so it can detect it correctly + // Now that BaseCommand set verbosity, construct the printer so it can detect it correctly $this->cmdPrinter = new CommandLinePrinter($io, 'green', 50, true); + // Start tracking execution time + $this->startTime = microtime(true); + + // Initialize the index manager with the live target connection and printer + $this->indexManager->initialize($this->outconn, $this->cmdPrinter); + // Validate "info" option combinations and handle errors $code = $this->validateInfoOptions($io); if ($code !== null) { @@ -408,6 +419,21 @@ public function execute(Arguments $args, ConsoleIo $io): int $this->cache['error'] = 0; $this->cache['warns'] = 0; + // Drop non-PK indexes before the bulk load to avoid per-row index maintenance overhead. + // They will be recreated in a single pass after all rows are inserted. + $indexesDisabled = false; + if ($this->cache['skipInsert'][$outboundQualifiedTableName] === false) { + try { + $this->indexManager->disableIndexes($outboundQualifiedTableName); + $indexesDisabled = $this->indexManager->hasSavedIndexes($outboundQualifiedTableName); + } catch (\Throwable $e) { + $this->cmdPrinter->warning( + 'Could not drop indexes on ' . $outboundQualifiedTableName . ': ' . $e->getMessage() + ); + $this->cmdPrinter->warning('Proceeding with indexes in place (slower).'); + } + } + while ($row = $stmt->fetchAssociative()) { if (!empty($row[$this->tables[$t]['displayField']])) { $displayMessage = "$t " . $row[$this->tables[$t]['displayField']]; @@ -492,6 +518,20 @@ public function execute(Arguments $args, ConsoleIo $io): int * FINISH PROGRESS */ + // Recreate indexes that were dropped before the bulk load + if ($indexesDisabled) { + try { + $this->indexManager->enableIndexes($outboundQualifiedTableName); + } catch (\Throwable $e) { + $this->cmdPrinter->error( + 'Failed to recreate indexes on ' . $outboundQualifiedTableName . ': ' . $e->getMessage() + ); + $this->cmdPrinter->error( + 'You may need to manually recreate indexes. Run: bin/cake database' + ); + } + } + // Output final warning and error counts for the table $this->cmdPrinter->warning(sprintf('Warnings: %d', $this->cache['warns'])); $this->cmdPrinter->error(sprintf('Errors: %d', $this->cache['error'])); @@ -527,6 +567,22 @@ public function execute(Arguments $args, ConsoleIo $io): int $this->cmdPrinter->out('Running assignUuids task via UpgradeCommand...'); $this->executeCommand(UpgradeCommand::class, ['-D', '-X', '-t', 'assignUuids'], $this->io); + // Display total execution time + $executionTime = microtime(true) - $this->startTime; + + $hours = floor($executionTime / 3600); + $minutes = floor(($executionTime % 3600) / 60); + $seconds = $executionTime % 60; + + $formatted = sprintf( + '%02d:%02d:%02d', + (int)$hours, + (int)$minutes, + (int)$seconds + ); + + $this->cmdPrinter->out(sprintf('Total execution time: %s (HH:MM:SS)', $formatted)); + return BaseCommand::CODE_SUCCESS; } diff --git a/app/plugins/Transmogrify/src/Lib/Util/IndexManager.php b/app/plugins/Transmogrify/src/Lib/Util/IndexManager.php new file mode 100644 index 000000000..3f56e6df0 --- /dev/null +++ b/app/plugins/Transmogrify/src/Lib/Util/IndexManager.php @@ -0,0 +1,329 @@ + + */ + protected array $savedIndexes = []; + + /** @var DBALConnection|null */ + protected ?DBALConnection $conn = null; + + /** @var CommandLinePrinter|null */ + protected ?CommandLinePrinter $printer = null; + + /** + * Initialize the manager with a live connection and optional printer. + * Must be called before disableIndexes()/enableIndexes(). + * + * @param DBALConnection $conn DBAL connection to the target database + * @param CommandLinePrinter|null $printer Optional printer for log output + * @return self Fluent interface + * @since COmanage Registry v5.2.0 + */ + public function initialize(DBALConnection $conn, ?CommandLinePrinter $printer = null): self + { + $this->conn = $conn; + $this->printer = $printer; + return $this; + } + + /** + * Drop non-primary, non-constraint indexes on a table, saving their definitions + * so they can be recreated later with enableIndexes(). + * + * Primary keys and indexes that back unique/foreign-key constraints are preserved + * to maintain referential integrity during the bulk load. + * + * @param string $qualifiedTableName Schema-qualified table name (e.g. "public.cos") + * @throws \RuntimeException If initialize() has not been called + * @since COmanage Registry v5.2.0 + */ + public function disableIndexes(string $qualifiedTableName): void + { + $this->assertInitialized(); + + if ($this->conn->isPostgreSQL()) { + $this->disablePostgresIndexes($qualifiedTableName); + } else { + $this->disableMysqlIndexes($qualifiedTableName); + } + } + + /** + * Recreate previously disabled indexes for a table. + * + * @param string $qualifiedTableName Schema-qualified table name (e.g. "public.cos") + * @throws \RuntimeException If initialize() has not been called + * @since COmanage Registry v5.2.0 + */ + public function enableIndexes(string $qualifiedTableName): void + { + $this->assertInitialized(); + + if ($this->conn->isPostgreSQL()) { + $this->enablePostgresIndexes($qualifiedTableName); + } else { + $this->enableMysqlIndexes($qualifiedTableName); + } + + unset($this->savedIndexes[$qualifiedTableName]); + } + + /** + * Check whether any saved index definitions exist for the given table. + * + * @param string $qualifiedTableName Schema-qualified table name + * @return bool + * @since COmanage Registry v5.2.0 + */ + public function hasSavedIndexes(string $qualifiedTableName): bool + { + return !empty($this->savedIndexes[$qualifiedTableName]); + } + + // ---------- PostgreSQL ---------- + + /** + * Drop non-PK, non-constraint indexes on a PostgreSQL table. + * + * We query pg_indexes and exclude any index name that appears in pg_constraint + * (which covers primary keys, unique constraints, and foreign keys). + * + * @param string $qualifiedTableName Schema-qualified table name + */ + protected function disablePostgresIndexes(string $qualifiedTableName): void + { + [$schema, $table] = $this->parseQualifiedName($qualifiedTableName, 'public'); + + $sql = <<conn->executeQuery($sql, ['schema' => $schema, 'table' => $table]); + $indexes = $stmt->fetchAllAssociative(); + + if (empty($indexes)) { + $this->printer?->verbose("No droppable indexes found on $qualifiedTableName"); + return; + } + + $this->savedIndexes[$qualifiedTableName] = $indexes; + $this->printer?->info(sprintf( + 'Dropping %d index(es) on %s for bulk load performance', + count($indexes), + $qualifiedTableName + )); + + foreach ($indexes as $idx) { + $dropSql = 'DROP INDEX IF EXISTS ' . $schema . '."' . $idx['indexname'] . '"'; + $this->printer?->verbose(' ' . $dropSql); + $this->conn->executeStatement($dropSql); + } + } + + /** + * Recreate previously saved PostgreSQL indexes. + * + * @param string $qualifiedTableName Schema-qualified table name + */ + protected function enablePostgresIndexes(string $qualifiedTableName): void + { + if (empty($this->savedIndexes[$qualifiedTableName])) { + return; + } + + $count = count($this->savedIndexes[$qualifiedTableName]); + $this->printer?->info(sprintf( + 'Recreating %d index(es) on %s', + $count, + $qualifiedTableName + )); + + foreach ($this->savedIndexes[$qualifiedTableName] as $idx) { + $this->printer?->verbose(' ' . $idx['indexdef']); + $this->conn->executeStatement($idx['indexdef']); + } + } + + // ---------- MySQL / MariaDB ---------- + + /** + * Drop non-PK, non-UNIQUE indexes on a MySQL/MariaDB table. + * + * UNIQUE indexes are preserved to maintain data integrity during the bulk load. + * + * @param string $qualifiedTableName Qualified table name (db.table) + */ + protected function disableMysqlIndexes(string $qualifiedTableName): void + { + [$database, $table] = $this->parseQualifiedName($qualifiedTableName); + + $whereDb = $database !== null + ? 'TABLE_SCHEMA = :database' + : 'TABLE_SCHEMA = DATABASE()'; + + $sql = << $table]; + if ($database !== null) { + $params['database'] = $database; + } + + $stmt = $this->conn->executeQuery($sql, $params); + $indexes = $stmt->fetchAllAssociative(); + + if (empty($indexes)) { + $this->printer?->verbose("No droppable indexes found on $qualifiedTableName"); + return; + } + + $this->savedIndexes[$qualifiedTableName] = $indexes; + $this->printer?->info(sprintf( + 'Dropping %d index(es) on %s for bulk load performance', + count($indexes), + $qualifiedTableName + )); + + $qualifiedTarget = $database !== null + ? '`' . $database . '`.`' . $table . '`' + : '`' . $table . '`'; + + foreach ($indexes as $idx) { + $dropSql = 'DROP INDEX `' . $idx['INDEX_NAME'] . '` ON ' . $qualifiedTarget; + $this->printer?->verbose(' ' . $dropSql); + $this->conn->executeStatement($dropSql); + } + } + + /** + * Recreate previously saved MySQL indexes. + * + * @param string $qualifiedTableName Qualified table name (db.table) + */ + protected function enableMysqlIndexes(string $qualifiedTableName): void + { + if (empty($this->savedIndexes[$qualifiedTableName])) { + return; + } + + [$database, $table] = $this->parseQualifiedName($qualifiedTableName); + + $qualifiedTarget = $database !== null + ? '`' . $database . '`.`' . $table . '`' + : '`' . $table . '`'; + + $count = count($this->savedIndexes[$qualifiedTableName]); + $this->printer?->info(sprintf( + 'Recreating %d index(es) on %s', + $count, + $qualifiedTableName + )); + + foreach ($this->savedIndexes[$qualifiedTableName] as $idx) { + $cols = '`' . implode('`, `', explode(',', $idx['idx_columns'])) . '`'; + + $createSql = 'CREATE INDEX `' . $idx['INDEX_NAME'] . '` ON ' . $qualifiedTarget . ' (' . $cols . ')'; + $this->printer?->verbose(' ' . $createSql); + $this->conn->executeStatement($createSql); + } + } + + // ---------- Helpers ---------- + + /** + * Split a possibly-qualified table name into [prefix, table]. + * + * @param string $qualifiedTableName + * @param string|null $default Default prefix when not qualified + * @return array{0: string|null, 1: string} + */ + protected function parseQualifiedName(string $qualifiedTableName, ?string $default = null): array + { + $parts = explode('.', $qualifiedTableName, 2); + if (count($parts) === 2) { + return [$parts[0], $parts[1]]; + } + return [$default, $parts[0]]; + } + + /** + * Guard that initialize() has been called. + * + * @throws \RuntimeException + */ + protected function assertInitialized(): void + { + if ($this->conn === null) { + throw new \RuntimeException('IndexManager::initialize() must be called before use.'); + } + } +} diff --git a/app/plugins/Transmogrify/src/TransmogrifyPlugin.php b/app/plugins/Transmogrify/src/TransmogrifyPlugin.php index 8a41b336a..79d706339 100644 --- a/app/plugins/Transmogrify/src/TransmogrifyPlugin.php +++ b/app/plugins/Transmogrify/src/TransmogrifyPlugin.php @@ -11,6 +11,7 @@ use Cake\Routing\RouteBuilder; use Transmogrify\Command\TransmogrifyCommand; use Transmogrify\Command\TransmogrifySourceToTargetCommand; +use Transmogrify\Lib\Util\IndexManager; use Transmogrify\Service\ConfigLoaderService; use Transmogrify\Service\DbInfoService; @@ -97,9 +98,10 @@ public function services(ContainerInterface $container): void // Register services so the container can resolve them (constructor autowiring) $container->add(DbInfoService::class); $container->add(ConfigLoaderService::class); + $container->add(IndexManager::class); $container->add(TransmogrifyCommand::class) - ->addArguments([DbInfoService::class, ConfigLoaderService::class]); + ->addArguments([DbInfoService::class, ConfigLoaderService::class, IndexManager::class]); $container->add(TransmogrifySourceToTargetCommand::class) ->addArgument(ConfigLoaderService::class); }