diff --git a/app/plugins/CoreAssigner/resources/locales/en_US/core_assigner.po b/app/plugins/CoreAssigner/resources/locales/en_US/core_assigner.po index 7a3608bb4..cb54c17b3 100644 --- a/app/plugins/CoreAssigner/resources/locales/en_US/core_assigner.po +++ b/app/plugins/CoreAssigner/resources/locales/en_US/core_assigner.po @@ -58,6 +58,9 @@ msgstr "Collision Mode" msgid "field.FormatAssigners.collision_mode.desc" msgstr "How to assign collision numbers when required" +msgid "field.FormatAssigners.enable_transliteration" +msgstr "Enable Transliteration" + msgid "field.FormatAssigners.format" msgstr "Format" diff --git a/app/plugins/CoreAssigner/src/Model/Table/FormatAssignersTable.php b/app/plugins/CoreAssigner/src/Model/Table/FormatAssignersTable.php index 08b2f7dbc..a16a26c94 100644 --- a/app/plugins/CoreAssigner/src/Model/Table/FormatAssignersTable.php +++ b/app/plugins/CoreAssigner/src/Model/Table/FormatAssignersTable.php @@ -122,7 +122,8 @@ public function assign($ia, $entity): string { $entity, // If no format is specified, default to "(#)". $ia->format_assigner->format ?? "(#)", - $ia->format_assigner->permitted_characters + $ia->format_assigner->permitted_characters, + (bool)$ia->format_assigner->enable_transliteration ); // Now that we've got our base, loop until we get a unique identifier. @@ -345,19 +346,27 @@ protected function selectSequences( * string used in identifier assignment. * * @since COmanage Registry v5.0.0 - * @param EntityInterface $entity Entity to assign Identifier for - * @param string $format Identifier assignment format - * @param PermittedCharactersEnum $permitted Acceptable characters for substituted parameters - * @return string Identifier with paramaters substituted + * @param EntityInterface $entity Entity to assign Identifier for + * @param string $format Identifier assignment format + * @param PermittedCharactersEnum $permitted Acceptable characters for substituted parameters + * @param boolean $transliterate Whether to apply transliteration in constructing the identifier base + * @return string Identifier with paramaters substituted * @throws RuntimeException */ protected function substituteParameters( $entity, string $format, - string $permitted + string $permitted, + bool $transliterate=false ): string { $base = ""; + + if($transliterate) { + // Transliteration is performed by the entity when enabled + + $entity->primary_name->enableTransliteration(true); + } // For random letter generation ('h', 'r', 'R') $randomCharSet = array( @@ -561,6 +570,11 @@ public function validationDefault(Validator $validator): Validator { ]); $validator->notEmptyString('permitted_characters'); + $validator->add('enable_transliteration', [ + 'content' => ['rule' => ['boolean']] + ]); + $validator->allowEmptyString('enable_transliteration'); + return $validator; } } diff --git a/app/plugins/CoreAssigner/src/config/plugin.json b/app/plugins/CoreAssigner/src/config/plugin.json index 882dbad71..b94193d5c 100644 --- a/app/plugins/CoreAssigner/src/config/plugin.json +++ b/app/plugins/CoreAssigner/src/config/plugin.json @@ -16,7 +16,8 @@ "minimum": { "type": "integer" }, "maximum": { "type": "integer" }, "collision_mode": { "type": "string", "size": 2 }, - "permitted_characters": { "type": "string", "size": 2 } + "permitted_characters": { "type": "string", "size": 2 }, + "enable_transliteration": { "type": "boolean" } }, "indexes": { "format_assigners_i1": { "columns": [ "identifier_assignment_id" ]} diff --git a/app/plugins/CoreAssigner/templates/FormatAssigners/fields.inc b/app/plugins/CoreAssigner/templates/FormatAssigners/fields.inc index 0e824547c..681a95a7b 100644 --- a/app/plugins/CoreAssigner/templates/FormatAssigners/fields.inc +++ b/app/plugins/CoreAssigner/templates/FormatAssigners/fields.inc @@ -67,6 +67,7 @@ if($vv_action == 'edit') { 'minimum_length', 'minimum', 'maximum', + 'enable_transliteration' ] as $field) { print $this->element('form/listItem', [ 'arguments' => [ diff --git a/app/resources/locales/en_US/enumeration.po b/app/resources/locales/en_US/enumeration.po index 6456faffb..6dc5f5ea0 100644 --- a/app/resources/locales/en_US/enumeration.po +++ b/app/resources/locales/en_US/enumeration.po @@ -568,6 +568,15 @@ msgstr "Street, City, State, Postal Code" msgid "RequiredAddressFieldsEnum.street,locality,state,postal_code,country" msgstr "Street, City, State, Postal Code, Country" +msgid "RequiredEnum.NP" +msgstr "Not Permitted" + +msgid "RequiredEnum.O" +msgstr "Optional" + +msgid "RequiredEnum.R" +msgstr "Required" + msgid "RequiredNameFieldsEnum.given" msgstr "Given" diff --git a/app/src/Model/Entity/Name.php b/app/src/Model/Entity/Name.php index c08e31093..97b51a67f 100644 --- a/app/src/Model/Entity/Name.php +++ b/app/src/Model/Entity/Name.php @@ -47,6 +47,9 @@ class Name extends Entity { 'full_name' ]; + // Enable Transliteration? This could be refactored into a trait if other entities support it + protected $transliterate = false; + /** * Generate a full (common) name. * @@ -104,7 +107,40 @@ protected function _getFullName($showHonorific = false) { return $cn; } + + /** + * Accessor method to obtain possibly transliterated family name. + * + * @since COmanage Registry v5.1.0 + * @param string $given Family name + */ + + protected function _getFamily($family) { + return $this->maybeTransliterate($family); + } + /** + * Accessor method to obtain possibly transliterated given name. + * + * @since COmanage Registry v5.1.0 + * @param string $given Given name + */ + + protected function _getGiven($given) { + return $this->maybeTransliterate($given); + } + + /** + * Accessor method to obtain possibly transliterated middle name. + * + * @since COmanage Registry v5.1.0 + * @param string $given Middle name + */ + + protected function _getMiddle($middle) { + return $this->maybeTransliterate($middle); + } + /** * Determine if this entity record can be deleted. * @@ -115,6 +151,66 @@ protected function _getFullName($showHonorific = false) { public function canDelete(): bool { return $this->notPrimary(); } + + /** + * Set (or disable) transliteration when returning fields from this Entity. + * + * @since COmanage Registry v5.1.0 + * @param bool $enable If true, enable transliteration (default is false) + */ + + public function enableTransliteration(bool $enable) { + $this->transliterate = $enable; + } + + /** + * Maybe transliterate the requested string (if enabled). + * + * @since COmanage Registry v5.1.0 + * @param string $s String to transliterate + * @return string Possibly transliterated string + */ + + protected function maybeTransliterate(?string $s): ?string { + if(!$s) { + return null; + } + + if($this->transliterate) { + // The PHP transliteration library is basically a wrapper around unicode libraries. + // The documentation is extremely technical and has a fairly steep learning curve. + // A background in linguistics helps, but only somewhat. + // + // https://unicode-org.github.io/icu/userguide/transforms/general/ + // http://www.unicode.org/reports/tr15/#Norm_Forms + // + // Any-Latin will convert any script to a Latin representation, which might still + // have composed characters, such as é. We shouldn't actually use "Any", though, since + // by default Japanese Kanji (which are Chinese derived characters) will be + // transliterated using Chinese guidance. Unfortunately there isn't a better + // option available, and the transliterator library basically gives up and doesn't + // try to address Japanese. + // + // NFKD will decompose and separate, so (eg) the "fi" ligature becomes "f" and "i", + // and å becomes just a. For identifier assignment, this is preferable... in the + // unlikely event someone pastes in "fi" we really want "fi". + // + // We could perform other transformations here, such as converting to lowercase, + // but for the sake of functional compartmentalization we don't. + // + // Note this approach isn't without problems. For exmaple, Kanji in Japanese can + // translate to multiple words each with different pronunciations, and therefore + // different transliterations. Or, different European speakers might prefer + // different transliterations, eg å to a or aa. As such, this feature is + // experimental pending real world feedback. + + $txid = "Any-Latin; NFKD; [:Nonspacing Mark:] Remove; NFKC"; + + return \Transliterator::create($txid)->transliterate($s); + } else { + return $s; + } + } /** * Determine if this is not a Primary Name.