Skip to content

Commit

Permalink
Identifier Assignment Transliteration (CFM-163)
Browse files Browse the repository at this point in the history
  • Loading branch information
Benn Oshrin committed Apr 16, 2025
1 parent 42b7dd2 commit 1b04a06
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ msgstr "Collision Mode"
msgid "field.FormatAssigners.collision_mode.desc"
msgstr "How to assign collision numbers when required"

msgid "field.FormatAssigners.enable_transliteration"
msgstr "Enable Transliteration"

msgid "field.FormatAssigners.format"
msgstr "Format"

Expand Down
26 changes: 20 additions & 6 deletions app/plugins/CoreAssigner/src/Model/Table/FormatAssignersTable.php
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ public function assign($ia, $entity): string {
$entity,
// If no format is specified, default to "(#)".
$ia->format_assigner->format ?? "(#)",
$ia->format_assigner->permitted_characters
$ia->format_assigner->permitted_characters,
(bool)$ia->format_assigner->enable_transliteration
);

// Now that we've got our base, loop until we get a unique identifier.
Expand Down Expand Up @@ -345,19 +346,27 @@ protected function selectSequences(
* string used in identifier assignment.
*
* @since COmanage Registry v5.0.0
* @param EntityInterface $entity Entity to assign Identifier for
* @param string $format Identifier assignment format
* @param PermittedCharactersEnum $permitted Acceptable characters for substituted parameters
* @return string Identifier with paramaters substituted
* @param EntityInterface $entity Entity to assign Identifier for
* @param string $format Identifier assignment format
* @param PermittedCharactersEnum $permitted Acceptable characters for substituted parameters
* @param boolean $transliterate Whether to apply transliteration in constructing the identifier base
* @return string Identifier with paramaters substituted
* @throws RuntimeException
*/

protected function substituteParameters(
$entity,
string $format,
string $permitted
string $permitted,
bool $transliterate=false
): string {
$base = "";

if($transliterate) {
// Transliteration is performed by the entity when enabled

$entity->primary_name->enableTransliteration(true);
}

// For random letter generation ('h', 'r', 'R')
$randomCharSet = array(
Expand Down Expand Up @@ -561,6 +570,11 @@ public function validationDefault(Validator $validator): Validator {
]);
$validator->notEmptyString('permitted_characters');

$validator->add('enable_transliteration', [
'content' => ['rule' => ['boolean']]
]);
$validator->allowEmptyString('enable_transliteration');

return $validator;
}
}
3 changes: 2 additions & 1 deletion app/plugins/CoreAssigner/src/config/plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
"minimum": { "type": "integer" },
"maximum": { "type": "integer" },
"collision_mode": { "type": "string", "size": 2 },
"permitted_characters": { "type": "string", "size": 2 }
"permitted_characters": { "type": "string", "size": 2 },
"enable_transliteration": { "type": "boolean" }
},
"indexes": {
"format_assigners_i1": { "columns": [ "identifier_assignment_id" ]}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ if($vv_action == 'edit') {
'minimum_length',
'minimum',
'maximum',
'enable_transliteration'
] as $field) {
print $this->element('form/listItem', [
'arguments' => [
Expand Down
9 changes: 9 additions & 0 deletions app/resources/locales/en_US/enumeration.po
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,15 @@ msgstr "Street, City, State, Postal Code"
msgid "RequiredAddressFieldsEnum.street,locality,state,postal_code,country"
msgstr "Street, City, State, Postal Code, Country"

msgid "RequiredEnum.NP"
msgstr "Not Permitted"

msgid "RequiredEnum.O"
msgstr "Optional"

msgid "RequiredEnum.R"
msgstr "Required"

msgid "RequiredNameFieldsEnum.given"
msgstr "Given"

Expand Down
96 changes: 96 additions & 0 deletions app/src/Model/Entity/Name.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ class Name extends Entity {
'full_name'
];

// Enable Transliteration? This could be refactored into a trait if other entities support it
protected $transliterate = false;

/**
* Generate a full (common) name.
*
Expand Down Expand Up @@ -104,7 +107,40 @@ protected function _getFullName($showHonorific = false) {

return $cn;
}

/**
* Accessor method to obtain possibly transliterated family name.
*
* @since COmanage Registry v5.1.0
* @param string $given Family name
*/

protected function _getFamily($family) {
return $this->maybeTransliterate($family);
}

/**
* Accessor method to obtain possibly transliterated given name.
*
* @since COmanage Registry v5.1.0
* @param string $given Given name
*/

protected function _getGiven($given) {
return $this->maybeTransliterate($given);
}

/**
* Accessor method to obtain possibly transliterated middle name.
*
* @since COmanage Registry v5.1.0
* @param string $given Middle name
*/

protected function _getMiddle($middle) {
return $this->maybeTransliterate($middle);
}

/**
* Determine if this entity record can be deleted.
*
Expand All @@ -115,6 +151,66 @@ protected function _getFullName($showHonorific = false) {
public function canDelete(): bool {
return $this->notPrimary();
}

/**
* Set (or disable) transliteration when returning fields from this Entity.
*
* @since COmanage Registry v5.1.0
* @param bool $enable If true, enable transliteration (default is false)
*/

public function enableTransliteration(bool $enable) {
$this->transliterate = $enable;
}

/**
* Maybe transliterate the requested string (if enabled).
*
* @since COmanage Registry v5.1.0
* @param string $s String to transliterate
* @return string Possibly transliterated string
*/

protected function maybeTransliterate(?string $s): ?string {
if(!$s) {
return null;
}

if($this->transliterate) {
// The PHP transliteration library is basically a wrapper around unicode libraries.
// The documentation is extremely technical and has a fairly steep learning curve.
// A background in linguistics helps, but only somewhat.
//
// https://unicode-org.github.io/icu/userguide/transforms/general/
// http://www.unicode.org/reports/tr15/#Norm_Forms
//
// Any-Latin will convert any script to a Latin representation, which might still
// have composed characters, such as é. We shouldn't actually use "Any", though, since
// by default Japanese Kanji (which are Chinese derived characters) will be
// transliterated using Chinese guidance. Unfortunately there isn't a better
// option available, and the transliterator library basically gives up and doesn't
// try to address Japanese.
//
// NFKD will decompose and separate, so (eg) the "fi" ligature becomes "f" and "i",
// and å becomes just a. For identifier assignment, this is preferable... in the
// unlikely event someone pastes in "fi" we really want "fi".
//
// We could perform other transformations here, such as converting to lowercase,
// but for the sake of functional compartmentalization we don't.
//
// Note this approach isn't without problems. For exmaple, Kanji in Japanese can
// translate to multiple words each with different pronunciations, and therefore
// different transliterations. Or, different European speakers might prefer
// different transliterations, eg å to a or aa. As such, this feature is
// experimental pending real world feedback.

$txid = "Any-Latin; NFKD; [:Nonspacing Mark:] Remove; NFKC";

return \Transliterator::create($txid)->transliterate($s);
} else {
return $s;
}
}

/**
* Determine if this is not a Primary Name.
Expand Down

0 comments on commit 1b04a06

Please sign in to comment.