Skip to content

Commit

Permalink
[Emoji] Add the "text" locale
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolas-grekas committed Apr 4, 2024
1 parent d86cece commit 93c2e4b
Show file tree
Hide file tree
Showing 134 changed files with 12,306 additions and 4,073 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ CHANGELOG
7.1
---

* Add the component
* Extract the component from symfony/intl
* Add the `gitlab` locale to `EmojiTransliterator`
* Add the `text` locale to `EmojiTransliterator`
4 changes: 3 additions & 1 deletion EmojiTransliterator.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ final class EmojiTransliterator extends \Transliterator
'emoji-github' => 'github-emoji',
'emoji-gitlab' => 'gitlab-emoji',
'emoji-slack' => 'slack-emoji',
'emoji-text' => 'text-emoji',
'github-emoji' => 'emoji-github',
'gitlab-emoji' => 'emoji-gitlab',
'slack-emoji' => 'emoji-slack',
'text-emoji' => 'emoji-text',
];

public readonly string $id;
Expand Down Expand Up @@ -119,7 +121,7 @@ public function transliterate(string $string, int $start = 0, int $end = -1): st
}

// Here we rely on intl to validate the $string, $start and $end arguments
// and to slice the string. Slicing is done by replacing the part if $string
// and to slice the string. Slicing is done by replacing the part of $string
// between $start and $end by a unique cookie that can be reliably used to
// identify which part of $string should be transliterated.

Expand Down
160 changes: 67 additions & 93 deletions Resources/bin/build.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,15 @@
$emojisCodePoints = Builder::getEmojisCodePoints();
Builder::saveRules(Builder::buildRules($emojisCodePoints));
Builder::saveRules(Builder::buildStripRules($emojisCodePoints));
Builder::saveRules(Builder::buildGitHubRules($emojisCodePoints));
Builder::saveRules(Builder::buildGitlabRules($emojisCodePoints));
Builder::saveRules(Builder::buildSlackRules($emojisCodePoints));

$emojiMaps = ['slack', 'github', 'gitlab'];

foreach ($emojiMaps as $map) {
$maps = Builder::{"build{$map}Maps"}($emojisCodePoints);
Builder::saveRules(array_combine(["emoji-$map", "$map-emoji"], Builder::createRules($maps, true)));
}

Builder::saveRules(Builder::buildTextRules($emojisCodePoints, $emojiMaps));

final class Builder
{
Expand All @@ -45,10 +51,10 @@ public static function getEmojisCodePoints(): array
throw new \DomainException("Could not parse line: \"$line\".");
}

$codePoints = strtolower(trim($matches['codePoints']));
$codePoints = str_replace(' ', '-', trim($matches['codePoints']));
$emojisCodePoints[$codePoints] = $matches['emoji'];
// We also add a version without the "Zero Width Joiner"
$codePoints = str_replace('200d ', '', $codePoints);
$codePoints = str_replace('-200D-', '-', $codePoints);
$emojisCodePoints[$codePoints] = $matches['emoji'];
}

Expand All @@ -67,7 +73,6 @@ public static function buildRules(array $emojisCodePoints): Generator
->name('*.xml')
;

$ignored = [];
$mapsByLocale = [];

foreach ($files as $file) {
Expand All @@ -89,16 +94,8 @@ public static function buildRules(array $emojisCodePoints): Generator
continue;
}
$parts = preg_split('//u', $emoji, -1, \PREG_SPLIT_NO_EMPTY);
$emojiCodePoints = implode(' ', array_map('dechex', array_map('mb_ord', $parts)));
$emojiCodePoints = strtoupper(implode('-', array_map('dechex', array_map('mb_ord', $parts))));
if (!array_key_exists($emojiCodePoints, $emojisCodePoints)) {
$ignored[] = [
'locale' => $locale,
'emoji' => $emoji,
'name' => $name,
];
continue;
}
if (!self::testEmoji($emoji, $locale, $emojiCodePoints)) {
continue;
}
$codePointsCount = mb_strlen($emoji);
Expand Down Expand Up @@ -128,108 +125,83 @@ public static function buildRules(array $emojisCodePoints): Generator
}
}

public static function buildGitHubRules(array $emojisCodePoints): iterable
public static function buildGitHubMaps(array $emojisCodePoints): array
{
$emojis = json_decode((new Filesystem())->readFile(__DIR__.'/vendor/github-emojis.json'), true, flags: JSON_THROW_ON_ERROR);

$ignored = [];
$maps = [];

foreach ($emojis as $shortCode => $url) {
$emojiCodePoints = str_replace('-', ' ', strtolower(basename(parse_url($url, \PHP_URL_PATH), '.png')));
$emojiCodePoints = strtoupper(basename(parse_url($url, \PHP_URL_PATH), '.png'));

if (!array_key_exists($emojiCodePoints, $emojisCodePoints)) {
$ignored[] = [
'emojiCodePoints' => $emojiCodePoints,
'shortCode' => $shortCode,
];
continue;
}
$emoji = $emojisCodePoints[$emojiCodePoints];
if (!self::testEmoji($emoji, 'github', $emojiCodePoints)) {
continue;
}
$codePointsCount = mb_strlen($emoji);
$maps[$codePointsCount][$emoji] = ":$shortCode:";
$emojiPriority = mb_strlen($emoji) << 1;
$maps[$emojiPriority + 1][":$shortCode:"] = $emoji;
}

$maps = self::createRules($maps);

return ['emoji-github' => $maps, 'github-emoji' => array_flip($maps)];
return $maps;
}

public static function buildGitlabRules(array $emojisCodePoints): iterable
public static function buildGitlabMaps(array $emojisCodePoints): array
{
$emojis = json_decode((new Filesystem())->readFile(__DIR__.'/vendor/gitlab-emojis.json'), true, flags: JSON_THROW_ON_ERROR);

$ignored = [];
$maps = [];

foreach ($emojis as $emojiItem) {
$emojiCodePoints = strtolower($emojiItem['unicode']);
if (!array_key_exists($emojiCodePoints, $emojisCodePoints)) {
$ignored[] = [
'emojiCodePoints' => $emojiCodePoints,
'name' => $emojiItem['name'],
];
continue;
}
$emoji = $emojisCodePoints[$emojiCodePoints];
if (!self::testEmoji($emoji, 'gitlab', $emojiCodePoints)) {
continue;
$emoji = $emojiItem['moji'];
$emojiPriority = mb_strlen($emoji) << 1;
$maps[$emojiPriority + 1][$emojiItem['shortname']] = $emoji;

foreach ($emojiItem['aliases'] as $alias) {
$maps[$emojiPriority][$alias] = $emoji;
}
$codePointsCount = mb_strlen($emoji);
$maps[$codePointsCount][$emoji] = $emojiItem['shortname'];
}

$maps = self::createRules($maps);

return ['emoji-gitlab' => $maps, 'gitlab-emoji' => array_flip($maps)];
return $maps;
}

public static function buildSlackRules(array $emojisCodePoints): iterable
public static function buildSlackMaps(array $emojisCodePoints): array
{
$emojis = json_decode((new Filesystem())->readFile(__DIR__.'/vendor/slack-emojis.json'), true, flags: JSON_THROW_ON_ERROR);

$ignored = [];
$emojiSlackMaps = [];
$slackEmojiMaps = [];
$maps = [];

foreach ($emojis as $data) {
$emojiCodePoints = str_replace('-', ' ', strtolower($data['unified']));
$shortCode = $data['short_name'];
$shortCodes = $data['short_names'];
$shortCodes = array_map(fn ($v) => ":$v:", $shortCodes);
$emoji = $emojisCodePoints[$data['unified']];
$emojiPriority = mb_strlen($emoji) << 1;
$maps[$emojiPriority + 1][":{$data['short_name']}:"] = $emoji;

if (!array_key_exists($emojiCodePoints, $emojisCodePoints)) {
$ignored[] = [
'emojiCodePoints' => $emojiCodePoints,
'shortCode' => $shortCode,
];
continue;
foreach ($data['short_names'] as $shortName) {
$maps[$emojiPriority][":$shortName:"] = $emoji;
}
$emoji = $emojisCodePoints[$emojiCodePoints];
if (!self::testEmoji($emoji, 'slack', $emojiCodePoints)) {
continue;
}
$codePointsCount = mb_strlen($emoji);
$emojiSlackMaps[$codePointsCount][$emoji] = ":$shortCode:";
foreach ($shortCodes as $short_name) {
$slackEmojiMaps[$codePointsCount][$short_name] = $emoji;
}

return $maps;
}

public static function buildTextRules(array $emojiCodePoints, array $locales): iterable
{
$maps = [];

foreach ($locales as $locale) {
foreach (self::{"build{$locale}Maps"}($emojiCodePoints) as $emojiPriority => $map) {
foreach ($map as $text => $emoji) {
$maps[$emojiPriority][str_replace('_', '-', $text)] ??= $emoji;
}
}
}

return ['emoji-slack' => self::createRules($emojiSlackMaps), 'slack-emoji' => self::createRules($slackEmojiMaps)];
[$map, $reverse] = self::createRules($maps, true);

return ['emoji-text' => $map, 'text-emoji' => $reverse];
}

public static function buildStripRules(array $emojisCodePoints): iterable
{
$maps = [];
foreach ($emojisCodePoints as $codePoints => $emoji) {
if (!self::testEmoji($emoji, 'strip', $codePoints)) {
continue;
}
$codePointsCount = mb_strlen($emoji);
$maps[$codePointsCount][$emoji] = '';
foreach ($emojisCodePoints as $emoji) {
$maps[mb_strlen($emoji)][$emoji] = '';
}

return ['emoji-strip' => self::createRules($maps)];
Expand Down Expand Up @@ -269,24 +241,26 @@ public static function saveRules(iterable $rulesByLocale): void
$fs->dumpFile($file, preg_replace('/QUICK_CHECK = .*;/m', "QUICK_CHECK = {$quickCheck};", $fs->readFile($file)));
}

private static function testEmoji(string $emoji, string $locale, string $codePoints): bool
public static function createRules(array $maps, bool $reverse = false): array
{
if (!Transliterator::createFromRules("\\$emoji > test ;")) {
printf('Could not create transliterator for "%s" in "%s" locale. Code Point: "%s". Error: "%s".'."\n", $emoji, $locale, $codePoints, intl_get_error_message());
// We must sort the maps by the number of code points, because the order really matters:
// 🫶🏼 must be before 🫶
krsort($maps);

return false;
if (!$reverse) {
return array_merge(...$maps);
}

return true;
}
$emojiText = $textEmoji = [];

private static function createRules(array $maps): array
{
// We must sort the maps by the number of code points, because the order really matters:
// 🫶🏼 must be before 🫶
krsort($maps);
$maps = array_merge(...$maps);
foreach ($maps as $map) {
uksort($map, static fn ($a, $b) => strnatcmp(substr($a, 1, -1), substr($b, 1, -1)));
$textEmoji = array_merge($map, $textEmoji);

return $maps;
$map = array_flip($map);
$emojiText += $map;
}

return [$emojiText, $textEmoji];
}
}
1 change: 1 addition & 0 deletions Resources/data/emoji-af.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'gesin: vrou en meisie',
'😶‍🌫' => 'gesig in wolke',
'😮‍💨' => 'gesig asem uit',
'🙂‍↔' => 'kop wat horisontaal skud',
'🙂‍↕' => 'kop wat vertikaal skud',
'😵‍💫' => 'gesig met spiraaloë',
'❤‍🔥' => 'hart wat brand',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-am.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'ቤተሰብ: ሴት፣ ልጃገረድ',
'😶‍🌫' => 'ደመናማ ገጽ',
'😮‍💨' => 'ወደ ውጭ የሚተነፍስ ገጽ',
'🙂‍↔' => 'ራስ መንቀጥቀጥ አግድሞሽ',
'🙂‍↕' => 'ራስ መንቀጥቀጥ አቀባዊ',
'😵‍💫' => 'የዞሩ ዓይኖች ያለው ፊት',
'❤‍🔥' => 'በበስጭት ላይ ያለ ልብ',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-ar.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'أسرة: سيدة وفتاة',
'😶‍🌫' => 'وجه في الغيوم',
'😮‍💨' => 'وجه يزفر الهواء',
'🙂‍↔' => 'رأس يهتز أفقياً',
'🙂‍↕' => 'رأس يهتز عمودياً',
'😵‍💫' => 'وجه بعينين دائختين',
'❤‍🔥' => 'قلب يحترق',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-ar_sa.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'أسرة: سيدة وفتاة',
'😶‍🌫' => 'وجه في الغيوم',
'😮‍💨' => 'وجه يزفر الهواء',
'🙂‍↔' => 'رأس يهتز أفقياً',
'🙂‍↕' => 'رأس يهتز عمودياً',
'😵‍💫' => 'وجه بعينين دائختين',
'❤‍🔥' => 'قلب يحترق',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-as.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'পৰিয়াল: তিৰোতা, ছোৱালী',
'😶‍🌫' => 'মেঘত মুখ',
'😮‍💨' => 'নিশাহ এৰা মুখ',
'🙂‍↔' => 'অনুভূমিকভাৱে মূৰ জোকাৰি থকা',
'🙂‍↕' => 'উলম্বভাৱে মূৰ জোকাৰি থকা',
'😵‍💫' => 'ঘূৰি থকা চকু থকা মুখ',
'❤‍🔥' => 'জুইত থকা হৃদয়',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-az.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'ailə: qadın, qız uşağı',
'😶‍🌫' => 'buludlu üz',
'😮‍💨' => 'nəfəs alan üz',
'🙂‍↔' => 'başı üfüqi silkələmək',
'🙂‍↕' => 'başı şaquli silkələmək',
'😵‍💫' => 'spiral gözlü üz',
'❤‍🔥' => 'alovlu ürək',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-be.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'сям’я: жанчына дзяўчынка',
'😶‍🌫' => 'твар у аблоках',
'😮‍💨' => 'твар выдыхае',
'🙂‍↔' => 'гарызантальна круціць галавой',
'🙂‍↕' => 'вертыкальна ківае галавой',
'😵‍💫' => 'твар з вачыма-спіралямі',
'❤‍🔥' => 'сэрца ў агні',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-bg.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'семейство: жена и момиче',
'😶‍🌫' => 'лице в облаци',
'😮‍💨' => 'издишващо лице',
'🙂‍↔' => 'глава, която прави хоризонтални движения',
'🙂‍↕' => 'глава, която прави вертикални движения',
'😵‍💫' => 'лице със спираловидни очи',
'❤‍🔥' => 'сърце в пламъци',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-bn.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'পরিবার: মহিলা, মেয়ে',
'😶‍🌫' => 'মেঘে মুখ',
'😮‍💨' => 'নিশ্বাস ছাড়া মুখ',
'🙂‍↔' => 'এপাশ ওপাশ মাথা নাড়ানো',
'🙂‍↕' => 'উপর নীচে মাথা নাড়ানো',
'😵‍💫' => 'চোখ পাকানো মুখ',
'❤‍🔥' => 'আগুনে হৃদয়',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-bs.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'porodica: žena i djevojčica',
'😶‍🌫' => 'lice u oblacima',
'😮‍💨' => 'lice izdiše',
'🙂‍↔' => 'vrti glavom lijevo desno',
'🙂‍↕' => 'klima glavom',
'😵‍💫' => 'lice sa sprialnim očima',
'❤‍🔥' => 'zapaljeno srce',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-ca.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'família: dona i noia',
'😶‍🌫' => 'cara entre núvols',
'😮‍💨' => 'cara que exhala',
'🙂‍↔' => 'cap que tremola horitzontalment',
'🙂‍↕' => 'cap que tremola verticalment',
'😵‍💫' => 'cara amb ulls d’espiral',
'❤‍🔥' => 'cor en flames',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-chr.php
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,7 @@
'👩‍👧' => 'ᏏᏓᏁᎸᎢ: ᎠᎨᏯ, ᎠᎨᏳᏣ',
'😶‍🌫' => 'ᎤᎧᏛ ᎭᏫᏂ ᏧᎶᎩᎵ',
'😮‍💨' => 'ᎤᎧᏛ ᎤᏓᏅᏖᏙᏗ',
'🙂‍↔' => 'ᎠᎵᏍᏛᏂᎭ',
'🙂‍↕' => 'ᎤᏍᎫᏴᎢ',
'😵‍💫' => 'ᎤᎧᏛ ᏗᎩᏙᎵ ᎠᎦᎷᎦ ᎢᏳᏍᏗ',
'❤‍🔥' => 'ᎤᎾᏫ ᎠᎪᎲᏍᏗᏍᎪᎢ',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-cs.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'rodina: žena, dívka',
'😶‍🌫' => 'obličej v oblacích',
'😮‍💨' => 'vydechující obličej',
'🙂‍↔' => 'kroucení hlavou ze strany na stranu',
'🙂‍↕' => 'kývání hlavou nahoru a dolů',
'😵‍💫' => 'obličej se spirálami místo očí',
'❤‍🔥' => 'hořící srdce',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-cy.php
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@
'👩‍👧' => 'teulu: menyw, merch',
'😶‍🌫' => 'wyneb mewn cymylau',
'😮‍💨' => 'wyneb anadlu allan',
'🙂‍↔' => 'pen yn ysgwyd o ochr i ochr',
'🙂‍↕' => 'pen yn ysgwyd i fyny ac i lawr',
'😵‍💫' => 'wyneb â llygaid troellog',
'❤‍🔥' => 'calon ar dân',
Expand Down
1 change: 1 addition & 0 deletions Resources/data/emoji-da.php
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,7 @@
'👩‍👧' => 'familie: kvinde og pige',
'😶‍🌫' => 'ansigt i sky',
'😮‍💨' => 'udåndende ansigt',
'🙂‍↔' => 'ryster på hovedet vandret',
'🙂‍↕' => 'ryster på hovedet lodret',
'😵‍💫' => 'ansigt med spiraløjne',
'❤‍🔥' => 'brændende hjerte',
Expand Down
Loading

0 comments on commit 93c2e4b

Please sign in to comment.