Message ID | 20230721141101.3337118-1-colin@colino.net |
---|---|
State | New |
Headers | show |
Series | [v2] localedata: Translit common emojis to smileys [BZ #30649] | expand |
Hi folks, Ping :) I'd love a review on this! Thanks! > Add common emojis to the translit-able characters (mostly > faces and hearts), and translit them to old-fashioned > smileys. > > Author: Colin Leroy-Mira <colin@colino.net> > Signed-off-by: Colin Leroy-Mira <colin@colino.net> > --- > v2: Fix a wrong smiley, add unit test > localedata/Makefile | 3 + > localedata/locales/translit_emojis | 91 ++++++++++++++++++++ > localedata/locales/translit_neutral | 1 + > localedata/tst-iconv-emojis-trans.c | 124 ++++++++++++++++++++++++++++ > 4 files changed, 219 insertions(+) > create mode 100644 localedata/locales/translit_emojis > create mode 100644 localedata/tst-iconv-emojis-trans.c > > diff --git a/localedata/Makefile b/localedata/Makefile > index 3619b6d47e..5b6d10e33f 100644 > --- a/localedata/Makefile > +++ b/localedata/Makefile > @@ -164,6 +164,7 @@ tests = \ > bug-usesetlocale \ > tst-c-utf8-consistency \ > tst-digits \ > + tst-iconv-emojis-trans \ > tst-iconv-math-trans \ > tst-leaks \ > tst-mbswcs1 \ > @@ -320,6 +321,8 @@ LOCALES := \ > > include ../gen-locales.mk > > +$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales) > + > $(objpfx)tst-iconv-math-trans.out: $(gen-locales) > endif > > diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis > new file mode 100644 > index 0000000000..260aeedc35 > --- /dev/null > +++ b/localedata/locales/translit_emojis > @@ -0,0 +1,91 @@ > +escape_char / > +comment_char % > + > +% This file is part of the GNU C Library and contains locale data. > +% The Free Software Foundation does not claim any copyright interest > +% in the locale data contained in this file. The foregoing does not > +% affect the license of the GNU C Library as a whole. It does not > +% exempt you from the conditions of the license if your use would > +% otherwise be governed by that license. > + > +% Transliterations of emojis to ASCII smileys. > +% Generated algorithmically. > + > +LC_CTYPE > + > +translit_start > + > +<U2661> "<U003C><U0033>" % WHITE HEART SUIT > +<U2665> "<U003C><U0033>" % BLACK HEART SUIT > +<U2764> "<U003C><U0033>" % HEAVY BLACK HEART > +<U0001F499> "<U003C><U0033>" % BLUE HEART > +<U0001F493> "<U003C><U0033>" % BEATING HEART > +<U0001F494> "<U003C><U002F><U0033>" % BROKEN HEART > +<U0001F496> "<U003C><U0033>" % SPARKLING HEART > +<U0001F497> "<U003C><U0033>" % GROWING HEART > +<U0001F49A> "<U003C><U0033>" % GREEN HEART > +<U0001F49B> "<U003C><U0033>" % YELLOW HEART > +<U0001F49C> "<U003C><U0033>" % PURPLE HEART > +<U0001F5A4> "<U003C><U0033>" % BLACK HEART > +<U0001F9E1> "<U003C><U0033>" % ORANGE HEART > +<U0001F90D> "<U003C><U0033>" % WHITE HEART > +<U0001F90E> "<U003C><U0033>" % BROWN HEART > +<U0001F600> "<U003A><U002D><U0044>" % GRINNING FACE > +<U0001F601> "<U003A><U002D><U0044>" % GRINNING FACE WITH SMILING EYES > +<U0001F602> "<U003A><U0027><U0044>" % FACE WITH TEARS OF JOY > +<U0001F603> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH (C.F. โบ) > +<U0001F604> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES > +<U0001F605> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT > +<U0001F606> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES > +<U0001F607> "<U004F><U003A><U002D><U0029>" % SMILING FACE WITH HALO > +<U0001F608> "<U003E><U003A><U0029>" % SMILING FACE WITH HORNS > +<U0001F609> "<U003B><U002D><U0029>" % WINKING FACE > +<U0001F60A> "<U003A><U002D><U0029>" % SMILING FACE WITH SMILING EYES > +<U0001F60B> "<U003A><U002D><U0050>" % FACE SAVOURING DELICIOUS FOOD > +<U0001F60C> "<U003A><U002D><U0029>" % RELIEVED FACE > +<U0001F60D> "<U003A><U002D><U002A>" % SMILING FACE WITH HEART-SHAPED EYES > +<U0001F60E> "<U0042><U002D><U0029>" % SMILING FACE WITH SUNGLASSES > +<U0001F60F> "<U003B><U002D><U0029>" % SMIRKING FACE > +<U0001F610> "<U003A><U002D><U007C>" % NEUTRAL FACE > +<U0001F611> "<U003A><U002D><U007C>" % EXPRESSIONLESS FACE > +<U0001F612> "<U003A><U002D><U007C>" % UNAMUSED FACE > +<U0001F613> "<U003A><U0027><U002D><U007C>" % FACE WITH COLD SWEAT > +<U0001F614> "<U003A><U002D><U007C>" % PENSIVE FACE > +<U0001F615> "<U003A><U002D><U002F>" % CONFUSED FACE > +<U0001F616> "<U003A><U002D><U0053>" % CONFOUNDED FACE > +<U0001F617> "<U003A><U002D><U002A>" % KISSING FACE > +<U0001F618> "<U003A><U002D><U002A>" % FACE THROWING A KISS > +<U0001F619> "<U003A><U002D><U002A>" % KISSING FACE WITH SMILING EYES > +<U0001F61A> "<U003A><U002D><U002A>" % KISSING FACE WITH CLOSED EYES > +<U0001F61B> "<U003A><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE > +<U0001F61C> "<U003B><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE > +<U0001F61D> "<U0058><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES > +<U0001F61E> "<U003A><U002D><U0028>" % DISAPPOINTED FACE > +<U0001F61F> "<U003A><U002D><U0028>" % WORRIED FACE > +<U0001F620> "<U003E><U003A><U002D><U0028>" % ANGRY FACE > +<U0001F621> "<U003A><U002D><U0028>" % POUTING FACE > +<U0001F622> "<U003A><U0027><U002D><U0028>" % CRYING FACE > +<U0001F623> "<U0058><U002D><U0028>" % PERSEVERING FACE > +<U0001F626> "<U003A><U002D><U004F>" % FROWNING FACE WITH OPEN MOUTH > +<U0001F627> "<U003A><U002D><U004F>" % ANGUISHED FACE > +<U0001F628> "<U003A><U002D><U004F>" % FEARFUL FACE > +<U0001F629> "<U003A><U002D><U004F>" % WEARY FACE > +<U0001F62D> "<U003A><U0022><U002D><U0028>" % LOUDLY CRYING FACE > +<U0001F62E> "<U003A><U002D><U004F>" % FACE WITH OPEN MOUTH > +<U0001F62F> "<U003A><U002D><U004F>" % HUSHED FACE > +<U0001F630> "<U003A><U0027><U002D><U004F>" % FACE WITH OPEN MOUTH AND COLD SWEAT > +<U0001F631> "<U003A><U002D><U004F>" % FACE SCREAMING IN FEAR > +<U0001F632> "<U003A><U002D><U004F>" % ASTONISHED FACE > +<U0001F638> "<U003A><U002D><U0033>" % GRINNING CAT FACE WITH SMILING EYES > +<U0001F639> "<U003A><U0027><U002D><U0033>" % CAT FACE WITH TEARS OF JOY > +<U0001F63A> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH OPEN MOUTH > +<U0001F63B> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH HEART-SHAPE EYES > +<U0001F63C> "<U003B><U002D><U0033>" % CAT FACE WITH WRY SMILE > +<U0001F63D> "<U003A><U002D><U0033>" % KISSING CAT FACE WITH CLOSED EYES > +<U0001F641> "<U003A><U002D><U0028>" % SLIGHTLY FROWNING FACE > +<U0001F642> "<U003A><U002D><U0029>" % SLIGHTLY SMILING FACE > +<U0001F643> "<U0028><U002D><U003A>" % UPSIDE-DOWN FACE > + > +translit_end > + > +END LC_CTYPE > diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral > index 72f66220b7..57412ae565 100644 > --- a/localedata/locales/translit_neutral > +++ b/localedata/locales/translit_neutral > @@ -17,6 +17,7 @@ translit_start > include "translit_circle";"" > include "translit_cjk_compat";"" > include "translit_compat";"" > +include "translit_emojis";"" > include "translit_font";"" > include "translit_fraction";"" > include "translit_narrow";"" > diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c > new file mode 100644 > index 0000000000..89a32074d5 > --- /dev/null > +++ b/localedata/tst-iconv-emojis-trans.c > @@ -0,0 +1,124 @@ > +/* Test some emoji transliterations > + > + Copyright (C) 2019-2023 Free Software Foundation, Inc. > + This file is part of the GNU C Library. > + > + The GNU C Library is free software; you can redistribute it and/or > + modify it under the terms of the GNU Lesser General Public > + License as published by the Free Software Foundation; either > + version 2.1 of the License, or (at your option) any later version. > + > + The GNU C Library is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + Lesser General Public License for more details. > + > + You should have received a copy of the GNU Lesser General Public > + License along with the GNU C Library; if not, see > + <https://www.gnu.org/licenses/>. */ > + > +#include <iconv.h> > +#include <locale.h> > +#include <stdio.h> > +#include <string.h> > +#include <support/check.h> > + > +static int > +do_test (void) > +{ > + iconv_t cd; > + > + const int num_emojis = 70; > + > + const char str[] = "\u2661 \u2665 \u2764 \U0001F499 " > + "\U0001F493 \U0001F494 \U0001F496 " > + "\U0001F497 \U0001F49A \U0001F49B " > + "\U0001F49C \U0001F5A4 \U0001F9E1 " > + "\U0001F90D \U0001F90E \U0001F600 " > + "\U0001F601 \U0001F602 \U0001F603 " > + "\U0001F604 \U0001F605 \U0001F606 " > + "\U0001F607 \U0001F608 \U0001F609 " > + "\U0001F60A \U0001F60B \U0001F60C " > + "\U0001F60D \U0001F60E \U0001F60F " > + "\U0001F610 \U0001F611 \U0001F612 " > + "\U0001F613 \U0001F614 \U0001F615 " > + "\U0001F616 \U0001F617 \U0001F618 " > + "\U0001F619 \U0001F61A \U0001F61B " > + "\U0001F61C \U0001F61D \U0001F61E " > + "\U0001F61F \U0001F620 \U0001F621 " > + "\U0001F622 \U0001F623 \U0001F626 " > + "\U0001F627 \U0001F628 \U0001F629 " > + "\U0001F62D \U0001F62E \U0001F62F " > + "\U0001F630 \U0001F631 \U0001F632 " > + "\U0001F638 \U0001F639 \U0001F63A " > + "\U0001F63B \U0001F63C \U0001F63D " > + "\U0001F641 \U0001F642 \U0001F643"; > + > + const char expected[] = "<3 <3 <3 <3 <3 " > + "</3 <3 <3 <3 <3 " > + "<3 <3 <3 <3 <3 " > + ":-D :-D :'D :-D :-D " > + ":-D :-D O:-) >:) ;-) " > + ":-) :-P :-) :-* B-) " > + ";-) :-| :-| :-| :'-| " > + ":-| :-/ :-S :-* :-* " > + ":-* :-* :-P ;-P X-P " > + ":-( :-( >:-( :-( :'-( " > + "X-( :-O :-O :-O :-O " > + ":\"-( :-O :-O :'-O :-O " > + ":-O :-3 :'-3 :-3 :-3 " > + ";-3 :-3 :-( :-) (-:"; > + > + char *inptr = (char *) str; > + size_t inlen = strlen (str) + 1; > + char outbuf[500]; > + char *outptr = outbuf; > + size_t outlen = sizeof (outbuf); > + int result = 0; > + size_t n; > + > + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) > + FAIL_EXIT1 ("setlocale failed"); > + > + cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); > + if (cd == (iconv_t) -1) > + FAIL_EXIT1 ("iconv_open failed"); > + > + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); > + if (n != num_emojis) > + { > + if (n == (size_t) -1) > + printf ("iconv() returned error: %m\n"); > + else > + printf ("iconv() returned %zd, expected %d\n", n, num_emojis); > + result = 1; > + } > + if (inlen != 0) > + { > + puts ("not all input consumed"); > + result = 1; > + } > + else if (inptr - str != strlen (str) + 1) > + { > + printf ("inptr wrong, advanced by %td\n", inptr - str); > + result = 1; > + } > + if (memcmp (outbuf, expected, sizeof (expected)) != 0) > + { > + printf ("result wrong: \"%.*s\", expected: \"%s\"\n", > + (int) (sizeof (outbuf) - outlen), outbuf, expected); > + result = 1; > + } > + else if (outlen != sizeof (outbuf) - sizeof (expected)) > + { > + printf ("outlen wrong: %zd, expected %zd\n", outlen, > + sizeof (outbuf) - sizeof (expected)); > + result = 1; > + } > + else > + printf ("output is \"%s\" which is OK\n", outbuf); > + > + return result; > +} > + > +#include <support/test-driver.c>
* Colin Leroy-Mira via Libc-alpha: > +โก "/<3" % WHITE HEART SUIT > +โฅ "/<3" % BLACK HEART SUIT > +โค "/<3" % HEAVY BLACK HEART > +๐ "/<3" % BLUE HEART > +๐ "/<3" % BEATING HEART > +๐ "/<//3" % BROKEN HEART > +๐ "/<3" % SPARKLING HEART > +๐ "/<3" % GROWING HEART > +๐ "/<3" % GREEN HEART > +๐ "/<3" % YELLOW HEART > +๐ "/<3" % PURPLE HEART > +๐ค "/<3" % BLACK HEART > +๐งก "/<3" % ORANGE HEART > +๐ค "/<3" % WHITE HEART > +๐ค "/<3" % BROWN HEART > +๐ ":-D" % GRINNING FACE > +๐ ":-D" % GRINNING FACE WITH SMILING EYES > +๐ ":'D" % FACE WITH TEARS OF JOY > +๐ ":-D" % SMILING FACE WITH OPEN MOUTH (C.F. โบ) > +๐ ":-D" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES > +๐ ":-D" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT > +๐ ":-D" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES > +๐ "O:-)" % SMILING FACE WITH HALO > +๐ "/>:)" % SMILING FACE WITH HORNS > +๐ ";-)" % WINKING FACE > +๐ ":-)" % SMILING FACE WITH SMILING EYES > +๐ ":-P" % FACE SAVOURING DELICIOUS FOOD > +๐ ":-)" % RELIEVED FACE > +๐ ":-*" % SMILING FACE WITH HEART-SHAPED EYES > +๐ "B-)" % SMILING FACE WITH SUNGLASSES > +๐ ";-)" % SMIRKING FACE > +๐ ":-|" % NEUTRAL FACE > +๐ ":-|" % EXPRESSIONLESS FACE > +๐ ":-|" % UNAMUSED FACE > +๐ ":'-|" % FACE WITH COLD SWEAT > +๐ ":-|" % PENSIVE FACE > +๐ ":-//" % CONFUSED FACE > +๐ ":-S" % CONFOUNDED FACE > +๐ ":-*" % KISSING FACE > +๐ ":-*" % FACE THROWING A KISS > +๐ ":-*" % KISSING FACE WITH SMILING EYES > +๐ ":-*" % KISSING FACE WITH CLOSED EYES > +๐ ":-P" % FACE WITH STUCK-OUT TONGUE > +๐ ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE > +๐ "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES > +๐ ":-(" % DISAPPOINTED FACE > +๐ ":-(" % WORRIED FACE > +๐ "/>:-(" % ANGRY FACE > +๐ก ":-(" % POUTING FACE > +๐ข ":'-(" % CRYING FACE > +๐ฃ "X-(" % PERSEVERING FACE > +๐ฆ ":-O" % FROWNING FACE WITH OPEN MOUTH > +๐ง ":-O" % ANGUISHED FACE > +๐จ ":-O" % FEARFUL FACE > +๐ฉ ":-O" % WEARY FACE > +๐ญ ":<U0022>-(" % LOUDLY CRYING FACE > +๐ฎ ":-O" % FACE WITH OPEN MOUTH > +๐ฏ ":-O" % HUSHED FACE > +๐ฐ ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT > +๐ฑ ":-O" % FACE SCREAMING IN FEAR > +๐ฒ ":-O" % ASTONISHED FACE > +๐ธ ":-3" % GRINNING CAT FACE WITH SMILING EYES > +๐น ":'-3" % CAT FACE WITH TEARS OF JOY > +๐บ ":-3" % SMILING CAT FACE WITH OPEN MOUTH > +๐ป ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES > +๐ผ ";-3" % CAT FACE WITH WRY SMILE > +๐ฝ ":-3" % KISSING CAT FACE WITH CLOSED EYES > +๐ ":-(" % SLIGHTLY FROWNING FACE > +๐ ":-)" % SLIGHTLY SMILING FACE > +๐ "(-:" % UPSIDE-DOWN FACE Nowadays, you can use UTF-8 directly. It makes review much easier. The extra / above are escape characters. The transliterations seem mostly reasonable to me. Thanks, Florian
August 8, 2023 at 11:20 AM, "Florian Weimer" <fweimer@redhat.com> wrote: Hi, > Nowadays, you can use UTF-8 directly. It makes review much easier. The > extra / above are escape characters. Can I? I have seen no example of this in the code, and when I tried, the patch did not reach Patchwork, and Adhemerval suggested I resend it without UTF-8. > The transliterations seem mostly reasonable to me. Thanks!
* Colin Leroy-Mira via Libc-alpha: >> Nowadays, you can use UTF-8 directly. It makes review much easier. The >> extra / above are escape characters. > > Can I? I have seen no example of this in the code, and when I tried, > the patch did not reach Patchwork, and Adhemerval suggested I resend > it without UTF-8. Patchwork is optional. As long as Mailman distributes it, we should be fine. Patchwork bugs can be fixed (although whether this is a good use of our time is โฆ debatable). Thanks, Florian
diff --git a/localedata/Makefile b/localedata/Makefile index 3619b6d47e..5b6d10e33f 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -164,6 +164,7 @@ tests = \ bug-usesetlocale \ tst-c-utf8-consistency \ tst-digits \ + tst-iconv-emojis-trans \ tst-iconv-math-trans \ tst-leaks \ tst-mbswcs1 \ @@ -320,6 +321,8 @@ LOCALES := \ include ../gen-locales.mk +$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales) + $(objpfx)tst-iconv-math-trans.out: $(gen-locales) endif diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis new file mode 100644 index 0000000000..260aeedc35 --- /dev/null +++ b/localedata/locales/translit_emojis @@ -0,0 +1,91 @@ +escape_char / +comment_char % + +% This file is part of the GNU C Library and contains locale data. +% The Free Software Foundation does not claim any copyright interest +% in the locale data contained in this file. The foregoing does not +% affect the license of the GNU C Library as a whole. It does not +% exempt you from the conditions of the license if your use would +% otherwise be governed by that license. + +% Transliterations of emojis to ASCII smileys. +% Generated algorithmically. + +LC_CTYPE + +translit_start + +<U2661> "<U003C><U0033>" % WHITE HEART SUIT +<U2665> "<U003C><U0033>" % BLACK HEART SUIT +<U2764> "<U003C><U0033>" % HEAVY BLACK HEART +<U0001F499> "<U003C><U0033>" % BLUE HEART +<U0001F493> "<U003C><U0033>" % BEATING HEART +<U0001F494> "<U003C><U002F><U0033>" % BROKEN HEART +<U0001F496> "<U003C><U0033>" % SPARKLING HEART +<U0001F497> "<U003C><U0033>" % GROWING HEART +<U0001F49A> "<U003C><U0033>" % GREEN HEART +<U0001F49B> "<U003C><U0033>" % YELLOW HEART +<U0001F49C> "<U003C><U0033>" % PURPLE HEART +<U0001F5A4> "<U003C><U0033>" % BLACK HEART +<U0001F9E1> "<U003C><U0033>" % ORANGE HEART +<U0001F90D> "<U003C><U0033>" % WHITE HEART +<U0001F90E> "<U003C><U0033>" % BROWN HEART +<U0001F600> "<U003A><U002D><U0044>" % GRINNING FACE +<U0001F601> "<U003A><U002D><U0044>" % GRINNING FACE WITH SMILING EYES +<U0001F602> "<U003A><U0027><U0044>" % FACE WITH TEARS OF JOY +<U0001F603> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH (C.F. โบ) +<U0001F604> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES +<U0001F605> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT +<U0001F606> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES +<U0001F607> "<U004F><U003A><U002D><U0029>" % SMILING FACE WITH HALO +<U0001F608> "<U003E><U003A><U0029>" % SMILING FACE WITH HORNS +<U0001F609> "<U003B><U002D><U0029>" % WINKING FACE +<U0001F60A> "<U003A><U002D><U0029>" % SMILING FACE WITH SMILING EYES +<U0001F60B> "<U003A><U002D><U0050>" % FACE SAVOURING DELICIOUS FOOD +<U0001F60C> "<U003A><U002D><U0029>" % RELIEVED FACE +<U0001F60D> "<U003A><U002D><U002A>" % SMILING FACE WITH HEART-SHAPED EYES +<U0001F60E> "<U0042><U002D><U0029>" % SMILING FACE WITH SUNGLASSES +<U0001F60F> "<U003B><U002D><U0029>" % SMIRKING FACE +<U0001F610> "<U003A><U002D><U007C>" % NEUTRAL FACE +<U0001F611> "<U003A><U002D><U007C>" % EXPRESSIONLESS FACE +<U0001F612> "<U003A><U002D><U007C>" % UNAMUSED FACE +<U0001F613> "<U003A><U0027><U002D><U007C>" % FACE WITH COLD SWEAT +<U0001F614> "<U003A><U002D><U007C>" % PENSIVE FACE +<U0001F615> "<U003A><U002D><U002F>" % CONFUSED FACE +<U0001F616> "<U003A><U002D><U0053>" % CONFOUNDED FACE +<U0001F617> "<U003A><U002D><U002A>" % KISSING FACE +<U0001F618> "<U003A><U002D><U002A>" % FACE THROWING A KISS +<U0001F619> "<U003A><U002D><U002A>" % KISSING FACE WITH SMILING EYES +<U0001F61A> "<U003A><U002D><U002A>" % KISSING FACE WITH CLOSED EYES +<U0001F61B> "<U003A><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE +<U0001F61C> "<U003B><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE +<U0001F61D> "<U0058><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES +<U0001F61E> "<U003A><U002D><U0028>" % DISAPPOINTED FACE +<U0001F61F> "<U003A><U002D><U0028>" % WORRIED FACE +<U0001F620> "<U003E><U003A><U002D><U0028>" % ANGRY FACE +<U0001F621> "<U003A><U002D><U0028>" % POUTING FACE +<U0001F622> "<U003A><U0027><U002D><U0028>" % CRYING FACE +<U0001F623> "<U0058><U002D><U0028>" % PERSEVERING FACE +<U0001F626> "<U003A><U002D><U004F>" % FROWNING FACE WITH OPEN MOUTH +<U0001F627> "<U003A><U002D><U004F>" % ANGUISHED FACE +<U0001F628> "<U003A><U002D><U004F>" % FEARFUL FACE +<U0001F629> "<U003A><U002D><U004F>" % WEARY FACE +<U0001F62D> "<U003A><U0022><U002D><U0028>" % LOUDLY CRYING FACE +<U0001F62E> "<U003A><U002D><U004F>" % FACE WITH OPEN MOUTH +<U0001F62F> "<U003A><U002D><U004F>" % HUSHED FACE +<U0001F630> "<U003A><U0027><U002D><U004F>" % FACE WITH OPEN MOUTH AND COLD SWEAT +<U0001F631> "<U003A><U002D><U004F>" % FACE SCREAMING IN FEAR +<U0001F632> "<U003A><U002D><U004F>" % ASTONISHED FACE +<U0001F638> "<U003A><U002D><U0033>" % GRINNING CAT FACE WITH SMILING EYES +<U0001F639> "<U003A><U0027><U002D><U0033>" % CAT FACE WITH TEARS OF JOY +<U0001F63A> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH OPEN MOUTH +<U0001F63B> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH HEART-SHAPE EYES +<U0001F63C> "<U003B><U002D><U0033>" % CAT FACE WITH WRY SMILE +<U0001F63D> "<U003A><U002D><U0033>" % KISSING CAT FACE WITH CLOSED EYES +<U0001F641> "<U003A><U002D><U0028>" % SLIGHTLY FROWNING FACE +<U0001F642> "<U003A><U002D><U0029>" % SLIGHTLY SMILING FACE +<U0001F643> "<U0028><U002D><U003A>" % UPSIDE-DOWN FACE + +translit_end + +END LC_CTYPE diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral index 72f66220b7..57412ae565 100644 --- a/localedata/locales/translit_neutral +++ b/localedata/locales/translit_neutral @@ -17,6 +17,7 @@ translit_start include "translit_circle";"" include "translit_cjk_compat";"" include "translit_compat";"" +include "translit_emojis";"" include "translit_font";"" include "translit_fraction";"" include "translit_narrow";"" diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c new file mode 100644 index 0000000000..89a32074d5 --- /dev/null +++ b/localedata/tst-iconv-emojis-trans.c @@ -0,0 +1,124 @@ +/* Test some emoji transliterations + + Copyright (C) 2019-2023 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <iconv.h> +#include <locale.h> +#include <stdio.h> +#include <string.h> +#include <support/check.h> + +static int +do_test (void) +{ + iconv_t cd; + + const int num_emojis = 70; + + const char str[] = "\u2661 \u2665 \u2764 \U0001F499 " + "\U0001F493 \U0001F494 \U0001F496 " + "\U0001F497 \U0001F49A \U0001F49B " + "\U0001F49C \U0001F5A4 \U0001F9E1 " + "\U0001F90D \U0001F90E \U0001F600 " + "\U0001F601 \U0001F602 \U0001F603 " + "\U0001F604 \U0001F605 \U0001F606 " + "\U0001F607 \U0001F608 \U0001F609 " + "\U0001F60A \U0001F60B \U0001F60C " + "\U0001F60D \U0001F60E \U0001F60F " + "\U0001F610 \U0001F611 \U0001F612 " + "\U0001F613 \U0001F614 \U0001F615 " + "\U0001F616 \U0001F617 \U0001F618 " + "\U0001F619 \U0001F61A \U0001F61B " + "\U0001F61C \U0001F61D \U0001F61E " + "\U0001F61F \U0001F620 \U0001F621 " + "\U0001F622 \U0001F623 \U0001F626 " + "\U0001F627 \U0001F628 \U0001F629 " + "\U0001F62D \U0001F62E \U0001F62F " + "\U0001F630 \U0001F631 \U0001F632 " + "\U0001F638 \U0001F639 \U0001F63A " + "\U0001F63B \U0001F63C \U0001F63D " + "\U0001F641 \U0001F642 \U0001F643"; + + const char expected[] = "<3 <3 <3 <3 <3 " + "</3 <3 <3 <3 <3 " + "<3 <3 <3 <3 <3 " + ":-D :-D :'D :-D :-D " + ":-D :-D O:-) >:) ;-) " + ":-) :-P :-) :-* B-) " + ";-) :-| :-| :-| :'-| " + ":-| :-/ :-S :-* :-* " + ":-* :-* :-P ;-P X-P " + ":-( :-( >:-( :-( :'-( " + "X-( :-O :-O :-O :-O " + ":\"-( :-O :-O :'-O :-O " + ":-O :-3 :'-3 :-3 :-3 " + ";-3 :-3 :-( :-) (-:"; + + char *inptr = (char *) str; + size_t inlen = strlen (str) + 1; + char outbuf[500]; + char *outptr = outbuf; + size_t outlen = sizeof (outbuf); + int result = 0; + size_t n; + + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) + FAIL_EXIT1 ("setlocale failed"); + + cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); + if (cd == (iconv_t) -1) + FAIL_EXIT1 ("iconv_open failed"); + + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + if (n != num_emojis) + { + if (n == (size_t) -1) + printf ("iconv() returned error: %m\n"); + else + printf ("iconv() returned %zd, expected %d\n", n, num_emojis); + result = 1; + } + if (inlen != 0) + { + puts ("not all input consumed"); + result = 1; + } + else if (inptr - str != strlen (str) + 1) + { + printf ("inptr wrong, advanced by %td\n", inptr - str); + result = 1; + } + if (memcmp (outbuf, expected, sizeof (expected)) != 0) + { + printf ("result wrong: \"%.*s\", expected: \"%s\"\n", + (int) (sizeof (outbuf) - outlen), outbuf, expected); + result = 1; + } + else if (outlen != sizeof (outbuf) - sizeof (expected)) + { + printf ("outlen wrong: %zd, expected %zd\n", outlen, + sizeof (outbuf) - sizeof (expected)); + result = 1; + } + else + printf ("output is \"%s\" which is OK\n", outbuf); + + return result; +} + +#include <support/test-driver.c>
Add common emojis to the translit-able characters (mostly faces and hearts), and translit them to old-fashioned smileys. Author: Colin Leroy-Mira <colin@colino.net> Signed-off-by: Colin Leroy-Mira <colin@colino.net> --- v2: Fix a wrong smiley, add unit test localedata/Makefile | 3 + localedata/locales/translit_emojis | 91 ++++++++++++++++++++ localedata/locales/translit_neutral | 1 + localedata/tst-iconv-emojis-trans.c | 124 ++++++++++++++++++++++++++++ 4 files changed, 219 insertions(+) create mode 100644 localedata/locales/translit_emojis create mode 100644 localedata/tst-iconv-emojis-trans.c