From e91c0ce377731c90d3b0aea4786737343f4b061a Mon Sep 17 00:00:00 2001
From: Bruno Haible <bruno@clisp.org>
Date: Thu, 12 Apr 2001 12:55:41 +0000
Subject: [PATCH] Add UTF-32 encodings.

---
 ChangeLog                    |  15 +++++++
 NEWS                         |   5 +++
 NOTES                        |   2 +
 README                       |   1 +
 lib/converters.h             |   3 ++
 lib/encodings.def            |  15 +++++++
 lib/ucs4.h                   |  10 ++---
 lib/utf32.h                  |  91 +++++++++++++++++++++++++++++++++++++++++++
 lib/{ucs4.h => utf32be.h}    |  42 +++++++-------------
 lib/utf32le.h                |  55 ++++++++++++++++++++++++++
 man/iconv_open.3             |   3 +-
 tests/Makefile.in            |   3 ++
 tests/Makefile.msvc          |   3 ++
 tests/Makefile.os2           |   3 ++
 tests/UTF-32-snippet         | Bin 0 -> 1068 bytes
 tests/UTF-32-snippet.UTF-8   |   6 +++
 tests/UTF-32BE-snippet       | Bin 0 -> 1064 bytes
 tests/UTF-32BE-snippet.UTF-8 |   6 +++
 tests/UTF-32LE-snippet       | Bin 0 -> 1064 bytes
 tests/UTF-32LE-snippet.UTF-8 |   6 +++
 20 files changed, 236 insertions(+), 33 deletions(-)
 create mode 100644 lib/utf32.h
 copy lib/{ucs4.h => utf32be.h} (52%)
 create mode 100644 lib/utf32le.h
 create mode 100644 tests/UTF-32-snippet
 create mode 100644 tests/UTF-32-snippet.UTF-8
 create mode 100644 tests/UTF-32BE-snippet
 create mode 100644 tests/UTF-32BE-snippet.UTF-8
 create mode 100644 tests/UTF-32LE-snippet
 create mode 100644 tests/UTF-32LE-snippet.UTF-8

diff --git a/ChangeLog b/ChangeLog
index 4153ea3..1398539 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,20 @@
 2001-04-11  Bruno Haible  <haible@clisp.cons.org>
 
+        Implement and document UTF-32, UTF-32BE, UTF-32LE.
+        * src/utf32.h, src/utf32be.h, src/utf32le.h: New files.
+        * src/converters.h: Include them.
+        * src/encodings.def (UTF-32, UTF-32BE, UTF32LE): New encodings.
+        * README, man/iconv_open.3: Add UTF-32, UTF-32BE, UTF32LE.
+        * tests/Makefile.in (check): Check UTF-32, UTF-32BE, UTF32LE.
+        * tests/Makefile.os2 (check): Likewise.
+        * tests/Makefile.msvc (check): Likewise.
+        * tests/UTF-32*snippet*: New files.
+
+        * lib/ucs4.h (ucs4_mbtowc): Fix value of other-endian byte order.
+          (ucs4_wctomb): Allow any 31-bit codepoint.
+
+2001-04-11  Bruno Haible  <haible@clisp.cons.org>
+
         * tests/GB18030.TXT: Add mappings for all of U+0000..U+FFFF, including
           unassigned code points.
         * tests/table-from.c (main); When dumping GB18030, don't print code
diff --git a/NEWS b/NEWS
index 9e754d7..51c0157 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,8 @@
+New in 1.7:
+
+* Added UTF-32, UTF-32BE, UTF-32LE converters.
+* Fixed a bug in the byte order mark treatment of the UCS-4 decoder.
+
 New in 1.6:
 * The iconv program's -f and -t options are now optional.
 * Many more transliterations.
diff --git a/NOTES b/NOTES
index 2d8133d..47b2c51 100644
--- a/NOTES
+++ b/NOTES
@@ -290,6 +290,8 @@ A: libiconv, as an internationalization library, supports those character
        We implement these, because UTF-16 is still the favourite encoding of
        the president of the Unicode Consortium (for political reasons), and
        because they appear in RFC 2781.
+     * UTF-32, UTF-32BE, UTF-32LE
+       We implement these because they are part of Unicode 3.1.
      * UTF-7
        We implement this because it is essential functionality for mail
        applications.
diff --git a/README b/README
index 84e0023..93c0755 100644
--- a/README
+++ b/README
@@ -38,6 +38,7 @@ It provides support for the encodings:
         UCS-2, UCS-2BE, UCS-2LE
         UCS-4, UCS-4BE, UCS-4LE
         UTF-16, UTF-16BE, UTF-16LE
+        UTF-32, UTF-32BE, UTF-32LE
         UTF-7
         JAVA
     Full Unicode, in terms of `uint16_t' or `uint32_t'
diff --git a/lib/converters.h b/lib/converters.h
index 2d1ee51..3456724 100644
--- a/lib/converters.h
+++ b/lib/converters.h
@@ -107,6 +107,9 @@ struct conv_struct {
 #include "utf16.h"
 #include "utf16be.h"
 #include "utf16le.h"
+#include "utf32.h"
+#include "utf32be.h"
+#include "utf32le.h"
 #include "utf7.h"
 #include "ucs2internal.h"
 #include "ucs2swapped.h"
diff --git a/lib/encodings.def b/lib/encodings.def
index e6b64b8..60c4a93 100644
--- a/lib/encodings.def
+++ b/lib/encodings.def
@@ -112,6 +112,21 @@ DEFENCODING(( "UTF-16LE",               /* RFC 2781 */
             utf16le,
             { utf16le_mbtowc },           { utf16le_wctomb, NULL })
 
+DEFENCODING(( "UTF-32",                 /* Unicode 3.1 */
+            ),
+            utf32,
+            { utf32_mbtowc },             { utf32_wctomb, NULL })
+
+DEFENCODING(( "UTF-32BE",               /* Unicode 3.1 */
+            ),
+            utf32be,
+            { utf32be_mbtowc },           { utf32be_wctomb, NULL })
+
+DEFENCODING(( "UTF-32LE",               /* Unicode 3.1 */
+            ),
+            utf32le,
+            { utf32le_mbtowc },           { utf32le_wctomb, NULL })
+
 DEFENCODING(( "UTF-7",                  /* IANA, RFC 2152 */
               "UNICODE-1-1-UTF-7",      /* IANA, RFC 1642 */
               "csUnicode11UTF7",        /* IANA */
diff --git a/lib/ucs4.h b/lib/ucs4.h
index 927e994..e4f1c79 100644
--- a/lib/ucs4.h
+++ b/lib/ucs4.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1999-2000 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001 Free Software Foundation, Inc.
  * This file is part of the GNU LIBICONV Library.
  *
  * The GNU LIBICONV Library is free software; you can redistribute it
@@ -22,7 +22,7 @@
  * UCS-4
  */
 
-/* Here we accept 0000FFFE/0000FEFF marks as endianness indicators everywhere
+/* Here we accept FFFE0000/0000FEFF marks as endianness indicators everywhere
    in the stream, not just at the beginning. The default is big-endian. */
 /* The state is 0 if big-endian, 1 if little-endian. */
 static int
@@ -35,8 +35,8 @@ ucs4_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
                   ? s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24)
                   : (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3]);
     s += 4; n -= 4; count += 4;
-    if (wc == 0xfeff) {
-    } else if (wc == 0xfffe) {
+    if (wc == 0x0000feff) {
+    } else if (wc == 0xfffe0000u) {
       state ^= 1;
     } else if (wc <= 0x7fffffff) {
       *pwc = wc;
@@ -53,7 +53,7 @@ ucs4_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
 static int
 ucs4_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
 {
-  if (wc != 0xfffe) {
+  if (wc <= 0x7fffffff) {
     if (n >= 4) {
       r[0] = (unsigned char) (wc >> 24);
       r[1] = (unsigned char) (wc >> 16);
diff --git a/lib/utf32.h b/lib/utf32.h
new file mode 100644
index 0000000..442a9a2
--- /dev/null
+++ b/lib/utf32.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * This file is part of the GNU LIBICONV Library.
+ *
+ * The GNU LIBICONV Library is free software; you can redistribute it
+ * and/or modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * The GNU LIBICONV Library is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
+ * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
+ * Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * UTF-32
+ */
+
+/* Specification: Unicode 3.1 Standard Annex #19 */
+
+/* Here we accept FFFE0000/0000FEFF marks as endianness indicators
+   everywhere in the stream, not just at the beginning. (This is contrary
+   to what #19 D36c specifies, but it allows concatenation of byte
+   sequences to work flawlessly, while disagreeing with #19 behaviour
+   only for strings containing U+FEFF characters, which is quite rare.)
+   The default is big-endian. */
+/* The state is 0 if big-endian, 1 if little-endian. */
+static int
+utf32_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
+{
+  state_t state = conv->istate;
+  int count = 0;
+  for (; n >= 4;) {
+    ucs4_t wc = (state
+                  ? s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24)
+                  : (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3]);
+    count += 4;
+    if (wc == 0x0000feff) {
+    } else if (wc == 0xfffe0000u) {
+      state ^= 1;
+    } else {
+      if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) {
+        *pwc = wc;
+        conv->istate = state;
+        return count;
+      } else
+        return RET_ILSEQ;
+    }
+    s += 4; n -= 4;
+  }
+  conv->istate = state;
+  return RET_TOOFEW(count);
+}
+
+/* We output UTF-32 in big-endian order, with byte-order mark. */
+/* The state is 0 at the beginning, 1 after the BOM has been written. */
+static int
+utf32_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
+{
+  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) {
+    int count = 0;
+    if (!conv->ostate) {
+      if (n >= 4) {
+        r[0] = 0x00;
+        r[1] = 0x00;
+        r[2] = 0xFE;
+        r[3] = 0xFF;
+        r += 4; n -= 4; count += 4;
+      } else
+        return RET_TOOSMALL;
+    }
+    if (wc < 0x110000) {
+      if (n >= 4) {
+        r[0] = 0;
+        r[1] = (unsigned char) (wc >> 16);
+        r[2] = (unsigned char) (wc >> 8);
+        r[3] = (unsigned char) wc;
+        conv->ostate = 1;
+        return count+4;
+      } else
+        return RET_TOOSMALL;
+    }
+  }
+  return RET_ILSEQ;
+}
diff --git a/lib/ucs4.h b/lib/utf32be.h
similarity index 52%
copy from lib/ucs4.h
copy to lib/utf32be.h
index 927e994..21875a9 100644
--- a/lib/ucs4.h
+++ b/lib/utf32be.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 1999-2000 Free Software Foundation, Inc.
+ * Copyright (C) 1999-2001 Free Software Foundation, Inc.
  * This file is part of the GNU LIBICONV Library.
  *
  * The GNU LIBICONV Library is free software; you can redistribute it
@@ -19,49 +19,37 @@
  */
 
 /*
- * UCS-4
+ * UTF-32BE
  */
 
-/* Here we accept 0000FFFE/0000FEFF marks as endianness indicators everywhere
-   in the stream, not just at the beginning. The default is big-endian. */
-/* The state is 0 if big-endian, 1 if little-endian. */
+/* Specification: Unicode 3.1 Standard Annex #19 */
+
 static int
-ucs4_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
+utf32be_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
 {
-  state_t state = conv->istate;
-  int count = 0;
-  for (; n >= 4;) {
-    ucs4_t wc = (state
-                  ? s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24)
-                  : (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3]);
-    s += 4; n -= 4; count += 4;
-    if (wc == 0xfeff) {
-    } else if (wc == 0xfffe) {
-      state ^= 1;
-    } else if (wc <= 0x7fffffff) {
+  if (n >= 4) {
+    ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
+    if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) {
       *pwc = wc;
-      conv->istate = state;
-      return count;
+      return 4;
     } else
       return RET_ILSEQ;
   }
-  conv->istate = state;
-  return RET_TOOFEW(count);
+  return RET_TOOFEW(0);
 }
 
-/* But we output UCS-4 in big-endian order, without byte-order mark. */
 static int
-ucs4_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
+utf32be_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
 {
-  if (wc != 0xfffe) {
+  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) {
     if (n >= 4) {
-      r[0] = (unsigned char) (wc >> 24);
+      r[0] = 0;
       r[1] = (unsigned char) (wc >> 16);
       r[2] = (unsigned char) (wc >> 8);
       r[3] = (unsigned char) wc;
       return 4;
     } else
       return RET_TOOSMALL;
-  } else
-    return RET_ILSEQ;
+  }
+  return RET_ILSEQ;
 }
diff --git a/lib/utf32le.h b/lib/utf32le.h
new file mode 100644
index 0000000..c065a1d
--- /dev/null
+++ b/lib/utf32le.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 1999-2001 Free Software Foundation, Inc.
+ * This file is part of the GNU LIBICONV Library.
+ *
+ * The GNU LIBICONV Library is free software; you can redistribute it
+ * and/or modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * The GNU LIBICONV Library is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
+ * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
+ * Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * UTF-32LE
+ */
+
+/* Specification: Unicode 3.1 Standard Annex #19 */
+
+static int
+utf32le_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
+{
+  if (n >= 4) {
+    ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
+    if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) {
+      *pwc = wc;
+      return 4;
+    } else
+      return RET_ILSEQ;
+  }
+  return RET_TOOFEW(0);
+}
+
+static int
+utf32le_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
+{
+  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) {
+    if (n >= 4) {
+      r[0] = (unsigned char) wc;
+      r[1] = (unsigned char) (wc >> 8);
+      r[2] = (unsigned char) (wc >> 16);
+      r[3] = 0;
+      return 4;
+    } else
+      return RET_TOOSMALL;
+  }
+  return RET_ILSEQ;
+}
diff --git a/man/iconv_open.3 b/man/iconv_open.3
index 699fde7..a3735d9 100644
--- a/man/iconv_open.3
+++ b/man/iconv_open.3
@@ -9,7 +9,7 @@
 .\"   GNU glibc-2 source code and manual
 .\"   OpenGroup's Single Unix specification http://www.UNIX-systems.org/online.html
 .\"
-.TH ICONV_OPEN 3  "January 5, 2001" "GNU" "Linux Programmer's Manual"
+.TH ICONV_OPEN 3  "April 12, 2001" "GNU" "Linux Programmer's Manual"
 .SH NAME
 iconv_open \- allocate descriptor for character set conversion
 .SH SYNOPSIS
@@ -72,6 +72,7 @@ UTF-8
 UCS-2, UCS-2BE, UCS-2LE
 UCS-4, UCS-4BE, UCS-4LE
 UTF-16, UTF-16BE, UTF-16LE
+UTF-32, UTF-32BE, UTF-32LE
 UTF-7
 JAVA
 .fi
diff --git a/tests/Makefile.in b/tests/Makefile.in
index f27af9e..40f8887 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -41,6 +41,9 @@ check : all table-from table-to ../src/iconv
 	$(srcdir)/check-stateful $(srcdir) UTF-16
 	$(srcdir)/check-stateful $(srcdir) UTF-16BE
 	$(srcdir)/check-stateful $(srcdir) UTF-16LE
+	$(srcdir)/check-stateful $(srcdir) UTF-32
+	$(srcdir)/check-stateful $(srcdir) UTF-32BE
+	$(srcdir)/check-stateful $(srcdir) UTF-32LE
 	$(srcdir)/check-stateful $(srcdir) UTF-7
 #	/* 8-bit encodings */
 	$(srcdir)/check-stateless $(srcdir) ISO-8859-1
diff --git a/tests/Makefile.msvc b/tests/Makefile.msvc
index f340975..6bc3b18 100644
--- a/tests/Makefile.msvc
+++ b/tests/Makefile.msvc
@@ -63,6 +63,9 @@ check : all table-from.exe table-to.exe ../src/iconv.exe uniq-u.exe
 	$(srcdir)\check-stateful.bat $(srcdir) UTF-16
 	$(srcdir)\check-stateful.bat $(srcdir) UTF-16BE
 	$(srcdir)\check-stateful.bat $(srcdir) UTF-16LE
+	$(srcdir)\check-stateful.bat $(srcdir) UTF-32
+	$(srcdir)\check-stateful.bat $(srcdir) UTF-32BE
+	$(srcdir)\check-stateful.bat $(srcdir) UTF-32LE
 	$(srcdir)\check-stateful.bat $(srcdir) UTF-7
 #	/* 8-bit encodings */
 	$(srcdir)\check-stateless.bat $(srcdir) ISO-8859-1
diff --git a/tests/Makefile.os2 b/tests/Makefile.os2
index 3e1ed56..9cb324b 100644
--- a/tests/Makefile.os2
+++ b/tests/Makefile.os2
@@ -31,6 +31,9 @@ check : all table-from.exe table-to.exe ../src/iconv.exe genutf8.exe
 	$(srcdir)\check-stateful $(srcdir) UTF-16
 	$(srcdir)\check-stateful $(srcdir) UTF-16BE
 	$(srcdir)\check-stateful $(srcdir) UTF-16LE
+	$(srcdir)\check-stateful $(srcdir) UTF-32
+	$(srcdir)\check-stateful $(srcdir) UTF-32BE
+	$(srcdir)\check-stateful $(srcdir) UTF-32LE
 	$(srcdir)\check-stateful $(srcdir) UTF-7
 #	/* 8-bit encodings */
 	$(srcdir)\check-stateless $(srcdir) ISO-8859-1
diff --git a/tests/UTF-32-snippet b/tests/UTF-32-snippet
new file mode 100644
index 0000000000000000000000000000000000000000..6aa4dcb8453cb36ea9ab7aa09e97a374854829fa
GIT binary patch
literal 1068
zcwUv$)oxWm6oBDX&Mq!>$KBnF)Qh{`K|=z8K!SwyF>trwZ-cuOhnE1p%{hT!fnI#c
zTx|JgWhUbDmx#<E4s(Q~9OF1AILRqabB42=<2)C*$R#dwg{xfSIybl(5rh6yE#q_R
z7WT3&XD2-)FB9a^g$xP#bfp{J=|TEzFM895zVxGjLW(G+gi^{Vr-DkVsHTSg|7dMu
zf3Eic`)FeeyT5fu)KO0Zjk$N4BfHO57O|KmEM*zXS;0zHv6?lkWgY9;z(zK)IXXAv
z^gn51+P)*cw<ZM?QbaK&lu|}H6;x71H8q$~GJt{jb~2bD3}qO@8No<KF`6-qWgME4
z2~7M;YdW{~$j&%z-IbHB+ah~5CNY^QOl2C=nZZnEF`GHerIvZjX8{YTqn-vDX`-1H
zTI2V5{>iWBcu!<!y#1%%$iKyH?r@iT+~)xgdBkI$@RVmf=LIk6;1#cV!&~0*o)3KF
TQzv`VJ*M|<Yy7x>2h-lKBEM8k

literal 0
HcwPel00001

diff --git a/tests/UTF-32-snippet.UTF-8 b/tests/UTF-32-snippet.UTF-8
new file mode 100644
index 0000000..4229c88
--- /dev/null
+++ b/tests/UTF-32-snippet.UTF-8
@@ -0,0 +1,6 @@
+ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ          <- Greek
+𐌀𐌁𐌂𐌃𐌄𐌅𐌆𐌇𐌈𐌉𐌊𐌋𐌌𐌍𐌎𐌏𐌐𐌑𐌒𐌓𐌔𐌕𐌖𐌗𐌘𐌙𐌚𐌛𐌜𐌝    <- Etruscan
+ABCDEFGHIJKLMNOPQRSTUVWXYZ        <- Latin
+АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ  <- Cyrillic
+𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷𐌸𐌹𐌺𐌻𐌼𐌽𐌾𐌿𐍀𐍁𐍂𐍃𐍄𐍅𐍆𐍇𐍈         <- Gothic
+אבגדהוזחטיךכלםמןנסעףפץצקרש        <- Hebrew
diff --git a/tests/UTF-32BE-snippet b/tests/UTF-32BE-snippet
new file mode 100644
index 0000000000000000000000000000000000000000..72e4761c60351ceea2d1840a8b1e7eae65e85681
GIT binary patch
literal 1064
zcwUv$)oxZn6oAoP{!?7)j=Q@TsTX&>gN6hGfdmQRF>trwZ-cu#Z^1hJClD;qi<Qj9
zk#BZpB65c#az{AIF^+SBlbqr-XE@6_&U1l_T;eiUxXLxIbAy}Qiikn~sh;uKx{bYT
z%j~3Q6y$;ex{xEGkgjy2J3UCB?L}|;(3gG`QA`P?lu=Fvl~hqp4YkzK{~xU_?9X}+
zu#XP5u=`tgL<5a9(VV^08u@*;vxvnkVJXX4&I(qtiq))PE$dj%1~#&Z&C$6Tr~gSC
z)Ak+ly)`MKm=a1UqnrvVsiK-1YN^AFk^v0Fx0AsPVJO2G&Im>_iqVW=EaT9WOkm<)
zTGP25iu{bz)?Jx&-4Xe-F^S1cVJg#@&J1QUi`mR!F7?b~J_}e#1C2D%Obe~F(H_6g
z^G|*~$9p0_<Ly87M&WJlaF=`B=K&9S#ABZDlxIBW1uuEUYu@mdcf98VANj;*zI3uT
Q-D7&+w#JY9cQEb!3fr+#Pyhe`

literal 0
HcwPel00001

diff --git a/tests/UTF-32BE-snippet.UTF-8 b/tests/UTF-32BE-snippet.UTF-8
new file mode 100644
index 0000000..4229c88
--- /dev/null
+++ b/tests/UTF-32BE-snippet.UTF-8
@@ -0,0 +1,6 @@
+ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ          <- Greek
+𐌀𐌁𐌂𐌃𐌄𐌅𐌆𐌇𐌈𐌉𐌊𐌋𐌌𐌍𐌎𐌏𐌐𐌑𐌒𐌓𐌔𐌕𐌖𐌗𐌘𐌙𐌚𐌛𐌜𐌝    <- Etruscan
+ABCDEFGHIJKLMNOPQRSTUVWXYZ        <- Latin
+АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ  <- Cyrillic
+𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷𐌸𐌹𐌺𐌻𐌼𐌽𐌾𐌿𐍀𐍁𐍂𐍃𐍄𐍅𐍆𐍇𐍈         <- Gothic
+אבגדהוזחטיךכלםמןנסעףפץצקרש        <- Hebrew
diff --git a/tests/UTF-32LE-snippet b/tests/UTF-32LE-snippet
new file mode 100644
index 0000000000000000000000000000000000000000..b15f5c43f7a020869a5a1ad82f1e32f747d768f1
GIT binary patch
literal 1064
zcwUv$*G?5t6oBE4=Nt<*dPfm^!HNaNLdOQ!%bgMvjYbno43DAMz#jYC*c%q?@)mrP
zGYJVfF>>)G`(`r#tXca~B81EIa)qm0<2pC!qo14H;x>1<%RTP%fQLNdF;95PGpfS>
zsi!7Bw{ezZv6EyI5?LXOaU@8RVmuR=$Rx6v%oL_Fjp^i&OCI?YP)HHQlu$|;<y26~
zzgk;3ANOA19Niq@bllxQBTY2N&$Nd0ceb;YZER-;JL#a4E_Sh-J?v#4`#Hct4ske)
zZN|}iqQ<EGL>Ot!NrsS19{ChdND;-9P)Zr)R8YwbW>Up0s+r9k<}#1@EMOsvSj-Y?
zSjw_w=GJI#Js~~gsP$AVI(Nt2%l)SntYj6ntY!^sSw|h~*+4xT*~DhH&_E+iG}A&W
zZM0|DgG@&6@tKgG@$pf;&r>11;3cnk%^TkGj`w`vBLjTmGhg`1Am8}T4}S8C-~8b(
VL#eU#M!Oi@x1(X?{#}fFhXKv=QcwT@

literal 0
HcwPel00001

diff --git a/tests/UTF-32LE-snippet.UTF-8 b/tests/UTF-32LE-snippet.UTF-8
new file mode 100644
index 0000000..4229c88
--- /dev/null
+++ b/tests/UTF-32LE-snippet.UTF-8
@@ -0,0 +1,6 @@
+ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ          <- Greek
+𐌀𐌁𐌂𐌃𐌄𐌅𐌆𐌇𐌈𐌉𐌊𐌋𐌌𐌍𐌎𐌏𐌐𐌑𐌒𐌓𐌔𐌕𐌖𐌗𐌘𐌙𐌚𐌛𐌜𐌝    <- Etruscan
+ABCDEFGHIJKLMNOPQRSTUVWXYZ        <- Latin
+АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ  <- Cyrillic
+𐌰𐌱𐌲𐌳𐌴𐌵𐌶𐌷𐌸𐌹𐌺𐌻𐌼𐌽𐌾𐌿𐍀𐍁𐍂𐍃𐍄𐍅𐍆𐍇𐍈         <- Gothic
+אבגדהוזחטיךכלםמןנסעףפץצקרש        <- Hebrew
-- 
2.11.4.GIT