From d9e18e7939613345d0de9a1a1de80684c7bec39b Mon Sep 17 00:00:00 2001
From: Angel Ortega <angel@triptico.com>
Date: Thu, 23 Jul 2009 11:06:50 +0200
Subject: [PATCH] New document mp_encoding.txt.

---
 doc/mp_encoding.txt | 139 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 doc/mp_index.txt    |   1 +
 makefile.in         |   3 +-
 3 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 doc/mp_encoding.txt

diff --git a/doc/mp_encoding.txt b/doc/mp_encoding.txt
new file mode 100644
index 0000000..de8b46d
--- /dev/null
+++ b/doc/mp_encoding.txt
@@ -0,0 +1,139 @@
+Minimum Profit character encoding support
+=========================================
+
+This document describes the character encodings supported by the
+Minimum Profit text editor and the performed autodetection tests.
+
+None (default locale)
+---------------------
+
+The following steps are performed on input:
+
+ * If any utf BOM is found, it sets the document encoding to any of
+   `utf-8bom', `utf-16le', `utf-16be', `utf-32le' or `utf-32be';
+ * Otherwise, if an explicit utf-8 sequence is detected, it sets the
+   document encoding to `utf-8';
+ * Otherwise, if some character is found with the 7 bit set (that is,
+   a non-ASCII character), but does not conform to the utf-8 standard,
+   it sets the document encoding to `8bit';
+ * In any other case, no encoding is forced, and the file is read using
+   the locale conversion functions.
+
+On output, the document is saved using the locale conversion functions.
+
+utf-8
+-----
+
+The following steps are performed on input:
+
+ * If an utf-8 BOM is found, it sets the document encoding to `utf-8bom';
+ * In any other case, utf-8 is assumed as the character encoding and any
+   invalid character combination is converted to the `?' character.
+
+On output, it saves the document using the utf-8 encoding without a BOM
+prefix.
+
+utf-8bom
+--------
+
+On input, if no utf-8 BOM is found, the encoding is still assumed to be
+`utf-8', but not changed to it.
+
+On output, it saves the document using the utf-8 encoding with a BOM
+prefix.
+
+8bit
+----
+
+No character conversion is done on input nor output.
+
+iso8859-1
+---------
+
+Characters are treated as being encoded using the iso8859-1 character set,
+that is, no real conversion is done. This mode is really identical to
+`8bit'.
+
+Aliases: `latin1'.
+
+utf-16
+------
+
+On input, it tries to determine the endianness of the document by reading
+the BOM; if a valid one is found, encoding is set to `utf-16le' or
+`utf-16be'; if none is found, it assumes `utf-16le'.
+
+On output, it behaves like `utf-16le'.
+
+Aliases: `ucs-2'.
+
+utf-16le
+--------
+
+On input, it assumes utf-16 little endian characters.
+
+On output, it saves the document using the utf-16 little endian encoding
+with a BOM prefix.
+
+Aliases: `ucs-2le'.
+
+utf-16be
+--------
+
+On input, it assumes utf-16 big endian characters.
+
+On output, it saves the document using the utf-16 big endian encoding
+with a BOM prefix.
+
+Aliases: `ucs-2be'.
+
+utf-32
+------
+
+On input, it tries to determine the endianness of the document by reading
+the BOM; it a valid one is found, encoding is set to `utf-32le' or
+`utf-32be'; if none is found, it assumes `utf-32le'.
+
+On output, it behaves like `utf-32le'.
+
+Aliases: `ucs-4'.
+
+utf-32le
+--------
+
+On input, it assumes utf-32 little endian characters.
+
+On output, it saves the document using the utf-32 little endian encoding
+with a BOM prefix.
+
+Aliases: `ucs-4le'.
+
+utf-32be
+--------
+
+On input, it assumes utf-32 big endian characters.
+
+On output, it saves the document using the utf-32 big endian encoding
+with a BOM prefix.
+
+Aliases: `ucs-4be'.
+
+Iconv support
+-------------
+
+If Minimum Profit is compiled with support for the `iconv' library, many
+more encodings will be available. There is no easy way of knowing their
+names; the underlying system may provide the `iconv --list' command to have
+a list.
+
+End of line markers
+-------------------
+
+Though not directly related to character encodings, the Minimum Profit text
+editor remembers the end of line marker found inside each document, and use
+it when saving it afterwards. This helps in maintaining document
+compatibility and portability. This behaviour can be disabled by setting
+the `mp.config.keep_eol' configuration directive to 0.
+
+----
+Angel Ortega <angel@triptico.com>
diff --git a/doc/mp_index.txt b/doc/mp_index.txt
index a749c54..d482cb2 100644
--- a/doc/mp_index.txt
+++ b/doc/mp_index.txt
@@ -8,6 +8,7 @@ General Documentation
  * ./mp_configuration.html (Minimum Profit Configuration Directives).
  * ./mp_actions.html (Minimum Profit Action Reference).
  * ./mp_keycodes.html (Minimum Profit Keycodes).
+ * ./mp_encoding.html (Minimum Profit character encoding support).
  * ./mp_interactive_dialog_boxes.html (Creating interactive dialog boxes).
  * ./mp_cookbook.html (Minimum Profit Cookbook).
  * ./mp_api.html (Minimum Profit API).
diff --git a/makefile.in b/makefile.in
index 911da59..95c7146 100644
--- a/makefile.in
+++ b/makefile.in
@@ -9,7 +9,8 @@ ADD_DOCS=AUTHORS README COPYING RELEASE_NOTES \
 GRUTATXT_DOCS=doc/mp_index.html doc/mp_cookbook.html doc/mp_internals.html \
 	doc/mp_configuration.html doc/mp_data_model.html \
 	doc/mp_man.html doc/mp_interactive_dialog_boxes.html \
-	doc/mp_actions.html doc/mp_keycodes.html $(APPNAME).1
+	doc/mp_actions.html doc/mp_keycodes.html \
+	doc/mp_encoding.html $(APPNAME).1
 MP_DOCCER_DOCS=doc/mp_api.txt
 G_AND_MP_DOCS=doc/mp_api.html
 
-- 
2.11.4.GIT