1 ;;; po-compat.el --- basic support of PO translation files -*- coding: latin-1; -*-
3 ;; Copyright (C) 1995-1999, 2000-2002 Free Software Foundation, Inc.
5 ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
6 ;; Greg McGary <gkm@magilla.cichlid.com>,
7 ;; Bruno Haible <bruno@clisp.org>.
8 ;; Keywords: i18n, files
10 ;; This file is part of GNU gettext.
12 ;; GNU gettext is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
17 ;; GNU gettext is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
29 ;; Emacs 21.2 and newer already contain this file, under the name po.el,
30 ;; and without portability hassles.
32 ;; This package makes sure visiting PO files decodes them correctly,
33 ;; according to the Charset= header in the PO file. For more support
34 ;; for editing PO files, see po-mode.el.
38 ;;; Emacs portability matters.
40 ;; Identify which Emacs variety is being used.
41 ;; This file supports:
42 ;; - XEmacs (version 19 and above) -> po-XEMACS = t,
43 ;; - GNU Emacs (version 20 and above) -> po-EMACS20 = t,
44 ;; - GNU Emacs (version 19) -> no flag.
46 (cond ((string-match "XEmacs\\|Lucid" emacs-version
)
47 (setq po-EMACS20 nil po-XEMACS t
))
48 ((and (string-lessp "19" emacs-version
) (featurep 'faces
))
49 (setq po-EMACS20 t po-XEMACS nil
))
50 (t (setq po-EMACS20 nil po-XEMACS nil
))))
52 ;; Handle missing 'with-temp-buffer' function.
54 (if (fboundp 'with-temp-buffer
)
55 (fset 'po-with-temp-buffer
(symbol-function 'with-temp-buffer
))
57 (defmacro po-with-temp-buffer
(&rest forms
)
58 "Create a temporary buffer, and evaluate FORMS there like 'progn'."
59 (let ((curr-buffer (make-symbol "curr-buffer"))
60 (temp-buffer (make-symbol "temp-buffer")))
61 `(let ((,curr-buffer
(current-buffer))
62 (,temp-buffer
(get-buffer-create
63 (generate-new-buffer-name " *po-temp*"))))
66 (set-buffer ,temp-buffer
)
68 (set-buffer ,curr-buffer
)
69 (and (buffer-name ,temp-buffer
)
70 (kill-buffer ,temp-buffer
))))))))
72 (defconst po-content-type-charset-alist
73 '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
75 ("ANSI_X3.4-1968" . undecided
)
76 ("US-ASCII" . undecided
)
77 ("ISO-8859-1" . iso-8859-1
)
78 ("ISO_8859-1" . iso-8859-1
)
79 ("ISO-8859-2" . iso-8859-2
)
80 ("ISO_8859-2" . iso-8859-2
)
81 ("ISO-8859-3" . iso-8859-3
)
82 ("ISO_8859-3" . iso-8859-3
)
83 ("ISO-8859-4" . iso-8859-4
)
84 ("ISO_8859-4" . iso-8859-4
)
85 ("ISO-8859-5" . iso-8859-5
)
86 ("ISO_8859-5" . iso-8859-5
)
89 ("ISO-8859-7" . iso-8859-7
)
90 ("ISO_8859-7" . iso-8859-7
)
91 ("ISO-8859-8" . iso-8859-8
)
92 ("ISO_8859-8" . iso-8859-8
)
93 ("ISO-8859-9" . iso-8859-9
)
94 ("ISO_8859-9" . iso-8859-9
)
99 ("ISO-8859-15" . iso-8859-15
) ; requires Emacs 21
100 ("ISO_8859-15" . iso-8859-15
) ; requires Emacs 21
104 ("CP437" . cp437
) ; requires Emacs 20
105 ("CP775" . cp775
) ; requires Emacs 20
106 ("CP850" . cp850
) ; requires Emacs 20
107 ("CP852" . cp852
) ; requires Emacs 20
108 ("CP855" . cp855
) ; requires Emacs 20
110 ("CP857" . cp857
) ; requires Emacs 20
111 ("CP861" . cp861
) ; requires Emacs 20
112 ("CP862" . cp862
) ; requires Emacs 20
113 ("CP864" . cp864
) ; requires Emacs 20
114 ("CP865" . cp865
) ; requires Emacs 20
115 ("CP866" . cp866
) ; requires Emacs 21
116 ("CP869" . cp869
) ; requires Emacs 20
126 ("CP1250" . cp1250
) ; requires Emacs 20
127 ("CP1251" . cp1251
) ; requires Emacs 20
128 ("CP1252" . iso-8859-1
) ; approximation
129 ("CP1253" . cp1253
) ; requires Emacs 20
130 ("CP1254" . iso-8859-9
) ; approximation
131 ("CP1255" . iso-8859-8
) ; approximation
133 ("CP1257" . cp1257
) ; requires Emacs 20
134 ("GB2312" . cn-gb-2312
) ; also named 'gb2312' in XEmacs 21 or Emacs 21
135 ; also named 'euc-cn' in Emacs 20 or Emacs 21
143 ("SHIFT_JIS" . shift_jis
)
145 ("TIS-620" . tis-620
) ; requires Emacs 20 or Emacs 21
146 ("VISCII" . viscii
) ; requires Emacs 20 or Emacs 21
147 ;("GEORGIAN-PS" . ??)
148 ("UTF-8" . utf-8
) ; requires Mule-UCS in Emacs 20, or Emacs 21
150 "How to convert a GNU libc/libiconv canonical charset name as seen in
151 Content-Type into a Mule coding system.")
153 (defun po-find-charset (filename)
154 "Return PO file charset value."
156 (let ((charset-regexp
157 "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
159 ;; Try the first 4096 bytes. In case we cannot find the charset value
160 ;; within the first 4096 bytes (the PO file might start with a long
161 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
162 ;; we've checked the empty header entry entirely.
163 (while (not (or short-read
(re-search-forward "^msgid" nil t
)))
165 (goto-char (point-max))
166 (let ((pair (insert-file-contents-literally filename nil
168 (1- (+ (point) 4096)))))
169 (setq short-read
(< (nth 1 pair
) 4096)))))
170 (cond ((re-search-forward charset-regexp nil t
) (match-string 1))
172 ;; We've found the first msgid; maybe, only a part of the msgstr
173 ;; value was loaded. Load the next 1024 bytes; if charset still
174 ;; isn't available, give up.
176 (goto-char (point-max))
177 (insert-file-contents-literally filename nil
179 (1- (+ (point) 1024))))
180 (if (re-search-forward charset-regexp nil t
)
181 (match-string 1))))))
185 (defun po-find-file-coding-system-guts (operation filename
)
187 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
188 Called through file-coding-system-alist, before the file is visited for real."
189 (and (eq operation
'insert-file-contents
)
190 (file-exists-p filename
)
192 (let* ((coding-system-for-read 'no-conversion
)
193 (charset (or (po-find-charset filename
) "ascii"))
194 (charset-upper (upcase charset
))
195 (charset-lower (downcase charset
))
197 (cdr (assoc charset-upper po-content-type-charset-alist
)))
198 (try-symbol (or candidate
(intern-soft charset-lower
)))
200 (if try-symbol
(symbol-name try-symbol
) charset-lower
)))
201 (list (cond ((and try-symbol
(coding-system-p try-symbol
))
204 (string-match "\\`cp[1-9][0-9][0-9]?\\'"
206 (assoc (substring try-string
2)
207 (cp-supported-codepages)))
208 (codepage-setup (substring try-string
2))
211 'no-conversion
))))))))
214 (defun po-find-file-coding-system-guts (operation filename
)
216 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
217 Called through file-coding-system-alist, before the file is visited for real."
218 (and (eq operation
'insert-file-contents
)
219 (file-exists-p filename
)
221 (let ((coding-system-for-read 'no-conversion
))
222 (let* ((charset (or (po-find-charset filename
)
224 (charset-upper (upcase charset
))
225 (charset-lower (intern (downcase charset
))))
226 (list (or (cdr (assoc charset-upper
227 po-content-type-charset-alist
))
228 (if (memq charset-lower
(coding-system-list))
230 'no-conversion
)))))))))
233 (defun po-find-file-coding-system (arg-list)
235 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
236 Called through file-coding-system-alist, before the file is visited for real."
237 (po-find-file-coding-system-guts (car arg-list
) (car (cdr arg-list
)))))
240 (defun po-find-file-coding-system (operation filename
)
242 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
243 Called through file-coding-system-alist, before the file is visited for real."
244 (po-find-file-coding-system-guts operation filename
)))
250 ;;; Testing this file:
252 ;; For each emacsimpl in { emacs, xemacs } do
253 ;; For each pofile in {
254 ;; cs.po ; gettext/po/cs.el, charset=ISO-8859-2
255 ;; cs-modified.po ; gettext/po/cs.el, charset=ISO_8859-2
256 ;; de.po ; gettext/po/de.el, charset=UTF-8, if $emacsimpl = emacs
259 ;; M-x load-file po-compat.el RET
260 ;; C-x C-f $pofile RET
261 ;; Verify charset marker in status line ('2' = ISO-8859-2, 'u' = UTF-8).
263 ;;; po-compat.el ends here