3 declare(strict_types
=1);
7 use function array_filter
;
8 use function array_intersect
;
9 use function array_map
;
15 use function function_exists
;
18 use function mb_convert_encoding
;
19 use function mb_convert_kana
;
20 use function mb_detect_encoding
;
21 use function mb_list_encodings
;
22 use function preg_replace
;
23 use function str_contains
;
24 use function str_starts_with
;
25 use function strtolower
;
26 use function strtoupper
;
31 * Encoding conversion helper class
35 public const ENGINE_NONE
= 'none';
36 public const ENGINE_ICONV
= 'iconv';
37 public const ENGINE_MBSTRING
= 'mbstring';
40 * Chosen encoding engine
42 * @var self::ENGINE_NONE|self::ENGINE_ICONV|self::ENGINE_MBSTRING|null
44 private static string|
null $engine = null;
47 * Map of conversion engine configurations
49 * Each entry contains:
51 * - function to detect
53 * - extension name to warn when missing
55 private const ENGINE_MAP
= [
57 'mbstring' => 'mb_convert_encoding',
62 * Order of automatic detection of engines
64 private const ENGINE_ORDER
= ['iconv', 'mbstring'];
67 * Kanji encodings list
69 private static string $kanjiEncodings = 'ASCII,SJIS,EUC-JP,JIS';
72 * Initializes encoding engine detecting available backends.
74 public static function initEngine(): void
76 $engine = Config
::getInstance()->config
->RecodingEngine
;
78 /* Use user configuration */
79 if (isset(self
::ENGINE_MAP
[$engine])) {
80 if (function_exists(self
::ENGINE_MAP
[$engine])) {
81 self
::$engine = $engine;
86 Core
::warnMissingExtension($engine);
90 foreach (self
::ENGINE_ORDER
as $engine) {
91 if (function_exists(self
::ENGINE_MAP
[$engine])) {
92 self
::$engine = $engine;
98 /* Fallback to none conversion */
99 self
::$engine = self
::ENGINE_NONE
;
103 * Setter for engine. Use with caution, mostly useful for testing.
105 * @param self::ENGINE_NONE|self::ENGINE_ICONV|self::ENGINE_MBSTRING $engine
107 public static function setEngine(string $engine): void
109 self
::$engine = $engine;
113 * Checks whether there is any charset conversion supported
115 public static function isSupported(): bool
117 if (self
::$engine === null) {
121 return self
::$engine != self
::ENGINE_NONE
;
125 * Converts encoding of text according to parameters with detected
126 * conversion function.
128 * @param string $srcCharset source charset
129 * @param string $destCharset target charset
130 * @param string $what what to convert
132 * @return string converted text
134 public static function convertString(
139 if ($srcCharset === $destCharset) {
143 if (self
::$engine === null) {
147 $config = Config
::getInstance();
148 $iconvExtraParams = '';
149 if (str_starts_with($config->config
->IconvExtraParams
, '//')) {
150 $iconvExtraParams = $config->config
->IconvExtraParams
;
153 return match (self
::$engine) {
154 self
::ENGINE_ICONV
=> iconv($srcCharset, $destCharset . $iconvExtraParams, $what),
155 self
::ENGINE_MBSTRING
=> mb_convert_encoding($what, $destCharset, $srcCharset),
161 * Detects whether Kanji encoding is available
163 public static function canConvertKanji(): bool
165 return Current
::$lang === 'ja';
169 * Setter for Kanji encodings. Use with caution, mostly useful for testing.
171 public static function getKanjiEncodings(): string
173 return self
::$kanjiEncodings;
177 * Setter for Kanji encodings. Use with caution, mostly useful for testing.
179 * @param string $value Kanji encodings list
181 public static function setKanjiEncodings(string $value): void
183 self
::$kanjiEncodings = $value;
187 * Reverses SJIS & EUC-JP position in the encoding codes list
189 public static function kanjiChangeOrder(): void
191 $parts = explode(',', self
::$kanjiEncodings);
192 if ($parts[1] === 'EUC-JP') {
193 self
::$kanjiEncodings = 'ASCII,SJIS,EUC-JP,JIS';
198 self
::$kanjiEncodings = 'ASCII,EUC-JP,SJIS,JIS';
202 * Kanji string encoding convert
204 * @param string $str the string to convert
205 * @param string $enc the destination encoding code
206 * @param string $kana set 'kana' convert to JIS-X208-kana
208 * @return string the converted string
210 public static function kanjiStrConv(string $str, string $enc, string $kana): string
212 if ($enc === '' && $kana === '') {
216 $stringEncoding = mb_detect_encoding($str, self
::$kanjiEncodings);
217 if ($stringEncoding === false) {
218 $stringEncoding = 'utf-8';
221 if ($kana === 'kana') {
222 $dist = mb_convert_kana($str, 'KV', $stringEncoding);
226 if ($stringEncoding !== $enc && $enc !== '') {
227 return mb_convert_encoding($str, $enc, $stringEncoding);
234 * Kanji file encoding convert
236 * @param string $file the name of the file to convert
237 * @param string $enc the destination encoding code
238 * @param string $kana set 'kana' convert to JIS-X208-kana
240 * @return string the name of the converted file
242 public static function kanjiFileConv(string $file, string $enc, string $kana): string
244 if ($enc === '' && $kana === '') {
248 $tmpfname = (string) tempnam(Config
::getInstance()->getUploadTempDir(), $enc);
249 $fpd = fopen($tmpfname, 'wb');
250 if ($fpd === false) {
254 $fps = fopen($file, 'r');
255 if ($fps === false) {
259 self
::kanjiChangeOrder();
260 while (! feof($fps)) {
261 $line = fgets($fps, 4096);
262 if ($line === false) {
266 $dist = self
::kanjiStrConv($line, $enc, $kana);
270 self
::kanjiChangeOrder();
279 * Defines radio form fields to switch between encoding modes
281 * @return string HTML code for the radio controls
283 public static function kanjiEncodingForm(): string
285 $template = new Template();
287 return $template->render('encoding/kanji_encoding_form');
291 * Lists available encodings.
295 public static function listEncodings(): array
297 if (self
::$engine === null) {
301 /* Most engines do not support listing */
302 $config = Config
::getInstance();
303 if (self
::$engine != self
::ENGINE_MBSTRING
) {
304 return array_filter($config->settings
['AvailableCharsets'], static function (string $charset): bool {
305 // Removes any ignored character
306 $normalizedCharset = strtoupper((string) preg_replace(['/[^A-Za-z0-9\-\/]/'], '', $charset));
308 // The character set ISO-2022-CN-EXT can be vulnerable (CVE-2024-2961).
309 return ! str_contains($normalizedCharset, 'ISO-2022-CN-EXT')
310 && ! str_contains($normalizedCharset, 'ISO2022CNEXT');
314 return array_intersect(
315 array_map(strtolower(...), mb_list_encodings()),
316 $config->settings
['AvailableCharsets'],