1 /*-----------------------------------------------------------------------
3 * The PostgreSQL routine for string to ascii conversion.
5 * Portions Copyright (c) 1999-2025, PostgreSQL Global Development Group
8 * src/backend/utils/adt/ascii.c
10 *-----------------------------------------------------------------------
14 #include "mb/pg_wchar.h"
15 #include "utils/ascii.h"
16 #include "utils/fmgrprotos.h"
19 static void pg_to_ascii(unsigned char *src
, unsigned char *src_end
,
20 unsigned char *dest
, int enc
);
21 static text
*encode_to_ascii(text
*data
, int enc
);
29 pg_to_ascii(unsigned char *src
, unsigned char *src_end
, unsigned char *dest
, int enc
)
32 const unsigned char *ascii
;
36 * relevant start for an encoding
44 * ISO-8859-1 <range: 160 -- 255>
46 ascii
= (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
49 else if (enc
== PG_LATIN2
)
52 * ISO-8859-2 <range: 160 -- 255>
54 ascii
= (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
57 else if (enc
== PG_LATIN9
)
60 * ISO-8859-15 <range: 160 -- 255>
62 ascii
= (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
65 else if (enc
== PG_WIN1250
)
68 * Window CP1250 <range: 128 -- 255>
70 ascii
= (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
76 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
77 errmsg("encoding conversion from %s to ASCII not supported",
78 pg_encoding_to_char(enc
))));
79 return; /* keep compiler quiet */
85 for (x
= src
; x
< src_end
; x
++)
90 *dest
++ = ' '; /* bogus 128 to 'range' */
92 *dest
++ = ascii
[*x
- range
];
99 * The text datum is overwritten in-place, therefore this coding method
100 * cannot support conversions that change the string length!
104 encode_to_ascii(text
*data
, int enc
)
106 pg_to_ascii((unsigned char *) VARDATA(data
), /* src */
107 (unsigned char *) (data
) + VARSIZE(data
), /* src end */
108 (unsigned char *) VARDATA(data
), /* dest */
115 * convert to ASCII - enc is set as 'name' arg.
119 to_ascii_encname(PG_FUNCTION_ARGS
)
121 text
*data
= PG_GETARG_TEXT_P_COPY(0);
122 char *encname
= NameStr(*PG_GETARG_NAME(1));
123 int enc
= pg_char_to_encoding(encname
);
127 (errcode(ERRCODE_UNDEFINED_OBJECT
),
128 errmsg("%s is not a valid encoding name", encname
)));
130 PG_RETURN_TEXT_P(encode_to_ascii(data
, enc
));
134 * convert to ASCII - enc is set as int4
138 to_ascii_enc(PG_FUNCTION_ARGS
)
140 text
*data
= PG_GETARG_TEXT_P_COPY(0);
141 int enc
= PG_GETARG_INT32(1);
143 if (!PG_VALID_ENCODING(enc
))
145 (errcode(ERRCODE_UNDEFINED_OBJECT
),
146 errmsg("%d is not a valid encoding code", enc
)));
148 PG_RETURN_TEXT_P(encode_to_ascii(data
, enc
));
152 * convert to ASCII - current enc is DatabaseEncoding
156 to_ascii_default(PG_FUNCTION_ARGS
)
158 text
*data
= PG_GETARG_TEXT_P_COPY(0);
159 int enc
= GetDatabaseEncoding();
161 PG_RETURN_TEXT_P(encode_to_ascii(data
, enc
));
165 * Copy a string in an arbitrary backend-safe encoding, converting it to a
166 * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
167 * behavior is identical to strlcpy(), except that we don't bother with a
170 * This must not trigger ereport(ERROR), as it is called in postmaster.
174 ascii_safe_strlcpy(char *dest
, const char *src
, size_t destsiz
)
176 if (destsiz
== 0) /* corner case: no room for trailing nul */
179 while (--destsiz
> 0)
181 /* use unsigned char here to avoid compiler warning */
182 unsigned char ch
= *src
++;
186 /* Keep printable ASCII characters */
187 if (32 <= ch
&& ch
<= 127)
189 /* White-space is also OK */
190 else if (ch
== '\n' || ch
== '\r' || ch
== '\t')
192 /* Everything else is replaced with '?' */