merged tag ooo/DEV300_m102
[LibreOffice.git] / sal / textenc / tenchelp.c
blob73495b05a98b848b5081bf04004f0f45e16e40a5
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 #include "tenchelp.h"
29 #include "unichars.h"
30 #include "rtl/textcvt.h"
31 #include "sal/types.h"
33 static sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
34 sal_Char * pBuf,
35 sal_Size nMaxLen);
37 static sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
38 sal_Char * pBuf,
39 sal_Size nMaxLen);
41 static int ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags);
43 sal_Bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
44 sal_Char * pBuf,
45 sal_Size nMaxLen)
47 if (nMaxLen == 0)
48 return sal_False;
49 switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
51 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
52 *pBuf = 0x00;
53 break;
55 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
56 default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */
57 *pBuf = 0x3F;
58 break;
60 case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
61 *pBuf = 0x5F;
62 break;
64 return sal_True;
67 sal_Bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
68 sal_Char * pBuf,
69 sal_Size nMaxLen)
71 if (nMaxLen == 0)
72 return sal_False;
73 switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
75 case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
76 *pBuf = 0x00;
77 break;
79 case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
80 default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
81 *pBuf = 0x3F;
82 break;
84 case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
85 *pBuf = 0x5F;
86 break;
88 return sal_True;
91 int ImplIsUnicodeIgnoreChar( sal_Unicode c, sal_uInt32 nFlags )
93 return
94 ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0
95 && ImplIsZeroWidth(c))
96 || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0
97 && ImplIsControlOrFormat(c))
98 || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0
99 && ImplIsPrivateUse(c));
102 /* ======================================================================= */
104 sal_Unicode ImplGetUndefinedUnicodeChar(sal_uChar cChar, sal_uInt32 nFlags)
106 return ((nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK)
107 == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MAPTOPRIVATE) ?
108 RTL_TEXTCVT_BYTE_PRIVATE_START + cChar :
109 RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
112 /* ----------------------------------------------------------------------- */
114 sal_Bool
115 ImplHandleUndefinedUnicodeToTextChar(ImplTextConverterData const * pData,
116 sal_Unicode const ** ppSrcBuf,
117 sal_Unicode const * pEndSrcBuf,
118 sal_Char ** ppDestBuf,
119 sal_Char const * pEndDestBuf,
120 sal_uInt32 nFlags,
121 sal_uInt32 * pInfo)
123 sal_Unicode c = **ppSrcBuf;
125 (void) pData; /* unused */
127 /* Should the private character map to one byte */
128 if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) )
130 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
132 **ppDestBuf = (sal_Char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START);
133 (*ppDestBuf)++;
134 (*ppSrcBuf)++;
135 return sal_True;
139 /* Should this character ignored (Private, Non Spacing, Control) */
140 if ( ImplIsUnicodeIgnoreChar( c, nFlags ) )
142 (*ppSrcBuf)++;
143 return sal_True;
146 /* Surrogates Characters should result in */
147 /* one replacement character */
148 if (ImplIsHighSurrogate(c))
150 if ( *ppSrcBuf == pEndSrcBuf )
152 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
153 return sal_False;
156 c = *((*ppSrcBuf)+1);
157 if (ImplIsLowSurrogate(c))
158 (*ppSrcBuf)++;
159 else
161 *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID;
162 if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR )
164 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
165 return sal_False;
167 else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE )
169 (*ppSrcBuf)++;
170 return sal_True;
172 else if (ImplGetInvalidAsciiMultiByte(nFlags,
173 *ppDestBuf,
174 pEndDestBuf - *ppDestBuf))
176 ++*ppSrcBuf;
177 ++*ppDestBuf;
178 return sal_True;
180 else
182 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
183 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
184 return sal_False;
189 *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED;
190 if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR )
192 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
193 return sal_False;
195 else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE )
196 (*ppSrcBuf)++;
197 else if (ImplGetUndefinedAsciiMultiByte(nFlags,
198 *ppDestBuf,
199 pEndDestBuf - *ppDestBuf))
201 ++*ppSrcBuf;
202 ++*ppDestBuf;
204 else
206 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
207 | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
208 return sal_False;
211 return sal_True;