build fix
[LibreOffice.git] / filter / source / msfilter / rtfutil.cxx
blob6b1dd95ed9f60fa057ad4bee755632d52a2bc02e
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <filter/msfilter/rtfutil.hxx>
11 #include <rtl/strbuf.hxx>
12 #include <osl/diagnose.h>
13 #include <svtools/rtfkeywd.hxx>
15 namespace msfilter
17 namespace rtfutil
20 OString OutHex(sal_uLong nHex, sal_uInt8 nLen)
22 sal_Char aNToABuf[] = "0000000000000000";
24 OSL_ENSURE(nLen < sizeof(aNToABuf), "nLen is too big");
25 if (nLen >= sizeof(aNToABuf))
26 nLen = (sizeof(aNToABuf)-1);
28 // Set pointer to the buffer end
29 sal_Char* pStr = aNToABuf + (sizeof(aNToABuf)-1);
30 for (sal_uInt8 n = 0; n < nLen; ++n)
32 *(--pStr) = (sal_Char)(nHex & 0xf) + 48;
33 if (*pStr > '9')
34 *pStr += 39;
35 nHex >>= 4;
37 return OString(pStr);
40 // Ideally, this function should work on (sal_uInt32) Unicode scalar values
41 // instead of (sal_Unicode) UTF-16 code units. However, at least "Rich Text
42 // Format (RTF) Specification Version 1.9.1" available at
43 // <https://www.microsoft.com/en-us/download/details.aspx?id=10725> does not
44 // look like it allows non-BMP Unicode characters >= 0x10000 in the \uN notation
45 // (it only talks about "Unicode character", but then explains how values of N
46 // greater than 32767 will be expressed as negative signed 16-bit numbers, so
47 // that smells like \uN is limited to BMP).
48 // However the "Mathematics" section has an example that shows the code point
49 // U+1D44E being encoded as UTF-16 surrogate pair "\u-10187?\u-9138?", so
50 // sal_Unicode actually works fine here.
51 OString OutChar(sal_Unicode c, int* pUCMode, rtl_TextEncoding eDestEnc, bool* pSuccess, bool bUnicode)
53 if (pSuccess)
54 *pSuccess = true;
55 OStringBuffer aBuf;
56 const sal_Char* pStr = nullptr;
57 // 0x0b instead of \n, etc because of the replacements in SwWW8AttrIter::GetSnippet()
58 switch (c)
60 case 0x0b:
61 // hard line break
62 pStr = OOO_STRING_SVTOOLS_RTF_LINE;
63 break;
64 case '\t':
65 pStr = OOO_STRING_SVTOOLS_RTF_TAB;
66 break;
67 case '\\':
68 case '}':
69 case '{':
70 aBuf.append('\\');
71 aBuf.append((sal_Char)c);
72 break;
73 case 0xa0:
74 // non-breaking space
75 pStr = "\\~";
76 break;
77 case 0x1e:
78 // non-breaking hyphen
79 pStr = "\\_";
80 break;
81 case 0x1f:
82 // optional hyphen
83 pStr = "\\-";
84 break;
85 default:
86 if (c >= ' ' && c <= '~')
87 aBuf.append((sal_Char)c);
88 else
90 OUString sBuf(&c, 1);
91 OString sConverted;
92 if (pSuccess)
93 *pSuccess &= sBuf.convertToString(&sConverted, eDestEnc, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR);
94 else
95 sBuf.convertToString(&sConverted, eDestEnc, OUSTRING_TO_OSTRING_CVTFLAGS);
96 const sal_Int32 nLen = sConverted.getLength();
98 if (pUCMode && bUnicode)
100 if (*pUCMode != nLen)
102 aBuf.append("\\uc");
103 aBuf.append((sal_Int32)nLen);
104 // #i47831# add an additional whitespace, so that "document whitespaces" are not ignored.
105 aBuf.append(' ');
106 *pUCMode = nLen;
108 aBuf.append("\\u");
109 aBuf.append((sal_Int32)c);
112 for (sal_Int32 nI = 0; nI < nLen; ++nI)
114 aBuf.append("\\'");
115 aBuf.append(OutHex(sConverted[nI], 2));
119 if (pStr)
121 aBuf.append(pStr);
122 switch (c)
124 case 0xa0:
125 case 0x1e:
126 case 0x1f:
127 break;
128 default:
129 aBuf.append(' ');
132 return aBuf.makeStringAndClear();
135 OString OutString(const OUString& rStr, rtl_TextEncoding eDestEnc, bool bUnicode)
137 SAL_INFO("filter.ms", OSL_THIS_FUNC << ", rStr = '" << OUString(rStr) << "'");
138 OStringBuffer aBuf;
139 int nUCMode = 1;
140 for (sal_Int32 n = 0; n < rStr.getLength(); ++n)
141 aBuf.append(OutChar(rStr[n], &nUCMode, eDestEnc, nullptr, bUnicode));
142 if (nUCMode != 1)
144 aBuf.append(OOO_STRING_SVTOOLS_RTF_UC);
145 aBuf.append((sal_Int32)1);
146 aBuf.append(" "); // #i47831# add an additional whitespace, so that "document whitespaces" are not ignored.;
148 return aBuf.makeStringAndClear();
151 /// Checks if lossless conversion of the string to eDestEnc is possible or not.
152 static bool TryOutString(const OUString& rStr, rtl_TextEncoding eDestEnc)
154 int nUCMode = 1;
155 for (sal_Int32 n = 0; n < rStr.getLength(); ++n)
157 bool bRet;
158 OutChar(rStr[n], &nUCMode, eDestEnc, &bRet);
159 if (!bRet)
160 return false;
162 return true;
165 OString OutStringUpr(const sal_Char* pToken, const OUString& rStr, rtl_TextEncoding eDestEnc)
167 if (TryOutString(rStr, eDestEnc))
168 return OString("{") + pToken + " " + OutString(rStr, eDestEnc) + "}";
170 OStringBuffer aRet;
171 aRet.append("{" OOO_STRING_SVTOOLS_RTF_UPR "{");
172 aRet.append(pToken);
173 aRet.append(" ");
174 aRet.append(OutString(rStr, eDestEnc, /*bUnicode =*/ false));
175 aRet.append("}{" OOO_STRING_SVTOOLS_RTF_IGNORE OOO_STRING_SVTOOLS_RTF_UD "{");
176 aRet.append(pToken);
177 aRet.append(" ");
178 aRet.append(OutString(rStr, eDestEnc));
179 aRet.append("}}}");
180 return aRet.makeStringAndClear();
185 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */