1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <filter/msfilter/rtfutil.hxx>
11 #include <rtl/strbuf.hxx>
12 #include <osl/diagnose.h>
13 #include <svtools/rtfkeywd.hxx>
20 OString
OutHex(sal_uLong nHex
, sal_uInt8 nLen
)
22 sal_Char aNToABuf
[] = "0000000000000000";
24 OSL_ENSURE(nLen
< sizeof(aNToABuf
), "nLen is too big");
25 if (nLen
>= sizeof(aNToABuf
))
26 nLen
= (sizeof(aNToABuf
)-1);
28 // Set pointer to the buffer end
29 sal_Char
* pStr
= aNToABuf
+ (sizeof(aNToABuf
)-1);
30 for (sal_uInt8 n
= 0; n
< nLen
; ++n
)
32 *(--pStr
) = (sal_Char
)(nHex
& 0xf) + 48;
40 // Ideally, this function should work on (sal_uInt32) Unicode scalar values
41 // instead of (sal_Unicode) UTF-16 code units. However, at least "Rich Text
42 // Format (RTF) Specification Version 1.9.1" available at
43 // <https://www.microsoft.com/en-us/download/details.aspx?id=10725> does not
44 // look like it allows non-BMP Unicode characters >= 0x10000 in the \uN notation
45 // (it only talks about "Unicode character", but then explains how values of N
46 // greater than 32767 will be expressed as negative signed 16-bit numbers, so
47 // that smells like \uN is limited to BMP).
48 // However the "Mathematics" section has an example that shows the code point
49 // U+1D44E being encoded as UTF-16 surrogate pair "\u-10187?\u-9138?", so
50 // sal_Unicode actually works fine here.
51 OString
OutChar(sal_Unicode c
, int* pUCMode
, rtl_TextEncoding eDestEnc
, bool* pSuccess
, bool bUnicode
)
56 const sal_Char
* pStr
= nullptr;
57 // 0x0b instead of \n, etc because of the replacements in SwWW8AttrIter::GetSnippet()
62 pStr
= OOO_STRING_SVTOOLS_RTF_LINE
;
65 pStr
= OOO_STRING_SVTOOLS_RTF_TAB
;
71 aBuf
.append((sal_Char
)c
);
78 // non-breaking hyphen
86 if (c
>= ' ' && c
<= '~')
87 aBuf
.append((sal_Char
)c
);
93 *pSuccess
&= sBuf
.convertToString(&sConverted
, eDestEnc
, RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
| RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
);
95 sBuf
.convertToString(&sConverted
, eDestEnc
, OUSTRING_TO_OSTRING_CVTFLAGS
);
96 const sal_Int32 nLen
= sConverted
.getLength();
98 if (pUCMode
&& bUnicode
)
100 if (*pUCMode
!= nLen
)
103 aBuf
.append((sal_Int32
)nLen
);
104 // #i47831# add an additional whitespace, so that "document whitespaces" are not ignored.
109 aBuf
.append((sal_Int32
)c
);
112 for (sal_Int32 nI
= 0; nI
< nLen
; ++nI
)
115 aBuf
.append(OutHex(sConverted
[nI
], 2));
132 return aBuf
.makeStringAndClear();
135 OString
OutString(const OUString
& rStr
, rtl_TextEncoding eDestEnc
, bool bUnicode
)
137 SAL_INFO("filter.ms", OSL_THIS_FUNC
<< ", rStr = '" << OUString(rStr
) << "'");
140 for (sal_Int32 n
= 0; n
< rStr
.getLength(); ++n
)
141 aBuf
.append(OutChar(rStr
[n
], &nUCMode
, eDestEnc
, nullptr, bUnicode
));
144 aBuf
.append(OOO_STRING_SVTOOLS_RTF_UC
);
145 aBuf
.append((sal_Int32
)1);
146 aBuf
.append(" "); // #i47831# add an additional whitespace, so that "document whitespaces" are not ignored.;
148 return aBuf
.makeStringAndClear();
151 /// Checks if lossless conversion of the string to eDestEnc is possible or not.
152 static bool TryOutString(const OUString
& rStr
, rtl_TextEncoding eDestEnc
)
155 for (sal_Int32 n
= 0; n
< rStr
.getLength(); ++n
)
158 OutChar(rStr
[n
], &nUCMode
, eDestEnc
, &bRet
);
165 OString
OutStringUpr(const sal_Char
* pToken
, const OUString
& rStr
, rtl_TextEncoding eDestEnc
)
167 if (TryOutString(rStr
, eDestEnc
))
168 return OString("{") + pToken
+ " " + OutString(rStr
, eDestEnc
) + "}";
171 aRet
.append("{" OOO_STRING_SVTOOLS_RTF_UPR
"{");
174 aRet
.append(OutString(rStr
, eDestEnc
, /*bUnicode =*/ false));
175 aRet
.append("}{" OOO_STRING_SVTOOLS_RTF_IGNORE OOO_STRING_SVTOOLS_RTF_UD
"{");
178 aRet
.append(OutString(rStr
, eDestEnc
));
180 return aRet
.makeStringAndClear();
185 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */