2 * Copyright 2002-2009, Haiku, Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
7 * Axel Dörfler, axeld@pinc-software.de
11 #include "STXTTranslator.h"
15 #include <CharacterSet.h>
16 #include <CharacterSetRoster.h>
19 #include <TextEncoding.h>
29 using namespace BPrivate
;
32 #undef B_TRANSLATION_CONTEXT
33 #define B_TRANSLATION_CONTEXT "STXTTranslator"
35 #define READ_BUFFER_SIZE 32768
36 #define DATA_BUFFER_SIZE 2048
38 // The input formats that this translator supports.
39 static const translation_format sInputFormats
[] = {
58 // The output formats that this translator supports.
59 static const translation_format sOutputFormats
[] = {
78 // Default settings for the Translator
79 static const TranSetting sDefaultSettings
[] = {
80 {B_TRANSLATOR_EXT_HEADER_ONLY
, TRAN_SETTING_BOOL
, false},
81 {B_TRANSLATOR_EXT_DATA_ONLY
, TRAN_SETTING_BOOL
, false}
84 const uint32 kNumInputFormats
= sizeof(sInputFormats
) / sizeof(translation_format
);
85 const uint32 kNumOutputFormats
= sizeof(sOutputFormats
) / sizeof(translation_format
);
86 const uint32 kNumDefaultSettings
= sizeof(sDefaultSettings
) / sizeof(TranSetting
);
88 // ---------------------------------------------------------------
89 // make_nth_translator
91 // Creates a STXTTranslator object to be used by BTranslatorRoster
95 // Parameters: n, The translator to return. Since
96 // STXTTranslator only publishes one
97 // translator, it only returns a
98 // STXTTranslator if n == 0
100 // you, The image_id of the add-on that
101 // contains code (not used).
103 // flags, Has no meaning yet, should be 0.
107 // Returns: NULL if n is not zero,
108 // a new STXTTranslator if n is zero
109 // ---------------------------------------------------------------
111 make_nth_translator(int32 n
, image_id you
, uint32 flags
, ...)
114 return new (std::nothrow
) STXTTranslator();
124 Determines if the data in inSource is of the STXT format.
126 \param header the STXT stream header read in by Identify() or Translate()
127 \param inSource the stream with the STXT data
128 \param outInfo information about the type of data from inSource is stored here
129 \param outType the desired output type for the data in inSource
130 \param ptxtheader if this is not NULL, the TEXT header from
131 inSource is copied to it
134 identify_stxt_header(const TranslatorStyledTextStreamHeader
&header
,
135 BPositionIO
*inSource
, translator_info
*outInfo
, uint32 outType
,
136 TranslatorStyledTextTextHeader
*ptxtheader
= NULL
)
138 const ssize_t ktxtsize
= sizeof(TranslatorStyledTextTextHeader
);
139 const ssize_t kstylsize
= sizeof(TranslatorStyledTextStyleHeader
);
141 uint8 buffer
[max(ktxtsize
, kstylsize
)];
143 // Check the TEXT header
144 TranslatorStyledTextTextHeader txtheader
;
145 if (inSource
->Read(buffer
, ktxtsize
) != ktxtsize
)
146 return B_NO_TRANSLATOR
;
148 memcpy(&txtheader
, buffer
, ktxtsize
);
149 if (swap_data(B_UINT32_TYPE
, &txtheader
, ktxtsize
,
150 B_SWAP_BENDIAN_TO_HOST
) != B_OK
)
153 if (txtheader
.header
.magic
!= 'TEXT'
154 || txtheader
.header
.header_size
!= sizeof(TranslatorStyledTextTextHeader
)
155 || txtheader
.charset
!= B_UNICODE_UTF8
)
156 return B_NO_TRANSLATOR
;
158 // skip the text data
159 off_t seekresult
, pos
;
160 pos
= header
.header
.header_size
+ txtheader
.header
.header_size
161 + txtheader
.header
.data_size
;
162 seekresult
= inSource
->Seek(txtheader
.header
.data_size
,
164 if (seekresult
< pos
)
165 return B_NO_TRANSLATOR
;
166 if (seekresult
> pos
)
169 // check the STYL header (not all STXT files have this)
171 TranslatorStyledTextStyleHeader stylheader
;
172 read
= inSource
->Read(buffer
, kstylsize
);
175 if (read
!= kstylsize
&& read
!= 0)
176 return B_NO_TRANSLATOR
;
178 // If there is a STYL header
179 if (read
== kstylsize
) {
180 memcpy(&stylheader
, buffer
, kstylsize
);
181 if (swap_data(B_UINT32_TYPE
, &stylheader
, kstylsize
,
182 B_SWAP_BENDIAN_TO_HOST
) != B_OK
)
185 if (stylheader
.header
.magic
!= 'STYL'
186 || stylheader
.header
.header_size
!=
187 sizeof(TranslatorStyledTextStyleHeader
))
188 return B_NO_TRANSLATOR
;
191 // if output TEXT header is supplied, fill it with data
193 ptxtheader
->header
.magic
= txtheader
.header
.magic
;
194 ptxtheader
->header
.header_size
= txtheader
.header
.header_size
;
195 ptxtheader
->header
.data_size
= txtheader
.header
.data_size
;
196 ptxtheader
->charset
= txtheader
.charset
;
199 // return information about the data in the stream
200 outInfo
->type
= B_STYLED_TEXT_FORMAT
;
201 outInfo
->group
= B_TRANSLATOR_TEXT
;
202 outInfo
->quality
= STXT_IN_QUALITY
;
203 outInfo
->capability
= STXT_IN_CAPABILITY
;
204 strlcpy(outInfo
->name
, B_TRANSLATE("Be styled text file"),
205 sizeof(outInfo
->name
));
206 strcpy(outInfo
->MIME
, "text/x-vnd.Be-stxt");
213 Determines if the data in \a inSource is of the UTF8 plain
215 \param data buffer containing data already read (must be at
216 least DATA_BUFFER_SIZE bytes large)
217 \param nread number of bytes that have already been read from the stream
218 \param header the STXT stream header read in by Identify() or Translate()
219 \param inSource the stream with the STXT data
220 \param outInfo information about the type of data from inSource is stored here
221 \param outType the desired output type for the data in inSource
224 identify_text(uint8
* data
, int32 bytesRead
, BPositionIO
* source
,
225 translator_info
* outInfo
, uint32 outType
, const char*& encoding
)
227 ssize_t readLater
= source
->Read(data
+ bytesRead
, DATA_BUFFER_SIZE
- bytesRead
);
228 if (readLater
< B_OK
)
229 return B_NO_TRANSLATOR
;
231 bytesRead
+= readLater
;
233 BPrivate::BTextEncoding
textEncoding((char*)data
, (size_t)bytesRead
);
234 encoding
= textEncoding
.GetName();
235 if (strlen(encoding
) == 0) {
236 /* No valid character encoding found! */
237 return B_NO_TRANSLATOR
;
240 float capability
= TEXT_IN_CAPABILITY
;
244 // return information about the data in the stream
245 outInfo
->type
= B_TRANSLATOR_TEXT
;
246 outInfo
->group
= B_TRANSLATOR_TEXT
;
247 outInfo
->quality
= TEXT_IN_QUALITY
;
248 outInfo
->capability
= capability
;
250 strlcpy(outInfo
->name
, B_TRANSLATE("Plain text file"),
251 sizeof(outInfo
->name
));
253 //strlcpy(outInfo->MIME, type.Type(), sizeof(outInfo->MIME));
254 strcpy(outInfo
->MIME
, "text/plain");
259 // ---------------------------------------------------------------
260 // translate_from_stxt
262 // Translates the data in inSource to the type outType and stores
263 // the translated data in outDestination.
267 // Parameters: inSource, the data to be translated
269 // outDestination, where the translated data is
272 // outType, the type to convert inSource to
274 // txtheader, the TEXT header from inSource
279 // Returns: B_BAD_VALUE, if outType is invalid
281 // B_NO_TRANSLATOR, if this translator doesn't understand the data
283 // B_ERROR, if there was an error allocating memory or converting
286 // B_OK, if all went well
287 // ---------------------------------------------------------------
289 translate_from_stxt(BPositionIO
*inSource
, BPositionIO
*outDestination
,
290 uint32 outType
, const TranslatorStyledTextTextHeader
&txtheader
)
292 if (inSource
->Seek(0, SEEK_SET
) != 0)
295 const ssize_t kstxtsize
= sizeof(TranslatorStyledTextStreamHeader
);
296 const ssize_t ktxtsize
= sizeof(TranslatorStyledTextTextHeader
);
299 if (outType
== B_TRANSLATOR_TEXT
)
301 else if (outType
== B_STYLED_TEXT_FORMAT
)
306 uint8 buffer
[READ_BUFFER_SIZE
];
307 ssize_t nread
= 0, nwritten
= 0, nreed
= 0, ntotalread
= 0;
309 // skip to the actual text data when outputting a
312 if (inSource
->Seek(kstxtsize
+ ktxtsize
, SEEK_CUR
) !=
313 kstxtsize
+ ktxtsize
)
317 // Read data from inSource
318 // When outputing B_TRANSLATOR_TEXT, the loop stops when all of
319 // the text data has been read and written.
320 // When outputting B_STYLED_TEXT_FORMAT, the loop stops when all
321 // of the data from inSource has been read and written.
323 nreed
= min((size_t)READ_BUFFER_SIZE
,
324 (size_t)txtheader
.header
.data_size
- ntotalread
);
326 nreed
= READ_BUFFER_SIZE
;
327 nread
= inSource
->Read(buffer
, nreed
);
329 nwritten
= outDestination
->Write(buffer
, nread
);
330 if (nwritten
!= nread
)
335 nreed
= min((size_t)READ_BUFFER_SIZE
,
336 (size_t)txtheader
.header
.data_size
- ntotalread
);
338 nreed
= READ_BUFFER_SIZE
;
339 nread
= inSource
->Read(buffer
, nreed
);
342 if (btoplain
&& static_cast<ssize_t
>(txtheader
.header
.data_size
) !=
344 // If not all of the text data was able to be read...
345 return B_NO_TRANSLATOR
;
350 // ---------------------------------------------------------------
353 // Outputs the Stream and Text headers from the B_STYLED_TEXT_FORMAT
354 // to outDestination, setting the data_size member of the text header
359 // Parameters: outDestination, where the translated data is
362 // text_data_size, number of bytes in data section
363 // of the TEXT header
370 // B_ERROR, if there was an error writing to outDestination or
371 // an error with converting the byte order
373 // B_OK, if all went well
374 // ---------------------------------------------------------------
376 output_headers(BPositionIO
*outDestination
, uint32 text_data_size
)
378 const int32 kHeadersSize
= sizeof(TranslatorStyledTextStreamHeader
) +
379 sizeof(TranslatorStyledTextTextHeader
);
381 TranslatorStyledTextStreamHeader stxtheader
;
382 TranslatorStyledTextTextHeader txtheader
;
384 uint8 buffer
[kHeadersSize
];
386 stxtheader
.header
.magic
= 'STXT';
387 stxtheader
.header
.header_size
= sizeof(TranslatorStyledTextStreamHeader
);
388 stxtheader
.header
.data_size
= 0;
389 stxtheader
.version
= 100;
390 memcpy(buffer
, &stxtheader
, stxtheader
.header
.header_size
);
392 txtheader
.header
.magic
= 'TEXT';
393 txtheader
.header
.header_size
= sizeof(TranslatorStyledTextTextHeader
);
394 txtheader
.header
.data_size
= text_data_size
;
395 txtheader
.charset
= B_UNICODE_UTF8
;
396 memcpy(buffer
+ stxtheader
.header
.header_size
, &txtheader
,
397 txtheader
.header
.header_size
);
399 // write out headers in Big Endian byte order
400 result
= swap_data(B_UINT32_TYPE
, buffer
, kHeadersSize
,
401 B_SWAP_HOST_TO_BENDIAN
);
402 if (result
== B_OK
) {
403 ssize_t nwritten
= 0;
404 nwritten
= outDestination
->Write(buffer
, kHeadersSize
);
405 if (nwritten
!= kHeadersSize
)
414 // ---------------------------------------------------------------
417 // Writes out the actual style information into outDestination
418 // using the data from pflatRunArray
422 // Parameters: outDestination, where the translated data is
425 // text_size, size in bytes of the text in
428 // data_size, size of pflatRunArray
434 // B_ERROR, if there was an error writing to outDestination or
435 // an error with converting the byte order
437 // B_OK, if all went well
438 // ---------------------------------------------------------------
440 output_styles(BPositionIO
*outDestination
, uint32 text_size
,
441 uint8
*pflatRunArray
, ssize_t data_size
)
443 const ssize_t kstylsize
= sizeof(TranslatorStyledTextStyleHeader
);
445 uint8 buffer
[kstylsize
];
447 // output STYL header
448 TranslatorStyledTextStyleHeader stylheader
;
449 stylheader
.header
.magic
= 'STYL';
450 stylheader
.header
.header_size
=
451 sizeof(TranslatorStyledTextStyleHeader
);
452 stylheader
.header
.data_size
= data_size
;
453 stylheader
.apply_offset
= 0;
454 stylheader
.apply_length
= text_size
;
456 memcpy(buffer
, &stylheader
, kstylsize
);
457 if (swap_data(B_UINT32_TYPE
, buffer
, kstylsize
,
458 B_SWAP_HOST_TO_BENDIAN
) != B_OK
)
460 if (outDestination
->Write(buffer
, kstylsize
) != kstylsize
)
463 // output actual style information
464 if (outDestination
->Write(pflatRunArray
,
465 data_size
) != data_size
)
473 Convert the plain text (UTF8) from inSource to plain or
474 styled text in outDestination
477 translate_from_text(BPositionIO
* source
, const char* encoding
, bool forceEncoding
,
478 BPositionIO
* destination
, uint32 outType
)
480 if (outType
!= B_TRANSLATOR_TEXT
&& outType
!= B_STYLED_TEXT_FORMAT
)
483 // find the length of the text
484 off_t size
= source
->Seek(0, SEEK_END
);
486 return (status_t
)size
;
487 if (size
> UINT32_MAX
&& outType
== B_STYLED_TEXT_FORMAT
)
488 return B_NOT_SUPPORTED
;
490 status_t status
= source
->Seek(0, SEEK_SET
);
494 if (outType
== B_STYLED_TEXT_FORMAT
) {
495 // output styled text headers
496 status
= output_headers(destination
, (uint32
)size
);
503 MallocBuffer() : fBuffer(NULL
), fSize(0) {}
504 ~MallocBuffer() { free(fBuffer
); }
506 void* Buffer() { return fBuffer
; }
507 size_t Size() const { return fSize
; }
510 Allocate(size_t size
)
512 fBuffer
= malloc(size
);
513 if (fBuffer
!= NULL
) {
524 BMallocIO encodingIO
;
526 BNode
* node
= dynamic_cast<BNode
*>(source
);
527 BString
name(encoding
);
529 // determine encoding, if available
530 bool hasAttribute
= false;
531 if (encoding
!= NULL
&& !forceEncoding
) {
533 node
->GetAttrInfo("be:encoding", &info
);
535 if ((info
.type
== B_STRING_TYPE
) && (node
->ReadAttrString(
536 "be:encoding", &name
) == B_OK
)) {
537 encoding
= name
.String();
539 } else if (info
.type
== B_INT32_TYPE
) {
540 // Try the BeOS version of the atribute, which used an int32
541 // and a well-known list of encodings.
543 ssize_t bytesRead
= node
->ReadAttr("be:encoding", B_INT32_TYPE
, 0,
544 &value
, sizeof(value
));
545 if (bytesRead
== (ssize_t
)sizeof(value
)) {
547 if (value
!= 65535) {
548 const BCharacterSet
* characterSet
549 = BCharacterSetRoster::GetCharacterSetByConversionID(value
);
550 if (characterSet
!= NULL
)
551 encoding
= characterSet
->GetName();
557 // we don't write the encoding in this case
560 if (encoding
!= NULL
)
561 encodingBuffer
.Allocate(READ_BUFFER_SIZE
* 4);
563 if (!hasAttribute
&& encoding
!= NULL
) {
564 // add encoding attribute, so that someone opening the file can
565 // retrieve it for persistance
566 node
->WriteAttr("be:encoding", B_STRING_TYPE
, 0, encoding
,
571 off_t outputSize
= 0;
574 BPrivate::BTextEncoding
codec(encoding
);
576 // output the actual text part of the data
578 uint8 buffer
[READ_BUFFER_SIZE
];
579 bytesRead
= source
->Read(buffer
, READ_BUFFER_SIZE
);
580 if (bytesRead
< B_OK
)
585 if (encodingBuffer
.Size() == 0) {
586 // default, no encoding
587 ssize_t bytesWritten
= destination
->Write(buffer
, bytesRead
);
588 if (bytesWritten
!= bytesRead
) {
589 if (bytesWritten
< B_OK
)
595 outputSize
+= bytesRead
;
597 // decode text file to UTF-8
598 const char* pos
= (char*)buffer
;
599 size_t encodingLength
= encodingIO
.BufferLength();
600 int32 bytesLeft
= bytesRead
;
603 encodingLength
= READ_BUFFER_SIZE
* 4;
606 status
= codec
.Decode(pos
, bytes
,
607 (char*)encodingBuffer
.Buffer(), encodingLength
);
612 ssize_t bytesWritten
= destination
->Write(encodingBuffer
.Buffer(),
614 if (bytesWritten
< (ssize_t
)encodingLength
) {
615 if (bytesWritten
< B_OK
)
623 outputSize
+= encodingLength
;
624 } while (encodingLength
> 0 && bytesLeft
> 0);
626 } while (bytesRead
> 0);
628 if (outType
!= B_STYLED_TEXT_FORMAT
)
631 if (encodingBuffer
.Size() != 0 && size
!= outputSize
) {
632 if (outputSize
> UINT32_MAX
)
633 return B_NOT_SUPPORTED
;
635 // we need to update the header as the decoded text size has changed
636 status
= destination
->Seek(0, SEEK_SET
);
638 status
= output_headers(destination
, (uint32
)outputSize
);
640 status
= destination
->Seek(0, SEEK_END
);
646 // Read file attributes if outputting styled data
647 // and source is a BNode object
652 // Try to read styles - we only propagate an error if the actual on-disk
653 // data is likely to be okay
655 const char *kAttrName
= "styles";
657 if (node
->GetAttrInfo(kAttrName
, &info
) != B_OK
)
660 if (info
.type
!= B_RAW_TYPE
|| info
.size
< 160) {
661 // styles seem to be broken, but since we got the text,
662 // we don't propagate the error
666 uint8
* flatRunArray
= new (std::nothrow
) uint8
[info
.size
];
667 if (flatRunArray
== NULL
)
670 bytesRead
= node
->ReadAttr(kAttrName
, B_RAW_TYPE
, 0, flatRunArray
, info
.size
);
671 if (bytesRead
!= info
.size
)
674 output_styles(destination
, size
, flatRunArray
, info
.size
);
676 delete[] flatRunArray
;
684 STXTTranslator::STXTTranslator()
685 : BaseTranslator(B_TRANSLATE("StyledEdit files"),
686 B_TRANSLATE("StyledEdit file translator"),
687 STXT_TRANSLATOR_VERSION
,
688 sInputFormats
, kNumInputFormats
,
689 sOutputFormats
, kNumOutputFormats
,
690 "STXTTranslator_Settings",
691 sDefaultSettings
, kNumDefaultSettings
,
692 B_TRANSLATOR_TEXT
, B_STYLED_TEXT_FORMAT
)
697 STXTTranslator::~STXTTranslator()
703 STXTTranslator::Identify(BPositionIO
*inSource
,
704 const translation_format
*inFormat
, BMessage
*ioExtension
,
705 translator_info
*outInfo
, uint32 outType
)
708 outType
= B_TRANSLATOR_TEXT
;
709 if (outType
!= B_TRANSLATOR_TEXT
&& outType
!= B_STYLED_TEXT_FORMAT
)
710 return B_NO_TRANSLATOR
;
712 const ssize_t kstxtsize
= sizeof(TranslatorStyledTextStreamHeader
);
714 uint8 buffer
[DATA_BUFFER_SIZE
];
716 // Read in the header to determine
717 // if the data is supported
718 nread
= inSource
->Read(buffer
, kstxtsize
);
722 // read in enough data to fill the stream header
723 if (nread
== kstxtsize
) {
724 TranslatorStyledTextStreamHeader header
;
725 memcpy(&header
, buffer
, kstxtsize
);
726 if (swap_data(B_UINT32_TYPE
, &header
, kstxtsize
,
727 B_SWAP_BENDIAN_TO_HOST
) != B_OK
)
730 if (header
.header
.magic
== B_STYLED_TEXT_FORMAT
731 && header
.header
.header_size
== (int32
)kstxtsize
732 && header
.header
.data_size
== 0
733 && header
.version
== 100)
734 return identify_stxt_header(header
, inSource
, outInfo
, outType
);
737 // if the data is not styled text, check if it is plain text
738 const char* encoding
;
739 return identify_text(buffer
, nread
, inSource
, outInfo
, outType
, encoding
);
744 STXTTranslator::Translate(BPositionIO
* source
, const translator_info
* info
,
745 BMessage
* ioExtension
, uint32 outType
, BPositionIO
* outDestination
)
748 outType
= B_TRANSLATOR_TEXT
;
749 if (outType
!= B_TRANSLATOR_TEXT
&& outType
!= B_STYLED_TEXT_FORMAT
)
750 return B_NO_TRANSLATOR
;
752 const ssize_t headerSize
= sizeof(TranslatorStyledTextStreamHeader
);
753 uint8 buffer
[DATA_BUFFER_SIZE
];
755 translator_info outInfo
;
756 // Read in the header to determine
757 // if the data is supported
758 ssize_t bytesRead
= source
->Read(buffer
, headerSize
);
762 // read in enough data to fill the stream header
763 if (bytesRead
== headerSize
) {
764 TranslatorStyledTextStreamHeader header
;
765 memcpy(&header
, buffer
, headerSize
);
766 if (swap_data(B_UINT32_TYPE
, &header
, headerSize
,
767 B_SWAP_BENDIAN_TO_HOST
) != B_OK
)
770 if (header
.header
.magic
== B_STYLED_TEXT_FORMAT
771 && header
.header
.header_size
== sizeof(TranslatorStyledTextStreamHeader
)
772 && header
.header
.data_size
== 0
773 && header
.version
== 100) {
774 TranslatorStyledTextTextHeader textHeader
;
775 result
= identify_stxt_header(header
, source
, &outInfo
, outType
,
780 return translate_from_stxt(source
, outDestination
, outType
, textHeader
);
784 // if the data is not styled text, check if it is ASCII text
785 bool forceEncoding
= false;
786 const char* encoding
= NULL
;
787 result
= identify_text(buffer
, bytesRead
, source
, &outInfo
, outType
, encoding
);
791 if (ioExtension
!= NULL
) {
793 if (ioExtension
->FindString("be:encoding", &value
) == B_OK
797 forceEncoding
= true;
801 return translate_from_text(source
, encoding
, forceEncoding
, outDestination
, outType
);
806 STXTTranslator::NewConfigView(TranslatorSettings
*settings
)
808 return new STXTView(BRect(0, 0, 225, 175),
809 B_TRANSLATE("STXTTranslator Settings"),
810 B_FOLLOW_ALL
, B_WILL_DRAW
, settings
);