Update to Scintilla 5.5.2
[TortoiseGit.git] / ext / scintilla / src / CellBuffer.cxx
blob78f291bf90deb1195592c9ec4de9499dee0d2d48
1 // Scintilla source code edit control
2 /** @file CellBuffer.cxx
3 ** Manages a buffer of cells.
4 **/
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <cstddef>
9 #include <cstdlib>
10 #include <cassert>
11 #include <cstring>
12 #include <cstdio>
13 #include <cstdarg>
14 #include <climits>
16 #include <stdexcept>
17 #include <string>
18 #include <string_view>
19 #include <vector>
20 #include <optional>
21 #include <algorithm>
22 #include <memory>
24 #include "ScintillaTypes.h"
26 #include "Debugging.h"
28 #include "Position.h"
29 #include "SplitVector.h"
30 #include "Partitioning.h"
31 #include "RunStyles.h"
32 #include "SparseVector.h"
33 #include "ChangeHistory.h"
34 #include "CellBuffer.h"
35 #include "UndoHistory.h"
36 #include "UniConversion.h"
38 namespace Scintilla::Internal {
40 struct CountWidths {
41 // Measures the number of characters in a string divided into those
42 // from the Base Multilingual Plane and those from other planes.
43 Sci::Position countBasePlane;
44 Sci::Position countOtherPlanes;
45 explicit CountWidths(Sci::Position countBasePlane_=0, Sci::Position countOtherPlanes_=0) noexcept :
46 countBasePlane(countBasePlane_),
47 countOtherPlanes(countOtherPlanes_) {
49 CountWidths operator-() const noexcept {
50 return CountWidths(-countBasePlane, -countOtherPlanes);
52 Sci::Position WidthUTF32() const noexcept {
53 // All code points take one code unit in UTF-32.
54 return countBasePlane + countOtherPlanes;
56 Sci::Position WidthUTF16() const noexcept {
57 // UTF-16 takes 2 code units for other planes
58 return countBasePlane + 2 * countOtherPlanes;
60 void CountChar(int lenChar) noexcept {
61 if (lenChar == 4) {
62 countOtherPlanes++;
63 } else {
64 countBasePlane++;
69 class ILineVector {
70 public:
71 virtual void Init() = 0;
72 virtual void SetPerLine(PerLine *pl) noexcept = 0;
73 virtual void InsertText(Sci::Line line, Sci::Position delta) noexcept = 0;
74 virtual void InsertLine(Sci::Line line, Sci::Position position, bool lineStart) = 0;
75 virtual void InsertLines(Sci::Line line, const Sci::Position *positions, size_t lines, bool lineStart) = 0;
76 virtual void SetLineStart(Sci::Line line, Sci::Position position) noexcept = 0;
77 virtual void RemoveLine(Sci::Line line) = 0;
78 virtual Sci::Line Lines() const noexcept = 0;
79 virtual void AllocateLines(Sci::Line lines) = 0;
80 virtual Sci::Line LineFromPosition(Sci::Position pos) const noexcept = 0;
81 virtual Sci::Position LineStart(Sci::Line line) const noexcept = 0;
82 virtual void InsertCharacters(Sci::Line line, CountWidths delta) noexcept = 0;
83 virtual void SetLineCharactersWidth(Sci::Line line, CountWidths width) noexcept = 0;
84 virtual Scintilla::LineCharacterIndexType LineCharacterIndex() const noexcept = 0;
85 virtual bool AllocateLineCharacterIndex(Scintilla::LineCharacterIndexType lineCharacterIndex, Sci::Line lines) = 0;
86 virtual bool ReleaseLineCharacterIndex(Scintilla::LineCharacterIndexType lineCharacterIndex) = 0;
87 virtual Sci::Position IndexLineStart(Sci::Line line, Scintilla::LineCharacterIndexType lineCharacterIndex) const noexcept = 0;
88 virtual Sci::Line LineFromPositionIndex(Sci::Position pos, Scintilla::LineCharacterIndexType lineCharacterIndex) const noexcept = 0;
89 virtual ~ILineVector() {}
94 using namespace Scintilla;
95 using namespace Scintilla::Internal;
97 template <typename POS>
98 class LineStartIndex {
99 // line_cast(): cast Sci::Line to either 32-bit or 64-bit value
100 // This avoids warnings from Visual C++ Code Analysis and shortens code
101 static constexpr POS line_cast(Sci::Line pos) noexcept {
102 return static_cast<POS>(pos);
104 public:
105 int refCount;
106 Partitioning<POS> starts;
108 LineStartIndex() : refCount(0), starts(4) {
109 // Minimal initial allocation
111 bool Allocate(Sci::Line lines) {
112 refCount++;
113 Sci::Position length = starts.PositionFromPartition(starts.Partitions());
114 for (Sci::Line line = starts.Partitions(); line < lines; line++) {
115 // Produce an ascending sequence that will be filled in with correct widths later
116 length++;
117 starts.InsertPartition(line_cast(line), line_cast(length));
119 return refCount == 1;
121 bool Release() {
122 if (refCount == 1) {
123 starts.DeleteAll();
125 refCount--;
126 return refCount == 0;
128 bool Active() const noexcept {
129 return refCount > 0;
131 Sci::Position LineWidth(Sci::Line line) const noexcept {
132 return starts.PositionFromPartition(line_cast(line) + 1) -
133 starts.PositionFromPartition(line_cast(line));
135 void SetLineWidth(Sci::Line line, Sci::Position width) noexcept {
136 const Sci::Position widthCurrent = LineWidth(line);
137 starts.InsertText(line_cast(line), line_cast(width - widthCurrent));
139 void AllocateLines(Sci::Line lines) {
140 if (lines > starts.Partitions()) {
141 starts.ReAllocate(lines);
144 void InsertLines(Sci::Line line, Sci::Line lines) {
145 // Insert multiple lines with each temporarily 1 character wide.
146 // The line widths will be fixed up by later measuring code.
147 const POS lineAsPos = line_cast(line);
148 const POS lineStart = starts.PositionFromPartition(lineAsPos - 1) + 1;
149 for (POS l = 0; l < line_cast(lines); l++) {
150 starts.InsertPartition(lineAsPos + l, lineStart + l);
155 template <typename POS>
156 class LineVector : public ILineVector {
157 Partitioning<POS> starts;
158 PerLine *perLine;
159 LineStartIndex<POS> startsUTF16;
160 LineStartIndex<POS> startsUTF32;
161 LineCharacterIndexType activeIndices;
163 void SetActiveIndices() noexcept {
164 activeIndices =
165 (startsUTF32.Active() ? LineCharacterIndexType::Utf32 : LineCharacterIndexType::None)
166 | (startsUTF16.Active() ? LineCharacterIndexType::Utf16 : LineCharacterIndexType::None);
169 // pos_cast(): cast Sci::Line and Sci::Position to either 32-bit or 64-bit value
170 // This avoids warnings from Visual C++ Code Analysis and shortens code
171 static constexpr POS pos_cast(Sci::Position pos) noexcept {
172 return static_cast<POS>(pos);
175 // line_from_pos_cast(): return 32-bit or 64-bit value as Sci::Line
176 // This avoids warnings from Visual C++ Code Analysis and shortens code
177 static constexpr Sci::Line line_from_pos_cast(POS line) noexcept {
178 return static_cast<Sci::Line>(line);
181 public:
182 LineVector() : starts(256), perLine(nullptr), activeIndices(LineCharacterIndexType::None) {
184 void Init() override {
185 starts.DeleteAll();
186 if (perLine) {
187 perLine->Init();
189 startsUTF32.starts.DeleteAll();
190 startsUTF16.starts.DeleteAll();
192 void SetPerLine(PerLine *pl) noexcept override {
193 perLine = pl;
195 void InsertText(Sci::Line line, Sci::Position delta) noexcept override {
196 starts.InsertText(pos_cast(line), pos_cast(delta));
198 void InsertLine(Sci::Line line, Sci::Position position, bool lineStart) override {
199 const POS lineAsPos = pos_cast(line);
200 starts.InsertPartition(lineAsPos, pos_cast(position));
201 if (activeIndices != LineCharacterIndexType::None) {
202 if (FlagSet(activeIndices, LineCharacterIndexType::Utf32)) {
203 startsUTF32.InsertLines(line, 1);
205 if (FlagSet(activeIndices, LineCharacterIndexType::Utf16)) {
206 startsUTF16.InsertLines(line, 1);
209 if (perLine) {
210 if ((line > 0) && lineStart)
211 line--;
212 perLine->InsertLine(line);
215 void InsertLines(Sci::Line line, const Sci::Position *positions, size_t lines, bool lineStart) override {
216 const POS lineAsPos = pos_cast(line);
217 if constexpr (sizeof(Sci::Position) == sizeof(POS)) {
218 starts.InsertPartitions(lineAsPos, positions, lines);
219 } else {
220 starts.InsertPartitionsWithCast(lineAsPos, positions, lines);
222 if (activeIndices != LineCharacterIndexType::None) {
223 if (FlagSet(activeIndices, LineCharacterIndexType::Utf32)) {
224 startsUTF32.InsertLines(line, lines);
226 if (FlagSet(activeIndices, LineCharacterIndexType::Utf16)) {
227 startsUTF16.InsertLines(line, lines);
230 if (perLine) {
231 if ((line > 0) && lineStart)
232 line--;
233 perLine->InsertLines(line, lines);
236 void SetLineStart(Sci::Line line, Sci::Position position) noexcept override {
237 starts.SetPartitionStartPosition(pos_cast(line), pos_cast(position));
239 void RemoveLine(Sci::Line line) override {
240 starts.RemovePartition(pos_cast(line));
241 if (FlagSet(activeIndices, LineCharacterIndexType::Utf32)) {
242 startsUTF32.starts.RemovePartition(pos_cast(line));
244 if (FlagSet(activeIndices, LineCharacterIndexType::Utf16)) {
245 startsUTF16.starts.RemovePartition(pos_cast(line));
247 if (perLine) {
248 perLine->RemoveLine(line);
251 Sci::Line Lines() const noexcept override {
252 return line_from_pos_cast(starts.Partitions());
254 void AllocateLines(Sci::Line lines) override {
255 if (lines > Lines()) {
256 starts.ReAllocate(lines);
257 if (FlagSet(activeIndices, LineCharacterIndexType::Utf32)) {
258 startsUTF32.AllocateLines(lines);
260 if (FlagSet(activeIndices, LineCharacterIndexType::Utf16)) {
261 startsUTF16.AllocateLines(lines);
265 Sci::Line LineFromPosition(Sci::Position pos) const noexcept override {
266 return line_from_pos_cast(starts.PartitionFromPosition(pos_cast(pos)));
268 Sci::Position LineStart(Sci::Line line) const noexcept override {
269 return starts.PositionFromPartition(pos_cast(line));
271 void InsertCharacters(Sci::Line line, CountWidths delta) noexcept override {
272 if (FlagSet(activeIndices, LineCharacterIndexType::Utf32)) {
273 startsUTF32.starts.InsertText(pos_cast(line), pos_cast(delta.WidthUTF32()));
275 if (FlagSet(activeIndices, LineCharacterIndexType::Utf16)) {
276 startsUTF16.starts.InsertText(pos_cast(line), pos_cast(delta.WidthUTF16()));
279 void SetLineCharactersWidth(Sci::Line line, CountWidths width) noexcept override {
280 if (FlagSet(activeIndices, LineCharacterIndexType::Utf32)) {
281 assert(startsUTF32.starts.Partitions() == starts.Partitions());
282 startsUTF32.SetLineWidth(line, width.WidthUTF32());
284 if (FlagSet(activeIndices, LineCharacterIndexType::Utf16)) {
285 assert(startsUTF16.starts.Partitions() == starts.Partitions());
286 startsUTF16.SetLineWidth(line, width.WidthUTF16());
290 LineCharacterIndexType LineCharacterIndex() const noexcept override {
291 return activeIndices;
293 bool AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex, Sci::Line lines) override {
294 const LineCharacterIndexType activeIndicesStart = activeIndices;
295 if (FlagSet(lineCharacterIndex, LineCharacterIndexType::Utf32)) {
296 startsUTF32.Allocate(lines);
297 assert(startsUTF32.starts.Partitions() == starts.Partitions());
299 if (FlagSet(lineCharacterIndex, LineCharacterIndexType::Utf16)) {
300 startsUTF16.Allocate(lines);
301 assert(startsUTF16.starts.Partitions() == starts.Partitions());
303 SetActiveIndices();
304 return activeIndicesStart != activeIndices;
306 bool ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) override {
307 const LineCharacterIndexType activeIndicesStart = activeIndices;
308 if (FlagSet(lineCharacterIndex, LineCharacterIndexType::Utf32)) {
309 startsUTF32.Release();
311 if (FlagSet(lineCharacterIndex, LineCharacterIndexType::Utf16)) {
312 startsUTF16.Release();
314 SetActiveIndices();
315 return activeIndicesStart != activeIndices;
317 Sci::Position IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept override {
318 if (lineCharacterIndex == LineCharacterIndexType::Utf32) {
319 return startsUTF32.starts.PositionFromPartition(pos_cast(line));
320 } else {
321 return startsUTF16.starts.PositionFromPartition(pos_cast(line));
324 Sci::Line LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept override {
325 if (lineCharacterIndex == LineCharacterIndexType::Utf32) {
326 return line_from_pos_cast(startsUTF32.starts.PartitionFromPosition(pos_cast(pos)));
327 } else {
328 return line_from_pos_cast(startsUTF16.starts.PartitionFromPosition(pos_cast(pos)));
333 CellBuffer::CellBuffer(bool hasStyles_, bool largeDocument_) :
334 hasStyles(hasStyles_), largeDocument(largeDocument_) {
335 readOnly = false;
336 utf8Substance = false;
337 utf8LineEnds = LineEndType::Default;
338 collectingUndo = true;
339 uh = std::make_unique<UndoHistory>();
340 if (largeDocument)
341 plv = std::make_unique<LineVector<Sci::Position>>();
342 else
343 plv = std::make_unique<LineVector<int>>();
346 CellBuffer::~CellBuffer() noexcept = default;
348 char CellBuffer::CharAt(Sci::Position position) const noexcept {
349 return substance.ValueAt(position);
352 unsigned char CellBuffer::UCharAt(Sci::Position position) const noexcept {
353 return substance.ValueAt(position);
356 void CellBuffer::GetCharRange(char *buffer, Sci::Position position, Sci::Position lengthRetrieve) const {
357 if (lengthRetrieve <= 0)
358 return;
359 if (position < 0)
360 return;
361 if ((position + lengthRetrieve) > substance.Length()) {
362 Platform::DebugPrintf("Bad GetCharRange %.0f for %.0f of %.0f\n",
363 static_cast<double>(position),
364 static_cast<double>(lengthRetrieve),
365 static_cast<double>(substance.Length()));
366 return;
368 substance.GetRange(buffer, position, lengthRetrieve);
371 char CellBuffer::StyleAt(Sci::Position position) const noexcept {
372 return hasStyles ? style.ValueAt(position) : '\0';
375 void CellBuffer::GetStyleRange(unsigned char *buffer, Sci::Position position, Sci::Position lengthRetrieve) const {
376 if (lengthRetrieve < 0)
377 return;
378 if (position < 0)
379 return;
380 if (!hasStyles) {
381 std::fill(buffer, buffer + lengthRetrieve, static_cast<unsigned char>(0));
382 return;
384 if ((position + lengthRetrieve) > style.Length()) {
385 Platform::DebugPrintf("Bad GetStyleRange %.0f for %.0f of %.0f\n",
386 static_cast<double>(position),
387 static_cast<double>(lengthRetrieve),
388 static_cast<double>(style.Length()));
389 return;
391 style.GetRange(reinterpret_cast<char *>(buffer), position, lengthRetrieve);
394 const char *CellBuffer::BufferPointer() {
395 return substance.BufferPointer();
398 const char *CellBuffer::RangePointer(Sci::Position position, Sci::Position rangeLength) noexcept {
399 return substance.RangePointer(position, rangeLength);
402 Sci::Position CellBuffer::GapPosition() const noexcept {
403 return substance.GapPosition();
406 SplitView CellBuffer::AllView() const noexcept {
407 const size_t length = substance.Length();
408 size_t length1 = substance.GapPosition();
409 if (length1 == 0) {
410 // Assign segment2 to segment1 / length1 to avoid useless test against 0 length1
411 length1 = length;
413 return SplitView {
414 substance.ElementPointer(0),
415 length1,
416 substance.ElementPointer(length1) - length1,
417 length
421 // The char* returned is to an allocation owned by the undo history
422 const char *CellBuffer::InsertString(Sci::Position position, const char *s, Sci::Position insertLength, bool &startSequence) {
423 // InsertString and DeleteChars are the bottleneck though which all changes occur
424 const char *data = s;
425 if (!readOnly) {
426 if (collectingUndo) {
427 // Save into the undo/redo stack, but only the characters - not the formatting
428 // This takes up about half load time
429 data = uh->AppendAction(ActionType::insert, position, s, insertLength, startSequence);
432 BasicInsertString(position, s, insertLength);
433 if (changeHistory) {
434 changeHistory->Insert(position, insertLength, collectingUndo, uh->BeforeReachableSavePoint());
437 return data;
440 bool CellBuffer::SetStyleAt(Sci::Position position, char styleValue) noexcept {
441 if (!hasStyles) {
442 return false;
444 const char curVal = style.ValueAt(position);
445 if (curVal != styleValue) {
446 style.SetValueAt(position, styleValue);
447 return true;
448 } else {
449 return false;
453 bool CellBuffer::SetStyleFor(Sci::Position position, Sci::Position lengthStyle, char styleValue) noexcept {
454 if (!hasStyles) {
455 return false;
457 bool changed = false;
458 PLATFORM_ASSERT(lengthStyle == 0 ||
459 (lengthStyle > 0 && lengthStyle + position <= style.Length()));
460 while (lengthStyle--) {
461 const char curVal = style.ValueAt(position);
462 if (curVal != styleValue) {
463 style.SetValueAt(position, styleValue);
464 changed = true;
466 position++;
468 return changed;
471 // The char* returned is to an allocation owned by the undo history
472 const char *CellBuffer::DeleteChars(Sci::Position position, Sci::Position deleteLength, bool &startSequence) {
473 // InsertString and DeleteChars are the bottleneck though which all changes occur
474 PLATFORM_ASSERT(deleteLength > 0);
475 const char *data = nullptr;
476 if (!readOnly) {
477 if (collectingUndo) {
478 // Save into the undo/redo stack, but only the characters - not the formatting
479 // The gap would be moved to position anyway for the deletion so this doesn't cost extra
480 data = substance.RangePointer(position, deleteLength);
481 data = uh->AppendAction(ActionType::remove, position, data, deleteLength, startSequence);
484 if (changeHistory) {
485 changeHistory->DeleteRangeSavingHistory(position, deleteLength,
486 uh->BeforeReachableSavePoint(), uh->AfterOrAtDetachPoint());
489 BasicDeleteChars(position, deleteLength);
491 return data;
494 Sci::Position CellBuffer::Length() const noexcept {
495 return substance.Length();
498 void CellBuffer::Allocate(Sci::Position newSize) {
499 if (!largeDocument && (newSize > INT32_MAX)) {
500 throw std::runtime_error("CellBuffer::Allocate: size of standard document limited to 2G.");
502 substance.ReAllocate(newSize);
503 if (hasStyles) {
504 style.ReAllocate(newSize);
508 void CellBuffer::SetUTF8Substance(bool utf8Substance_) noexcept {
509 utf8Substance = utf8Substance_;
512 void CellBuffer::SetLineEndTypes(LineEndType utf8LineEnds_) {
513 if (utf8LineEnds != utf8LineEnds_) {
514 const LineCharacterIndexType indexes = plv->LineCharacterIndex();
515 utf8LineEnds = utf8LineEnds_;
516 ResetLineEnds();
517 AllocateLineCharacterIndex(indexes);
521 bool CellBuffer::ContainsLineEnd(const char *s, Sci::Position length) const noexcept {
522 unsigned char chBeforePrev = 0;
523 unsigned char chPrev = 0;
524 for (Sci::Position i = 0; i < length; i++) {
525 const unsigned char ch = s[i];
526 if ((ch == '\r') || (ch == '\n')) {
527 return true;
528 } else if (utf8LineEnds == LineEndType::Unicode) {
529 if (UTF8IsMultibyteLineEnd(chBeforePrev, chPrev, ch)) {
530 return true;
533 chBeforePrev = chPrev;
534 chPrev = ch;
536 return false;
539 void CellBuffer::SetPerLine(PerLine *pl) noexcept {
540 plv->SetPerLine(pl);
543 LineCharacterIndexType CellBuffer::LineCharacterIndex() const noexcept {
544 return plv->LineCharacterIndex();
547 void CellBuffer::AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
548 if (utf8Substance) {
549 if (plv->AllocateLineCharacterIndex(lineCharacterIndex, Lines())) {
550 // Changed so recalculate whole file
551 RecalculateIndexLineStarts(0, Lines() - 1);
556 void CellBuffer::ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
557 plv->ReleaseLineCharacterIndex(lineCharacterIndex);
560 Sci::Line CellBuffer::Lines() const noexcept {
561 return plv->Lines();
564 void CellBuffer::AllocateLines(Sci::Line lines) {
565 plv->AllocateLines(lines);
568 Sci::Position CellBuffer::LineStart(Sci::Line line) const noexcept {
569 if (line < 0)
570 return 0;
571 else if (line >= Lines())
572 return Length();
573 else
574 return plv->LineStart(line);
577 Sci::Position CellBuffer::LineEnd(Sci::Line line) const noexcept {
578 if (line >= Lines() - 1) {
579 return LineStart(line + 1);
580 } else {
581 Sci::Position position = LineStart(line + 1);
582 if (LineEndType::Unicode == GetLineEndTypes()) {
583 const unsigned char bytes[] = {
584 UCharAt(position - 3),
585 UCharAt(position - 2),
586 UCharAt(position - 1),
588 if (UTF8IsSeparator(bytes)) {
589 return position - UTF8SeparatorLength;
591 if (UTF8IsNEL(bytes + 1)) {
592 return position - UTF8NELLength;
595 position--; // Back over CR or LF
596 // When line terminator is CR+LF, may need to go back one more
597 if ((position > LineStart(line)) && (CharAt(position - 1) == '\r')) {
598 position--;
600 return position;
604 Sci::Line CellBuffer::LineFromPosition(Sci::Position pos) const noexcept {
605 return plv->LineFromPosition(pos);
608 Sci::Position CellBuffer::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept {
609 return plv->IndexLineStart(line, lineCharacterIndex);
612 Sci::Line CellBuffer::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept {
613 return plv->LineFromPositionIndex(pos, lineCharacterIndex);
616 bool CellBuffer::IsReadOnly() const noexcept {
617 return readOnly;
620 void CellBuffer::SetReadOnly(bool set) noexcept {
621 readOnly = set;
624 bool CellBuffer::IsLarge() const noexcept {
625 return largeDocument;
628 bool CellBuffer::HasStyles() const noexcept {
629 return hasStyles;
632 void CellBuffer::SetSavePoint() {
633 uh->SetSavePoint();
634 if (changeHistory) {
635 changeHistory->SetSavePoint();
639 bool CellBuffer::IsSavePoint() const noexcept {
640 return uh->IsSavePoint();
643 void CellBuffer::TentativeStart() noexcept {
644 uh->TentativeStart();
647 void CellBuffer::TentativeCommit() noexcept {
648 uh->TentativeCommit();
651 int CellBuffer::TentativeSteps() noexcept {
652 return uh->TentativeSteps();
655 bool CellBuffer::TentativeActive() const noexcept {
656 return uh->TentativeActive();
659 // Without undo
661 void CellBuffer::InsertLine(Sci::Line line, Sci::Position position, bool lineStart) {
662 plv->InsertLine(line, position, lineStart);
665 void CellBuffer::RemoveLine(Sci::Line line) {
666 plv->RemoveLine(line);
669 bool CellBuffer::UTF8LineEndOverlaps(Sci::Position position) const noexcept {
670 const unsigned char bytes[] = {
671 static_cast<unsigned char>(substance.ValueAt(position-2)),
672 static_cast<unsigned char>(substance.ValueAt(position-1)),
673 static_cast<unsigned char>(substance.ValueAt(position)),
674 static_cast<unsigned char>(substance.ValueAt(position+1)),
676 return UTF8IsSeparator(bytes) || UTF8IsSeparator(bytes+1) || UTF8IsNEL(bytes+1);
679 bool CellBuffer::UTF8IsCharacterBoundary(Sci::Position position) const {
680 assert(position >= 0 && position <= Length());
681 if (position > 0) {
682 std::string back;
683 for (int i = 0; i < UTF8MaxBytes; i++) {
684 const Sci::Position posBack = position - i;
685 if (posBack < 0) {
686 return false;
688 back.insert(0, 1, substance.ValueAt(posBack));
689 if (!UTF8IsTrailByte(back.front())) {
690 if (i > 0) {
691 // Have reached a non-trail
692 const int cla = UTF8Classify(back);
693 if ((cla & UTF8MaskInvalid) || (cla != i)) {
694 return false;
697 break;
701 if (position < Length()) {
702 const unsigned char fore = substance.ValueAt(position);
703 if (UTF8IsTrailByte(fore)) {
704 return false;
707 return true;
710 void CellBuffer::ResetLineEnds() {
711 // Reinitialize line data -- too much work to preserve
712 const Sci::Line lines = plv->Lines();
713 plv->Init();
714 plv->AllocateLines(lines);
716 constexpr Sci::Position position = 0;
717 const Sci::Position length = Length();
718 plv->InsertText(0, length);
719 Sci::Line lineInsert = 1;
720 constexpr bool atLineStart = true;
721 unsigned char chBeforePrev = 0;
722 unsigned char chPrev = 0;
723 for (Sci::Position i = 0; i < length; i++) {
724 const unsigned char ch = substance.ValueAt(position + i);
725 if (ch == '\r') {
726 InsertLine(lineInsert, (position + i) + 1, atLineStart);
727 lineInsert++;
728 } else if (ch == '\n') {
729 if (chPrev == '\r') {
730 // Patch up what was end of line
731 plv->SetLineStart(lineInsert - 1, (position + i) + 1);
732 } else {
733 InsertLine(lineInsert, (position + i) + 1, atLineStart);
734 lineInsert++;
736 } else if (utf8LineEnds == LineEndType::Unicode) {
737 if (UTF8IsMultibyteLineEnd(chBeforePrev, chPrev, ch)) {
738 InsertLine(lineInsert, (position + i) + 1, atLineStart);
739 lineInsert++;
742 chBeforePrev = chPrev;
743 chPrev = ch;
747 namespace {
749 CountWidths CountCharacterWidthsUTF8(std::string_view sv) noexcept {
750 CountWidths cw;
751 size_t remaining = sv.length();
752 while (remaining > 0) {
753 const int utf8Status = UTF8Classify(sv);
754 const int lenChar = utf8Status & UTF8MaskWidth;
755 cw.CountChar(lenChar);
756 sv.remove_prefix(lenChar);
757 remaining -= lenChar;
759 return cw;
764 bool CellBuffer::MaintainingLineCharacterIndex() const noexcept {
765 return plv->LineCharacterIndex() != LineCharacterIndexType::None;
768 void CellBuffer::RecalculateIndexLineStarts(Sci::Line lineFirst, Sci::Line lineLast) {
769 std::string text;
770 Sci::Position posLineEnd = LineStart(lineFirst);
771 for (Sci::Line line = lineFirst; line <= lineLast; line++) {
772 // Find line start and end, retrieve text of line, count characters and update line width
773 const Sci::Position posLineStart = posLineEnd;
774 posLineEnd = LineStart(line+1);
775 const Sci::Position width = posLineEnd - posLineStart;
776 text.resize(width);
777 GetCharRange(text.data(), posLineStart, width);
778 const CountWidths cw = CountCharacterWidthsUTF8(text);
779 plv->SetLineCharactersWidth(line, cw);
783 void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
784 if (insertLength == 0)
785 return;
786 PLATFORM_ASSERT(insertLength > 0);
788 const unsigned char chAfter = substance.ValueAt(position);
789 bool breakingUTF8LineEnd = false;
790 if (utf8LineEnds == LineEndType::Unicode && UTF8IsTrailByte(chAfter)) {
791 breakingUTF8LineEnd = UTF8LineEndOverlaps(position);
794 const Sci::Line linePosition = plv->LineFromPosition(position);
795 Sci::Line lineInsert = linePosition + 1;
797 // A simple insertion is one that inserts valid text on a single line at a character boundary
798 bool simpleInsertion = false;
800 const bool maintainingIndex = MaintainingLineCharacterIndex();
802 // Check for breaking apart a UTF-8 sequence and inserting invalid UTF-8
803 if (utf8Substance && maintainingIndex) {
804 // Actually, don't need to check that whole insertion is valid just that there
805 // are no potential fragments at ends.
806 simpleInsertion = UTF8IsCharacterBoundary(position) &&
807 UTF8IsValid(std::string_view(s, insertLength));
810 substance.InsertFromArray(position, s, 0, insertLength);
811 if (hasStyles) {
812 style.InsertValue(position, insertLength, 0);
815 const bool atLineStart = plv->LineStart(lineInsert-1) == position;
816 // Point all the lines after the insertion point further along in the buffer
817 plv->InsertText(lineInsert-1, insertLength);
818 unsigned char chBeforePrev = substance.ValueAt(position - 2);
819 unsigned char chPrev = substance.ValueAt(position - 1);
820 if (chPrev == '\r' && chAfter == '\n') {
821 // Splitting up a crlf pair at position
822 InsertLine(lineInsert, position, false);
823 lineInsert++;
825 if (breakingUTF8LineEnd) {
826 RemoveLine(lineInsert);
829 constexpr size_t PositionBlockSize = 128;
830 Sci::Position positions[PositionBlockSize]{};
831 size_t nPositions = 0;
832 const Sci::Line lineStart = lineInsert;
834 // s may not NULL-terminated, ensure *ptr == '\n' or *next == '\n' is valid.
835 const char *const end = s + insertLength - 1;
836 const char *ptr = s;
837 unsigned char ch = 0;
839 if (chPrev == '\r' && *ptr == '\n') {
840 ++ptr;
841 // Patch up what was end of line
842 plv->SetLineStart(lineInsert - 1, (position + ptr - s));
843 simpleInsertion = false;
846 if (ptr < end) {
847 uint8_t eolTable[256]{};
848 eolTable[static_cast<uint8_t>('\n')] = 1;
849 eolTable[static_cast<uint8_t>('\r')] = 2;
850 if (utf8LineEnds == LineEndType::Unicode) {
851 // see UniConversion.h for LS, PS and NEL
852 eolTable[0x85] = 4;
853 eolTable[0xa8] = 3;
854 eolTable[0xa9] = 3;
857 do {
858 // skip to line end
859 ch = *ptr++;
860 uint8_t type;
861 while ((type = eolTable[ch]) == 0 && ptr < end) {
862 chBeforePrev = chPrev;
863 chPrev = ch;
864 ch = *ptr++;
866 switch (type) {
867 case 2: // '\r'
868 if (*ptr == '\n') {
869 ++ptr;
871 [[fallthrough]];
872 case 1: // '\n'
873 positions[nPositions++] = position + ptr - s;
874 if (nPositions == PositionBlockSize) {
875 plv->InsertLines(lineInsert, positions, nPositions, atLineStart);
876 lineInsert += nPositions;
877 nPositions = 0;
879 break;
880 case 3:
881 case 4:
882 // LS, PS and NEL
883 if ((type == 3 && chPrev == 0x80 && chBeforePrev == 0xe2) || (type == 4 && chPrev == 0xc2)) {
884 positions[nPositions++] = position + ptr - s;
885 if (nPositions == PositionBlockSize) {
886 plv->InsertLines(lineInsert, positions, nPositions, atLineStart);
887 lineInsert += nPositions;
888 nPositions = 0;
891 break;
894 chBeforePrev = chPrev;
895 chPrev = ch;
896 } while (ptr < end);
899 if (nPositions != 0) {
900 plv->InsertLines(lineInsert, positions, nPositions, atLineStart);
901 lineInsert += nPositions;
904 ch = *end;
905 if (ptr == end) {
906 ++ptr;
907 if (ch == '\r' || ch == '\n') {
908 InsertLine(lineInsert, (position + ptr - s), atLineStart);
909 lineInsert++;
910 } else if (utf8LineEnds == LineEndType::Unicode && !UTF8IsAscii(ch)) {
911 if (UTF8IsMultibyteLineEnd(chBeforePrev, chPrev, ch)) {
912 InsertLine(lineInsert, (position + ptr - s), atLineStart);
913 lineInsert++;
918 // Joining two lines where last insertion is cr and following substance starts with lf
919 if (chAfter == '\n') {
920 if (ch == '\r') {
921 // End of line already in buffer so drop the newly created one
922 RemoveLine(lineInsert - 1);
923 simpleInsertion = false;
925 } else if (utf8LineEnds == LineEndType::Unicode && !UTF8IsAscii(chAfter)) {
926 chBeforePrev = chPrev;
927 chPrev = ch;
928 // May have end of UTF-8 line end in buffer and start in insertion
929 for (int j = 0; j < UTF8SeparatorLength-1; j++) {
930 const unsigned char chAt = substance.ValueAt(position + insertLength + j);
931 const unsigned char back3[3] = {chBeforePrev, chPrev, chAt};
932 if (UTF8IsSeparator(back3)) {
933 InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart);
934 lineInsert++;
936 if ((j == 0) && UTF8IsNEL(back3+1)) {
937 InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart);
938 lineInsert++;
940 chBeforePrev = chPrev;
941 chPrev = chAt;
944 if (maintainingIndex) {
945 if (simpleInsertion && (lineInsert == lineStart)) {
946 const CountWidths cw = CountCharacterWidthsUTF8(std::string_view(s, insertLength));
947 plv->InsertCharacters(linePosition, cw);
948 } else {
949 RecalculateIndexLineStarts(linePosition, lineInsert - 1);
954 void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLength) {
955 if (deleteLength == 0)
956 return;
958 Sci::Line lineRecalculateStart = Sci::invalidPosition;
960 if ((position == 0) && (deleteLength == substance.Length())) {
961 // If whole buffer is being deleted, faster to reinitialise lines data
962 // than to delete each line.
963 plv->Init();
964 } else {
965 // Have to fix up line positions before doing deletion as looking at text in buffer
966 // to work out which lines have been removed
968 const Sci::Line linePosition = plv->LineFromPosition(position);
969 Sci::Line lineRemove = linePosition + 1;
971 plv->InsertText(lineRemove-1, - (deleteLength));
972 const unsigned char chPrev = substance.ValueAt(position - 1);
973 const unsigned char chBefore = chPrev;
974 unsigned char chNext = substance.ValueAt(position);
976 // Check for breaking apart a UTF-8 sequence
977 // Needs further checks that text is UTF-8 or that some other break apart is occurring
978 if (utf8Substance && MaintainingLineCharacterIndex()) {
979 const Sci::Position posEnd = position + deleteLength;
980 const Sci::Line lineEndRemove = plv->LineFromPosition(posEnd);
981 const bool simpleDeletion =
982 (linePosition == lineEndRemove) &&
983 UTF8IsCharacterBoundary(position) && UTF8IsCharacterBoundary(posEnd);
984 if (simpleDeletion) {
985 std::string text(deleteLength, '\0');
986 GetCharRange(text.data(), position, deleteLength);
987 if (UTF8IsValid(text)) {
988 // Everything is good
989 const CountWidths cw = CountCharacterWidthsUTF8(text);
990 plv->InsertCharacters(linePosition, -cw);
991 } else {
992 lineRecalculateStart = linePosition;
994 } else {
995 lineRecalculateStart = linePosition;
999 bool ignoreNL = false;
1000 if (chPrev == '\r' && chNext == '\n') {
1001 // Move back one
1002 plv->SetLineStart(lineRemove, position);
1003 lineRemove++;
1004 ignoreNL = true; // First \n is not real deletion
1006 if (utf8LineEnds == LineEndType::Unicode && UTF8IsTrailByte(chNext)) {
1007 if (UTF8LineEndOverlaps(position)) {
1008 RemoveLine(lineRemove);
1012 unsigned char ch = chNext;
1013 for (Sci::Position i = 0; i < deleteLength; i++) {
1014 chNext = substance.ValueAt(position + i + 1);
1015 if (ch == '\r') {
1016 if (chNext != '\n') {
1017 RemoveLine(lineRemove);
1019 } else if (ch == '\n') {
1020 if (ignoreNL) {
1021 ignoreNL = false; // Further \n are real deletions
1022 } else {
1023 RemoveLine(lineRemove);
1025 } else if (utf8LineEnds == LineEndType::Unicode) {
1026 if (!UTF8IsAscii(ch)) {
1027 const unsigned char next3[3] = {ch, chNext,
1028 static_cast<unsigned char>(substance.ValueAt(position + i + 2))};
1029 if (UTF8IsSeparator(next3) || UTF8IsNEL(next3)) {
1030 RemoveLine(lineRemove);
1035 ch = chNext;
1037 // May have to fix up end if last deletion causes cr to be next to lf
1038 // or removes one of a crlf pair
1039 const char chAfter = substance.ValueAt(position + deleteLength);
1040 if (chBefore == '\r' && chAfter == '\n') {
1041 // Using lineRemove-1 as cr ended line before start of deletion
1042 RemoveLine(lineRemove - 1);
1043 plv->SetLineStart(lineRemove - 1, position + 1);
1046 substance.DeleteRange(position, deleteLength);
1047 if (lineRecalculateStart >= 0) {
1048 RecalculateIndexLineStarts(lineRecalculateStart, lineRecalculateStart);
1050 if (hasStyles) {
1051 style.DeleteRange(position, deleteLength);
1055 bool CellBuffer::SetUndoCollection(bool collectUndo) noexcept {
1056 collectingUndo = collectUndo;
1057 uh->DropUndoSequence();
1058 return collectingUndo;
1061 bool CellBuffer::IsCollectingUndo() const noexcept {
1062 return collectingUndo;
1065 void CellBuffer::BeginUndoAction(bool mayCoalesce) noexcept {
1066 uh->BeginUndoAction(mayCoalesce);
1069 void CellBuffer::EndUndoAction() noexcept {
1070 uh->EndUndoAction();
1073 int CellBuffer::UndoSequenceDepth() const noexcept {
1074 return uh->UndoSequenceDepth();
1077 void CellBuffer::AddUndoAction(Sci::Position token, bool mayCoalesce) {
1078 bool startSequence = false;
1079 uh->AppendAction(ActionType::container, token, nullptr, 0, startSequence, mayCoalesce);
1082 void CellBuffer::DeleteUndoHistory() noexcept {
1083 uh->DeleteUndoHistory();
1086 bool CellBuffer::CanUndo() const noexcept {
1087 return uh->CanUndo();
1090 int CellBuffer::StartUndo() noexcept {
1091 return uh->StartUndo();
1094 Action CellBuffer::GetUndoStep() const noexcept {
1095 return uh->GetUndoStep();
1098 void CellBuffer::PerformUndoStep() {
1099 const Action previousStep = uh->GetUndoStep();
1100 // PreviousBeforeSavePoint and AfterDetachPoint are called since acting on the previous action,
1101 // that is currentAction-1
1102 if (changeHistory && uh->PreviousBeforeSavePoint()) {
1103 changeHistory->StartReversion();
1105 if (previousStep.at == ActionType::insert) {
1106 if (substance.Length() < previousStep.lenData) {
1107 throw std::runtime_error(
1108 "CellBuffer::PerformUndoStep: deletion must be less than document length.");
1110 if (changeHistory) {
1111 changeHistory->DeleteRange(previousStep.position, previousStep.lenData,
1112 uh->PreviousBeforeSavePoint() && !uh->AfterDetachPoint());
1114 BasicDeleteChars(previousStep.position, previousStep.lenData);
1115 } else if (previousStep.at == ActionType::remove) {
1116 BasicInsertString(previousStep.position, previousStep.data, previousStep.lenData);
1117 if (changeHistory) {
1118 changeHistory->UndoDeleteStep(previousStep.position, previousStep.lenData, uh->AfterDetachPoint());
1121 uh->CompletedUndoStep();
1124 bool CellBuffer::CanRedo() const noexcept {
1125 return uh->CanRedo();
1128 int CellBuffer::StartRedo() noexcept {
1129 return uh->StartRedo();
1132 Action CellBuffer::GetRedoStep() const noexcept {
1133 return uh->GetRedoStep();
1136 void CellBuffer::PerformRedoStep() {
1137 const Action actionStep = uh->GetRedoStep();
1138 if (actionStep.at == ActionType::insert) {
1139 BasicInsertString(actionStep.position, actionStep.data, actionStep.lenData);
1140 if (changeHistory) {
1141 changeHistory->Insert(actionStep.position, actionStep.lenData, collectingUndo,
1142 uh->BeforeSavePoint() && !uh->AfterOrAtDetachPoint());
1144 } else if (actionStep.at == ActionType::remove) {
1145 if (changeHistory) {
1146 changeHistory->DeleteRangeSavingHistory(actionStep.position, actionStep.lenData,
1147 uh->BeforeReachableSavePoint(), uh->AfterOrAtDetachPoint());
1149 BasicDeleteChars(actionStep.position, actionStep.lenData);
1151 if (changeHistory && uh->AfterSavePoint()) {
1152 changeHistory->EndReversion();
1154 uh->CompletedRedoStep();
1157 int CellBuffer::UndoActions() const noexcept {
1158 return uh->Actions();
1161 void CellBuffer::SetUndoSavePoint(int action) noexcept {
1162 uh->SetSavePoint(action);
1165 int CellBuffer::UndoSavePoint() const noexcept {
1166 return uh->SavePoint();
1169 void CellBuffer::SetUndoDetach(int action) noexcept {
1170 uh->SetDetachPoint(action);
1173 int CellBuffer::UndoDetach() const noexcept {
1174 return uh->DetachPoint();
1177 void CellBuffer::SetUndoTentative(int action) noexcept {
1178 uh->SetTentative(action);
1181 int CellBuffer::UndoTentative() const noexcept {
1182 return uh->TentativePoint();
1185 namespace {
1187 void RestoreChangeHistory(const UndoHistory *uh, ChangeHistory *changeHistory) {
1188 // Replay all undo actions into changeHistory
1189 const int savePoint = uh->SavePoint();
1190 const int detachPoint = uh->DetachPoint();
1191 const int currentPoint = uh->Current();
1192 for (int act = 0; act < uh->Actions(); act++) {
1193 const ActionType type = static_cast<ActionType>(uh->Type(act) & ~coalesceFlag);
1194 const Sci::Position position = uh->Position(act);
1195 const Sci::Position length = uh->Length(act);
1196 const bool beforeSave = act < savePoint || ((detachPoint >= 0) && (detachPoint > act));
1197 const bool afterDetach = (detachPoint >= 0) && (detachPoint < act);
1198 switch (type) {
1199 case ActionType::insert:
1200 changeHistory->Insert(position, length, true, beforeSave);
1201 break;
1202 case ActionType::remove:
1203 changeHistory->DeleteRangeSavingHistory(position, length, beforeSave, afterDetach);
1204 break;
1205 default:
1206 // Only insertions and deletions go into change history
1207 break;
1209 changeHistory->Check();
1211 // Undo back to currentPoint, updating change history
1212 for (int act = uh->Actions() - 1; act >= currentPoint; act--) {
1213 const ActionType type = static_cast<ActionType>(uh->Type(act) & ~coalesceFlag);
1214 const Sci::Position position = uh->Position(act);
1215 const Sci::Position length = uh->Length(act);
1216 const bool beforeSave = act < savePoint;
1217 const bool afterDetach = (detachPoint >= 0) && (detachPoint < act);
1218 if (beforeSave) {
1219 changeHistory->StartReversion();
1221 switch (type) {
1222 case ActionType::insert:
1223 changeHistory->DeleteRange(position, length, beforeSave && !afterDetach);
1224 break;
1225 case ActionType::remove:
1226 changeHistory->UndoDeleteStep(position, length, afterDetach);
1227 break;
1228 default:
1229 // Only insertions and deletions go into change history
1230 break;
1232 changeHistory->Check();
1238 void CellBuffer::SetUndoCurrent(int action) {
1239 uh->SetCurrent(action, Length());
1240 if (changeHistory) {
1241 if ((uh->DetachPoint() >= 0) && (uh->SavePoint() >= 0)) {
1242 // Can't have a valid save point and a valid detach point at same time
1243 uh->DeleteUndoHistory();
1244 changeHistory.reset();
1245 throw std::runtime_error("UndoHistory::SetCurrent: invalid undo history.");
1247 const intptr_t sizeChange = uh->Delta(action);
1248 const intptr_t lengthOriginal = Length() - sizeChange;
1249 // Recreate empty change history
1250 changeHistory = std::make_unique<ChangeHistory>(lengthOriginal);
1251 RestoreChangeHistory(uh.get(), changeHistory.get());
1252 if (Length() != changeHistory->Length()) {
1253 uh->DeleteUndoHistory();
1254 changeHistory.reset();
1255 throw std::runtime_error("UndoHistory::SetCurrent: invalid undo history.");
1260 int CellBuffer::UndoCurrent() const noexcept {
1261 return uh->Current();
1264 int CellBuffer::UndoActionType(int action) const noexcept {
1265 return uh->Type(action);
1268 Sci::Position CellBuffer::UndoActionPosition(int action) const noexcept {
1269 return uh->Position(action);
1272 std::string_view CellBuffer::UndoActionText(int action) const noexcept {
1273 return uh->Text(action);
1276 void CellBuffer::PushUndoActionType(int type, Sci::Position position) {
1277 uh->PushUndoActionType(type, position);
1280 void CellBuffer::ChangeLastUndoActionText(size_t length, const char *text) {
1281 uh->ChangeLastUndoActionText(length, text);
1284 void CellBuffer::ChangeHistorySet(bool set) {
1285 if (set) {
1286 if (!changeHistory && !uh->CanUndo()) {
1287 changeHistory = std::make_unique<ChangeHistory>(Length());
1289 } else {
1290 changeHistory.reset();
1294 int CellBuffer::EditionAt(Sci::Position pos) const noexcept {
1295 if (changeHistory) {
1296 return changeHistory->EditionAt(pos);
1298 return 0;
1301 Sci::Position CellBuffer::EditionEndRun(Sci::Position pos) const noexcept {
1302 if (changeHistory) {
1303 return changeHistory->EditionEndRun(pos);
1305 return Length();
1308 unsigned int CellBuffer::EditionDeletesAt(Sci::Position pos) const noexcept {
1309 if (changeHistory) {
1310 return changeHistory->EditionDeletesAt(pos);
1312 return 0;
1315 Sci::Position CellBuffer::EditionNextDelete(Sci::Position pos) const noexcept {
1316 if (changeHistory) {
1317 return changeHistory->EditionNextDelete(pos);
1319 return Length() + 1;