Add some files and code rom Gnulib as preparation to TAR updates.
[midnight-commander.git] / src / editor / editbuffer.c
blobcc6ae6eff7b6a16c6e1bdfd21d9d94ca681b3caf
1 /*
2 Editor text keep buffer.
4 Copyright (C) 2013-2024
5 Free Software Foundation, Inc.
7 Written by:
8 Andrew Borodin <aborodin@vmail.ru> 2013
10 This file is part of the Midnight Commander.
12 The Midnight Commander is free software: you can redistribute it
13 and/or modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation, either version 3 of the License,
15 or (at your option) any later version.
17 The Midnight Commander is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program. If not, see <http://www.gnu.org/licenses/>.
26 /** \file
27 * \brief Source: editor text keep buffer.
28 * \author Andrew Borodin
29 * \date 2013
32 #include <config.h>
34 #include <ctype.h> /* isdigit() */
35 #include <stdlib.h>
36 #include <string.h>
37 #include <sys/types.h>
39 #include "lib/global.h"
41 #include "lib/vfs/vfs.h"
43 #include "edit-impl.h"
44 #include "editbuffer.h"
46 /* --------------------------------------------------------------------------------------------- */
47 /*-
49 * here's a quick sketch of the layout: (don't run this through indent.)
51 * |
52 * \0\0\0\0\0m e _ f i l e . \nf i n . \n|T h i s _ i s _ s o\0\0\0\0\0\0\0\0\0
53 * ______________________________________|______________________________________
54 * |
55 * ... | b2[2] | b2[1] | b2[0] | b1[0] | b1[1] | b1[2] | ...
56 * |-> |-> |-> |-> |-> |-> |
57 * |
58 * _<------------------------->|<----------------->_
59 * curs2 | curs1
60 * ^ | ^
61 * | ^|^ |
62 * cursor ||| cursor
63 * |||
64 * file end|||file beginning
65 * |
66 * |
68 * _
69 * This_is_some_file
70 * fin.
73 * This is called a "gap buffer".
74 * See also:
75 * http://en.wikipedia.org/wiki/Gap_buffer
76 * http://stackoverflow.com/questions/4199694/data-structure-for-text-editor
79 /*** global variables ****************************************************************************/
81 /*** file scope macro definitions ****************************************************************/
84 * The editor keeps data in two arrays of buffers.
85 * All buffers have the same size, which must be a power of 2.
88 /* Configurable: log2 of the buffer size in bytes */
89 #ifndef S_EDIT_BUF_SIZE
90 #define S_EDIT_BUF_SIZE 16
91 #endif
93 /* Size of the buffer */
94 #define EDIT_BUF_SIZE (((off_t) 1) << S_EDIT_BUF_SIZE)
96 /* Buffer mask (used to find cursor position relative to the buffer) */
97 #define M_EDIT_BUF_SIZE (EDIT_BUF_SIZE - 1)
99 /*** file scope type declarations ****************************************************************/
101 /*** forward declarations (file scope functions) *************************************************/
103 /*** file scope variables ************************************************************************/
105 /* --------------------------------------------------------------------------------------------- */
106 /*** file scope functions ************************************************************************/
107 /* --------------------------------------------------------------------------------------------- */
109 * Get pointer to byte at specified index
111 * @param buf pointer to editor buffer
112 * @param byte_index byte index
114 * @return NULL if byte_index is negative or larger than file size; pointer to byte otherwise.
116 static char *
117 edit_buffer_get_byte_ptr (const edit_buffer_t *buf, off_t byte_index)
119 void *b;
121 if (byte_index >= (buf->curs1 + buf->curs2) || byte_index < 0)
122 return NULL;
124 if (byte_index >= buf->curs1)
126 off_t p;
128 p = buf->curs1 + buf->curs2 - byte_index - 1;
129 b = g_ptr_array_index (buf->b2, p >> S_EDIT_BUF_SIZE);
130 return (char *) b + EDIT_BUF_SIZE - 1 - (p & M_EDIT_BUF_SIZE);
133 b = g_ptr_array_index (buf->b1, byte_index >> S_EDIT_BUF_SIZE);
134 return (char *) b + (byte_index & M_EDIT_BUF_SIZE);
137 /* --------------------------------------------------------------------------------------------- */
138 /*** public functions ****************************************************************************/
139 /* --------------------------------------------------------------------------------------------- */
141 * Initialize editor buffers.
143 * @param buf pointer to editor buffer
146 void
147 edit_buffer_init (edit_buffer_t *buf, off_t size)
149 buf->b1 = g_ptr_array_new_full (32, g_free);
150 buf->b2 = g_ptr_array_new_full (32, g_free);
152 buf->curs1 = 0;
153 buf->curs2 = 0;
155 buf->size = size;
156 buf->lines = 0;
159 /* --------------------------------------------------------------------------------------------- */
161 * Clean editor buffers.
163 * @param buf pointer to editor buffer
166 void
167 edit_buffer_clean (edit_buffer_t *buf)
169 if (buf->b1 != NULL)
170 g_ptr_array_free (buf->b1, TRUE);
172 if (buf->b2 != NULL)
173 g_ptr_array_free (buf->b2, TRUE);
176 /* --------------------------------------------------------------------------------------------- */
178 * Get byte at specified index
180 * @param buf pointer to editor buffer
181 * @param byte_index byte index
183 * @return '\n' if byte_index is negative or larger than file size; byte at byte_index otherwise.
187 edit_buffer_get_byte (const edit_buffer_t *buf, off_t byte_index)
189 char *p;
191 p = edit_buffer_get_byte_ptr (buf, byte_index);
193 return (p != NULL) ? *(unsigned char *) p : '\n';
196 /* --------------------------------------------------------------------------------------------- */
198 #ifdef HAVE_CHARSET
200 * Get utf-8 symbol at specified index
202 * @param buf pointer to editor buffer
203 * @param byte_index byte index
204 * @param char_length length of returned symbol
206 * @return '\n' if byte_index is negative or larger than file size;
207 * 0 if utf-8 symbol at specified index is invalid;
208 * utf-8 symbol otherwise
212 edit_buffer_get_utf (const edit_buffer_t *buf, off_t byte_index, int *char_length)
214 gchar *str = NULL;
215 gunichar res;
216 gunichar ch;
217 gchar *next_ch = NULL;
219 if (byte_index >= (buf->curs1 + buf->curs2) || byte_index < 0)
221 *char_length = 0;
222 return '\n';
225 str = edit_buffer_get_byte_ptr (buf, byte_index);
226 if (str == NULL)
228 *char_length = 0;
229 return 0;
232 res = g_utf8_get_char_validated (str, -1);
233 if (res == (gunichar) (-2) || res == (gunichar) (-1))
235 /* Retry with explicit bytes to make sure it's not a buffer boundary */
236 size_t i;
237 gchar utf8_buf[UTF8_CHAR_LEN + 1];
239 for (i = 0; i < UTF8_CHAR_LEN; i++)
240 utf8_buf[i] = edit_buffer_get_byte (buf, byte_index + i);
241 utf8_buf[i] = '\0';
242 res = g_utf8_get_char_validated (utf8_buf, -1);
245 if (res == (gunichar) (-2) || res == (gunichar) (-1))
247 ch = *str;
248 *char_length = 0;
250 else
252 ch = res;
253 /* Calculate UTF-8 char length */
254 next_ch = g_utf8_next_char (str);
255 *char_length = next_ch - str;
258 return (int) ch;
261 /* --------------------------------------------------------------------------------------------- */
263 * Get utf-8 symbol before specified index
265 * @param buf pointer to editor buffer
266 * @param byte_index byte index
267 * @param char_length length of returned symbol
269 * @return 0 if byte_index is negative or larger than file size;
270 * 1-byte value before specified index if utf-8 symbol before specified index is invalid;
271 * utf-8 symbol otherwise
275 edit_buffer_get_prev_utf (const edit_buffer_t *buf, off_t byte_index, int *char_length)
277 size_t i;
278 gchar utf8_buf[3 * UTF8_CHAR_LEN + 1];
279 gchar *str;
280 gchar *cursor_buf_ptr;
281 gunichar res;
283 if (byte_index > (buf->curs1 + buf->curs2) || byte_index <= 0)
285 *char_length = 0;
286 return 0;
289 for (i = 0; i < (3 * UTF8_CHAR_LEN); i++)
290 utf8_buf[i] = edit_buffer_get_byte (buf, byte_index + i - (2 * UTF8_CHAR_LEN));
291 utf8_buf[i] = '\0';
293 cursor_buf_ptr = utf8_buf + (2 * UTF8_CHAR_LEN);
294 str = g_utf8_find_prev_char (utf8_buf, cursor_buf_ptr);
296 if (str == NULL || g_utf8_next_char (str) != cursor_buf_ptr)
298 *char_length = 1;
299 return *(cursor_buf_ptr - 1);
302 res = g_utf8_get_char_validated (str, -1);
303 if (res == (gunichar) (-2) || res == (gunichar) (-1))
305 *char_length = 1;
306 return *(cursor_buf_ptr - 1);
309 *char_length = cursor_buf_ptr - str;
310 return (int) res;
312 #endif /* HAVE_CHARSET */
314 /* --------------------------------------------------------------------------------------------- */
316 * Count lines in editor buffer.
318 * @param buf editor buffer
319 * @param first start byte offset
320 * @param last finish byte offset
322 * @return line numbers between "first" and "last" bytes
325 long
326 edit_buffer_count_lines (const edit_buffer_t *buf, off_t first, off_t last)
328 long lines = 0;
330 first = MAX (first, 0);
331 last = MIN (last, buf->size);
333 while (first < last)
334 if (edit_buffer_get_byte (buf, first++) == '\n')
335 lines++;
337 return lines;
340 /* --------------------------------------------------------------------------------------------- */
342 * Get "begin-of-line" offset of line contained specified byte offset
344 * @param buf editor buffer
345 * @param current byte offset
347 * @return index of first char of line
350 off_t
351 edit_buffer_get_bol (const edit_buffer_t *buf, off_t current)
353 if (current <= 0)
354 return 0;
356 for (; edit_buffer_get_byte (buf, current - 1) != '\n'; current--)
359 return current;
362 /* --------------------------------------------------------------------------------------------- */
364 * Get "end-of-line" offset of line contained specified byte offset
366 * @param buf editor buffer
367 * @param current byte offset
369 * @return index of last char of line + 1
372 off_t
373 edit_buffer_get_eol (const edit_buffer_t *buf, off_t current)
375 if (current >= buf->size)
376 return buf->size;
378 for (; edit_buffer_get_byte (buf, current) != '\n'; current++)
381 return current;
384 /* --------------------------------------------------------------------------------------------- */
386 * Get word from specified offset.
388 * @param buf editor buffer
389 * @param current start_pos offset
390 * @param start actual start word ofset
391 * @param cut
393 * @return word as newly allocated object
396 GString *
397 edit_buffer_get_word_from_pos (const edit_buffer_t *buf, off_t start_pos, off_t *start, gsize *cut)
399 off_t word_start;
400 gsize cut_len = 0;
401 GString *match_expr;
402 int c1, c2;
404 for (word_start = start_pos; word_start != 0; word_start--, cut_len++)
406 c1 = edit_buffer_get_byte (buf, word_start);
407 c2 = edit_buffer_get_byte (buf, word_start - 1);
409 if (is_break_char (c1) != is_break_char (c2) || c1 == '\n' || c2 == '\n')
410 break;
413 match_expr = g_string_sized_new (16);
417 c1 = edit_buffer_get_byte (buf, word_start + match_expr->len);
418 c2 = edit_buffer_get_byte (buf, word_start + match_expr->len + 1);
419 g_string_append_c (match_expr, c1);
421 while (!(is_break_char (c1) != is_break_char (c2) || c1 == '\n' || c2 == '\n'));
423 *start = word_start;
424 *cut = cut_len;
426 return match_expr;
429 /* --------------------------------------------------------------------------------------------- */
431 * Find first character of current word
433 * @param buf editor buffer
434 * @param word_start position of first character of current word
435 * @param word_len length of current word
437 * @return TRUE if first character of word is found and this character is not 1) a digit and
438 * 2) a begin of file, FALSE otherwise
441 gboolean
442 edit_buffer_find_word_start (const edit_buffer_t *buf, off_t *word_start, gsize *word_len)
444 int c;
445 off_t i;
447 /* return if at begin of file */
448 if (buf->curs1 <= 0)
449 return FALSE;
451 c = edit_buffer_get_previous_byte (buf);
452 /* return if not at end or in word */
453 if (is_break_char (c))
454 return FALSE;
456 /* search start of word */
457 for (i = 1;; i++)
459 int last;
461 last = c;
462 c = edit_buffer_get_byte (buf, buf->curs1 - i - 1);
464 if (is_break_char (c))
466 /* return if word starts with digit */
467 if (isdigit (last))
468 return FALSE;
470 break;
474 /* success */
475 *word_start = buf->curs1 - i; /* start found */
476 *word_len = (gsize) i;
478 return TRUE;
481 /* --------------------------------------------------------------------------------------------- */
483 * Basic low level single character buffer alterations and movements at the cursor: insert character
484 * at the cursor position and move right.
486 * @param buf pointer to editor buffer
487 * @param c character to insert
490 void
491 edit_buffer_insert (edit_buffer_t *buf, int c)
493 void *b;
494 off_t i;
496 i = buf->curs1 & M_EDIT_BUF_SIZE;
498 /* add a new buffer if we've reached the end of the last one */
499 if (i == 0)
500 g_ptr_array_add (buf->b1, g_malloc0 (EDIT_BUF_SIZE));
502 /* perform the insertion */
503 b = g_ptr_array_index (buf->b1, buf->curs1 >> S_EDIT_BUF_SIZE);
504 *((unsigned char *) b + i) = (unsigned char) c;
506 /* update cursor position */
507 buf->curs1++;
509 /* update file length */
510 buf->size++;
513 /* --------------------------------------------------------------------------------------------- */
515 * Basic low level single character buffer alterations and movements at the cursor: insert character
516 * at the cursor position and move left.
518 * @param buf pointer to editor buffer
519 * @param c character to insert
522 void
523 edit_buffer_insert_ahead (edit_buffer_t *buf, int c)
525 void *b;
526 off_t i;
528 i = buf->curs2 & M_EDIT_BUF_SIZE;
530 /* add a new buffer if we've reached the end of the last one */
531 if (i == 0)
532 g_ptr_array_add (buf->b2, g_malloc0 (EDIT_BUF_SIZE));
534 /* perform the insertion */
535 b = g_ptr_array_index (buf->b2, buf->curs2 >> S_EDIT_BUF_SIZE);
536 *((unsigned char *) b + EDIT_BUF_SIZE - 1 - i) = (unsigned char) c;
538 /* update cursor position */
539 buf->curs2++;
541 /* update file length */
542 buf->size++;
545 /* --------------------------------------------------------------------------------------------- */
547 * Basic low level single character buffer alterations and movements at the cursor: delete character
548 * at the cursor position.
550 * @param buf pointer to editor buffer
551 * @param c character to insert
555 edit_buffer_delete (edit_buffer_t *buf)
557 void *b;
558 unsigned char c;
559 off_t prev;
560 off_t i;
562 prev = buf->curs2 - 1;
564 b = g_ptr_array_index (buf->b2, prev >> S_EDIT_BUF_SIZE);
565 i = prev & M_EDIT_BUF_SIZE;
566 c = *((unsigned char *) b + EDIT_BUF_SIZE - 1 - i);
568 if (i == 0)
570 guint j;
572 j = buf->b2->len - 1;
573 b = g_ptr_array_index (buf->b2, j);
574 g_ptr_array_remove_index (buf->b2, j);
577 buf->curs2 = prev;
579 /* update file length */
580 buf->size--;
582 return c;
585 /* --------------------------------------------------------------------------------------------- */
587 * Basic low level single character buffer alterations and movements at the cursor: delete character
588 * before the cursor position and move left.
590 * @param buf pointer to editor buffer
591 * @param c character to insert
595 edit_buffer_backspace (edit_buffer_t *buf)
597 void *b;
598 unsigned char c;
599 off_t prev;
600 off_t i;
602 prev = buf->curs1 - 1;
604 b = g_ptr_array_index (buf->b1, prev >> S_EDIT_BUF_SIZE);
605 i = prev & M_EDIT_BUF_SIZE;
606 c = *((unsigned char *) b + i);
608 if (i == 0)
610 guint j;
612 j = buf->b1->len - 1;
613 b = g_ptr_array_index (buf->b1, j);
614 g_ptr_array_remove_index (buf->b1, j);
617 buf->curs1 = prev;
619 /* update file length */
620 buf->size--;
622 return c;
625 /* --------------------------------------------------------------------------------------------- */
627 * Calculate forward offset with specified number of lines.
629 * @param buf editor buffer
630 * @param current current offset
631 * @param lines number of lines to move forward
632 * @param upto offset to count lines between current and upto.
634 * @return If lines is zero returns the count of lines from current to upto.
635 * If upto is zero returns offset of lines forward current.
636 * Else returns forward offset with specified number of lines
639 off_t
640 edit_buffer_get_forward_offset (const edit_buffer_t *buf, off_t current, long lines, off_t upto)
642 if (upto != 0)
643 return (off_t) edit_buffer_count_lines (buf, current, upto);
645 lines = MAX (lines, 0);
647 while (lines-- != 0)
649 long next;
651 next = edit_buffer_get_eol (buf, current) + 1;
652 if (next > buf->size)
653 break;
654 current = next;
657 return current;
660 /* --------------------------------------------------------------------------------------------- */
662 * Calculate backward offset with specified number of lines.
664 * @param buf editor buffer
665 * @param current current offset
666 * @param lines number of lines to move backward
668 * @return backward offset with specified number of lines.
671 off_t
672 edit_buffer_get_backward_offset (const edit_buffer_t *buf, off_t current, long lines)
674 lines = MAX (lines, 0);
675 current = edit_buffer_get_bol (buf, current);
677 while (lines-- != 0 && current != 0)
678 current = edit_buffer_get_bol (buf, current - 1);
680 return current;
683 /* --------------------------------------------------------------------------------------------- */
685 * Load file into editor buffer
687 * @param buf pointer to editor buffer
688 * @param fd file descriptor
689 * @param size file size
691 * @return number of read bytes
694 off_t
695 edit_buffer_read_file (edit_buffer_t *buf, int fd, off_t size,
696 edit_buffer_read_file_status_msg_t *sm, gboolean *aborted)
698 off_t ret = 0;
699 off_t i, j;
700 off_t data_size;
701 void *b;
702 status_msg_t *s = STATUS_MSG (sm);
703 unsigned short update_cnt = 0;
705 *aborted = FALSE;
707 buf->lines = 0;
708 buf->curs2 = size;
709 i = buf->curs2 >> S_EDIT_BUF_SIZE;
711 /* fill last part of b2 */
712 data_size = buf->curs2 & M_EDIT_BUF_SIZE;
713 if (data_size != 0)
715 b = g_malloc0 (EDIT_BUF_SIZE);
716 g_ptr_array_add (buf->b2, b);
717 b = (char *) b + EDIT_BUF_SIZE - data_size;
718 ret = mc_read (fd, b, data_size);
720 /* count lines */
721 for (j = 0; j < ret; j++)
722 if (*((char *) b + j) == '\n')
723 buf->lines++;
725 if (ret < 0 || ret != data_size)
726 return ret;
729 /* fulfill other parts of b2 from end to begin */
730 data_size = EDIT_BUF_SIZE;
731 for (--i; i >= 0; i--)
733 off_t sz;
735 b = g_malloc0 (data_size);
736 g_ptr_array_add (buf->b2, b);
737 sz = mc_read (fd, b, data_size);
738 if (sz >= 0)
739 ret += sz;
741 /* count lines */
742 for (j = 0; j < sz; j++)
743 if (*((char *) b + j) == '\n')
744 buf->lines++;
746 if (s != NULL && s->update != NULL)
748 update_cnt = (update_cnt + 1) & 0xf;
749 if (update_cnt == 0)
751 /* FIXME: overcare */
752 if (sm->buf == NULL)
753 sm->buf = buf;
755 sm->loaded = ret;
756 if (s->update (s) == B_CANCEL)
758 *aborted = TRUE;
759 return (-1);
764 if (sz != data_size)
765 break;
768 /* reverse buffer */
769 for (i = 0; i < (off_t) buf->b2->len / 2; i++)
771 void **b1, **b2;
773 b1 = &g_ptr_array_index (buf->b2, i);
774 b2 = &g_ptr_array_index (buf->b2, buf->b2->len - 1 - i);
776 b = *b1;
777 *b1 = *b2;
778 *b2 = b;
780 if (s != NULL && s->update != NULL)
782 update_cnt = (update_cnt + 1) & 0xf;
783 if (update_cnt == 0)
785 sm->loaded = ret;
786 if (s->update (s) == B_CANCEL)
788 *aborted = TRUE;
789 return (-1);
795 return ret;
798 /* --------------------------------------------------------------------------------------------- */
800 * Write editor buffer content to file
802 * @param buf pointer to editor buffer
803 * @param fd file descriptor
805 * @return number of written bytes
808 off_t
809 edit_buffer_write_file (edit_buffer_t *buf, int fd)
811 off_t ret = 0;
812 off_t i;
813 off_t data_size, sz;
814 void *b;
816 /* write all fulfilled parts of b1 from begin to end */
817 if (buf->b1->len != 0)
819 data_size = EDIT_BUF_SIZE;
820 for (i = 0; i < (off_t) buf->b1->len - 1; i++)
822 b = g_ptr_array_index (buf->b1, i);
823 sz = mc_write (fd, b, data_size);
824 if (sz >= 0)
825 ret += sz;
826 else if (i == 0)
827 ret = sz;
828 if (sz != data_size)
829 return ret;
832 /* write last partially filled part of b1 */
833 data_size = ((buf->curs1 - 1) & M_EDIT_BUF_SIZE) + 1;
834 b = g_ptr_array_index (buf->b1, i);
835 sz = mc_write (fd, b, data_size);
836 if (sz >= 0)
837 ret += sz;
838 if (sz != data_size)
839 return ret;
842 /* write b2 from end to begin, if b2 contains some data */
843 if (buf->b2->len != 0)
845 /* write last partially filled part of b2 */
846 i = buf->b2->len - 1;
847 b = g_ptr_array_index (buf->b2, i);
848 data_size = ((buf->curs2 - 1) & M_EDIT_BUF_SIZE) + 1;
849 sz = mc_write (fd, (char *) b + EDIT_BUF_SIZE - data_size, data_size);
850 if (sz >= 0)
851 ret += sz;
853 if (sz == data_size)
855 /* write other fulfilled parts of b2 from end to begin */
856 data_size = EDIT_BUF_SIZE;
857 while (--i >= 0)
859 b = g_ptr_array_index (buf->b2, i);
860 sz = mc_write (fd, b, data_size);
861 if (sz >= 0)
862 ret += sz;
863 if (sz != data_size)
864 break;
869 return ret;
872 /* --------------------------------------------------------------------------------------------- */
874 * Calculate percentage of specified character offset
876 * @param buf pointer to editor buffer
877 * @param p character offset
879 * @return percentage of specified character offset
883 edit_buffer_calc_percent (const edit_buffer_t *buf, off_t offset)
885 int percent;
887 if (buf->size == 0)
888 percent = 0;
889 else if (offset >= buf->size)
890 percent = 100;
891 else if (offset > (INT_MAX / 100))
892 percent = offset / (buf->size / 100);
893 else
894 percent = offset * 100 / buf->size;
896 return percent;
899 /* --------------------------------------------------------------------------------------------- */