respect https prefix
[k8lowj.git] / src / smartquotes.c
blob682cf0d690960b4f7ba4255e37578c5b50dd410a
1 /* logjam - a GTK client for LiveJournal.
2 * Copyright (C) 2000-2003 Evan Martin <evan@livejournal.com>
4 * vim: tabstop=4 shiftwidth=4 noexpandtab :
5 */
6 #include <stdlib.h>
8 #include "gtk-all.h"
11 static void xfree (void *p) {
12 void **xp = (void **)p;
13 if (*xp) free(*xp);
15 #define AUTOFREE __attribute__((cleanup(xfree)))
18 #define ASCII_BACKTICK '`'
19 #define ASCII_SINGLEQUOTE '\''
20 #define ASCII_DOUBLEQUOTE '"'
21 #define UNICODE_LEFTSINGLEQUOTE 0x2018
22 #define UNICODE_RIGHTSINGLEQUOTE 0x2019
23 #define UNICODE_LEFTDOUBLEQUOTE 0x201C
24 #define UNICODE_RIGHTDOUBLEQUOTE 0x201D
25 #define UNICODE_LEFTDOUBLEQUOTE_RU 0x00AB
26 #define UNICODE_RIGHTDOUBLEQUOTE_RU 0x00BB
29 static gboolean rusmode = FALSE;
32 /* return quote type:
33 * 0: none
34 * 1: uni-single
35 * 2: uni-double
37 static int is_quote (gunichar c) {
38 if (!rusmode) {
39 switch (c) {
40 case ASCII_BACKTICK:
41 case ASCII_SINGLEQUOTE:
42 case UNICODE_LEFTSINGLEQUOTE:
43 case UNICODE_RIGHTSINGLEQUOTE:
44 return 1;
45 case ASCII_DOUBLEQUOTE:
46 case UNICODE_LEFTDOUBLEQUOTE:
47 case UNICODE_RIGHTDOUBLEQUOTE:
48 return 2;
49 default:
50 return 0;
52 } else {
53 switch (c) {
54 case ASCII_DOUBLEQUOTE:
55 case UNICODE_LEFTDOUBLEQUOTE_RU:
56 case UNICODE_RIGHTDOUBLEQUOTE_RU:
57 return 2;
58 default:
59 return 0;
65 /* return quote type plus:
66 * 3: hot char
68 static int is_hotchar (gunichar c) {
69 int qq = is_quote(c);
70 if (qq) return qq;
71 switch (c) {
72 case '!': case '.': case '-': case ' ': case '\t': case '<': case '>': case '`': case '&': case '/':
73 return 3;
74 default:
75 return 0;
80 static void buf_replace_chars (GtkTextBuffer *buffer, GtkTextIter *pos, GtkTextIter *nextpos, gunichar oldc, gunichar newc) {
81 if (oldc != newc) {
82 char buf[8];
83 int len;
84 gtk_text_buffer_delete(buffer, pos, nextpos);
85 len = g_unichar_to_utf8(newc, buf);
86 buf[len] = 0;
87 gtk_text_buffer_insert(buffer, pos, buf, len);
92 /* only ASCII or valid UTF-8 */
93 static void buf_replace_char_with_string (GtkTextBuffer *buffer, GtkTextIter *pos, const char *str) {
94 GtkTextIter npos = *pos;
95 gtk_text_iter_forward_char(&npos);
96 gtk_text_buffer_delete(buffer, pos, &npos);
97 if (str != NULL && str[0]) {
98 int len = strlen(str);
99 gtk_text_buffer_insert(buffer, pos, str, len);
100 gtk_text_iter_forward_chars(pos, len);
105 /* return:
106 * -2: </code>
107 * -1: </pre>
108 * 0: none
109 * 1: <pre>
110 * 2: <code>
112 static int check_pre_tag (GtkTextIter pos) {
113 static const char *tag_names[2] = {"pre", "code"}; /* engrish: no first char can be the same */
114 int closing = 1;
115 int sidx;
116 gunichar c = gtk_text_iter_get_char(&pos);
117 if (c != '<') return 0;
118 gtk_text_iter_forward_char(&pos);
119 if ((c = gtk_text_iter_get_char(&pos)) == '/') {
120 closing = -1;
121 gtk_text_iter_forward_char(&pos);
122 c = gtk_text_iter_get_char(&pos);
124 gtk_text_iter_forward_char(&pos);
125 if (c >= 'A' && c <= 'Z') c += 32; // convert to lower case
126 for (sidx = 0; sidx < sizeof(tag_names)/sizeof(tag_names[0]); ++sidx) if (tag_names[sidx][0] == c) break;
127 if (sidx < sizeof(tag_names)/sizeof(tag_names[0])) {
128 // ok, try to match
129 const char *s = tag_names[sidx]+1;
130 while (*s) {
131 c = gtk_text_iter_get_char(&pos);
132 gtk_text_iter_forward_char(&pos);
133 if (c >= 'A' && c <= 'Z') c += 32; // convert to lower case
134 if (*s != c) return 0;
135 ++s;
137 c = gtk_text_iter_get_char(&pos);
138 //if (c == '/') return 0;
139 if (c < 'A') return (sidx+1)*closing;
141 return 0;
145 static void run_smartquotes (GtkTextBuffer *buffer) {
146 GtkTextIter pos;
147 gunichar c = 0, prevca[4];
148 int curnesting = -1;
149 AUTOFREE int *balance = NULL; /* array */
150 int balanceMax = 10;
151 gboolean insidetag = FALSE, closing;
152 int inside_pre_tag = 0; /* 0: none; -1: 'pre'; -2: 'code' */
153 int quotes;
155 /* this runs as the user is typing, so undo doesn't make much sense.
156 gtk_text_buffer_begin_user_action(buffer);
159 if (!rusmode) {
160 if ((balance = calloc(balanceMax, sizeof(int))) == NULL) return; /* shit happens... */
163 gtk_text_buffer_get_start_iter(buffer, &pos);
164 while ((c = gtk_text_iter_get_char(&pos))) {
165 /* [0],[1],[2],c,[3] */
166 int tag = check_pre_tag(pos);
167 GtkTextIter nextpos = pos;
168 for (int f = 2; f >= 0; --f) {
169 if (gtk_text_iter_get_offset(&nextpos) <= 0) {
170 prevca[f] = 0;
171 } else {
172 gtk_text_iter_backward_char(&nextpos);
173 prevca[f] = gtk_text_iter_get_char(&nextpos);
176 /*g_printf("ofs: %d; char=%i\n", gtk_text_iter_get_offset(&pos), c);*/
177 nextpos = pos;
178 gtk_text_iter_forward_char(&nextpos);
179 prevca[3] = gtk_text_iter_get_char(&nextpos);
181 if (inside_pre_tag) {
182 gtk_text_iter_forward_char(&pos);
183 if (tag == inside_pre_tag) {
184 inside_pre_tag = 0;
185 } else {
186 if (c == '<' && prevca[3] && prevca[3] != '/') {
187 gtk_text_iter_backward_char(&pos);
188 buf_replace_char_with_string(buffer, &pos, "&lt;");
189 } else if (c == '>') {
190 gtk_text_iter_backward_char(&pos);
191 buf_replace_char_with_string(buffer, &pos, "&gt;");
192 } else if (c == '&' && prevca[3]) {
193 const char *s = NULL;
194 gtk_text_iter_backward_char(&pos);
195 switch (prevca[3]) {
196 case 'a': s = "&amp;"; break;
197 case 'l': s = "&lt;"; break;
198 case 'g': s = "&gt;"; break;
200 if (s != NULL) {
201 nextpos = pos;
202 while (*s && (c = gtk_text_iter_get_char(&nextpos))) {
203 gtk_text_iter_forward_char(&nextpos);
204 if (*s != c) break;
205 ++s;
207 if (!c) break; // done
208 if (*s) {
209 // should replace
210 buf_replace_char_with_string(buffer, &pos, "&amp;");
211 } else {
212 // don't replace, just skip
213 pos = nextpos;
215 } else {
216 // should replace
217 buf_replace_char_with_string(buffer, &pos, "&amp;");
221 continue;
222 } else if (tag > 0) {
223 inside_pre_tag = -tag;
224 // skip tag
225 gtk_text_iter_forward_char(&pos);
226 while ((c = gtk_text_iter_get_char(&pos))) {
227 gtk_text_iter_forward_char(&pos);
228 if (c == '>') break;
230 continue;
233 /* --> */
234 if (!insidetag && prevca[1] == '-' && prevca[2] == '-' && c == '>') {
235 gtk_text_iter_backward_char(&pos);
236 gtk_text_iter_backward_char(&pos);
237 buf_replace_chars(buffer, &pos, &nextpos, c, 0x2192);
238 gtk_text_iter_forward_char(&pos);
239 continue;
242 if (c == '<') insidetag = TRUE;
243 else if (c == '>') insidetag = FALSE;
245 if (!insidetag) {
246 /* long dash */
247 if (prevca[0] == ' ' && prevca[1] == '-' && prevca[2] == '-' && c == ' ') {
248 gtk_text_iter_backward_char(&pos);
249 gtk_text_iter_backward_char(&pos);
250 gtk_text_iter_backward_char(&nextpos);
251 buf_replace_chars(buffer, &pos, &nextpos, c, 0x2014);
252 gtk_text_iter_forward_char(&pos);
253 continue;
255 /* ellipsis */
256 if (prevca[1] == '.' && prevca[2] == '.' && c == '.') {
257 gtk_text_iter_backward_char(&pos);
258 gtk_text_iter_backward_char(&pos);
259 buf_replace_chars(buffer, &pos, &nextpos, c, 0x2026);
260 gtk_text_iter_forward_char(&pos);
261 continue;
263 /* accent */
264 if (prevca[2] == '`' && c == '`') {
265 gtk_text_iter_backward_char(&pos);
266 buf_replace_chars(buffer, &pos, &nextpos, c, 0x0301);
267 gtk_text_iter_forward_char(&pos);
268 continue;
272 quotes = is_quote(c);
274 if (insidetag || quotes == 0 || curnesting < -1) {
275 gtk_text_iter_forward_char(&pos);
276 continue;
279 if (rusmode) {
280 closing = (curnesting >= 0);
281 if (closing) {
282 /*g_print("n %d right %c\n", curnesting, (char)c);*/
283 buf_replace_chars(buffer, &pos, &nextpos, c, UNICODE_RIGHTDOUBLEQUOTE_RU);
284 --curnesting;
285 } else {
286 /*g_print("n %d left %c\n", curnesting, (char)c);*/
287 buf_replace_chars(buffer, &pos, &nextpos, c, UNICODE_LEFTDOUBLEQUOTE_RU);
288 ++curnesting;
290 } else {
291 closing = (curnesting >= 0 && balance[curnesting] == quotes);
292 if (quotes == 1 && g_unichar_isalnum(prevca[2]) && (!closing || g_unichar_isalnum(prevca[3]))) {
293 /* an apostrophe. fix it up, but don't change nesting. */
294 buf_replace_chars(buffer, &pos, &nextpos, c, UNICODE_RIGHTSINGLEQUOTE);
295 } else if (closing) {
296 buf_replace_chars(buffer, &pos, &nextpos, c, (quotes == 1 ? UNICODE_RIGHTSINGLEQUOTE : UNICODE_RIGHTDOUBLEQUOTE));
297 --curnesting;
298 } else {
299 buf_replace_chars(buffer, &pos, &nextpos, c, (quotes == 1 ? UNICODE_LEFTSINGLEQUOTE : UNICODE_LEFTDOUBLEQUOTE));
300 ++curnesting;
301 balance[curnesting] = quotes;
303 if (curnesting >= balanceMax-1) {
304 /*g_warning("too many nested quotes.");*/
305 int newsz = balanceMax+16;
306 int *newb = realloc(balance, newsz*sizeof(int));
307 if (!newb) break; /* shit happens... */
308 balance = newb;
309 balanceMax = newsz;
310 for (int f = curnesting+1; f < balanceMax; ++f) balance[f] = 0;
313 gtk_text_iter_forward_char(&pos);
315 /* gtk_text_buffer_end_user_action(buffer); */
319 #define SMARTQUOTES_KEY "logjam-smartquotes-id"
320 static void smartquotes_begin (GtkTextBuffer *buffer);
321 static void smartquotes_insert_cb (GtkTextBuffer *buffer, GtkTextIter *iter, gchar *text, gint len, gpointer user_data);
322 static void smartquotes_delete_cb (GtkTextBuffer *buffer, GtkTextIter *i1, GtkTextIter *i2);
325 static gboolean smartquotes_idle_cb (GtkTextBuffer *buffer) {
326 g_signal_handlers_block_by_func(buffer, smartquotes_insert_cb, SMARTQUOTES_KEY);
327 g_signal_handlers_block_by_func(buffer, smartquotes_delete_cb, SMARTQUOTES_KEY);
328 run_smartquotes(buffer);
329 g_signal_handlers_unblock_by_func(buffer, smartquotes_insert_cb, SMARTQUOTES_KEY);
330 g_signal_handlers_unblock_by_func(buffer, smartquotes_delete_cb, SMARTQUOTES_KEY);
331 return FALSE;
335 static void smartquotes_begin (GtkTextBuffer *buffer) {
336 GObject *obj = G_OBJECT(buffer);
337 guint idleid;
338 idleid = GPOINTER_TO_INT(g_object_get_data(obj, SMARTQUOTES_KEY));
339 if (idleid) g_source_remove(idleid);
340 idleid = g_idle_add((GSourceFunc)smartquotes_idle_cb, buffer);
341 g_object_set_data(obj, SMARTQUOTES_KEY, GINT_TO_POINTER(idleid));
345 static void smartquotes_insert_cb (GtkTextBuffer *buffer, GtkTextIter *iter, gchar *text, gint len, gpointer user_data) {
346 for (int i = 0; i < len; ++i) {
347 if (is_hotchar(text[i])) {
348 smartquotes_begin(buffer);
349 break;
354 static void smartquotes_delete_cb(GtkTextBuffer *buffer, GtkTextIter *i1, GtkTextIter *i2) {
355 gunichar c;
356 GtkTextIter i = *i1;
357 while (gtk_text_iter_in_range(&i, i1, i2)) {
358 c = gtk_text_iter_get_char(&i);
359 if (is_hotchar(c)) {
360 smartquotes_begin(buffer);
361 break;
363 gtk_text_iter_forward_char(&i);
368 void smartquotes_attach (GtkTextBuffer *buffer, gboolean russian_mode) {
369 rusmode = russian_mode;
370 g_signal_connect(buffer, "insert-text", G_CALLBACK(smartquotes_insert_cb), SMARTQUOTES_KEY);
371 g_signal_connect(buffer, "delete-range", G_CALLBACK(smartquotes_delete_cb), SMARTQUOTES_KEY);
375 void smartquotes_detach (GtkTextBuffer *buffer) {
376 g_signal_handlers_disconnect_matched(G_OBJECT(buffer), G_SIGNAL_MATCH_DATA, 0, 0, NULL, NULL, SMARTQUOTES_KEY);