firmware/bidi.c

   1 /***************************************************************************
   2  *             __________               __   ___.
   3  *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
   4  *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
   5  *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
   6  *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
   7  *                     \/            \/     \/    \/            \/
   8  * $Id$
   9  *
  10  * Copyright (C) 2005 by Gadi Cohen
  11  *
  12  * Largely based on php_hebrev by Zeev Suraski <zeev@php.net>
  13  * Heavily modified by Gadi Cohen aka Kinslayer <dragon@wastelands.net>
  14  *
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
  21  * KIND, either express or implied.
  22  *
  23  ****************************************************************************/
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27 #include "file.h"
  28 #include "lcd.h"
  29 #include "rbunicode.h"
  30 #include "arabjoin.h"
  31 #include "scroll_engine.h"
  32 #include "bidi.h"
  33
  34 /* #define _HEB_BUFFER_LENGTH (MAX_PATH + LCD_WIDTH/2 + 3 + 2 + 2) * 2 */
  35 #define _HEB_BLOCK_TYPE_ENG 1
  36 #define _HEB_BLOCK_TYPE_HEB 0
  37 #define _HEB_ORIENTATION_LTR 1
  38 #define _HEB_ORIENTATION_RTL 0
  39
  40 #define ischar(c) ((c > 0x0589 && c < 0x0700) || \
  41                    (c >= 0xfb50 && c <= 0xfefc) ? 1 : 0)
  42 #define _isblank(c) ((c==' ' || c=='\t') ? 1 : 0)
  43 #define _isnewline(c) ((c=='\n' || c=='\r') ? 1 : 0)
  44 #define XOR(a,b) ((a||b) && !(a&&b))
  45
  46 #ifndef BOOTLOADER
  47 static const arab_t * arab_lookup(unsigned short uchar)
  48 {
  49     if (uchar >= 0x621 && uchar <= 0x63a)
  50         return &(jointable[uchar - 0x621]);
  51     if (uchar >= 0x640 && uchar <= 0x64a)
  52         return &(jointable[uchar - 0x621 - 5]);
  53     if (uchar >= 0x671 && uchar <= 0x6d5)
  54         return &(jointable[uchar - 0x621 - 5 - 38]);
  55     if (uchar == 0x200D) /* Support for the zero-width joiner */
  56         return &zwj;
  57     return 0;
  58 }
  59
  60 static void arabjoin(unsigned short * stringprt, int length)
  61 {
  62     bool connected = false;
  63     unsigned short * writeprt = stringprt;
  64
  65     const arab_t * prev = 0;
  66     const arab_t * cur;
  67     const arab_t * ligature = 0;
  68     short uchar;
  69
  70     int i;
  71     for (i = 0; i <= length; i++) {
  72         cur = arab_lookup(uchar = *stringprt++);
  73
  74         /* Skip non-arabic chars */
  75         if (cur == 0) {
  76             if (prev) {
  77                 /* Finish the last char */
  78                 if (connected) {
  79                     *writeprt++ = prev->final;
  80                     connected = false;
  81                 } else
  82                     *writeprt++ = prev->isolated;
  83                 prev = 0;
  84                 *writeprt++ = uchar;
  85             } else {
  86                 *writeprt++ = uchar;
  87             }
  88             continue;
  89         }
  90
  91         /* nothing to do for arabic char if the previous was non-arabic */
  92         if (prev == 0) {
  93             prev = cur;
  94             continue;
  95         }
  96
  97         /* if it's LAM, check for LAM+ALEPH ligatures */
  98         if (prev->isolated == 0xfedd) {
  99             switch (cur->isolated) {
 100                 case 0xfe8d:
 101                     ligature = &(lamaleph[0]);
 102                     break;
 103                 case 0xfe87:
 104                     ligature = &(lamaleph[1]);
 105                     break;
 106                 case 0xfe83:
 107                     ligature = &(lamaleph[2]);
 108                     break;
 109                 case 0xfe81:
 110                     ligature = &(lamaleph[3]);
 111             }
 112         }
 113
 114         if (ligature) { /* replace the 2 glyphs by their ligature */
 115             prev = ligature;
 116             ligature = 0;
 117         } else {
 118             if (connected) { /* previous char has something connected to it */
 119                 if (prev->medial && cur->final) /* Can we connect to it? */
 120                     *writeprt++ = prev->medial;
 121                 else {
 122                     *writeprt++ = prev->final;
 123                     connected = false;
 124                 }
 125             } else {
 126                 if (prev->initial && cur->final) { /* Can we connect to it? */
 127                     *writeprt++ = prev->initial;
 128                     connected = true;
 129                 } else
 130                     *writeprt++ = prev->isolated;
 131             }
 132             prev = cur;
 133         }
 134     }
 135 }
 136 #endif /* !BOOTLOADER */
 137
 138 unsigned short *bidi_l2v(const unsigned char *str, int orientation)
 139 {
 140     static unsigned short  utf16_buf[SCROLL_LINE_SIZE];
 141     unsigned short *target, *tmp;
 142 #ifndef BOOTLOADER
 143     static unsigned short  bidi_buf[SCROLL_LINE_SIZE];
 144     unsigned short *heb_str; /* *broken_str */
 145     int block_start, block_end, block_type, block_length, i;
 146     int length = utf8length(str);
 147 #endif
 148     /*
 149     long max_chars=0;
 150     int begin, end, char_count, orig_begin;
 151
 152     tmp = str;
 153     */
 154     target = tmp = utf16_buf;
 155     while (*str)
 156         str = utf8decode(str, target++);
 157     *target = 0;
 158
 159 #ifdef BOOTLOADER
 160     (void)orientation;
 161     return utf16_buf;
 162
 163 #else /* !BOOTLOADER */
 164     if (target == utf16_buf) /* empty string */
 165         return target;
 166
 167     /* properly join any arabic chars */
 168     arabjoin(utf16_buf, length);
 169
 170     block_start=block_end=block_length=0;
 171
 172     heb_str = bidi_buf;
 173     if (orientation) {
 174         target = heb_str;
 175     } else {
 176         target = heb_str + length;
 177         *target = 0;
 178         target--;
 179     }
 180
 181     if (ischar(*tmp))
 182         block_type = _HEB_BLOCK_TYPE_HEB;
 183     else
 184         block_type = _HEB_BLOCK_TYPE_ENG;
 185
 186     do {
 187         while((XOR(ischar(*(tmp+1)),block_type)
 188                || _isblank(*(tmp+1)) || ispunct((int)*(tmp+1))
 189                || *(tmp+1)=='\n')
 190               && block_end < length-1) {
 191                 tmp++;
 192                 block_end++;
 193                 block_length++;
 194         }
 195
 196         if (block_type != orientation) {
 197             while ((_isblank(*tmp) || ispunct((int)*tmp))
 198                    && *tmp!='/' && *tmp!='-' && block_end>block_start) {
 199                 tmp--;
 200                 block_end--;
 201             }
 202         }
 203
 204         for (i=block_start; i<=block_end; i++) {
 205             *target = (block_type == orientation) ?
 206                       *(utf16_buf+i) : *(utf16_buf+block_end-i+block_start);
 207             if (block_type!=orientation) {
 208                 switch (*target) {
 209                 case '(':
 210                     *target = ')';
 211                     break;
 212                 case ')':
 213                     *target = '(';
 214                     break;
 215                 default:
 216                     break;
 217                 }
 218             }
 219             target += orientation ? 1 : -1;
 220         }
 221         block_type = !block_type;
 222         block_start=block_end+1;
 223     } while(block_end<length-1);
 224
 225     *target = 0;
 226
 227 #if 0 /* Is this code really necessary? */
 228     broken_str = utf16_buf;
 229     begin=end=length-1;
 230     target = broken_str;
 231
 232     while (1) {
 233         char_count=0;
 234         while ((!max_chars || char_count<max_chars) && begin>0) {
 235             char_count++;
 236             begin--;
 237             if (begin<=0 || _isnewline(heb_str[begin])) {
 238                 while(begin>0 && _isnewline(heb_str[begin-1])) {
 239                     begin--;
 240                     char_count++;
 241                 }
 242                 break;
 243             }
 244         }
 245         if (char_count==max_chars) { /* try to avoid breaking words */
 246             int new_char_count = char_count;
 247             int new_begin = begin;
 248
 249             while (new_char_count>0) {
 250                 if (_isblank(heb_str[new_begin]) ||
 251                     _isnewline(heb_str[new_begin])) {
 252                     break;
 253                 }
 254                 new_begin++;
 255                 new_char_count--;
 256             }
 257             if (new_char_count>0) {
 258                 char_count=new_char_count;
 259                 begin=new_begin;
 260             }
 261         }
 262         orig_begin=begin;
 263
 264         /* if (_isblank(heb_str[begin])) {
 265             heb_str[begin]='\n';
 266         } */
 267
 268         /* skip leading newlines */
 269         while (begin<=end && _isnewline(heb_str[begin])) {
 270             begin++;
 271         }
 272
 273         /* copy content */
 274         for (i=begin; i<=end; i++) {
 275             *target = heb_str[i];
 276             target++;
 277         }
 278
 279         for (i=orig_begin; i<=end && _isnewline(heb_str[i]); i++) {
 280             *target = heb_str[i];
 281             target++;
 282         }
 283         begin=orig_begin;
 284
 285         if (begin<=0) {
 286             *target = 0;
 287             break;
 288         }
 289         begin--;
 290         end=begin;
 291     }
 292     return broken_str;
 293 #endif
 294     return heb_str;
 295 #endif /* !BOOTLOADER */
 296 }
 297