aboutsummaryrefslogtreecommitdiff
path: root/lib/gdi/font_arabic.cpp
blob: 573f9fa2a8133c311361ceeb3a757cb00b2e8114 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/*
 * This was taken from pango. 
 * Modified for enigma by Felix Domke <tmbinc@gmx.net>.
 * I removed support for vowels and ligatures. Sorry.
 *
 * Original header:
 *
 * This is part of Pango - Arabic shaping module 
 *
 * (C) 2000 Karl Koehler <koehler@or.uni-bonn.de>
 * (C) 2001 Roozbeh Pournader <roozbeh@sharif.edu>
 *
 */

#include <vector>
#include <lib/base/eerror.h>

typedef struct
{
	unsigned long basechar;
	int count;
	unsigned long charshape[4];
} shapestruct;

typedef struct
{
	unsigned long basechar;
	char numshapes;
} charstruct;

static void
charstruct_init (charstruct * s)
{
  s->basechar = 0;
  s->numshapes = 1;
}

#define connects_to_left(a)	 ((a).numshapes > 2)

/* The Unicode order is always 'isolated, final, initial, medial'. */

/* *INDENT-OFF* */
static shapestruct chartable[] = {
	{0x0621, 1, {0xFE80}}, /* HAMZA */
	{0x0622, 2, {0xFE81, 0xFE82}}, /* ALEF WITH MADDA ABOVE */
	{0x0623, 2, {0xFE83, 0xFE84}}, /* ALEF WITH HAMZA ABOVE */
	{0x0624, 2, {0xFE85, 0xFE86}}, /* WAW WITH HAMZA ABOVE */
	{0x0625, 2, {0xFE87, 0xFE88}}, /* ALEF WITH HAMZA BELOW */
	{0x0626, 4, {0xFE89, 0xFE8A, 0xFE8B, 0xFE8C}}, /* YEH WITH HAMZA ABOVE */
	{0x0627, 2, {0xFE8D, 0xFE8E}}, /* ALEF */
	{0x0628, 4, {0xFE8F, 0xFE90, 0xFE91, 0xFE92}}, /* BEH */
	{0x0629, 2, {0xFE93, 0xFE94}}, /* TEH MARBUTA */
	{0x062A, 4, {0xFE95, 0xFE96, 0xFE97, 0xFE98}}, /* TEH */
	{0x062B, 4, {0xFE99, 0xFE9A, 0xFE9B, 0xFE9C}}, /* THEH */
	{0x062C, 4, {0xFE9D, 0xFE9E, 0xFE9F, 0xFEA0}}, /* JEEM */
	{0x062D, 4, {0xFEA1, 0xFEA2, 0xFEA3, 0xFEA4}}, /* HAH */
	{0x062E, 4, {0xFEA5, 0xFEA6, 0xFEA7, 0xFEA8}}, /* KHAH */
	{0x062F, 2, {0xFEA9, 0xFEAA}}, /* DAL */
	{0x0630, 2, {0xFEAB, 0xFEAC}}, /* THAL */
	{0x0631, 2, {0xFEAD, 0xFEAE}}, /* REH */
	{0x0632, 2, {0xFEAF, 0xFEB0}}, /* ZAIN */
	{0x0633, 4, {0xFEB1, 0xFEB2, 0xFEB3, 0xFEB4}}, /* SEEN */
	{0x0634, 4, {0xFEB5, 0xFEB6, 0xFEB7, 0xFEB8}}, /* SHEEN */
	{0x0635, 4, {0xFEB9, 0xFEBA, 0xFEBB, 0xFEBC}}, /* SAD */
	{0x0636, 4, {0xFEBD, 0xFEBE, 0xFEBF, 0xFEC0}}, /* DAD */
	{0x0637, 4, {0xFEC1, 0xFEC2, 0xFEC3, 0xFEC4}}, /* TAH */
	{0x0638, 4, {0xFEC5, 0xFEC6, 0xFEC7, 0xFEC8}}, /* ZAH */
	{0x0639, 4, {0xFEC9, 0xFECA, 0xFECB, 0xFECC}}, /* AIN */
	{0x063A, 4, {0xFECD, 0xFECE, 0xFECF, 0xFED0}}, /* GHAIN */
	{0x0640, 4, {0x0640, 0x0640, 0x0640, 0x0640}}, /* TATWEEL */
	{0x0641, 4, {0xFED1, 0xFED2, 0xFED3, 0xFED4}}, /* FEH */
	{0x0642, 4, {0xFED5, 0xFED6, 0xFED7, 0xFED8}}, /* QAF */
	{0x0643, 4, {0xFED9, 0xFEDA, 0xFEDB, 0xFEDC}}, /* KAF */
	{0x0644, 4, {0xFEDD, 0xFEDE, 0xFEDF, 0xFEE0}}, /* LAM */
	{0x0645, 4, {0xFEE1, 0xFEE2, 0xFEE3, 0xFEE4}}, /* MEEM */
	{0x0646, 4, {0xFEE5, 0xFEE6, 0xFEE7, 0xFEE8}}, /* NOON */
	{0x0647, 4, {0xFEE9, 0xFEEA, 0xFEEB, 0xFEEC}}, /* HEH */
	{0x0648, 2, {0xFEED, 0xFEEE}}, /* WAW */
	{0x0649, 4, {0xFEEF, 0xFEF0, 0xFBE8, 0xFBE9}}, /* ALEF MAKSURA */
	{0x064A, 4, {0xFEF1, 0xFEF2, 0xFEF3, 0xFEF4}}, /* YEH */
	{0x0671, 2, {0xFB50, 0xFB51}}, /* ALEF WASLA */
	{0x0679, 4, {0xFB66, 0xFB67, 0xFB68, 0xFB69}}, /* TTEH */
	{0x067A, 4, {0xFB5E, 0xFB5F, 0xFB60, 0xFB61}}, /* TTEHEH */
	{0x067B, 4, {0xFB52, 0xFB53, 0xFB54, 0xFB55}}, /* BEEH */
	{0x067E, 4, {0xFB56, 0xFB57, 0xFB58, 0xFB59}}, /* PEH */
	{0x067F, 4, {0xFB62, 0xFB63, 0xFB64, 0xFB65}}, /* TEHEH */
	{0x0680, 4, {0xFB5A, 0xFB5B, 0xFB5C, 0xFB5D}}, /* BEHEH */
	{0x0683, 4, {0xFB76, 0xFB77, 0xFB78, 0xFB79}}, /* NYEH */
	{0x0684, 4, {0xFB72, 0xFB73, 0xFB74, 0xFB75}}, /* DYEH */
	{0x0686, 4, {0xFB7A, 0xFB7B, 0xFB7C, 0xFB7D}}, /* TCHEH */
	{0x0687, 4, {0xFB7E, 0xFB7F, 0xFB80, 0xFB81}}, /* TCHEHEH */
	{0x0688, 2, {0xFB88, 0xFB89}}, /* DDAL */
	{0x068C, 2, {0xFB84, 0xFB85}}, /* DAHAL */
	{0x068D, 2, {0xFB82, 0xFB83}}, /* DDAHAL */
	{0x068E, 2, {0xFB86, 0xFB87}}, /* DUL */
	{0x0691, 2, {0xFB8C, 0xFB8D}}, /* RREH */
	{0x0698, 2, {0xFB8A, 0xFB8B}}, /* JEH */
	{0x06A4, 4, {0xFB6A, 0xFB6B, 0xFB6C, 0xFB6D}}, /* VEH */
	{0x06A6, 4, {0xFB6E, 0xFB6F, 0xFB70, 0xFB71}}, /* PEHEH */
	{0x06A9, 4, {0xFB8E, 0xFB8F, 0xFB90, 0xFB91}}, /* KEHEH */
	{0x06AD, 4, {0xFBD3, 0xFBD4, 0xFBD5, 0xFBD6}}, /* NG */
	{0x06AF, 4, {0xFB92, 0xFB93, 0xFB94, 0xFB95}}, /* GAF */
	{0x06B1, 4, {0xFB9A, 0xFB9B, 0xFB9C, 0xFB9D}}, /* NGOEH */
	{0x06B3, 4, {0xFB96, 0xFB97, 0xFB98, 0xFB99}}, /* GUEH */
	{0x06BB, 4, {0xFBA0, 0xFBA1, 0xFBA2, 0xFBA3}}, /* RNOON */
	{0x06BE, 4, {0xFBAA, 0xFBAB, 0xFBAC, 0xFBAD}}, /* HEH DOACHASHMEE */
	{0x06C0, 2, {0xFBA4, 0xFBA5}}, /* HEH WITH YEH ABOVE */
	{0x06C1, 4, {0xFBA6, 0xFBA7, 0xFBA8, 0xFBA9}}, /* HEH GOAL */
	{0x06C5, 2, {0xFBE0, 0xFBE1}}, /* KIRGHIZ OE */
	{0x06C6, 2, {0xFBD9, 0xFBDA}}, /* OE */
	{0x06C7, 2, {0xFBD7, 0xFBD8}}, /* U */
	{0x06C8, 2, {0xFBDB, 0xFBDC}}, /* YU */
	{0x06C9, 2, {0xFBE2, 0xFBE3}}, /* KIRGHIZ YU */
	{0x06CB, 2, {0xFBDE, 0xFBDF}}, /* VE */
	{0x06CC, 4, {0xFBFC, 0xFBFD, 0xFBFE, 0xFBFF}}, /* FARSI YEH */
	{0x06D0, 4, {0xFBE4, 0xFBE5, 0xFBE6, 0xFBE7}}, /* E */
	{0x06D2, 2, {0xFBAE, 0xFBAF}}, /* YEH BARREE */
	{0x06D3, 2, {0xFBB0, 0xFBB1}}, /* YEH BARREE WITH HAMZA ABOVE */
};

#define ALEF					 0x0627
#define ALEFHAMZA			0x0623
#define ALEFHAMZABELOW 0x0625
#define ALEFMADDA			0x0622
#define LAM						0x0644
#define HAMZA					0x0621
#define TATWEEL				0x0640
#define ZWJ						0x200D

#define HAMZAABOVE	0x0654
#define HAMZABELOW	0x0655

#define WAWHAMZA		0x0624
#define YEHHAMZA		0x0626
#define WAW				 0x0648
#define ALEFMAKSURA 0x0649
#define YEH				 0x064A
#define FARSIYEH		0x06CC

#define SHADDA			0x0651
#define KASRA			 0x0650
#define FATHA			 0x064E
#define DAMMA			 0x064F
#define MADDA			 0x0653

#define LAM_ALEF					 0xFEFB
#define LAM_ALEFHAMZA			0xFEF7
#define LAM_ALEFHAMZABELOW 0xFEF9
#define LAM_ALEFMADDA			0xFEF5

static short
shapecount (unsigned long s)
{
  int l, r, m;
  if ((s >= 0x0621) && (s <= 0x06D3))
    {
      l = 0;
      r = sizeof (chartable) / sizeof (shapestruct);
      while (l <= r)
        {
          m = (l + r) / 2;
          if (s == chartable[m].basechar)
            {
              return chartable[m].count;
            }
          else if (s < chartable[m].basechar)
            {
              r = m - 1;
            }
          else
            {
              l = m + 1;
            }
        }
    }
  else if (s == ZWJ)
    {
      return 4;
    }
  return 1;
}

static unsigned long
charshape (unsigned long s, int which)
/* which 0=isolated 1=final 2=initial 3=medial */
{
	int l, r, m;
	if ((s >= 0x0621) && (s <= 0x06D3))
		{
			l = 0;
			r = sizeof (chartable) / sizeof (shapestruct);
			while (l <= r)
				{
					m = (l + r) / 2;
					if (s == chartable[m].basechar)
						{
							return chartable[m].charshape[which];
						}
					else if (s < chartable[m].basechar)
						{
							r = m - 1;
						}
					else
						{
							l = m + 1;
						}
				}
		}
	else if ((s >= 0xFEF5) && (s <= 0xFEFB))
		{													 /* Lam+Alef */
			return s + which;
		}

	return s;
}

void
shape (std::vector<unsigned long> &string, const std::vector<unsigned long> &text)
{
	string.reserve(text.size());

	charstruct oldchar, curchar;
	int which;
	unsigned long nextletter;

	charstruct_init (&oldchar);
	charstruct_init (&curchar);
	
	for (std::vector<unsigned long>::const_iterator i(text.begin());
			i != text.end(); ++i)
	{
		nextletter = *i;
		int nc = shapecount (nextletter);
		
		if (nc == 1)
			which = 0;				/* final or isolated */
		else
			which = 2;				/* medial or initial */
		if (connects_to_left (oldchar))
			which++;
		which = which % (curchar.numshapes);
		curchar.basechar = charshape (curchar.basechar, which);
			/* get rid of oldchar */
		if (oldchar.basechar)
			string.push_back(oldchar.basechar);
		oldchar = curchar;		/* new values in oldchar */

			/* init new curchar */
		charstruct_init (&curchar);
		curchar.basechar = nextletter;
		curchar.numshapes = nc;
	}

	/* Handle last char */
	if (connects_to_left (oldchar))
		which = 1;
	else
		which = 0;
	which = which % (curchar.numshapes);
	curchar.basechar = charshape (curchar.basechar, which);

	if (oldchar.basechar != 0)
		string.push_back(oldchar.basechar);
	if (curchar.basechar != 0)
		string.push_back(curchar.basechar);
}