9268
|
1 /*
|
|
2 * nmrtf.c
|
|
3 *
|
|
4 * Copyright (c) 2004 Novell, Inc. All Rights Reserved.
|
|
5 *
|
|
6 * This program is free software; you can redistribute it and/or modify
|
|
7 * it under the terms of the GNU General Public License as published by
|
|
8 * the Free Software Foundation; version 2 of the License.
|
|
9 *
|
|
10 * This program is distributed in the hope that it will be useful,
|
|
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 * GNU General Public License for more details.
|
|
14 *
|
|
15 * You should have received a copy of the GNU General Public License
|
|
16 * along with this program; if not, write to the Free Software
|
|
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
18 *
|
|
19 */
|
|
20
|
|
21 /* This code was adapted from the sample RTF reader found here:
|
|
22 * http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dnrtfspec/html/rtfspec.asp
|
|
23 */
|
|
24
|
|
25 #include <glib.h>
|
|
26 #include <stdlib.h>
|
|
27 #include <stdio.h>
|
|
28 #include <stddef.h>
|
|
29 #include <ctype.h>
|
|
30 #include <string.h>
|
|
31 #include "nmrtf.h"
|
|
32 #include "debug.h"
|
|
33
|
|
34 /* Internal RTF parser error codes */
|
|
35 #define NMRTF_OK 0 /* Everything's fine! */
|
|
36 #define NMRTF_STACK_UNDERFLOW 1 /* Unmatched '}' */
|
|
37 #define NMRTF_STACK_OVERFLOW 2 /* Too many '{' -- memory exhausted */
|
|
38 #define NMRTF_UNMATCHED_BRACE 3 /* RTF ended during an open group. */
|
|
39 #define NMRTF_INVALID_HEX 4 /* invalid hex character found in data */
|
|
40 #define NMRTF_BAD_TABLE 5 /* RTF table (sym or prop) invalid */
|
|
41 #define NMRTF_ASSERTION 6 /* Assertion failure */
|
|
42 #define NMRTF_EOF 7 /* End of file reached while reading RTF */
|
|
43 #define NMRTF_CONVERT_ERROR 8 /* Error converting text */
|
|
44
|
|
45 #define NMRTF_MAX_DEPTH 256
|
|
46
|
|
47 typedef enum
|
|
48 {
|
|
49 NMRTF_STATE_NORMAL,
|
|
50 NMRTF_STATE_SKIP,
|
|
51 NMRTF_STATE_FONTTABLE,
|
|
52 NMRTF_STATE_BIN,
|
|
53 NMRTF_STATE_HEX
|
|
54 } NMRtfState; /* Rtf State */
|
|
55
|
|
56 /* Property types that we care about */
|
|
57 typedef enum
|
|
58 {
|
|
59 NMRTF_PROP_FONT_IDX,
|
|
60 NMRTF_PROP_FONT_CHARSET,
|
|
61 NMRTF_PROP_MAX
|
|
62 } NMRtfProperty;
|
|
63
|
|
64 typedef enum
|
|
65 {
|
|
66 NMRTF_SPECIAL_BIN,
|
|
67 NMRTF_SPECIAL_HEX,
|
|
68 NMRTF_SPECIAL_UNICODE,
|
|
69 NMRTF_SPECIAL_SKIP
|
|
70 } NMRtfSpecialKwd;
|
|
71
|
|
72 typedef enum
|
|
73 {
|
|
74 NMRTF_DEST_FONTTABLE,
|
|
75 NMRTF_DEST_SKIP
|
|
76 } NMRtfDestinationType;
|
|
77
|
|
78 typedef enum
|
|
79 {
|
|
80 NMRTF_KWD_CHAR,
|
|
81 NMRTF_KWD_DEST,
|
|
82 NMRTF_KWD_PROP,
|
|
83 NMRTF_KWD_SPEC
|
|
84 } NMRtfKeywordType;
|
|
85
|
|
86 typedef struct _NMRTFCharProp
|
|
87 {
|
|
88 /* All we care about for now is the font.
|
|
89 * bold, italic, underline, etc. should be
|
|
90 * added here
|
|
91 */
|
|
92 int font_idx;
|
|
93 int font_charset;
|
|
94 } NMRtfCharProp;
|
|
95
|
|
96 typedef struct _NMRtfStateSave
|
|
97 {
|
|
98 NMRtfCharProp chp;
|
|
99 NMRtfState rds;
|
|
100 NMRtfState ris;
|
|
101 } NMRtfStateSave;
|
|
102
|
|
103 typedef struct _NMRtfSymbol
|
|
104 {
|
|
105 char *keyword; /* RTF keyword */
|
|
106 int default_val; /* default value to use */
|
|
107 gboolean pass_default; /* true to use default value from this table */
|
|
108 NMRtfKeywordType kwd_type; /* the type of the keyword */
|
|
109 int action; /* property type if the keyword represents a property */
|
|
110 /* destination type if the keyword represents a destination */
|
|
111 /* character to print if the keyword represents a character */
|
|
112 } NMRtfSymbol;
|
|
113
|
|
114
|
|
115 typedef struct _NMRtfFont
|
|
116 {
|
|
117 int number;
|
|
118 char *name;
|
|
119 int charset;
|
|
120 } NMRtfFont;
|
|
121
|
|
122 /* RTF Context */
|
|
123 struct _NMRtfContext
|
|
124 {
|
|
125 NMRtfState rds; /* destination state */
|
|
126 NMRtfState ris; /* internal state */
|
|
127 NMRtfCharProp chp; /* current character properties (ie. font, bold, italic, etc.) */
|
|
128 GSList *font_table; /* the font table */
|
|
129 GSList *saved; /* saved state stack */
|
|
130 int param; /* numeric parameter for the current keyword */
|
|
131 long bytes_to_skip; /* number of bytes to skip (after encountering \bin) */
|
|
132 int depth; /* how many groups deep are we */
|
|
133 gboolean skip_unknown; /* if true, skip any unknown destinations (this is set after encountering '\*') */
|
|
134 char *input; /* input string */
|
|
135 char nextch; /* next char in input */
|
|
136 GString *ansi; /* Temporary ansi text, will be convert/flushed to the output string */
|
|
137 GString *output; /* The plain text UTF8 string */
|
|
138 };
|
|
139
|
|
140 static int rtf_parse(NMRtfContext *ctx);
|
|
141 static int rtf_push_state(NMRtfContext *ctx);
|
|
142 static int rtf_pop_state(NMRtfContext *ctx);
|
|
143 static NMRtfFont *rtf_get_font(NMRtfContext *ctx, int index);
|
|
144 static int rtf_get_char(NMRtfContext *ctx, guchar *ch);
|
|
145 static int rtf_unget_char(NMRtfContext *ctx, guchar ch);
|
|
146 static int rtf_flush_data(NMRtfContext *ctx);
|
|
147 static int rtf_parse_keyword(NMRtfContext *ctx);
|
|
148 static int rtf_dispatch_control(NMRtfContext *ctx, char *keyword, int param, gboolean param_set);
|
|
149 static int rtf_dispatch_char(NMRtfContext *ctx, guchar ch);
|
|
150 static int rtf_dispatch_unicode_char(NMRtfContext *ctx, gunichar ch);
|
|
151 static int rtf_print_char(NMRtfContext *ctx, guchar ch);
|
|
152 static int rtf_print_unicode_char(NMRtfContext *ctx, gunichar ch);
|
|
153 static int rtf_change_destination(NMRtfContext *ctx, NMRtfDestinationType dest);
|
|
154 static int rtf_dispatch_special(NMRtfContext *ctx, NMRtfSpecialKwd special);
|
|
155 static int rtf_apply_property(NMRtfContext *ctx, NMRtfProperty prop, int val);
|
|
156
|
|
157 /* RTF parser tables */
|
|
158
|
|
159 /* Keyword descriptions */
|
|
160 NMRtfSymbol rtf_symbols[] = {
|
|
161 /* keyword, default, pass_default, keyword_type, action */
|
|
162 {"fonttbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_FONTTABLE},
|
|
163 {"f", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_IDX},
|
|
164 {"fcharset", 0, FALSE, NMRTF_KWD_PROP, NMRTF_PROP_FONT_CHARSET},
|
|
165 {"par", 0, FALSE, NMRTF_KWD_CHAR, 0x0a},
|
|
166 {"line", 0, FALSE, NMRTF_KWD_CHAR, 0x0a},
|
|
167 {"\0x0a", 0, FALSE, NMRTF_KWD_CHAR, 0x0a},
|
|
168 {"\0x0d", 0, FALSE, NMRTF_KWD_CHAR, 0x0a},
|
|
169 {"tab", 0, FALSE, NMRTF_KWD_CHAR, 0x09},
|
|
170 {"\r", 0, FALSE, NMRTF_KWD_CHAR, '\r'},
|
|
171 {"\n", 0, FALSE, NMRTF_KWD_CHAR, '\n'},
|
|
172 {"ldblquote",0, FALSE, NMRTF_KWD_CHAR, '"'},
|
|
173 {"rdblquote",0, FALSE, NMRTF_KWD_CHAR, '"'},
|
|
174 {"{", 0, FALSE, NMRTF_KWD_CHAR, '{'},
|
|
175 {"}", 0, FALSE, NMRTF_KWD_CHAR, '}'},
|
|
176 {"\\", 0, FALSE, NMRTF_KWD_CHAR, '\\'},
|
|
177 {"bin", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_BIN},
|
|
178 {"*", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_SKIP},
|
|
179 {"'", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_HEX},
|
|
180 {"u", 0, FALSE, NMRTF_KWD_SPEC, NMRTF_SPECIAL_UNICODE},
|
|
181 {"colortbl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
182 {"author", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
183 {"buptim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
184 {"comment", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
185 {"creatim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
186 {"doccomm", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
187 {"footer", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
188 {"footerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
189 {"footerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
190 {"footerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
191 {"footnote", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
192 {"ftncn", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
193 {"ftnsep", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
194 {"ftnsepc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
195 {"header", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
196 {"headerf", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
197 {"headerl", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
198 {"headerr", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
199 {"info", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
200 {"keywords", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
201 {"operator", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
202 {"pict", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
203 {"printim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
204 {"private1", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
205 {"revtim", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
206 {"rxe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
207 {"stylesheet", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
208 {"subject", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
209 {"tc", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
210 {"title", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
211 {"txe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP},
|
|
212 {"xe", 0, FALSE, NMRTF_KWD_DEST, NMRTF_DEST_SKIP}
|
|
213 };
|
|
214 int table_size = sizeof(rtf_symbols) / sizeof(NMRtfSymbol);
|
|
215
|
|
216 NMRtfContext *
|
|
217 nm_rtf_init()
|
|
218 {
|
|
219 NMRtfContext *ctx = g_new0(NMRtfContext, 1);
|
|
220 ctx->nextch = -1;
|
|
221 ctx->ansi = g_string_new("");
|
|
222 ctx->output = g_string_new("");
|
|
223 return ctx;
|
|
224 }
|
|
225
|
|
226 char *
|
|
227 nm_rtf_strip_formatting(NMRtfContext *ctx, const char *input)
|
|
228 {
|
|
229 int status;
|
|
230
|
|
231 ctx->input = (char *)input;
|
|
232 status = rtf_parse(ctx);
|
|
233 if (status == NMRTF_OK)
|
|
234 return g_strdup(ctx->output->str);
|
|
235
|
|
236 gaim_debug_info("novell", "RTF parser failed with error code %d", status);
|
|
237 return NULL;
|
|
238 }
|
|
239
|
|
240 void
|
|
241 nm_rtf_deinit(NMRtfContext *ctx)
|
|
242 {
|
|
243 GSList *node;
|
|
244 NMRtfFont *font;
|
|
245 NMRtfStateSave *save;
|
|
246
|
|
247 if (ctx) {
|
|
248 for (node = ctx->font_table; node; node = node->next) {
|
|
249 font = node->data;
|
|
250 g_free(font->name);
|
|
251 g_free(font);
|
|
252 node->data = NULL;
|
|
253 }
|
|
254 g_slist_free(ctx->font_table);
|
|
255 for (node = ctx->saved; node; node = node->next) {
|
|
256 save = node->data;
|
|
257 g_free(save);
|
|
258 node->data = NULL;
|
|
259 }
|
|
260 g_slist_free(ctx->saved);
|
|
261 g_string_free(ctx->ansi, TRUE);
|
|
262 g_string_free(ctx->output, TRUE);
|
|
263 g_free(ctx);
|
|
264 }
|
|
265 }
|
|
266
|
|
267 static const char *
|
|
268 get_current_encoding(NMRtfContext *ctx)
|
|
269 {
|
|
270 NMRtfFont *font;
|
|
271
|
|
272 font = rtf_get_font(ctx, ctx->chp.font_idx);
|
|
273
|
|
274 switch (font->charset) {
|
|
275 case 0:
|
|
276 return "CP1252";
|
|
277 case 77:
|
|
278 return "MACINTOSH";
|
|
279 case 78:
|
|
280 return "SJIS";
|
|
281 case 128:
|
|
282 return "CP932";
|
|
283 case 129:
|
|
284 return "CP949";
|
|
285 case 130:
|
|
286 return "CP1361";
|
|
287 case 134:
|
|
288 return "CP936";
|
|
289 case 136:
|
|
290 return "CP950";
|
|
291 case 161:
|
|
292 return "CP1253";
|
|
293 case 162:
|
|
294 return "CP1254";
|
|
295 case 163:
|
|
296 return "CP1258";
|
|
297 case 181:
|
|
298 case 177:
|
|
299 return "CP1255";
|
|
300 case 178:
|
|
301 case 179:
|
|
302 case 180:
|
|
303 return "CP1256";
|
|
304 case 186:
|
|
305 return "CP1257";
|
|
306 case 204:
|
|
307 return "CP1251";
|
|
308 case 222:
|
|
309 return "CP874";
|
|
310 case 238:
|
|
311 return "CP1250";
|
|
312 case 254:
|
|
313 return "CP437";
|
|
314 default:
|
|
315 gaim_debug_info("novell", "Unhandled font charset %d\n", font->charset);
|
|
316 return "CP1252";
|
|
317 }
|
|
318 return "CP1252";
|
|
319 }
|
|
320
|
|
321
|
|
322 /*
|
|
323 * Add an entry to the font table
|
|
324 */
|
|
325 static int
|
|
326 rtf_add_font_entry(NMRtfContext *ctx, int number, const char *name, int charset)
|
|
327 {
|
|
328 NMRtfFont *font = g_new0(NMRtfFont, 1);
|
|
329
|
|
330 font->number = number;
|
|
331 font->name = g_strdup(name);
|
|
332 font->charset = charset;
|
|
333
|
|
334 gaim_debug_info("novell", "Adding font to table: #%d\t%s\t%d\n",
|
|
335 font->number, font->name, font->charset);
|
|
336
|
|
337 ctx->font_table = g_slist_append(ctx->font_table, font);
|
|
338
|
|
339 return NMRTF_OK;
|
|
340 }
|
|
341
|
|
342 /*
|
|
343 * Return the nth entry in the font table
|
|
344 */
|
|
345 static NMRtfFont *
|
|
346 rtf_get_font(NMRtfContext *ctx, int nth)
|
|
347 {
|
|
348 NMRtfFont *font;
|
|
349
|
|
350 font = g_slist_nth_data(ctx->font_table, nth);
|
|
351
|
|
352 return font;
|
|
353 }
|
|
354
|
|
355 /*
|
|
356 * Step 1:
|
|
357 * Isolate RTF keywords and send them to rtf_parse_keyword;
|
|
358 * Push and pop state at the start and end of RTF groups;
|
|
359 * Send text to rtf_dispatch_char for further processing.
|
|
360 */
|
|
361 static int
|
|
362 rtf_parse(NMRtfContext *ctx)
|
|
363 {
|
|
364 int status;
|
|
365 guchar ch;
|
|
366 guchar hex_byte = 0;
|
|
367 int hex_count = 2;
|
|
368 int len;
|
|
369
|
|
370 if (ctx->input == NULL)
|
|
371 return NMRTF_OK;
|
|
372
|
|
373 while (rtf_get_char(ctx, &ch) == NMRTF_OK) {
|
|
374 if (ctx->depth < 0)
|
|
375 return NMRTF_STACK_UNDERFLOW;
|
|
376
|
|
377 /* if we're parsing binary data, handle it directly */
|
|
378 if (ctx->ris == NMRTF_STATE_BIN) {
|
|
379 if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK)
|
|
380 return status;
|
|
381 } else {
|
|
382 switch (ch) {
|
|
383 case '{':
|
|
384 if (ctx->depth > NMRTF_MAX_DEPTH)
|
|
385 return NMRTF_STACK_OVERFLOW;
|
|
386 rtf_flush_data(ctx);
|
|
387 if ((status = rtf_push_state(ctx)) != NMRTF_OK)
|
|
388 return status;
|
|
389 break;
|
|
390 case '}':
|
|
391 rtf_flush_data(ctx);
|
|
392
|
|
393 /* for some reason there is always an unwanted '\par' at the end */
|
|
394 if (ctx->rds == NMRTF_STATE_NORMAL) {
|
|
395 len = ctx->output->len;
|
|
396 if (ctx->output->str[len-1] == '\n')
|
|
397 ctx->output = g_string_truncate(ctx->output, len-1);
|
|
398 }
|
|
399
|
|
400 if ((status = rtf_pop_state(ctx)) != NMRTF_OK)
|
|
401 return status;
|
|
402
|
|
403 if (ctx->depth < 0)
|
|
404 return NMRTF_STACK_OVERFLOW;
|
|
405 break;
|
|
406 case '\\':
|
|
407 if ((status = rtf_parse_keyword(ctx)) != NMRTF_OK)
|
|
408 return status;
|
|
409 break;
|
|
410 case 0x0d:
|
|
411 case 0x0a: /* cr and lf are noise characters... */
|
|
412 break;
|
|
413 default:
|
|
414 if (ctx->ris == NMRTF_STATE_NORMAL) {
|
|
415 if ((status = rtf_dispatch_char(ctx, ch)) != NMRTF_OK)
|
|
416 return status;
|
|
417 } else { /* parsing a hex encoded character */
|
|
418 if (ctx->ris != NMRTF_STATE_HEX)
|
|
419 return NMRTF_ASSERTION;
|
|
420
|
|
421 hex_byte = hex_byte << 4;
|
|
422 if (isdigit(ch))
|
|
423 hex_byte += (char) ch - '0';
|
|
424 else {
|
|
425 if (islower(ch)) {
|
|
426 if (ch < 'a' || ch > 'f')
|
|
427 return NMRTF_INVALID_HEX;
|
|
428 hex_byte += (char) ch - 'a' + 10;
|
|
429 } else {
|
|
430 if (ch < 'A' || ch > 'F')
|
|
431 return NMRTF_INVALID_HEX;
|
|
432 hex_byte += (char) ch - 'A' + 10;
|
|
433 }
|
|
434 }
|
|
435 hex_count--;
|
|
436 if (hex_count == 0) {
|
|
437 if ((status = rtf_dispatch_char(ctx, hex_byte)) != NMRTF_OK)
|
|
438 return status;
|
|
439 hex_count = 2;
|
|
440 hex_byte = 0;
|
|
441 ctx->ris = NMRTF_STATE_NORMAL;
|
|
442 }
|
|
443 }
|
|
444 break;
|
|
445 }
|
|
446 }
|
|
447 }
|
|
448 if (ctx->depth < 0)
|
|
449 return NMRTF_STACK_OVERFLOW;
|
|
450 if (ctx->depth > 0)
|
|
451 return NMRTF_UNMATCHED_BRACE;
|
|
452 return NMRTF_OK;
|
|
453 }
|
|
454
|
|
455 /*
|
|
456 * Push the current state onto stack
|
|
457 */
|
|
458 static int
|
|
459 rtf_push_state(NMRtfContext *ctx)
|
|
460 {
|
|
461 NMRtfStateSave *save = g_new0(NMRtfStateSave, 1);
|
|
462 save->chp = ctx->chp;
|
|
463 save->rds = ctx->rds;
|
|
464 save->ris = ctx->ris;
|
|
465 ctx->saved = g_slist_prepend(ctx->saved, save);
|
|
466 ctx->ris = NMRTF_STATE_NORMAL;
|
|
467 (ctx->depth)++;
|
|
468 return NMRTF_OK;
|
|
469 }
|
|
470
|
|
471 /*
|
|
472 * Restore the state at the top of the stack
|
|
473 */
|
|
474 static int
|
|
475 rtf_pop_state(NMRtfContext *ctx)
|
|
476 {
|
|
477 NMRtfStateSave *save_old;
|
|
478 GSList *link_old;
|
|
479
|
|
480 if (ctx->saved == NULL)
|
|
481 return NMRTF_STACK_UNDERFLOW;
|
|
482
|
|
483 save_old = ctx->saved->data;
|
|
484 ctx->chp = save_old->chp;
|
|
485 ctx->rds = save_old->rds;
|
|
486 ctx->ris = save_old->ris;
|
|
487 (ctx->depth)--;
|
|
488
|
|
489 g_free(save_old);
|
|
490 link_old = ctx->saved;
|
|
491 ctx->saved = g_slist_remove_link(ctx->saved, link_old);
|
|
492 g_slist_free_1(link_old);
|
|
493 return NMRTF_OK;
|
|
494 }
|
|
495
|
|
496 /*
|
|
497 * Step 2:
|
|
498 * Get a control word (and its associated value) and
|
|
499 * dispatch the control.
|
|
500 */
|
|
501 static int
|
|
502 rtf_parse_keyword(NMRtfContext *ctx)
|
|
503 {
|
|
504 int status = NMRTF_OK;
|
|
505 guchar ch;
|
|
506 gboolean param_set = FALSE;
|
|
507 gboolean is_neg = FALSE;
|
|
508 int param = 0;
|
|
509 char keyword[30];
|
|
510 char parameter[20];
|
9804
|
511 int i;
|
9268
|
512
|
|
513 keyword[0] = '\0';
|
|
514 parameter[0] = '\0';
|
|
515 if ((status = rtf_get_char(ctx, &ch)) != NMRTF_OK)
|
|
516 return status;
|
|
517
|
|
518 if (!isalpha(ch)) {
|
|
519 /* a control symbol; no delimiter. */
|
|
520 keyword[0] = (char) ch;
|
|
521 keyword[1] = '\0';
|
|
522 return rtf_dispatch_control(ctx, keyword, 0, param_set);
|
|
523 }
|
|
524
|
|
525 /* parse keyword */
|
9804
|
526 for (i = 0; isalpha(ch) && (i < sizeof(keyword) - 1); rtf_get_char(ctx, &ch)) {
|
|
527 keyword[i] = (char) ch;
|
|
528 i++;
|
9268
|
529 }
|
9804
|
530 keyword[i] = '\0';
|
9268
|
531
|
|
532 /* check for '-' indicated a negative parameter value */
|
|
533 if (ch == '-') {
|
|
534 is_neg = TRUE;
|
|
535 if ((status = rtf_get_char(ctx, &ch)) != NMRTF_OK)
|
|
536 return status;
|
|
537 }
|
|
538
|
|
539 /* check for numerical param */
|
|
540 if (isdigit(ch)) {
|
|
541
|
|
542 param_set = TRUE;
|
9804
|
543 for (i = 0; isdigit(ch) && (i < sizeof(parameter) - 1); rtf_get_char(ctx, &ch)) {
|
|
544 parameter[i] = (char) ch;
|
|
545 i++;
|
9268
|
546 }
|
9804
|
547 parameter[i] = '\0';
|
9268
|
548
|
|
549 ctx->param = param = atoi(parameter);
|
|
550 if (is_neg)
|
|
551 ctx->param = param = -param;
|
|
552 }
|
|
553
|
|
554 /* space after control is optional, put character back if it is not a space */
|
|
555 if (ch != ' ')
|
|
556 rtf_unget_char(ctx, ch);
|
|
557
|
|
558 return rtf_dispatch_control(ctx, keyword, param, param_set);
|
|
559 }
|
|
560
|
|
561 /*
|
|
562 * Route the character to the appropriate destination
|
|
563 */
|
|
564 static int
|
|
565 rtf_dispatch_char(NMRtfContext *ctx, guchar ch)
|
|
566 {
|
|
567 if (ctx->ris == NMRTF_STATE_BIN && --(ctx->bytes_to_skip) <= 0)
|
|
568 ctx->ris = NMRTF_STATE_NORMAL;
|
|
569
|
|
570 switch (ctx->rds) {
|
|
571 case NMRTF_STATE_SKIP:
|
|
572 return NMRTF_OK;
|
|
573 case NMRTF_STATE_NORMAL:
|
|
574 return rtf_print_char(ctx, ch);
|
|
575 case NMRTF_STATE_FONTTABLE:
|
|
576 if (ch == ';') {
|
|
577 rtf_add_font_entry(ctx, ctx->chp.font_idx,
|
|
578 ctx->ansi->str, ctx->chp.font_charset);
|
|
579 g_string_truncate(ctx->ansi, 0);
|
|
580 }
|
|
581 else {
|
|
582 return rtf_print_char(ctx, ch);
|
|
583 }
|
|
584 return NMRTF_OK;
|
|
585 default:
|
|
586 return NMRTF_OK;
|
|
587 }
|
|
588 }
|
|
589
|
|
590 /* Handle a unicode character */
|
|
591 static int
|
|
592 rtf_dispatch_unicode_char(NMRtfContext *ctx, gunichar ch)
|
|
593 {
|
|
594 switch (ctx->rds) {
|
|
595 case NMRTF_STATE_SKIP:
|
|
596 return NMRTF_OK;
|
|
597 case NMRTF_STATE_NORMAL:
|
|
598 case NMRTF_STATE_FONTTABLE:
|
|
599 return rtf_print_unicode_char(ctx, ch);
|
|
600 default:
|
|
601 return NMRTF_OK;
|
|
602 }
|
|
603 }
|
|
604
|
|
605 /*
|
|
606 * Output a character
|
|
607 */
|
|
608 static int
|
|
609 rtf_print_char(NMRtfContext *ctx, guchar ch)
|
|
610 {
|
|
611
|
|
612 ctx->ansi = g_string_append_c(ctx->ansi, ch);
|
|
613
|
|
614 return NMRTF_OK;
|
|
615 }
|
|
616
|
|
617 /*
|
|
618 * Output a unicode character
|
|
619 */
|
|
620 static int
|
|
621 rtf_print_unicode_char(NMRtfContext *ctx, gunichar ch)
|
|
622 {
|
|
623 char buf[7];
|
|
624 int num;
|
|
625
|
|
626 /* convert and flush the ansi buffer to the utf8 buffer */
|
|
627 rtf_flush_data(ctx);
|
|
628
|
|
629 /* convert the unicode character to utf8 and add directly to the output buffer */
|
|
630 num = g_unichar_to_utf8((gunichar) ch, buf);
|
|
631 buf[num] = 0;
|
|
632 gaim_debug_info("novell", "converted unichar 0x%X to utf8 char %s\n", ch, buf);
|
|
633
|
|
634 ctx->output = g_string_append(ctx->output, buf);
|
|
635 return NMRTF_OK;
|
|
636 }
|
|
637
|
|
638 /*
|
|
639 * Flush the output text
|
|
640 */
|
|
641 static int
|
|
642 rtf_flush_data(NMRtfContext *ctx)
|
|
643 {
|
|
644 int status = NMRTF_OK;
|
|
645 char *conv_data = NULL;
|
|
646 const char *enc = NULL;
|
|
647 GError *gerror = NULL;
|
|
648
|
|
649 if (ctx->rds == NMRTF_STATE_NORMAL && ctx->ansi->len > 0) {
|
|
650 enc = get_current_encoding(ctx);
|
|
651 conv_data = g_convert(ctx->ansi->str, ctx->ansi->len, "UTF-8", enc,
|
|
652 NULL, NULL, &gerror);
|
|
653 if (conv_data) {
|
|
654 ctx->output = g_string_append(ctx->output, conv_data);
|
|
655 g_free(conv_data);
|
|
656 ctx->ansi = g_string_truncate(ctx->ansi, 0);
|
|
657 } else {
|
|
658 status = NMRTF_CONVERT_ERROR;
|
|
659 gaim_debug_info("novell", "failed to convert data! error code = %d msg = %s\n",
|
|
660 gerror->code, gerror->message);
|
|
661 g_free(gerror);
|
|
662 }
|
|
663 }
|
|
664
|
|
665 return status;
|
|
666 }
|
|
667
|
|
668 /*
|
|
669 * Handle a property change
|
|
670 */
|
|
671 static int
|
|
672 rtf_apply_property(NMRtfContext *ctx, NMRtfProperty prop, int val)
|
|
673 {
|
|
674 if (ctx->rds == NMRTF_STATE_SKIP) /* If we're skipping text, */
|
|
675 return NMRTF_OK; /* don't do anything. */
|
|
676
|
|
677 /* Need to flush any temporary data before a property change*/
|
|
678 rtf_flush_data(ctx);
|
|
679
|
|
680 switch (prop) {
|
|
681 case NMRTF_PROP_FONT_IDX:
|
|
682 ctx->chp.font_idx = val;
|
|
683 break;
|
|
684 case NMRTF_PROP_FONT_CHARSET:
|
|
685 ctx->chp.font_charset = val;
|
|
686 break;
|
|
687 default:
|
|
688 return NMRTF_BAD_TABLE;
|
|
689 }
|
|
690
|
|
691 return NMRTF_OK;
|
|
692 }
|
|
693
|
|
694 /*
|
|
695 * Step 3.
|
|
696 * Search the table for keyword and evaluate it appropriately.
|
|
697 *
|
|
698 * Inputs:
|
|
699 * keyword: The RTF control to evaluate.
|
|
700 * param: The parameter of the RTF control.
|
|
701 * param_set: TRUE if the control had a parameter; (that is, if param is valid)
|
|
702 * FALSE if it did not.
|
|
703 */
|
|
704 static int
|
|
705 rtf_dispatch_control(NMRtfContext *ctx, char *keyword, int param, gboolean param_set)
|
|
706 {
|
|
707 int idx;
|
|
708
|
|
709 for (idx = 0; idx < table_size; idx++) {
|
|
710 if (strcmp(keyword, rtf_symbols[idx].keyword) == 0)
|
|
711 break;
|
|
712 }
|
|
713
|
|
714 if (idx == table_size) {
|
|
715 if (ctx->skip_unknown)
|
|
716 ctx->rds = NMRTF_STATE_SKIP;
|
|
717 ctx->skip_unknown = FALSE;
|
|
718 return NMRTF_OK;
|
|
719 }
|
|
720
|
|
721 /* found it! use kwd_type and action to determine what to do with it. */
|
|
722 ctx->skip_unknown = FALSE;
|
|
723 switch (rtf_symbols[idx].kwd_type) {
|
|
724 case NMRTF_KWD_PROP:
|
|
725 if (rtf_symbols[idx].pass_default || !param_set)
|
|
726 param = rtf_symbols[idx].default_val;
|
|
727 return rtf_apply_property(ctx, rtf_symbols[idx].action, param);
|
|
728 case NMRTF_KWD_CHAR:
|
|
729 return rtf_dispatch_char(ctx, rtf_symbols[idx].action);
|
|
730 case NMRTF_KWD_DEST:
|
|
731 return rtf_change_destination(ctx, rtf_symbols[idx].action);
|
|
732 case NMRTF_KWD_SPEC:
|
|
733 return rtf_dispatch_special(ctx, rtf_symbols[idx].action);
|
|
734 default:
|
|
735 return NMRTF_BAD_TABLE;
|
|
736 }
|
|
737 return NMRTF_BAD_TABLE;
|
|
738 }
|
|
739
|
|
740 /*
|
|
741 * Change to the destination specified.
|
|
742 */
|
|
743 static int
|
|
744 rtf_change_destination(NMRtfContext *ctx, NMRtfDestinationType type)
|
|
745 {
|
|
746 /* if we're skipping text, don't do anything */
|
|
747 if (ctx->rds == NMRTF_STATE_SKIP)
|
|
748 return NMRTF_OK;
|
|
749
|
|
750 switch (type) {
|
|
751 case NMRTF_DEST_FONTTABLE:
|
|
752 ctx->rds = NMRTF_STATE_FONTTABLE;
|
|
753 g_string_truncate(ctx->ansi, 0);
|
|
754 break;
|
|
755 default:
|
|
756 ctx->rds = NMRTF_STATE_SKIP; /* when in doubt, skip it... */
|
|
757 break;
|
|
758 }
|
|
759 return NMRTF_OK;
|
|
760 }
|
|
761
|
|
762 /*
|
|
763 * Dispatch an RTF control that needs special processing
|
|
764 */
|
|
765 static int
|
|
766 rtf_dispatch_special(NMRtfContext *ctx, NMRtfSpecialKwd type)
|
|
767 {
|
|
768 int status = NMRTF_OK;
|
|
769 guchar ch;
|
|
770
|
|
771 if (ctx->rds == NMRTF_STATE_SKIP && type != NMRTF_SPECIAL_BIN) /* if we're skipping, and it's not */
|
|
772 return NMRTF_OK; /* the \bin keyword, ignore it. */
|
|
773
|
|
774 switch (type) {
|
|
775 case NMRTF_SPECIAL_BIN:
|
|
776 ctx->ris = NMRTF_STATE_BIN;
|
|
777 ctx->bytes_to_skip = ctx->param;
|
|
778 break;
|
|
779 case NMRTF_SPECIAL_SKIP:
|
|
780 ctx->skip_unknown = TRUE;
|
|
781 break;
|
|
782 case NMRTF_SPECIAL_HEX:
|
|
783 ctx->ris = NMRTF_STATE_HEX;
|
|
784 break;
|
|
785 case NMRTF_SPECIAL_UNICODE:
|
|
786 gaim_debug_info("novell", "parsing unichar\n");
|
|
787 status = rtf_dispatch_unicode_char(ctx, ctx->param);
|
|
788 /* Skip next char */
|
|
789 if (status == NMRTF_OK)
|
|
790 status = rtf_get_char(ctx, &ch);
|
|
791 break;
|
|
792 default:
|
|
793 status = NMRTF_BAD_TABLE;
|
|
794 break;
|
|
795 }
|
|
796
|
|
797 return status;
|
|
798 }
|
|
799
|
|
800 /*
|
|
801 * Get the next character from the input stream
|
|
802 */
|
|
803 static int
|
|
804 rtf_get_char(NMRtfContext *ctx, guchar *ch)
|
|
805 {
|
|
806 if (ctx->nextch >= 0) {
|
|
807 *ch = ctx->nextch;
|
|
808 ctx->nextch = -1;
|
|
809 }
|
|
810 else {
|
|
811 *ch = *(ctx->input);
|
|
812 ctx->input++;
|
|
813 }
|
|
814
|
|
815 if (*ch)
|
|
816 return NMRTF_OK;
|
|
817 else
|
|
818 return NMRTF_EOF;
|
|
819 }
|
|
820
|
|
821 /*
|
|
822 * Move a character back into the input stream
|
|
823 */
|
|
824 static int
|
|
825 rtf_unget_char(NMRtfContext *ctx, guchar ch)
|
|
826 {
|
|
827 ctx->nextch = ch;
|
|
828 return NMRTF_OK;
|
|
829 }
|