95
|
1 /* the original code was taken from wget-1.10.2 */
|
|
2
|
|
3 #include <stdio.h>
|
|
4 #include <string.h>
|
|
5 #include <glib.h>
|
|
6 #include <ctype.h>
|
|
7 #include "urlencode.h"
|
|
8
|
|
9 enum {
|
|
10 /* rfc1738 reserved chars + "$" and ",". */
|
|
11 urlchr_reserved = 1,
|
|
12
|
|
13 /* rfc1738 unsafe chars, plus non-printables. */
|
|
14 urlchr_unsafe = 2
|
|
15 };
|
|
16
|
|
17 #define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
|
|
18 #define URL_RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)
|
|
19 #define URL_UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)
|
|
20 #define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x] + 0)
|
|
21 #define ISXDIGIT(x) (isxdigit((unsigned char)(x)))
|
|
22 #define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2))
|
|
23 #define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : toupper (h) - 'A' + 10)
|
|
24
|
|
25 /* Shorthands for the table: */
|
|
26 #define R urlchr_reserved
|
|
27 #define U urlchr_unsafe
|
|
28 #define RU R|U
|
|
29
|
|
30 static const unsigned char urlchr_table[256] =
|
|
31 {
|
|
32 U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */
|
|
33 U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */
|
|
34 U, U, U, U, U, U, U, U, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
|
|
35 U, U, U, U, U, U, U, U, /* CAN EM SUB ESC FS GS RS US */
|
|
36 U, 0, U, RU, R, U, R, 0, /* SP ! " # $ % & ' */
|
|
37 0, 0, 0, R, R, 0, 0, R, /* ( ) * + , - . / */
|
|
38 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
|
|
39 0, 0, RU, R, U, R, U, R, /* 8 9 : ; < = > ? */
|
|
40 RU, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
|
|
41 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
|
|
42 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
|
|
43 0, 0, 0, RU, U, RU, U, 0, /* X Y Z [ \ ] ^ _ */
|
|
44 U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
|
|
45 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
|
|
46 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
|
|
47 0, 0, 0, U, U, U, 0, U, /* x y z { | } ~ DEL */
|
|
48
|
|
49 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
50 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
51 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
52 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
53
|
|
54 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
55 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
56 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
57 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
58 };
|
|
59 #undef R
|
|
60 #undef U
|
|
61 #undef RU
|
|
62
|
|
63 /* URL-unescape the string S.
|
|
64
|
|
65 This is done by transforming the sequences "%HH" to the character
|
|
66 represented by the hexadecimal digits HH. If % is not followed by
|
|
67 two hexadecimal digits, it is inserted literally.
|
|
68
|
|
69 The transformation is done in place. If you need the original
|
|
70 string intact, make a copy before calling this function. */
|
|
71
|
|
72 char *
|
|
73 xspf_url_decode (const char *s)
|
|
74 {
|
|
75 char *copy = strdup(s);
|
|
76 char *t = copy; /* t - tortoise */
|
|
77 char *h = copy; /* h - hare */
|
|
78
|
|
79 for (; *h; h++, t++)
|
|
80 {
|
|
81 if (*h != '%')
|
|
82 {
|
|
83 copychar:
|
|
84 *t = *h;
|
|
85 }
|
|
86 else
|
|
87 {
|
|
88 char c;
|
|
89 /* Do nothing if '%' is not followed by two hex digits. */
|
|
90 if (!h[1] || !h[2] || !(ISXDIGIT (h[1]) && ISXDIGIT (h[2])))
|
|
91 goto copychar;
|
|
92 c = X2DIGITS_TO_NUM (h[1], h[2]);
|
|
93 /* Don't unescape %00 because there is no way to insert it
|
|
94 into a C string without effectively truncating it. */
|
|
95 if (c == '\0')
|
|
96 goto copychar;
|
|
97 *t = c;
|
|
98 h += 2;
|
|
99 }
|
|
100 }
|
|
101 *t = '\0';
|
|
102 return copy;
|
|
103 }
|
|
104
|
|
105 /* The core of url_escape_* functions. Escapes the characters that
|
|
106 match the provided mask in urlchr_table.
|
|
107
|
|
108 If ALLOW_PASSTHROUGH is non-zero, a string with no unsafe chars
|
|
109 will be returned unchanged. If ALLOW_PASSTHROUGH is zero, a
|
|
110 freshly allocated string will be returned in all cases. */
|
|
111
|
|
112 static char *
|
|
113 url_escape_1 (const char *s, unsigned char mask, int allow_passthrough)
|
|
114 {
|
|
115 const char *p1;
|
|
116 char *p2, *newstr;
|
|
117 int newlen;
|
|
118 int addition = 0;
|
|
119
|
|
120 for (p1 = s; *p1; p1++)
|
|
121 if (urlchr_test (*p1, mask))
|
|
122 addition += 2; /* Two more characters (hex digits) */
|
|
123
|
|
124 if (!addition)
|
|
125 return allow_passthrough ? (char *)s : strdup (s);
|
|
126
|
|
127 newlen = (p1 - s) + addition;
|
|
128 newstr = (char *)malloc (newlen + 1);
|
|
129
|
|
130 p1 = s;
|
|
131 p2 = newstr;
|
|
132 while (*p1)
|
|
133 {
|
|
134 /* Quote the characters that match the test mask. */
|
|
135 if (urlchr_test (*p1, mask))
|
|
136 {
|
|
137 unsigned char c = *p1++;
|
|
138 *p2++ = '%';
|
|
139 *p2++ = XNUM_TO_DIGIT (c >> 4);
|
|
140 *p2++ = XNUM_TO_DIGIT (c & 0xf);
|
|
141 }
|
|
142 else
|
|
143 *p2++ = *p1++;
|
|
144 }
|
|
145 g_return_if_fail (p2 - newstr == newlen);
|
|
146 *p2 = '\0';
|
|
147
|
|
148 return newstr;
|
|
149 }
|
|
150
|
|
151 /* URL-escape the unsafe characters (see urlchr_table) in a given
|
|
152 string, returning a freshly allocated string. */
|
|
153
|
|
154 char *
|
|
155 xspf_url_encode (const char *s)
|
|
156 {
|
|
157 return url_escape_1 (s, urlchr_unsafe, 0);
|
|
158 }
|