95
|
1 /* the original code was taken from wget-1.10.2 */
|
|
2
|
|
3 #include <stdio.h>
|
279
|
4 #include <stdlib.h>
|
95
|
5 #include <string.h>
|
|
6 #include <glib.h>
|
|
7 #include <ctype.h>
|
|
8 #include "urlencode.h"
|
|
9
|
|
10 enum {
|
|
11 /* rfc1738 reserved chars + "$" and ",". */
|
|
12 urlchr_reserved = 1,
|
|
13
|
|
14 /* rfc1738 unsafe chars, plus non-printables. */
|
|
15 urlchr_unsafe = 2
|
|
16 };
|
|
17
|
|
18 #define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
|
|
19 #define URL_RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)
|
|
20 #define URL_UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)
|
|
21 #define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x] + 0)
|
|
22 #define ISXDIGIT(x) (isxdigit((unsigned char)(x)))
|
|
23 #define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2))
|
|
24 #define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : toupper (h) - 'A' + 10)
|
|
25
|
|
26 /* Shorthands for the table: */
|
|
27 #define R urlchr_reserved
|
|
28 #define U urlchr_unsafe
|
|
29 #define RU R|U
|
|
30
|
|
31 static const unsigned char urlchr_table[256] =
|
|
32 {
|
|
33 U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */
|
|
34 U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */
|
|
35 U, U, U, U, U, U, U, U, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
|
|
36 U, U, U, U, U, U, U, U, /* CAN EM SUB ESC FS GS RS US */
|
|
37 U, 0, U, RU, R, U, R, 0, /* SP ! " # $ % & ' */
|
|
38 0, 0, 0, R, R, 0, 0, R, /* ( ) * + , - . / */
|
|
39 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
|
|
40 0, 0, RU, R, U, R, U, R, /* 8 9 : ; < = > ? */
|
|
41 RU, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
|
|
42 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
|
|
43 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
|
|
44 0, 0, 0, RU, U, RU, U, 0, /* X Y Z [ \ ] ^ _ */
|
|
45 U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
|
|
46 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
|
|
47 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
|
|
48 0, 0, 0, U, U, U, 0, U, /* x y z { | } ~ DEL */
|
|
49
|
|
50 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
51 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
52 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
53 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
54
|
|
55 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
56 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
57 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
58 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
|
59 };
|
|
60 #undef R
|
|
61 #undef U
|
|
62 #undef RU
|
|
63
|
|
64 /* URL-unescape the string S.
|
|
65
|
|
66 This is done by transforming the sequences "%HH" to the character
|
|
67 represented by the hexadecimal digits HH. If % is not followed by
|
|
68 two hexadecimal digits, it is inserted literally.
|
|
69
|
|
70 The transformation is done in place. If you need the original
|
|
71 string intact, make a copy before calling this function. */
|
|
72
|
|
73 char *
|
|
74 xspf_url_decode (const char *s)
|
|
75 {
|
|
76 char *copy = strdup(s);
|
|
77 char *t = copy; /* t - tortoise */
|
|
78 char *h = copy; /* h - hare */
|
|
79
|
|
80 for (; *h; h++, t++)
|
|
81 {
|
|
82 if (*h != '%')
|
|
83 {
|
|
84 copychar:
|
|
85 *t = *h;
|
|
86 }
|
|
87 else
|
|
88 {
|
|
89 char c;
|
|
90 /* Do nothing if '%' is not followed by two hex digits. */
|
|
91 if (!h[1] || !h[2] || !(ISXDIGIT (h[1]) && ISXDIGIT (h[2])))
|
|
92 goto copychar;
|
|
93 c = X2DIGITS_TO_NUM (h[1], h[2]);
|
|
94 /* Don't unescape %00 because there is no way to insert it
|
|
95 into a C string without effectively truncating it. */
|
|
96 if (c == '\0')
|
|
97 goto copychar;
|
|
98 *t = c;
|
|
99 h += 2;
|
|
100 }
|
|
101 }
|
|
102 *t = '\0';
|
|
103 return copy;
|
|
104 }
|
|
105
|
|
106 /* The core of url_escape_* functions. Escapes the characters that
|
|
107 match the provided mask in urlchr_table.
|
|
108
|
|
109 If ALLOW_PASSTHROUGH is non-zero, a string with no unsafe chars
|
|
110 will be returned unchanged. If ALLOW_PASSTHROUGH is zero, a
|
|
111 freshly allocated string will be returned in all cases. */
|
|
112
|
|
113 static char *
|
|
114 url_escape_1 (const char *s, unsigned char mask, int allow_passthrough)
|
|
115 {
|
|
116 const char *p1;
|
|
117 char *p2, *newstr;
|
|
118 int newlen;
|
|
119 int addition = 0;
|
|
120
|
|
121 for (p1 = s; *p1; p1++)
|
|
122 if (urlchr_test (*p1, mask))
|
|
123 addition += 2; /* Two more characters (hex digits) */
|
|
124
|
|
125 if (!addition)
|
|
126 return allow_passthrough ? (char *)s : strdup (s);
|
|
127
|
|
128 newlen = (p1 - s) + addition;
|
|
129 newstr = (char *)malloc (newlen + 1);
|
|
130
|
|
131 p1 = s;
|
|
132 p2 = newstr;
|
|
133 while (*p1)
|
|
134 {
|
|
135 /* Quote the characters that match the test mask. */
|
|
136 if (urlchr_test (*p1, mask))
|
|
137 {
|
|
138 unsigned char c = *p1++;
|
|
139 *p2++ = '%';
|
|
140 *p2++ = XNUM_TO_DIGIT (c >> 4);
|
|
141 *p2++ = XNUM_TO_DIGIT (c & 0xf);
|
|
142 }
|
|
143 else
|
|
144 *p2++ = *p1++;
|
|
145 }
|
|
146 g_return_if_fail (p2 - newstr == newlen);
|
|
147 *p2 = '\0';
|
|
148
|
|
149 return newstr;
|
|
150 }
|
|
151
|
|
152 /* URL-escape the unsafe characters (see urlchr_table) in a given
|
|
153 string, returning a freshly allocated string. */
|
|
154
|
|
155 char *
|
|
156 xspf_url_encode (const char *s)
|
|
157 {
|
|
158 return url_escape_1 (s, urlchr_unsafe, 0);
|
|
159 }
|