comparison src/format_canon.c @ 59:57f6da2510d9

Sun Jun 12 19:25:26 2005 John Ellis <johne@verizon.net> * format_canon.[ch]: Reimplement canon raw parser to use convenience functions from exif.c, also separated parsers into one per file type. For the cr2 format also verify compression type in tiff field 0x0103. * format_raw.c: Add FIXME comment noting current shortcomings.
author gqview
date Sun, 12 Jun 2005 23:45:45 +0000
parents a8c9992320f4
children 71e1ebee420e
comparison
equal deleted inserted replaced
58:df73b94154e4 59:57f6da2510d9
38 *----------------------------------------------------------------------------- 38 *-----------------------------------------------------------------------------
39 * Raw (CR2, CRW) embedded jpeg extraction for Canon 39 * Raw (CR2, CRW) embedded jpeg extraction for Canon
40 *----------------------------------------------------------------------------- 40 *-----------------------------------------------------------------------------
41 */ 41 */
42 42
43 43 static gint canon_cr2_tiff_entry(unsigned char *data, const guint len, guint offset, ExifByteOrder bo,
44 #if 0 44 guint *image_offset, gint *jpeg_encoding)
45 #define CANON_DEBUG
46 #endif
47
48 #ifdef CANON_DEBUG
49 int canonEnableDebug = 0;
50 /* This should be really a stack, but I am too lazy to implement */
51 #define DEBUG_ENABLE (canonEnableDebug = 0)
52 #define DEBUG_DISABLE (canonEnableDebug = 1)
53 /* It would be nice if these functions indented according to depth in the stack, but I am too lazy to implement */
54
55 #define DEBUG_ENTRY(a) (canonEnableDebug || fprintf(stderr, "Entering function: %s [%s:%d]\n", a, __FILE__, __LINE__))
56 #define DEBUG_EXIT(a) (canonEnableDebug || fprintf(stderr, "Exiting function: %s [%s:%d]\n", a, __FILE__, __LINE__))
57 #define DEBUG_1(a) (canonEnableDebug || fprintf(stderr, a " [%s:%d]\n", __FILE__, __LINE__))
58 #define DEBUG_2(a,b) (canonEnableDebug || fprintf(stderr, a " [%s:%d]\n",b, __FILE__, __LINE__))
59 #define DEBUG_3(a,b,c) (canonEnableDebug || fprintf(stderr, a " [%s:%d]\n",b, c, __FILE__, __LINE__))
60
61 #else
62 #define DEBUG_ENABLE
63 #define DEBUG_DISABLE
64 #define DEBUG_ENTRY(a)
65 #define DEBUG_EXIT(a)
66
67 #define DEBUG_1(a)
68 #define DEBUG_2(a,b)
69 #define DEBUG_3(a,b,c)
70 #endif
71
72
73 /* canon_read_int4
74
75
76 The problem with gqview is that sometimes the data is to be read from
77 a file, and sometimes it is in memory. This function tries to isolate
78 the rest of the code from having to deal with both cases
79
80 This function reads a 4 byte unsigned integer, and fixes its endianism.
81
82 If fd >= 0 then the value is read from the corresponding file descriptor
83
84 in that case, if offset is > 0, then the value is read from that offset
85
86 otherwise it is read from the current file pointer
87
88 if fd < 0 then the value is read from the memory pointed by data + offset
89
90
91 offset is a pointer to the actual offset of the file.
92
93 sizeInt can be 2 or 4 (it is the number of bytes to read)
94
95 RETURNS true is no error, false if it can't read the value
96
97
98 */
99 static int canon_read_int(unsigned int *offset, const void *data, int sizeInt, unsigned int *value )
100 { 45 {
101 DEBUG_DISABLE; 46 guint tag;
102 47 guint type;
103 DEBUG_ENTRY("canon_read_int"); 48 guint count;
104 /* Verify values before we do anything */ 49 guint jpeg_start;
105 if (sizeInt != 2 && sizeInt != 4) return FALSE; 50
106 if (offset == NULL) return FALSE; 51 /* the two (tiff compliant) tags we want are:
107 if (*offset <= 0) return FALSE; 52 * 0x0103 image compression type (must be type 6 for jpeg)
108 if (data == NULL) return FALSE; 53 * 0x0111 jpeg start offset
109 if (value == NULL) return FALSE; 54 * only use the first segment that contains an actual jpeg - as there
110 55 * is a another that contains the raw data.
111 if (sizeInt == 4) { 56 */
112 *value = GUINT32_FROM_LE(*(guint32*)(data + *offset)); 57 tag = exif_byte_get_int16(data + offset + EXIF_TIFD_OFFSET_TAG, bo);
113 *offset +=4; 58 type = exif_byte_get_int16(data + offset + EXIF_TIFD_OFFSET_FORMAT, bo);
114 DEBUG_3("Read 4 bytes %d %x", *value, *value); 59 count = exif_byte_get_int32(data + offset + EXIF_TIFD_OFFSET_COUNT, bo);
115 } else { 60
116 *value = GUINT16_FROM_LE(*(guint16*)(data + *offset)); 61 /* tag 0x0103 contains the compression type for this segment's image data */
117 *offset +=2; 62 if (tag == 0x0103)
118 DEBUG_3("Read 2 bytes %d %x", *value, *value); 63 {
119 } 64 if (ExifFormatList[type].size * count == 2 &&
120 65 exif_byte_get_int16(data + offset + EXIF_TIFD_OFFSET_DATA, bo) == 6)
121 DEBUG_EXIT("canon_read_int"); 66 {
122 67 *jpeg_encoding = TRUE;
123 DEBUG_ENABLE; 68 }
124 return TRUE; 69 return FALSE;
70 }
71
72 /* find and verify jpeg offset */
73 if (tag != 0x0111 ||
74 !jpeg_encoding) return FALSE;
75
76 /* make sure data segment contains 4 bytes */
77 if (ExifFormatList[type].size * count != 4) return FALSE;
78
79 jpeg_start = exif_byte_get_int32(data + offset + EXIF_TIFD_OFFSET_DATA, bo);
80
81 /* verify this is jpeg data */
82 if (len < jpeg_start + 4 ||
83 memcmp(data + jpeg_start, "\xff\xd8", 2) != 0)
84 {
85 return FALSE;
86 }
87
88 *image_offset = jpeg_start;
89 return TRUE;
125 } 90 }
126 91
127 #define CANON_HEADER_SIZE 26 92 static gint canon_cr2_tiff_table(unsigned char *data, const guint len, guint offset, ExifByteOrder bo,
128 93 guint *image_offset)
129 /*
130
131 The CR2 format is really a TIFF format. It is nicely documented in the TIFF V 6.0 document available from adobe.
132
133 The CR2 file contains two thumbnails, one tiny and one decent sized. The record Id of the latter is 0x0111.
134
135 The photo info is also available, in EXIF, and it looks like I don't need to do anything! Yeah!
136
137 */
138
139 static int canon_cr2_process_directory(void *data, int offsetIFD, guint *jpegLocation, guint *exifLocation)
140 { 94 {
141 unsigned int offset; 95 gint jpeg_encoding = FALSE;
142 int returnValue = FALSE; 96 guint count;
143 97 guint i;
144 DEBUG_ENTRY("canon_cr2_process_directory"); 98
145 99 if (len < offset + 2) return 0;
146 /* The directory is a link list, after an array of records, the next 4 byptes point to the offset of the next directory. 100
147 101 count = exif_byte_get_int16(data + offset, bo);
148 All offsets are absolution within the file (in CRWs the offsets are relative ). 102 offset += 2;
149 103 if (len < offset + count * EXIF_TIFD_SIZE + 4) return 0;
150 */ 104
151 105 for (i = 0; i < count; i++)
152 while (offsetIFD != 0 && offsetIFD != 0xFFFF) { 106 {
153 int countEntries=0; 107 if (canon_cr2_tiff_entry(data, len, offset + i * EXIF_TIFD_SIZE, bo,
154 int i; 108 image_offset, &jpeg_encoding))
155 /* Read directory, we start by reading number of entries in the directory */ 109 {
156 110 return 0;
157 offset = offsetIFD; 111 }
158 if (!canon_read_int(&offset, data, 2, &countEntries)) { 112 }
159 goto return_only; 113
160 } 114 return exif_byte_get_int32(data + offset + count * EXIF_TIFD_SIZE, bo);
161 DEBUG_2("Number of entries: %d\n", countEntries);
162
163 for (i=0;i<countEntries;i++) {
164 /* read each entry */
165
166 int recordId;
167 #if 0
168 int format;
169 int size;
170 #endif
171
172 /* read record type */
173 if (!canon_read_int(&offset, data, 2, &recordId)) {
174 goto return_only;
175 }
176
177 /* Did we find the JPEG */
178 if (recordId == 0x0111) {
179 DEBUG_1("This is the record to find**********************\n");
180 offset +=6;
181 if (!canon_read_int(&offset, data, 4, jpegLocation)) {
182 goto return_only;
183 }
184 DEBUG_3("JPEG Location %d 0x%x\n", *jpegLocation, *jpegLocation);
185 /* We don't want to keep reading, because there is another
186 0x0111 record at the end that contains the raw data */
187 returnValue = TRUE;
188 goto return_only;
189 } else {
190 /* advance pointer by skipping rest of record */
191 offset += 10;
192 }
193 }
194 /* The next 4 bytes are the offset of next directory, if zero we are done
195
196 */
197 if (!canon_read_int(&offset, data, 4, &offsetIFD)) {
198 goto return_only;
199 }
200 DEBUG_3("Value of NEXT offsetIFD: %d 0x%x\n", offsetIFD, offsetIFD);
201 }
202
203 returnValue = TRUE;
204 DEBUG_1("Going to return true");
205
206 return_only:
207 DEBUG_EXIT("canon_cr2_process_directory");
208
209 return TRUE;
210
211
212 } 115 }
213 116
214 117 gint format_canon_raw_cr2(unsigned char *data, const guint len,
215 static int format_raw_test_canon_cr2(void *data, const guint len, 118 guint *image_offset, guint *exif_offset)
216 guint *image_offset, guint *exif_offset)
217 { 119 {
218 #if 0 120 guint jpeg_offset = 0;
219 char signature[4]; 121 ExifByteOrder bo;
220 unsigned int offset = 4; 122 guint offset;
221 #endif 123 gint level;
222 int offsetIFD; 124
223 int returnValue = FALSE; 125 /* cr2 files are tiff files with a few canon specific directory tags
224 void *jpgInDataOffset; 126 * they are (always ?) in little endian format
225 127 */
226 DEBUG_ENTRY("format_raw_test_canon_cr2"); 128 if (!exif_tiff_directory_offset(data, len, &offset, &bo)) return FALSE;
227 129
228 /* Verify signature */ 130 level = 0;
229 if (memcmp(data, "\x49\x49\x2a\00", 4) != 0) { 131 while (offset && level < EXIF_TIFF_MAX_LEVELS)
230 DEBUG_1("This is not a CR2"); 132 {
231 goto return_only; 133 offset = canon_cr2_tiff_table(data, len, offset, bo, &jpeg_offset);
232 } 134 level++;
233 135
234 /* Get address of first directory */ 136 if (jpeg_offset != 0)
235 offsetIFD = GUINT32_FROM_LE(*(guint32*)(data + 4)); 137 {
236 138 if (image_offset) *image_offset = jpeg_offset;
237 139 return TRUE;
238 DEBUG_2("Value of offsetIFD: %d\n", offsetIFD); 140 }
239 141 }
240 returnValue = canon_cr2_process_directory(data, offsetIFD, image_offset, exif_offset); 142
241 143 return FALSE;
242 if (returnValue) {
243 jpgInDataOffset = data + *image_offset;
244
245 /* Make sure we really got a JPEG */
246
247 if (memcmp(jpgInDataOffset, "\xff\xd8",2) != 0) {
248 /* It is not at the JPEG! */
249 DEBUG_2("THis is not a jpeg after all: there are the first 4 bytes 0x%x ", (int)jpgInDataOffset);
250 returnValue = FALSE;
251 }
252 }
253
254 return_only:
255 DEBUG_EXIT("format_raw_test_canon_cr2");
256
257 return returnValue;
258 } 144 }
259 145
260 146 #define CRW_BYTE_ORDER EXIF_BYTE_ORDER_INTEL
261 gint format_canon_raw(unsigned char *data, const guint len, 147 #define CRW_HEADER_SIZE 26
262 guint *image_offset, guint *exif_offset) 148 #define CRW_DIR_ENTRY_SIZE 10
149
150 gint format_canon_raw_crw(unsigned char *data, const guint len,
151 guint *image_offset, guint *exif_offset)
263 { 152 {
264 153 guint block_offset;
265 154 guint data_length;
266 /* There are at least 2 types of Canon raw files. CRW and CR2 155 guint offset;
267 156 guint count;
268 CRW files have a proprietary format. 157 guint i;
269 158
270 HEADER 159 /* CRW header starts with 2 bytes for byte order (always "II", little endian),
271 Heap 160 * 4 bytes for start of root block,
272 RAW data 161 * and 8 bytes of magic for file type and format "HEAPCCDR"
273 JPEG data 162 * (also 4 bytes for file version, and 8 bytes reserved)
274 PHoto data 163 *
275 164 * CIFF specification in pdf format is available on some websites,
276 HEADER_LENGTH 32 bytes 165 * search for "CIFFspecV1R03.pdf" or "CIFFspecV1R04.pdf"
277 int2 byteOrder; Always II (MM Motorola ---big endian, II Intel --little endian) 166 */
278 int4 length; Should be 26 167 if (len < CRW_HEADER_SIZE ||
279 char identifier[8];type HEAP, subtype heap CCDR 168 memcmp(data, "II", 2) != 0 ||
280 int2 version; 169 memcmp(data + 6, "HEAPCCDR", 8) != 0)
281 int2 subversion; 170 {
282 char unused[14]; 171 return FALSE;
283 */ 172 }
284 173
285 int returnValue = FALSE; 174 block_offset = exif_byte_get_int32(data + 2, CRW_BYTE_ORDER);
286 int heapHeaderOffset = 0; 175
287 int heapRecordsCount = 0; 176 /* the end of the root block equals end of file,
288 #if 0 177 * the last 4 bytes of the root block contain the block's data size
289 guint32 rawInt4; 178 */
290 guint16 rawInt2; 179 offset = len - 4;
291 #endif 180 data_length = exif_byte_get_int32(data + offset, CRW_BYTE_ORDER);
292 int i; 181
293 unsigned int currentOffset; 182 offset = block_offset + data_length;
294 /* File has to be little endian, first two bytes II */ 183 if (len < offset + 2) return FALSE;
295 184
296 if (len < 100) 185 /* number of directory entries for this block is in
297 return FALSE; 186 * the next two bytes after the data for this block.
298 187 */
299 if (format_raw_test_canon_cr2((void *)data, len, image_offset, exif_offset)) { 188 count = exif_byte_get_int16(data + offset, CRW_BYTE_ORDER);
300 return TRUE; 189 offset += 2;
301 } 190 if (len < offset + count * CRW_DIR_ENTRY_SIZE + 4) return FALSE;
302 191
303 if (memcmp("II", data, 2) != 0) { 192 /* walk the directory entries looking for type jpeg (tag 0x2007),
304 return FALSE; 193 * for reference, other tags are 0x2005 for raw and 0x300a for photo info:
305 } 194 */
306 /* NO DEBUG BEFORE THIS POINT, we want to debug only Canon */ 195 for (i = 0; i < count ; i++)
307 196 {
308 DEBUG_ENTRY("format_raw_test_canon"); 197 guint entry_offset;
309 198 guint record_type;
310 DEBUG_2("Length of buffer read %u", len); 199 guint record_offset;
311 200 guint record_length;
312 DEBUG_2("CRW header length Data %d", GUINT32_FROM_LE(*(guint32*)(data + 2))); 201
313 202 entry_offset = offset + i * CRW_DIR_ENTRY_SIZE;
314 /* the length has to be CANON_HEADER_SIZE */ 203
315 if (GUINT32_FROM_LE(*(guint32*)(data + 2)) != CANON_HEADER_SIZE) { 204 /* entry is 10 bytes (in order):
316 DEBUG_1("It is not the right size"); 205 * 2 for type
317 goto return_only; 206 * 4 for length of data
318 } 207 * 4 for offset into data segment of this block
319 208 */
320 if (!memcmp("HEAPCCDR", data+6, 8) == 0) { 209 record_type = exif_byte_get_int16(data + entry_offset, CRW_BYTE_ORDER);
321 DEBUG_1("This file is not a Canon CRW raw photo"); 210 record_length = exif_byte_get_int32(data + entry_offset + 2, CRW_BYTE_ORDER);
322 goto return_only; 211 record_offset = exif_byte_get_int32(data + entry_offset + 6, CRW_BYTE_ORDER);
323 212
324 } 213 /* tag we want for jpeg data */
325 214 if (record_type == 0x2007)
326 /* Ok, so now we know that this is a CRW file */ 215 {
327 216 guint jpeg_offset;
328 /* The heap is a strange data structure. It is recursive, so a record 217
329 can contain a heap itself. That is indeed the case for the photo information 218 jpeg_offset = block_offset + record_offset;
330 reecord. Luckily the first heap contains the jpeg, so we don't need to do 219 if (len < jpeg_offset + record_length ||
331 any recursive processing. 220 record_length < 4 ||
332 221 memcmp(data + jpeg_offset, "\xff\xd8\xff\xdb", 4) != 0)
333 Its "header" is a the end. The header is a sequence of records, 222 {
334 and the data of each record is at the beginning of the heap 223 return FALSE;
335 224 }
336 +-----------------+ 225
337 | data raw | 226 /* we now know offset and verified jpeg */
338 +-----------------+ 227 *image_offset = jpeg_offset;
339 | data jpeg | 228 return TRUE;
340 +-----------------+ 229 }
341 | data photo info | 230 }
342 +-----------------+ 231
343 |header of heap | 232 return FALSE;
344 | # records | it should be 3
345 | raw info |
346 | jpeg info |
347 | photo info |
348 +-----------------+
349
350 The header contains
351 number of records: 2 bytes
352 for each record (10 bytes long)
353 type: 2 bytes
354 length: 4 bytes
355 offset: 4 bytes
356
357 In some records the length and offset are actually data,
358 but none for the ones in the first heap.
359
360 the offset is with respect to the beginning of the heap, not the
361 beginning of the file. That allows heaps to be "movable"
362
363 For the purpose of finding the JPEG, all we need is to scan the fist heap,
364 which contains the following record types:
365
366 0x2005 Record RAW data
367 0x2007 Record JPEG data
368 0x300a Record with photo info
369
370 */
371
372
373 if (len < 0x10000) {
374 DEBUG_2("We have a problem, the length is too small %d ", len);
375 goto return_only;
376 }
377 currentOffset = len-4;
378
379
380 /* The last 4 bytes have the offset of the header of the heap */
381 if (!canon_read_int(&currentOffset, data, 4, &heapHeaderOffset))
382 goto return_only;
383
384 /* The heapoffset has to be adjusted to the actual file size, the header is CANON_HEADER_SIZE bytes long */
385 heapHeaderOffset += CANON_HEADER_SIZE;
386 DEBUG_2("heap header Offset %d ", heapHeaderOffset);
387
388 /* Just check, it does not hurt, we don't want to crash */
389 if (heapHeaderOffset > len)
390 goto return_only;
391
392 currentOffset = heapHeaderOffset;
393 /* Let us read the number of records in the heap */
394 if (!canon_read_int(&currentOffset, data, 2, &heapRecordsCount))
395 goto return_only;
396
397 DEBUG_2("heap record count %d ", heapRecordsCount);
398
399 if (heapRecordsCount != 3) {
400 /* In all the cameras I have seen, this is always 3
401 if not, something is wrong, so just quit */
402 goto return_only;
403 }
404
405 for (i=0;i<3;i++) {
406 int recordType;
407 int recordOffset;
408 int recordLength;
409 const void *jpgInDataOffset;
410 /* Read each record, to find jpg, it should be second */
411
412 if (!canon_read_int(&currentOffset, data, 2, &recordType))
413 goto return_only;
414
415 DEBUG_2("record type 0x%x ", recordType);
416
417 if (recordType != 0x2007) {
418 /* Go to the next record, don't waste time,
419 but first, eat 8 bytes from header */
420 currentOffset += 8;
421 continue; /* Nah, wrong record, go to next */
422 }
423 /* Bingo, we are at the JPEG record */
424
425 /* Read length */
426 if (!canon_read_int(&currentOffset, data, 4, &recordLength))
427 goto return_only;
428
429 DEBUG_2("record length %d ", recordLength);
430
431 /* Read offset */
432
433 if (!canon_read_int(&currentOffset, data, 4, &recordOffset))
434 goto return_only;
435
436 DEBUG_2("record offset 0x%d ", recordOffset);
437
438 /* Great, we now know where the JPEG is!
439 it is CANON_HEADER_SIZE (size of CRW header) + recordOffset
440 */
441
442 *image_offset = CANON_HEADER_SIZE + recordOffset;
443 DEBUG_2("image offset %d ", *image_offset);
444
445 /* keep checking for potential errors */
446 if (*image_offset > len) {
447 goto return_only;
448 }
449 /* Get the JPEG is */
450
451 jpgInDataOffset = data + *image_offset;
452
453 if (memcmp(jpgInDataOffset, "\xff\xd8\xff\xdb",4) != 0) {
454 /* It is not at the JPEG! */
455 DEBUG_2("THis is not a jpeg after all: there are the first 4 bytes 0x%x ", (int)jpgInDataOffset);
456 goto return_only;
457 }
458 returnValue = TRUE;
459 goto return_only;
460 }
461 /* undo whatever we need in case of an error*/
462 DEBUG_1("We scan all records, but nothing was found!!!!!!!!!!!!!!!!!!");
463
464
465 /* At this point we are returning */
466 return_only:
467 if (returnValue) {
468 DEBUG_1("****We got an embedded JPEG for a canon CRW");
469
470 }
471
472 DEBUG_EXIT("format_raw_test_canon");
473 return returnValue;
474
475 #undef DEBUG_2
476 #undef DEBUG
477 #undef DEBUG_ENTRY
478 #undef DEBUG_EXIT
479
480 } 233 }
234
481 235
482 /* 236 /*
483 *----------------------------------------------------------------------------- 237 *-----------------------------------------------------------------------------
484 * EXIF Makernote for Canon 238 * EXIF Makernote for Canon
485 *----------------------------------------------------------------------------- 239 *-----------------------------------------------------------------------------