changeset 46122:17067a24204d

New file.
author Kim F. Storm <storm@cua.dk>
date Mon, 01 Jul 2002 21:59:30 +0000
parents 2198a6632b2b
children 90cae21cf871
files lisp/emacs-lisp/bindat.el
diffstat 1 files changed, 622 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lisp/emacs-lisp/bindat.el	Mon Jul 01 21:59:30 2002 +0000
@@ -0,0 +1,622 @@
+;;; bindat.el --- binary data structure packing and unpacking.
+
+;; Copyright (C) 2002 Free Software Foundation, Inc.
+
+;; Author: Kim F. Storm <storm@cua.dk>
+;; Assignment name: struct.el
+;; Keywords: comm data processes
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;;; Commentary:
+
+;;  Packing and unpacking of (binary) data structures.
+;;
+;;  The data formats used in binary files and network protocols are
+;;  often structed data which can be described by a C-style structure
+;;  such as the one shown below.  Using the bindat package, decoding
+;;  and encoding binary data formats like these is made simple using a
+;;  structure specification which closely resembles the C style
+;;  structure declarations.
+;;  
+;;  Encoded (binary) data is stored in a unibyte string or vector,
+;;  while the decoded data is stored in an alist with (FIELD . VALUE) 
+;;  pairs.
+
+;; Example:
+  
+;;  Consider the following C structures:
+;;  
+;;  struct header {
+;;	unsigned long	dest_ip;
+;;	unsigned long	src_ip;
+;;	unsigned short	dest_port;
+;;	unsigned short	src_port;
+;;  };
+;;  
+;;  struct data {
+;;	unsigned char	type;
+;;	unsigned char	opcode;
+;;	unsigned long	length;  /* In little endian order */
+;;	unsigned char	id[8];   /* nul-terminated string  */
+;;	unsigned char	data[/* (length + 3) & ~3 */];
+;;  };
+;;  
+;;  struct packet {
+;;	struct header	header;
+;;	unsigned char	items;
+;;	unsigned char   filler[3];
+;;	struct data	item[/* items */];
+;;  };
+;;  
+;;  The corresponding Lisp bindat specification looks like this:
+;;  
+;;  (setq header-spec
+;;    '((dest-ip   ip)
+;;	(src-ip    ip)
+;;	(dest-port u16)
+;;	(src-port  u16)))
+;;  
+;;  (setq data-spec
+;;    '((type      u8)
+;;	(opcode	   u8)
+;;	(length	   u16r)  ;; little endian order
+;;	(id	   strz 8)
+;;	(data	   vec (length))
+;;	(align     4)))
+;;  
+;;  (setq packet-spec
+;;    '((header    struct header-spec)
+;;	(items     u8)
+;;	(fill      3)
+;;	(item	   repeat (items)
+;;		   ((struct data-spec)))))
+;;  
+;;
+;;  A binary data representation may look like
+;;   [ 192 168 1 100 192 168 1 101 01 28 21 32 2 0 0 0  
+;;     2 3 5 0 ?A ?B ?C ?D ?E ?F 0 0 1 2 3 4 5 0 0 0
+;;     1 4 7 0 ?B ?C ?D ?E ?F ?G 0 0 6 7 8 9 10 11 12 0 ]
+;;  
+;;  The corresponding decoded structure looks like
+;;
+;;      ((header
+;;        (dest-ip   . [192 168 1 100])
+;;        (src-ip    . [192 168 1 101])
+;;        (dest-port . 284)
+;;        (src-port  . 5408))
+;;       (items . 2)
+;;       (item ((data . [1 2 3 4 5])
+;;      	(id . "ABCDEF")
+;;      	(length . 5)
+;;      	(opcode . 3)
+;;      	(type . 2))
+;;             ((data . [6 7 8 9 10 11 12])
+;;      	(id . "BCDEFG")
+;;      	(length . 7)
+;;      	(opcode . 4)
+;;      	(type . 1))))
+;;
+;;  To access a specific value in this structure, use the function
+;;  bindat-get-field with the structure as first arg followed by a list
+;;  of field names and array indexes, e.g. using the data above,
+;;    (bindat-get-field decoded-structure 'item 1 'id)
+;;  returns "BCDEFG".
+
+;; Binary Data Structure Specification Format
+;; ------------------------------------------
+
+;; The data specification is formatted as follows:
+
+;; SPEC    ::= ( ITEM... )
+
+;; ITEM    ::= ( [FIELD] TYPE )
+;;          |  ( [FIELD] eval FORM )    -- eval FORM for side-effect only
+;;          |  ( [FIELD] fill LEN )     -- skip LEN bytes
+;;          |  ( [FIELD] align LEN )    -- skip to next multiple of LEN bytes
+;;          |  ( [FIELD] struct SPEC_NAME )
+;;          |  ( [FIELD] union TAG_VAL (TAG SPEC)... [(t SPEC)] )
+;;          |  ( [FIELD] repeat COUNT SPEC )
+
+;;          -- In (eval EXPR), the value of the last field is available in
+;;             the dynamically bound variable `last'.
+
+;; TYPE    ::= ( eval EXPR )		-- interpret result as TYPE
+;;	    |  u8   | byte		-- length 1
+;;          |  u16  | word | short      -- length 2, network byte order
+;;          |  u24                      -- 3-byte value
+;;          |  u32  | dword | long      -- length 4, network byte order
+;;          |  u16r | u24r | u32r       -- little endian byte order.
+;;	    |  str LEN                  -- LEN byte string
+;;          |  strz LEN                 -- LEN byte (zero-terminated) string
+;;          |  vec LEN                  -- LEN byte vector
+;;          |  ip                       -- 4 byte vector
+;;          |  bits LEN                 -- List with bits set in LEN bytes.
+;;
+;;          -- Note: 32 bit values may be limited by emacs' INTEGER
+;;             implementation limits.
+;;
+;;          -- Example: bits 2 will map bytes 0x1c 0x28 to list (2 3 7 11 13)
+
+;; FIELD   ::= ( eval EXPR )		-- use result as NAME
+;;          |  NAME
+
+;; LEN     ::= ARG
+;;          |  <omitted> | nil		-- LEN = 1
+
+
+;; TAG_VAL ::= ARG
+
+;; TAG     ::= LISP_CONSTANT
+;;          |  ( eval EXPR )		-- return non-nil if tag match;
+;;					   current TAG_VAL in `tag'.
+
+;; ARG     ::= ( eval EXPR )		-- interpret result as ARG
+;;          |  INTEGER_CONSTANT
+;;          |  DEREF
+
+;; DEREF   ::= ( [NAME | INTEGER]... )	-- Field NAME or Array index relative to
+;;                                         current structure spec.
+;;                                      -- see bindat-get-field
+
+;; A `union' specification
+;;    ([FIELD] union TAG_VAL (TAG SPEC) ... [(t SPEC)])
+;; is interpreted by evalling TAG_VAL and then comparing that to 
+;; each TAG using equal; if a match is found, the corresponding SPEC
+;; is used.
+;; If TAG is a form (eval EXPR), EXPR is evalled with `tag' bound to the
+;; value of TAG_VAL; the corresponding SPEC is used if the result is non-nil.
+;; Finally, if TAG is t, the corresponding SPEC is used unconditionally.
+;;
+;; An `eval' specification
+;;  ([FIELD] eval FORM)
+;; is interpreted by evalling FORM for its side effects only.
+;; If FIELD is specified, the value is bound to that field.
+;; The FORM may access and update `raw-data' and `pos' (see `bindat-unpack'),
+;; as well as the lisp data structure in `struct'.
+
+;;; Code:
+
+;; Helper functions for structure unpacking.
+;; Relies on dynamic binding of RAW-DATA and POS
+
+(defvar raw-data)
+(defvar pos)
+
+(defun bindat--unpack-u8 ()
+  (prog1
+      (if (stringp raw-data)
+	  (string-to-char (substring raw-data pos (1+ pos)))
+	(aref raw-data pos))
+    (setq pos (1+ pos))))
+    
+(defun bindat--unpack-u16 ()
+  (let* ((a (bindat--unpack-u8)) (b (bindat--unpack-u8)))
+    (logior (lsh a 8) b)))
+
+(defun bindat--unpack-u24 ()
+  (let* ((a (bindat--unpack-u16)) (b (bindat--unpack-u8)))
+    (logior (lsh a 8) b)))
+
+(defun bindat--unpack-u32 ()
+  (let* ((a (bindat--unpack-u16)) (b (bindat--unpack-u16)))
+    (logior (lsh a 16) b)))
+
+(defun bindat--unpack-u16r ()
+  (let* ((a (bindat--unpack-u8)) (b (bindat--unpack-u8)))
+    (logior a (lsh b 8))))
+
+(defun bindat--unpack-u24r ()
+  (let* ((a (bindat--unpack-u16r)) (b (bindat--unpack-u8)))
+    (logior a (lsh b 16))))
+
+(defun bindat--unpack-u32r ()
+  (let* ((a (bindat--unpack-u16r)) (b (bindat--unpack-u16r)))
+    (logior a (lsh b 16))))
+
+(defun bindat--unpack-item (type len)
+  (if (eq type 'ip)
+      (setq type 'vec len 4))
+  (cond
+   ((memq type '(u8 byte))
+    (bindat--unpack-u8))
+   ((memq type '(u16 word short))
+    (bindat--unpack-u16))
+   ((eq type 'u24)
+    (bindat--unpack-u24))
+   ((memq type '(u32 dword long))
+    (bindat--unpack-u32))
+   ((eq type 'u16r)
+    (bindat--unpack-u16r))
+   ((eq type 'u24r)
+    (bindat--unpack-u24r))
+   ((eq type 'u32r)
+    (bindat--unpack-u32r))
+   ((eq type 'bits)
+    (let ((bits nil) (bnum (1- (* 8 len))) j m)
+      (while (>= bnum 0)
+	(if (= (setq m (bindat--unpack-u8)) 0)
+	    (setq bnum (- bnum 8))
+	  (setq j 128)
+	  (while (> j 0)
+	    (if (/= 0 (logand m j))
+		(setq bits (cons bnum bits)))
+	    (setq bnum (1- bnum)
+		  j (lsh j -1)))))
+      bits))
+   ((eq type 'str)
+    (let ((s (substring raw-data pos (+ pos len))))
+      (setq pos (+ pos len))
+      (if (stringp s) s
+	(string-make-unibyte (concat s)))))
+   ((eq type 'strz)
+    (let ((i 0) s)
+      (while (and (< i len) (/= (aref raw-data (+ pos i)) 0))
+	(setq i (1+ i)))
+      (setq s (substring raw-data pos (+ pos i)))
+      (setq pos (+ pos len))
+      (if (stringp s) s
+	(string-make-unibyte (concat s)))))
+   ((eq type 'vec)
+    (let ((v (make-vector len 0)) (i 0))
+      (while (< i len)
+	(aset v i (bindat--unpack-u8))
+	(setq i (1+ i)))
+      v))
+   (t nil)))
+
+(defun bindat--unpack-group (spec)
+  (let (struct last)
+    (while spec
+      (let* ((item (car spec))
+	     (field (car item))
+	     (type (nth 1 item))
+	     (len (nth 2 item))
+	     (tail 3)
+	     data)
+	(setq spec (cdr spec))
+	(if (and (consp field) (eq (car field) 'eval))
+	    (setq field (eval (car (cdr field)))))
+	(if (and type (consp type) (eq (car type) 'eval))
+	    (setq type (eval (car (cdr type)))))
+	(if (and len (consp len) (eq (car len) 'eval))
+	    (setq len (eval (car (cdr len)))))
+	(if (memq field '(eval fill align struct union))
+	    (setq tail 2
+		  len type
+		  type field
+		  field nil))
+	(if (and (consp len) (not (eq type 'eval)))
+	    (setq len (apply 'bindat-get-field struct len)))
+	(if (not len)
+	    (setq len 1))
+	(cond
+	 ((eq type 'eval)
+	  (if field
+	      (setq data (eval len))
+	    (eval len)))
+	 ((eq type 'fill)
+	  (setq pos (+ pos len)))
+	 ((eq type 'align)
+	  (while (/= (% pos len) 0)
+	    (setq pos (1+ pos))))
+	 ((eq type 'struct)
+	  (setq data (bindat--unpack-group (eval len))))
+	 ((eq type 'repeat)
+	  (let ((index 0))
+	    (while (< index len)
+	      (setq data (cons (bindat--unpack-group (nthcdr tail item)) data))
+	      (setq index (1+ index)))
+	    (setq data (nreverse data))))
+	 ((eq type 'union)
+	  (let ((tag len) (cases (nthcdr tail item)) case cc)
+	    (while cases
+	      (setq case (car cases)
+		    cases (cdr cases)
+		    cc (car case))
+	      (if (or (equal cc tag) (equal cc t)
+		      (and (consp cc) (eval cc)))
+		  (setq data (bindat--unpack-group (cdr case))
+			cases nil)))))
+	 (t
+	  (setq data (bindat--unpack-item type len)
+		last data)))
+	(if data
+	    (if field
+		(setq struct (cons (cons field data) struct))
+	      (setq struct (append data struct))))))
+    struct))
+  
+(defun bindat-unpack (spec raw-data &optional pos)
+  "Return structured data according to SPEC for binary data in RAW-DATA.
+RAW-DATA is a string or vector.  Optional third arg POS specifies the
+starting offset in RAW-DATA."
+  (unless pos (setq pos 0))
+  (bindat--unpack-group spec))
+
+(defun bindat-get-field (struct &rest field)
+  "In structured data STRUCT, return value of field named FIELD.
+If multiple field names are specified, use the field names to
+lookup nested sub-structures in STRUCT, corresponding to the
+C-language syntax STRUCT.FIELD1.FIELD2.FIELD3...
+An integer value in the field list is taken as an array index,
+e.g. corresponding to STRUCT.FIELD1[INDEX2].FIELD3..."
+  (while (and struct field)
+    (setq struct (if (integerp (car field))
+		     (nth (car field) struct)
+		   (let ((val (assq (car field) struct)))
+		     (if (consp val) (cdr val)))))
+    (setq field (cdr field)))
+  struct)
+
+
+;; Calculate raw-data length of structured data 
+
+(defvar bindat--fixed-length-alist
+  '((u8 . 1) (byte . 1)
+    (u16 . 2) (u16r . 2) (word . 2) (short . 2)
+    (u24 . 3) (u24r . 3)
+    (u32 . 4) (u32r . 4) (dword . 4) (long . 4)
+    (ip . 4)))
+
+(defun bindat--length-group (struct spec)
+  (let (last)
+    (while spec
+      (let* ((item (car spec))
+	     (field (car item))
+	     (type (nth 1 item))
+	     (len (nth 2 item))
+	     (tail 3))
+	(setq spec (cdr spec))
+	(if (and (consp field) (eq (car field) 'eval))
+	    (setq field (eval (car (cdr field)))))
+	(if (and type (consp type) (eq (car type) 'eval))
+	    (setq type (eval (car (cdr type)))))
+	(if (and len (consp len) (eq (car len) 'eval))
+	    (setq len (eval (car (cdr len)))))
+	(if (memq field '(eval fill align struct union))
+	    (setq tail 2
+		  len type
+		  type field
+		  field nil))
+	(if (and (consp len) (not (eq type 'eval)))
+	    (setq len (apply 'bindat-get-field struct len)))
+	(if (not len)
+	    (setq len 1))
+	(cond 
+	 ((eq type 'eval)
+	  (if field
+	      (setq struct (cons (cons field (eval len)) struct))
+	    (eval len)))
+	 ((eq type 'fill)
+	  (setq pos (+ pos len)))
+	 ((eq type 'align)
+	  (while (/= (% pos len) 0)
+	    (setq pos (1+ pos))))
+	 ((eq type 'struct)
+	  (bindat--length-group
+	   (if field (bindat-get-field struct field) struct) (eval len)))
+	 ((eq type 'repeat)
+	  (let ((index 0))
+	    (while (< index len)
+	      (bindat--length-group (nth index (bindat-get-field struct field)) (nthcdr tail item))
+	      (setq index (1+ index)))))
+	 ((eq type 'union)
+	  (let ((tag len) (cases (nthcdr tail item)) case cc)
+	    (while cases
+	      (setq case (car cases)
+		    cases (cdr cases)
+		    cc (car case))
+	      (if (or (equal cc tag) (equal cc t)
+		      (and (consp cc) (eval cc)))
+		  (progn
+		    (bindat--length-group struct (cdr case))
+		    (setq cases nil))))))
+	 (t
+	  (if (setq type (assq type bindat--fixed-length-alist))
+	      (setq len (cdr type)))
+	  (if field
+	      (setq last (bindat-get-field struct field)))
+	  (setq pos (+ pos len))))))))
+
+(defun bindat-length (spec struct)
+  "Calculate raw-data length for STRUCT according to bindat specification SPEC."
+  (let ((pos 0))
+    (bindat--length-group struct spec)
+    pos))
+
+
+;; Pack structured data into raw-data
+
+(defun bindat--pack-u8 (v)
+  (aset raw-data pos (logand v 255))
+  (setq pos (1+ pos)))
+    
+(defun bindat--pack-u16 (v)
+  (aset raw-data pos (logand (lsh v -8) 255))
+  (aset raw-data (1+ pos) (logand v 255))
+  (setq pos (+ pos 2)))
+
+(defun bindat--pack-u24 (v)
+  (bindat--pack-u8 (lsh v -16))
+  (bindat--pack-u16 v))
+
+(defun bindat--pack-u32 (v)
+  (bindat--pack-u16 (lsh v -16))
+  (bindat--pack-u16 v))
+
+(defun bindat--pack-u16r (v)
+  (aset raw-data (1+ pos) (logand (lsh v -8) 255))
+  (aset raw-data pos (logand v 255))
+  (setq pos (+ pos 2)))
+
+(defun bindat--pack-u24r (v)
+  (bindat--pack-u16r v)
+  (bindat--pack-u8 (lsh v -16)))
+
+(defun bindat--pack-u32r (v)
+  (bindat--pack-u16r v)
+  (bindat--pack-u16r (lsh v -16)))
+
+(defun bindat--pack-item (v type len)
+  (if (eq type 'ip)
+      (setq type 'vec len 4))
+  (cond
+   ((null v)
+    (setq pos (+ pos len)))
+   ((memq type '(u8 byte))
+    (bindat--pack-u8 v))
+   ((memq type '(u16 word short))
+    (bindat--pack-u16 v))
+   ((eq type 'u24)
+    (bindat--pack-u24 v))
+   ((memq type '(u32 dword long))
+    (bindat--pack-u32 v))
+   ((eq type 'u16r)
+    (bindat--pack-u16r v))
+   ((eq type 'u24r)
+    (bindat--pack-u24r v))
+   ((eq type 'u32r)
+    (bindat--pack-u32r v))
+   ((eq type 'bits)
+    (let ((bnum (1- (* 8 len))) j m)
+      (while (>= bnum 0)
+	(setq m 0)
+	(if (null v)
+	    (setq bnum (- bnum 8))
+	  (setq j 128)
+	  (while (> j 0)
+	    (if (memq bnum v)
+		(setq m (logior m j)))
+	    (setq bnum (1- bnum)
+		  j (lsh j -1))))
+	(bindat--pack-u8 m))))
+   ((memq type '(str strz vec))
+    (let ((l (length v)) (i 0))
+      (if (> l len) (setq l len))
+      (while (< i l)
+	(aset raw-data (+ pos i) (aref v i))
+	(setq i (1+ i)))
+      (setq pos (+ pos len))))
+   (t 
+    (setq pos (+ pos len)))))
+
+(defun bindat--pack-group (struct spec)
+  (let (last)
+    (while spec
+      (let* ((item (car spec))
+	     (field (car item))
+	     (type (nth 1 item))
+	     (len (nth 2 item))
+	     (tail 3))
+	(setq spec (cdr spec))
+	(if (and (consp field) (eq (car field) 'eval))
+	    (setq field (eval (car (cdr field)))))
+	(if (and type (consp type) (eq (car type) 'eval))
+	    (setq type (eval (car (cdr type)))))
+	(if (and len (consp len) (eq (car len) 'eval))
+	    (setq len (eval (car (cdr len)))))
+	(if (memq field '(eval fill align struct union))
+	    (setq tail 2
+		  len type
+		  type field
+		  field nil))
+	(if (and (consp len) (not (eq type 'eval)))
+	    (setq len (apply 'bindat-get-field struct len)))
+	(if (not len)
+	    (setq len 1))
+	(cond 
+	 ((eq type 'eval)
+	  (if field
+	      (setq struct (cons (cons field (eval len)) struct))
+	    (eval len)))
+	 ((eq type 'fill)
+	  (setq pos (+ pos len)))
+	 ((eq type 'align)
+	  (while (/= (% pos len) 0)
+	    (setq pos (1+ pos))))
+	 ((eq type 'struct)
+	  (bindat--pack-group
+	   (if field (bindat-get-field struct field) struct) (eval len)))
+	 ((eq type 'repeat)
+	  (let ((index 0))
+	    (while (< index len)
+	      (bindat--pack-group (nth index (bindat-get-field struct field)) (nthcdr tail item))
+	      (setq index (1+ index)))))
+	 ((eq type 'union)
+	  (let ((tag len) (cases (nthcdr tail item)) case cc)
+	    (while cases
+	      (setq case (car cases)
+		    cases (cdr cases)
+		    cc (car case))
+	      (if (or (equal cc tag) (equal cc t)
+		      (and (consp cc) (eval cc)))
+		  (progn
+		    (bindat--pack-group struct (cdr case))
+		    (setq cases nil))))))
+	 (t
+	  (setq last (bindat-get-field struct field))
+	  (bindat--pack-item last type len)
+	  ))))))
+
+(defun bindat-pack (spec struct &optional raw-data pos)
+  "Return binary data packed accoring to SPEC for structured data STRUCT.
+Optional third arg RAW-DATA is a pre-allocated string or vector to unpack into.
+Optional fourth arg POS is the starting offset into RAW-DATA.
+Note: The result is a multibyte string; use `string-make-unibyte' on it
+to make it unibyte if necessary."
+  (let ((no-return raw-data))
+    (unless pos (setq pos 0))
+    (unless raw-data (setq raw-data (make-vector (+ pos (bindat-length spec struct)) 0)))
+    (bindat--pack-group struct spec)
+    (if no-return nil (concat raw-data))))
+
+
+;; Misc. format conversions
+
+(defun bindat-format-vector (vect fmt sep &optional len)
+  "Format vector VECT using element format FMT and separator SEP.
+Result is a string with each element of VECT formatted using FMT and
+separated by the string SEP.  If optional fourth arg LEN is given, use
+only that many elements from VECT."
+  (unless len
+    (setq len (length vect)))
+  (let ((i len) (fmt2 (concat sep fmt)) (s nil))
+    (while (> i 0)
+      (setq i (1- i)
+	    s (cons (format (if (= i 0) fmt fmt2) (aref vect i)) s)))
+    (apply 'concat s)))
+  
+(defun bindat-vector-to-dec (vect &optional sep)
+  "Format vector VECT in decimal format separated by dots.
+If optional second arg SEP is a string, use that as separator."
+  (bindat-format-vector vect "%d" (if (stringp sep) sep ".")))
+
+(defun bindat-vector-to-hex (vect &optional sep)
+  "Format vector VECT in hex format separated by dots.
+If optional second arg SEP is a string, use that as separator."
+  (bindat-format-vector vect "%02x" (if (stringp sep) sep ":")))
+
+(defun bindat-ip-to-string (ip)
+  "Format vector IP as an ip address in dotted notation."
+  (format "%d.%d.%d.%d"
+	  (aref ip 0) (aref ip 1) (aref ip 2) (aref ip 3)))
+
+(provide 'bindat)
+
+;;; bindat.el ends here