Mercurial > emacs
changeset 46122:17067a24204d
New file.
author | Kim F. Storm <storm@cua.dk> |
---|---|
date | Mon, 01 Jul 2002 21:59:30 +0000 |
parents | 2198a6632b2b |
children | 90cae21cf871 |
files | lisp/emacs-lisp/bindat.el |
diffstat | 1 files changed, 622 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/emacs-lisp/bindat.el Mon Jul 01 21:59:30 2002 +0000 @@ -0,0 +1,622 @@ +;;; bindat.el --- binary data structure packing and unpacking. + +;; Copyright (C) 2002 Free Software Foundation, Inc. + +;; Author: Kim F. Storm <storm@cua.dk> +;; Assignment name: struct.el +;; Keywords: comm data processes + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; Packing and unpacking of (binary) data structures. +;; +;; The data formats used in binary files and network protocols are +;; often structed data which can be described by a C-style structure +;; such as the one shown below. Using the bindat package, decoding +;; and encoding binary data formats like these is made simple using a +;; structure specification which closely resembles the C style +;; structure declarations. +;; +;; Encoded (binary) data is stored in a unibyte string or vector, +;; while the decoded data is stored in an alist with (FIELD . VALUE) +;; pairs. + +;; Example: + +;; Consider the following C structures: +;; +;; struct header { +;; unsigned long dest_ip; +;; unsigned long src_ip; +;; unsigned short dest_port; +;; unsigned short src_port; +;; }; +;; +;; struct data { +;; unsigned char type; +;; unsigned char opcode; +;; unsigned long length; /* In little endian order */ +;; unsigned char id[8]; /* nul-terminated string */ +;; unsigned char data[/* (length + 3) & ~3 */]; +;; }; +;; +;; struct packet { +;; struct header header; +;; unsigned char items; +;; unsigned char filler[3]; +;; struct data item[/* items */]; +;; }; +;; +;; The corresponding Lisp bindat specification looks like this: +;; +;; (setq header-spec +;; '((dest-ip ip) +;; (src-ip ip) +;; (dest-port u16) +;; (src-port u16))) +;; +;; (setq data-spec +;; '((type u8) +;; (opcode u8) +;; (length u16r) ;; little endian order +;; (id strz 8) +;; (data vec (length)) +;; (align 4))) +;; +;; (setq packet-spec +;; '((header struct header-spec) +;; (items u8) +;; (fill 3) +;; (item repeat (items) +;; ((struct data-spec))))) +;; +;; +;; A binary data representation may look like +;; [ 192 168 1 100 192 168 1 101 01 28 21 32 2 0 0 0 +;; 2 3 5 0 ?A ?B ?C ?D ?E ?F 0 0 1 2 3 4 5 0 0 0 +;; 1 4 7 0 ?B ?C ?D ?E ?F ?G 0 0 6 7 8 9 10 11 12 0 ] +;; +;; The corresponding decoded structure looks like +;; +;; ((header +;; (dest-ip . [192 168 1 100]) +;; (src-ip . [192 168 1 101]) +;; (dest-port . 284) +;; (src-port . 5408)) +;; (items . 2) +;; (item ((data . [1 2 3 4 5]) +;; (id . "ABCDEF") +;; (length . 5) +;; (opcode . 3) +;; (type . 2)) +;; ((data . [6 7 8 9 10 11 12]) +;; (id . "BCDEFG") +;; (length . 7) +;; (opcode . 4) +;; (type . 1)))) +;; +;; To access a specific value in this structure, use the function +;; bindat-get-field with the structure as first arg followed by a list +;; of field names and array indexes, e.g. using the data above, +;; (bindat-get-field decoded-structure 'item 1 'id) +;; returns "BCDEFG". + +;; Binary Data Structure Specification Format +;; ------------------------------------------ + +;; The data specification is formatted as follows: + +;; SPEC ::= ( ITEM... ) + +;; ITEM ::= ( [FIELD] TYPE ) +;; | ( [FIELD] eval FORM ) -- eval FORM for side-effect only +;; | ( [FIELD] fill LEN ) -- skip LEN bytes +;; | ( [FIELD] align LEN ) -- skip to next multiple of LEN bytes +;; | ( [FIELD] struct SPEC_NAME ) +;; | ( [FIELD] union TAG_VAL (TAG SPEC)... [(t SPEC)] ) +;; | ( [FIELD] repeat COUNT SPEC ) + +;; -- In (eval EXPR), the value of the last field is available in +;; the dynamically bound variable `last'. + +;; TYPE ::= ( eval EXPR ) -- interpret result as TYPE +;; | u8 | byte -- length 1 +;; | u16 | word | short -- length 2, network byte order +;; | u24 -- 3-byte value +;; | u32 | dword | long -- length 4, network byte order +;; | u16r | u24r | u32r -- little endian byte order. +;; | str LEN -- LEN byte string +;; | strz LEN -- LEN byte (zero-terminated) string +;; | vec LEN -- LEN byte vector +;; | ip -- 4 byte vector +;; | bits LEN -- List with bits set in LEN bytes. +;; +;; -- Note: 32 bit values may be limited by emacs' INTEGER +;; implementation limits. +;; +;; -- Example: bits 2 will map bytes 0x1c 0x28 to list (2 3 7 11 13) + +;; FIELD ::= ( eval EXPR ) -- use result as NAME +;; | NAME + +;; LEN ::= ARG +;; | <omitted> | nil -- LEN = 1 + + +;; TAG_VAL ::= ARG + +;; TAG ::= LISP_CONSTANT +;; | ( eval EXPR ) -- return non-nil if tag match; +;; current TAG_VAL in `tag'. + +;; ARG ::= ( eval EXPR ) -- interpret result as ARG +;; | INTEGER_CONSTANT +;; | DEREF + +;; DEREF ::= ( [NAME | INTEGER]... ) -- Field NAME or Array index relative to +;; current structure spec. +;; -- see bindat-get-field + +;; A `union' specification +;; ([FIELD] union TAG_VAL (TAG SPEC) ... [(t SPEC)]) +;; is interpreted by evalling TAG_VAL and then comparing that to +;; each TAG using equal; if a match is found, the corresponding SPEC +;; is used. +;; If TAG is a form (eval EXPR), EXPR is evalled with `tag' bound to the +;; value of TAG_VAL; the corresponding SPEC is used if the result is non-nil. +;; Finally, if TAG is t, the corresponding SPEC is used unconditionally. +;; +;; An `eval' specification +;; ([FIELD] eval FORM) +;; is interpreted by evalling FORM for its side effects only. +;; If FIELD is specified, the value is bound to that field. +;; The FORM may access and update `raw-data' and `pos' (see `bindat-unpack'), +;; as well as the lisp data structure in `struct'. + +;;; Code: + +;; Helper functions for structure unpacking. +;; Relies on dynamic binding of RAW-DATA and POS + +(defvar raw-data) +(defvar pos) + +(defun bindat--unpack-u8 () + (prog1 + (if (stringp raw-data) + (string-to-char (substring raw-data pos (1+ pos))) + (aref raw-data pos)) + (setq pos (1+ pos)))) + +(defun bindat--unpack-u16 () + (let* ((a (bindat--unpack-u8)) (b (bindat--unpack-u8))) + (logior (lsh a 8) b))) + +(defun bindat--unpack-u24 () + (let* ((a (bindat--unpack-u16)) (b (bindat--unpack-u8))) + (logior (lsh a 8) b))) + +(defun bindat--unpack-u32 () + (let* ((a (bindat--unpack-u16)) (b (bindat--unpack-u16))) + (logior (lsh a 16) b))) + +(defun bindat--unpack-u16r () + (let* ((a (bindat--unpack-u8)) (b (bindat--unpack-u8))) + (logior a (lsh b 8)))) + +(defun bindat--unpack-u24r () + (let* ((a (bindat--unpack-u16r)) (b (bindat--unpack-u8))) + (logior a (lsh b 16)))) + +(defun bindat--unpack-u32r () + (let* ((a (bindat--unpack-u16r)) (b (bindat--unpack-u16r))) + (logior a (lsh b 16)))) + +(defun bindat--unpack-item (type len) + (if (eq type 'ip) + (setq type 'vec len 4)) + (cond + ((memq type '(u8 byte)) + (bindat--unpack-u8)) + ((memq type '(u16 word short)) + (bindat--unpack-u16)) + ((eq type 'u24) + (bindat--unpack-u24)) + ((memq type '(u32 dword long)) + (bindat--unpack-u32)) + ((eq type 'u16r) + (bindat--unpack-u16r)) + ((eq type 'u24r) + (bindat--unpack-u24r)) + ((eq type 'u32r) + (bindat--unpack-u32r)) + ((eq type 'bits) + (let ((bits nil) (bnum (1- (* 8 len))) j m) + (while (>= bnum 0) + (if (= (setq m (bindat--unpack-u8)) 0) + (setq bnum (- bnum 8)) + (setq j 128) + (while (> j 0) + (if (/= 0 (logand m j)) + (setq bits (cons bnum bits))) + (setq bnum (1- bnum) + j (lsh j -1))))) + bits)) + ((eq type 'str) + (let ((s (substring raw-data pos (+ pos len)))) + (setq pos (+ pos len)) + (if (stringp s) s + (string-make-unibyte (concat s))))) + ((eq type 'strz) + (let ((i 0) s) + (while (and (< i len) (/= (aref raw-data (+ pos i)) 0)) + (setq i (1+ i))) + (setq s (substring raw-data pos (+ pos i))) + (setq pos (+ pos len)) + (if (stringp s) s + (string-make-unibyte (concat s))))) + ((eq type 'vec) + (let ((v (make-vector len 0)) (i 0)) + (while (< i len) + (aset v i (bindat--unpack-u8)) + (setq i (1+ i))) + v)) + (t nil))) + +(defun bindat--unpack-group (spec) + (let (struct last) + (while spec + (let* ((item (car spec)) + (field (car item)) + (type (nth 1 item)) + (len (nth 2 item)) + (tail 3) + data) + (setq spec (cdr spec)) + (if (and (consp field) (eq (car field) 'eval)) + (setq field (eval (car (cdr field))))) + (if (and type (consp type) (eq (car type) 'eval)) + (setq type (eval (car (cdr type))))) + (if (and len (consp len) (eq (car len) 'eval)) + (setq len (eval (car (cdr len))))) + (if (memq field '(eval fill align struct union)) + (setq tail 2 + len type + type field + field nil)) + (if (and (consp len) (not (eq type 'eval))) + (setq len (apply 'bindat-get-field struct len))) + (if (not len) + (setq len 1)) + (cond + ((eq type 'eval) + (if field + (setq data (eval len)) + (eval len))) + ((eq type 'fill) + (setq pos (+ pos len))) + ((eq type 'align) + (while (/= (% pos len) 0) + (setq pos (1+ pos)))) + ((eq type 'struct) + (setq data (bindat--unpack-group (eval len)))) + ((eq type 'repeat) + (let ((index 0)) + (while (< index len) + (setq data (cons (bindat--unpack-group (nthcdr tail item)) data)) + (setq index (1+ index))) + (setq data (nreverse data)))) + ((eq type 'union) + (let ((tag len) (cases (nthcdr tail item)) case cc) + (while cases + (setq case (car cases) + cases (cdr cases) + cc (car case)) + (if (or (equal cc tag) (equal cc t) + (and (consp cc) (eval cc))) + (setq data (bindat--unpack-group (cdr case)) + cases nil))))) + (t + (setq data (bindat--unpack-item type len) + last data))) + (if data + (if field + (setq struct (cons (cons field data) struct)) + (setq struct (append data struct)))))) + struct)) + +(defun bindat-unpack (spec raw-data &optional pos) + "Return structured data according to SPEC for binary data in RAW-DATA. +RAW-DATA is a string or vector. Optional third arg POS specifies the +starting offset in RAW-DATA." + (unless pos (setq pos 0)) + (bindat--unpack-group spec)) + +(defun bindat-get-field (struct &rest field) + "In structured data STRUCT, return value of field named FIELD. +If multiple field names are specified, use the field names to +lookup nested sub-structures in STRUCT, corresponding to the +C-language syntax STRUCT.FIELD1.FIELD2.FIELD3... +An integer value in the field list is taken as an array index, +e.g. corresponding to STRUCT.FIELD1[INDEX2].FIELD3..." + (while (and struct field) + (setq struct (if (integerp (car field)) + (nth (car field) struct) + (let ((val (assq (car field) struct))) + (if (consp val) (cdr val))))) + (setq field (cdr field))) + struct) + + +;; Calculate raw-data length of structured data + +(defvar bindat--fixed-length-alist + '((u8 . 1) (byte . 1) + (u16 . 2) (u16r . 2) (word . 2) (short . 2) + (u24 . 3) (u24r . 3) + (u32 . 4) (u32r . 4) (dword . 4) (long . 4) + (ip . 4))) + +(defun bindat--length-group (struct spec) + (let (last) + (while spec + (let* ((item (car spec)) + (field (car item)) + (type (nth 1 item)) + (len (nth 2 item)) + (tail 3)) + (setq spec (cdr spec)) + (if (and (consp field) (eq (car field) 'eval)) + (setq field (eval (car (cdr field))))) + (if (and type (consp type) (eq (car type) 'eval)) + (setq type (eval (car (cdr type))))) + (if (and len (consp len) (eq (car len) 'eval)) + (setq len (eval (car (cdr len))))) + (if (memq field '(eval fill align struct union)) + (setq tail 2 + len type + type field + field nil)) + (if (and (consp len) (not (eq type 'eval))) + (setq len (apply 'bindat-get-field struct len))) + (if (not len) + (setq len 1)) + (cond + ((eq type 'eval) + (if field + (setq struct (cons (cons field (eval len)) struct)) + (eval len))) + ((eq type 'fill) + (setq pos (+ pos len))) + ((eq type 'align) + (while (/= (% pos len) 0) + (setq pos (1+ pos)))) + ((eq type 'struct) + (bindat--length-group + (if field (bindat-get-field struct field) struct) (eval len))) + ((eq type 'repeat) + (let ((index 0)) + (while (< index len) + (bindat--length-group (nth index (bindat-get-field struct field)) (nthcdr tail item)) + (setq index (1+ index))))) + ((eq type 'union) + (let ((tag len) (cases (nthcdr tail item)) case cc) + (while cases + (setq case (car cases) + cases (cdr cases) + cc (car case)) + (if (or (equal cc tag) (equal cc t) + (and (consp cc) (eval cc))) + (progn + (bindat--length-group struct (cdr case)) + (setq cases nil)))))) + (t + (if (setq type (assq type bindat--fixed-length-alist)) + (setq len (cdr type))) + (if field + (setq last (bindat-get-field struct field))) + (setq pos (+ pos len)))))))) + +(defun bindat-length (spec struct) + "Calculate raw-data length for STRUCT according to bindat specification SPEC." + (let ((pos 0)) + (bindat--length-group struct spec) + pos)) + + +;; Pack structured data into raw-data + +(defun bindat--pack-u8 (v) + (aset raw-data pos (logand v 255)) + (setq pos (1+ pos))) + +(defun bindat--pack-u16 (v) + (aset raw-data pos (logand (lsh v -8) 255)) + (aset raw-data (1+ pos) (logand v 255)) + (setq pos (+ pos 2))) + +(defun bindat--pack-u24 (v) + (bindat--pack-u8 (lsh v -16)) + (bindat--pack-u16 v)) + +(defun bindat--pack-u32 (v) + (bindat--pack-u16 (lsh v -16)) + (bindat--pack-u16 v)) + +(defun bindat--pack-u16r (v) + (aset raw-data (1+ pos) (logand (lsh v -8) 255)) + (aset raw-data pos (logand v 255)) + (setq pos (+ pos 2))) + +(defun bindat--pack-u24r (v) + (bindat--pack-u16r v) + (bindat--pack-u8 (lsh v -16))) + +(defun bindat--pack-u32r (v) + (bindat--pack-u16r v) + (bindat--pack-u16r (lsh v -16))) + +(defun bindat--pack-item (v type len) + (if (eq type 'ip) + (setq type 'vec len 4)) + (cond + ((null v) + (setq pos (+ pos len))) + ((memq type '(u8 byte)) + (bindat--pack-u8 v)) + ((memq type '(u16 word short)) + (bindat--pack-u16 v)) + ((eq type 'u24) + (bindat--pack-u24 v)) + ((memq type '(u32 dword long)) + (bindat--pack-u32 v)) + ((eq type 'u16r) + (bindat--pack-u16r v)) + ((eq type 'u24r) + (bindat--pack-u24r v)) + ((eq type 'u32r) + (bindat--pack-u32r v)) + ((eq type 'bits) + (let ((bnum (1- (* 8 len))) j m) + (while (>= bnum 0) + (setq m 0) + (if (null v) + (setq bnum (- bnum 8)) + (setq j 128) + (while (> j 0) + (if (memq bnum v) + (setq m (logior m j))) + (setq bnum (1- bnum) + j (lsh j -1)))) + (bindat--pack-u8 m)))) + ((memq type '(str strz vec)) + (let ((l (length v)) (i 0)) + (if (> l len) (setq l len)) + (while (< i l) + (aset raw-data (+ pos i) (aref v i)) + (setq i (1+ i))) + (setq pos (+ pos len)))) + (t + (setq pos (+ pos len))))) + +(defun bindat--pack-group (struct spec) + (let (last) + (while spec + (let* ((item (car spec)) + (field (car item)) + (type (nth 1 item)) + (len (nth 2 item)) + (tail 3)) + (setq spec (cdr spec)) + (if (and (consp field) (eq (car field) 'eval)) + (setq field (eval (car (cdr field))))) + (if (and type (consp type) (eq (car type) 'eval)) + (setq type (eval (car (cdr type))))) + (if (and len (consp len) (eq (car len) 'eval)) + (setq len (eval (car (cdr len))))) + (if (memq field '(eval fill align struct union)) + (setq tail 2 + len type + type field + field nil)) + (if (and (consp len) (not (eq type 'eval))) + (setq len (apply 'bindat-get-field struct len))) + (if (not len) + (setq len 1)) + (cond + ((eq type 'eval) + (if field + (setq struct (cons (cons field (eval len)) struct)) + (eval len))) + ((eq type 'fill) + (setq pos (+ pos len))) + ((eq type 'align) + (while (/= (% pos len) 0) + (setq pos (1+ pos)))) + ((eq type 'struct) + (bindat--pack-group + (if field (bindat-get-field struct field) struct) (eval len))) + ((eq type 'repeat) + (let ((index 0)) + (while (< index len) + (bindat--pack-group (nth index (bindat-get-field struct field)) (nthcdr tail item)) + (setq index (1+ index))))) + ((eq type 'union) + (let ((tag len) (cases (nthcdr tail item)) case cc) + (while cases + (setq case (car cases) + cases (cdr cases) + cc (car case)) + (if (or (equal cc tag) (equal cc t) + (and (consp cc) (eval cc))) + (progn + (bindat--pack-group struct (cdr case)) + (setq cases nil)))))) + (t + (setq last (bindat-get-field struct field)) + (bindat--pack-item last type len) + )))))) + +(defun bindat-pack (spec struct &optional raw-data pos) + "Return binary data packed accoring to SPEC for structured data STRUCT. +Optional third arg RAW-DATA is a pre-allocated string or vector to unpack into. +Optional fourth arg POS is the starting offset into RAW-DATA. +Note: The result is a multibyte string; use `string-make-unibyte' on it +to make it unibyte if necessary." + (let ((no-return raw-data)) + (unless pos (setq pos 0)) + (unless raw-data (setq raw-data (make-vector (+ pos (bindat-length spec struct)) 0))) + (bindat--pack-group struct spec) + (if no-return nil (concat raw-data)))) + + +;; Misc. format conversions + +(defun bindat-format-vector (vect fmt sep &optional len) + "Format vector VECT using element format FMT and separator SEP. +Result is a string with each element of VECT formatted using FMT and +separated by the string SEP. If optional fourth arg LEN is given, use +only that many elements from VECT." + (unless len + (setq len (length vect))) + (let ((i len) (fmt2 (concat sep fmt)) (s nil)) + (while (> i 0) + (setq i (1- i) + s (cons (format (if (= i 0) fmt fmt2) (aref vect i)) s))) + (apply 'concat s))) + +(defun bindat-vector-to-dec (vect &optional sep) + "Format vector VECT in decimal format separated by dots. +If optional second arg SEP is a string, use that as separator." + (bindat-format-vector vect "%d" (if (stringp sep) sep "."))) + +(defun bindat-vector-to-hex (vect &optional sep) + "Format vector VECT in hex format separated by dots. +If optional second arg SEP is a string, use that as separator." + (bindat-format-vector vect "%02x" (if (stringp sep) sep ":"))) + +(defun bindat-ip-to-string (ip) + "Format vector IP as an ip address in dotted notation." + (format "%d.%d.%d.%d" + (aref ip 0) (aref ip 1) (aref ip 2) (aref ip 3))) + +(provide 'bindat) + +;;; bindat.el ends here