#!/usr/bin/env python # -*- coding: windows-1251 -*- # Copyright (C) 2005 Roman V. Kiseliov # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgment: # "This product includes software developed by # Roman V. Kiseliov <roman@kiseliov.ru>." # # 4. Redistributions of any form whatsoever must retain the following # acknowledgment: # "This product includes software developed by # Roman V. Kiseliov <roman@kiseliov.ru>." # # THIS SOFTWARE IS PROVIDED BY Roman V. Kiseliov ``AS IS'' AND ANY # EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Roman V. Kiseliov OR # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED # OF THE POSSIBILITY OF SUCH DAMAGE. __rev_id__ = """$Id: biff-dumper.py,v 1.5 2005/10/26 07:44:24 rvk Exp $""" import Analyzer from pyExcelerator import * import sys from struct import unpack def print_bin_data(data): print i = 0 while i < len(data): j = 0 while (i < len(data)) and (j < 16): c = '0x%02X' % ord(data[i]) sys.stdout.write(c) sys.stdout.write(' ') i += 1 j += 1 print if i == 0: print '<NO DATA>' def print_ASCII_data(data): print i = 0 while i < len(data): j = 0 while (i < len(data)) and (j < 16): if data[i] < ' ': c = '.' else: c = data[i] sys.stdout.write(c) i += 1 j += 1 print if i == 0: print '<NO DATA>' def main(): if len(sys.argv) < 2: print 'no input files.' sys.exit(1) # Inside MS Office document looks like filesystem # We need extract stream named 'Workbook' or 'Book' ole_streams = CompoundDoc.Reader(sys.argv[1], True).STREAMS if 'Workbook' in ole_streams: workbook_stream = ole_streams['Workbook'] elif 'Book' in ole_streams: workbook_stream = ole_streams['Book'] else: raise Exception, 'No workbook stream in file.' wb_bin_data_len = len(workbook_stream) stream_pos = 0 print 'workbook stream size 0x%X bytes '% len(workbook_stream) # Excel's method of data storing is based on # ancient technology "TLV" (Type, Length, Value). # In addition, if record size grows to some limit # Excel writes CONTINUE records ws_num = 0 EOFs = 0 while stream_pos < len(workbook_stream) and EOFs <= ws_num: # header size == 4 print 'stream position:', '0x%08X' % stream_pos header = workbook_stream[stream_pos:stream_pos+4] rec_id, data_size = unpack('<2H', header) print 'rec id:', '0x%04X' % rec_id print 'rec data size:', '0x%04X' % data_size stream_pos += 4 rec_data = workbook_stream[stream_pos:stream_pos+data_size] stream_pos += data_size if rec_id == 0x000A: # EOF EOFs += 1 elif rec_id == 0x0085: # BOUNDSHEET ws_num += 1 if rec_id in Analyzer.all_records: rec_name, analyzer_func = Analyzer.all_records[rec_id] print 'rec name:', rec_name print 'rec data:', print_bin_data(rec_data) print 'ASCII data:', print_ASCII_data(rec_data) print 'analyzing...' analyzer_func(rec_data) else: print '<UNKNOWN RECORD>: rec_id == 0x%04X, size 0x%04X bytes' % (rec_id, data_size) print '---------------' main()