#!/usr/bin/env python
# -*- coding: windows-1251 -*-
 
#  Copyright (C) 2005 Roman V. Kiseliov
#  All rights reserved.
# 
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions
#  are met:
# 
#  1. Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
# 
#  2. Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in
#     the documentation and/or other materials provided with the
#     distribution.
# 
#  3. All advertising materials mentioning features or use of this
#     software must display the following acknowledgment:
#     "This product includes software developed by
#      Roman V. Kiseliov <roman@kiseliov.ru>."
# 
#  4. Redistributions of any form whatsoever must retain the following
#     acknowledgment:
#     "This product includes software developed by
#      Roman V. Kiseliov <roman@kiseliov.ru>."
# 
#  THIS SOFTWARE IS PROVIDED BY Roman V. Kiseliov ``AS IS'' AND ANY
#  EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
#  PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL Roman V. Kiseliov OR
#  ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
#  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
#  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
#  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
#  OF THE POSSIBILITY OF SUCH DAMAGE.
 
__rev_id__ = """$Id: biff-dumper.py,v 1.5 2005/10/26 07:44:24 rvk Exp $"""
 
 
import Analyzer
from pyExcelerator import *
import sys
from struct import unpack
 
 
def print_bin_data(data):
    print
    i = 0
    while i < len(data):
        j = 0
        while (i < len(data)) and (j < 16):
            c = '0x%02X' % ord(data[i])
            sys.stdout.write(c)
            sys.stdout.write(' ')
            i += 1
            j += 1
        print
    if i == 0:
        print '<NO DATA>'
 
 
def print_ASCII_data(data):
    print
    i = 0
    while i < len(data):
        j = 0
        while (i < len(data)) and (j < 16):
            if data[i] < ' ':
                c = '.'
            else:
                c = data[i]
            sys.stdout.write(c)
            i += 1
            j += 1
        print
    if i == 0:
        print '<NO DATA>'
 
 
def main():
    if len(sys.argv) < 2:
        print 'no input files.'
        sys.exit(1)
 
    # Inside MS Office document looks like filesystem
    # We need extract stream named 'Workbook' or 'Book'
    ole_streams = CompoundDoc.Reader(sys.argv[1], True).STREAMS
 
    if 'Workbook' in ole_streams:
        workbook_stream = ole_streams['Workbook']
    elif 'Book' in ole_streams:
        workbook_stream = ole_streams['Book']
    else:
        raise Exception, 'No workbook stream in file.'
 
    wb_bin_data_len = len(workbook_stream)
    stream_pos = 0
 
    print 'workbook stream size 0x%X bytes '% len(workbook_stream)
 
    # Excel's method of data storing is based on 
    # ancient technology "TLV" (Type, Length, Value).
    # In addition, if record size grows to some limit
    # Excel writes CONTINUE records
    ws_num = 0
    EOFs = 0
    while stream_pos < len(workbook_stream) and EOFs <= ws_num:
        # header size == 4
        print 'stream position:', '0x%08X' % stream_pos
        header = workbook_stream[stream_pos:stream_pos+4]
        rec_id, data_size = unpack('<2H', header)
        print 'rec id:', '0x%04X' % rec_id
        print 'rec data size:', '0x%04X' % data_size
        stream_pos += 4
        rec_data = workbook_stream[stream_pos:stream_pos+data_size]
        stream_pos += data_size
 
        if rec_id == 0x000A:   # EOF 
            EOFs += 1
        elif rec_id == 0x0085: # BOUNDSHEET
            ws_num += 1
 
        if rec_id in Analyzer.all_records:
            rec_name, analyzer_func = Analyzer.all_records[rec_id]
 
            print 'rec name:', rec_name
            print 'rec data:',
            print_bin_data(rec_data)
            print 'ASCII data:',
            print_ASCII_data(rec_data)
            print 'analyzing...'
            analyzer_func(rec_data)
        else:
            print '<UNKNOWN RECORD>: rec_id == 0x%04X, size 0x%04X bytes' % (rec_id, data_size)
        print '---------------'
 
if __name__ == '__main__':
    main()