#!/usr/bin/env python2.3
# -*- coding: utf-8 -*-
# Copyright 2010 British Broadcasting Corporation and Kamaelia Contributors(1)
# (1) Kamaelia Contributors are listed in the AUTHORS file and at
#     http://www.kamaelia.org/AUTHORS - please extend this file,
#     not this notice.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
# -------------------------------------------------------------------------
Mime Request Component.
Takes a request of the form:
Key: value
Key: value
Content-Length: value
Key: value
>>blank line<<
And converts it into a python object that contains:
   requestMethod : string
   url : string
   Protocol : string
   Protocol Version : string (not parsed into a number)
   KeyValues : dict
   body : raw data
Has a default inbox, and a default outbox. Requests data comes in the
inbox. MimeRequest objects come out the outbox.
from Axon.Component import component, scheduler,linkage,newComponent
from Axon.Ipc import errorInformation
from Kamaelia.File.ReadFileAdaptor import ReadFileAdaptor
from Kamaelia.Support.Data.MimeObject import mimeObject
import Kamaelia.Support.Data.requestLine
class MimeRequestComponent(component):
   """Component that accepts raw data, parses it into consituent
   parts of a MIME request. Attempts no interpretation of the request
   def __init__(self):
      super(MimeRequestComponent, self).__init__()
      self.header = {}
      self.requestLineRead = 0
      self.currentLineRead = 0
      self.seenEndHeader = 0
      self.currentLine = ''
      self.currentBytes = ''
      self.requestLine = ''
      self.stillReading = 1
      self.needData = 0
      self.gotRequest = 0
      self.body = ''
      self.step = 0
   def initialiseComponent(self): pass
   def nextLine(self):
      if self.dataReady("inbox"):
         theData = self.recv("inbox")
            self.currentBytes = self.currentBytes + theData
         except TypeError: # theData isn't a string/stringable object, junk
            return (0,"")
         [newline,self.currentBytes] = self.currentBytes.split("\n",1) # This is independent, unless line is null!
         if newline !="" and newline[-1] == "\r":
            newline = newline[0:-1]
         got = 1
      except ValueError: # Still waiting for enough data
         return (0,"")
      return (got, newline)
   # This code structure would be a lot cleaner in a single threaded environment,
   # or if "yield" could be nested. Unfortunately "yield" can't be nested and we're
   # not single threaded. It's not _too_ bad though.
   def getRequestLine(self):
      "Sets the *REQUEST* line arguments"
      (self.requestLineRead,self.requestLine) = self.nextLine()
   def getALine(self):
      "Sets the *CURRENT* line arguments"
      (self.currentLineRead, self.currentLine) = self.nextLine()
   def readHeader(self):
         [ origkey,value ] = self.currentLine.split(": ",1)
         key = origkey.lower()
         if not ( key in self.header ):
            self.header[key] = [origkey, value]
            self.header[key][1] = self.header[key][1]+ ", " + value
         self.currentLineRead = 0 # We've processed the line, and therefore don't have a current line
         self.currentLine = ""    # So we also set the current line to ""
   def getData(self):
      if self.dataReady("inbox"):
         theData = self.recv("inbox")
         self.currentBytes = self.currentBytes + theData
         self.needData = 0
   def checkEndOfHeader(self):
      if self.currentLine == '' and not self.seenEndHeader:
         self.seenEndHeader = 1
            self.headerlength = int(self.header['content-length'][1])
         except KeyError:
            self.headerlength = 0
            self.needData = 0
   def handleDataAquisition(self):
      """This is currently clunky and effectively implements a state machine.
      Should consider rewriting as a generator"""
      if not self.requestLineRead:
         return 1
      if not self.currentLineRead:
         self.getALine()      # Make sure we have a line(headers)
         return 2
      # This section collates the body if we've reached the end of the header
      # Exit condition is when we've read the content length specified in the header
      # The exit value is a None value.
      if not self.seenEndHeader: 	# !!!! We're in a loop, hence "if" rather than "while"
         return 3
      self.body =self.body + self.currentBytes
      if not (len(self.body) >= self.headerlength):
         if self.needData:
            self.getData()       # Grab raw, unparsed data. (body)
            return 4
            self.currentBytes = ""
            self.needData = 1
            return 5
   def mainBody(self):
      # This is running inside a while() loop remember.
      # Optimisation would be reassignment of self.mainBody when we skip through
      # the different phases of execution.
      if not self.gotRequest:
         if self.handleDataAquisition():
            return 1
      self.gotRequest = 1
         self.request = Kamaelia.Support.Data.requestLine.requestLine(self.requestLine)
      except Kamaelia.Support.Data.requestLine.BadRequest:
         br = sys.exc_info()[1]
         errinf = errorInformation(self, br)
         self.send(errinf, "signal")
         return 0
      assert self.debugger.note("MimeRequestComponent.mainBody",5, self.request,"\n")
      assert self.debugger.note("MimeRequestComponent.mainBody",10, "HEADER  \t:", self.header)
      assert self.debugger.note("MimeRequestComponent.mainBody",10, "BODY    \t:", self.body)
      self.mimeRequest = mimeObject(self.header, self.body, self.request)
      assert self.debugger.note("MimeRequestComponent.mainBody",5, self.mimeRequest)
      self.send(self.mimeRequest, "outbox")
__kamaelia_components__  = ( MimeRequestComponent, )
if __name__ =="__main__":
   class TestHarness(component):
      def main(self):
         reader = ReadFileAdaptor(filename="Support/SampleMIMERequest.txt")
         decoder = MimeRequestComponent()
         self.link((reader,"outbox"), (decoder, "inbox"))
         self.link((decoder, "outbox"), (self, "inbox"))
         yield newComponent(*(self.children))
         while 1:
            if self.dataReady("inbox"):
               message = self.recv("inbox")
               print ("MIME decoded:", repr(message))
            yield 1
   # We use nextLine reads into our buffer enough to extract a line, and
   # extracts it returning the line & the buffer
   # We store the header key/value fields here in 'header'
   # currentBytes forms an input buffer that we extract lines from.
   # * We then grab the first line, which *SHOULD* be the request line.
   # * We then grab the next line after that - this will either be empty,
   #   ending the request header, or be a request header line.
   # Assuming we're still processing the header, we then start looping, the
   # exit condition is we hit the end of the header.
   # Then for each line, we split it into key & value fields, use these to populate
   # the 'header' dict. If we have a collision, values are comma separated.
   # (comma is considered a special char by RFC822/2822 in headers, which
   # HTTP/RTSP are at least partially based on and tend to follow by convention)
   # Finally any remaining request data is mopped up into a body value.