Represents the interface between user and location, addition, 
and querying of documents to and from the database.
import sys
import datetime
#from listers import *
#from fetchers import *
#from parsers import *
import listers
import fetchers
import parsers
import db
class CorpusController(object):
  def __init__(self, db=None):
    if db is None:
      db = db.DB()
    self.db = db
  # obtains, constructs, and adds a batch of documents form a source
  def batch(self, source):
    pass #TODO
  # obtains, constructs, and adds a target document from a source
  def target(self, article):
    pass #TODO
  def hybrid(self, source):
    # Get current module
    module = sys.modules[__name__]
    ## Get utility classes
    lister = getattr(module, '%sLister' % (source))
    #fetcher = getattr(module, '%sFetcher' % (source))
    #parser = getattr(module, '%sParser' % (source))
    source_pmids = lister().get_article_list()
    db_pmids = db.citations.documents.find({}, {'pmid' : 1})
    db_pmids = [pmid['pmid'] for pmid in db_pmids]
    pmids_to_fetch = list(set(source_pmids) - set(db_pmids))
    print '%s pmids from source...' % (len(source_pmids))
    print '%s pmids from db...' % (len(db_pmids))
    print '%s pmids to parse...' % (len(pmids_to_fetch))
    for pmid in pmids_to_fetch:
      doc = fetcher(pmid).fetch()
      refs = parser(doc).parse_head_ref()
      # send stuff to db...
# CorpusController designed for the OAI database
class OAIController(CorpusController):
  # Default start date for batch retrieval
  DATE_FROM = datetime.date(2010, 1, 1)
  # Default time window for batch retrieval
  DATE_INCR = datetime.timedelta(days=1)
  def batch(self, date_from=None, date_incr=DATE_INCR, **kwargs):
    '''Retrieve, parse, and push a batch of documents.
      date_from (datetime.date): Start date
      date_incr (datetime.timedelta): Time window
      kwargs: Optional OAI parameters
    #TODO: save kwargs to query metadata
    # Get start date
    if not date_from:
      date_from = self.db.last_date_range('oai')[0]['until'] + \
    # Get end date
    if date_incr:
      date_until = date_from + date_incr
      date_until = None
    # Fetch batch of articles
    fetcher = fetchers.OAIFetcher()
    doc_batches = fetcher.fetch_batch(date_from, date_until, **kwargs)
    # Initialize parser
    parser = parsers.OAIParser()
    # Loop over batches of articles
    for doc_batch in doc_batches:
      # Loop over articles in batch
      for doc in doc_batch:
        # Parse document
        parsed_docs = parser.parse_document(doc)
        # Send parsed articles to DB
        for doc in parsed_docs:
    # Update date range
    self.db.add_date_range('oai', date_from, date_until)
# CorpusController designed for the Plos database 
class PLOSController(CorpusController):
    pass #@TODO: implement