# -*- coding: utf-8 -*
# This module (which must have the name queryfunc.py) is responsible
# for converting incoming queries to a database query understood by
# this particular node's database schema.
# This module must contain a function setupResults, taking a sql object
# as its only argument. 
# library imports 
import sys
from django.conf import settings
from vamdctap.sqlparse import *
from vamdctap.sqlparse import sql2Q
from math import sqrt
import dictionaries
import models # this imports models.py from the same directory as this file
from django.db.models import Q
import re
from inchivalidation import inchikey2chemicalformula
import chemlib
#in order to deal with the Last Modified header
from email.Utils import formatdate
import time
import datetime
def LOG(s):
    """ logfunction. will be removed for final version. """
    logfilejosi = open('/var/log/vamdc_josi.log','a')
    s = str(s)
    logfilejosi.write(s + '\n')
#generic empty class
class GenericClass:
    """return empty class"""
#create class for datasets
class DataSet:
    this class provides a method to make the Tabulated sub-objects out of two tuples containing the x and y values
    def __init__(self, sourceref, xs, ys, productiondate, y_units):
        #put reference to source first, so we always know what it is
        self.SourceRef = sourceref
        self.TabData = []
        tabdata = GenericClass()
        tabdata.Xunits = 'eV'
        tabdata.Yunits = y_units
        tabdata.ProductionDate = productiondate
        tabdata.X = GenericClass()
        tabdata.Y = GenericClass()
        tabdata.X.AccuracyType = 'estimated'
        tabdata.Y.AccuracyType = 'statistical'
        tabdata.X.Relative = 'false'
        tabdata.Y.Relative = 'false'
        tabdata.X.ErrorValue = float('0.1')
        tabdata.X.SourceRef = sourceref
        #i know this is bad crap crazy, but the standards want it like this
        #uncomment the float as we do it in the energyscan-loop already when reading the data
        #tabdata.X.DataList = map(float,xs)
        #tabdata.Y.DataList = map(float,ys)
        tabdata.X.DataList = map(str, xs)
        tabdata.Y.DataList = map(str, ys)
        tabdata.Xlength = len(xs)
        tabdata.Ylength = len(ys)
        #create errors
        tabdata.Y.ErrorList = []
        #apparently we have to take the abs, since there can be negative data
        yerrorlist = map(abs, ys)
        yerrorlist = map(sqrt, yerrorlist)
        for yerror in yerrorlist:
            tabdata.Y.ErrorList.append("%.2f" % round(yerror, 2))
# create electron statically, as it is always involved and always the same
class Particle:
    """Provide class for particles. Only used for electrons as of now"""
    def __init__(self, type):
        if type == 'electron':
            self.charge = -1
            self.name = 'electron'
            self.speciesid = 'electron'
            self.comment = 'low energy electrons'
# Main function 
def setupResults(sql, limit=1000):
    This function is always called by the software.
    # log the incoming query
    #x_internal is the list for the iteration over one search result, x the overall list (which is deduplicated in the end)
    molecules = []
    molecules_internal = []
    atoms = []
    atoms_internal = []
    sources = []
    sources_internal = []
    particles = []
    electron_particle = Particle('electron')
    inchiconvertedsearch = False
    #define the last modified header with an old date. we will compare all timestamps to this and take the most recent one
    lastmodifiedheader = datetime.datetime(1970, 01, 01, 01, 01)
    #use the function sql2Q provided by vamdctap to translate from query to Q-object
    q = sql2Q(sql)
    #create queryset for energyscans according to query
    energyscans = models.Energyscan.objects.filter(q)
    # count the number of matches
    nenergyscans = energyscans.count()
    #in case somebody is searching for a InchiKey and it didn't bring up any results:
    #convert the inchikey to an inchi, extract the sum formula and try again
    if nenergyscans == 0:
        if re.search('InchiKey', str(sql)) is not None:
            strsql = str(sql)
            match = re.findall('[A-Z]{14}-[A-Z]{10}-[A-Z]', strsql)
            #for each inchikey found we extract the chemical formula from the inchi
            #then we replace it in the original sql string
            for matchitem in match:
                chemical_formula = inchikey2chemicalformula(matchitem)
                if chemical_formula is not None:
	            strsql = strsql.replace(matchitem,chemical_formula)
            #if we had found one, we now replace the query
            if match is not None:
                strsql = strsql.replace('InchiKey','MoleculeStoichiometricFormula')
                #we now inject the new query to the request and call validate() to parse the SQL
                sql.request['QUERY'] = strsql
                #try again as usual
                energyscans = models.Energyscan.objects.filter(q)
                nenergyscans = energyscans.count()
                inchiconvertedsearch = True
    #append electron if there are results:
    if nenergyscans != 0:
    #loop over energyscans that came back
    for energyscan in energyscans:
        #compare if lastmodified is newer than then newest we have already included
        if energyscan.lastmodified > lastmodifiedheader:
            lastmodifiedheader = energyscan.lastmodified
        #our reactants are always molecules. here we check if the product is a molecule.
        if energyscan.species.molecule:
            molecules_internal = models.Species.objects.filter(Q(id__exact=energyscan.species.id)|Q(id__exact=energyscan.origin_species.id))
            atoms_internal = models.Species.objects.filter(Q(id__exact=energyscan.species.id))
            molecules_internal = models.Species.objects.filter(Q(id__exact=energyscan.origin_species.id))
        energyscan.Products = models.Species.objects.filter(id__exact=energyscan.species.id)
        energyscan.Reactants = models.Species.objects.filter(id__exact=energyscan.origin_species.id)
        #this part is a bit tricky: we make a new species-object which we give the ID 'electron'. otherwise it is empty
        #then we use list on the queryset energyscan.Reactants to force it to be executed.
        #afterwards, we append the newly created electron instance of the class species
        #keep in mind, that we actually defined the particle electron further up in the Particle() class. it was instanciated in the beginning of this function under the object electron_particle
        electron = models.Species('electron', '', '', '', '')
        energyscan.Reactants = list(energyscan.Reactants.all())
        #make products negative
        for product in energyscan.Products:
            for molecule in molecules_internal:
                if product.id == molecule.id:
                    molecule.ioncharge = -1
                    molecule.ioncharge = 0                    
            for atom in atoms_internal:
                if product.id == atom.id:
                    atom.ioncharge = -1
                    atom.ioncharge = 0
        #calculate exact / nominal masses
        for atom in atoms_internal:
            if molecule.isotope is True:
                atom.exactmass = chemlib.chemicalformula2exactmass(atom.chemical_formula)
        for molecule in molecules_internal:
            if molecule.isotope is True:
                molecule.mass = chemlib.chemicalformula2exactmass(molecule.chemical_formula)
        #treat sources
        sources_internal = models.Source.objects.filter(id__exact=energyscan.source.id)
        for source in sources_internal:
            authorlist = []
            for author in source.authors.all():
                authorlist.append(u'%s, %s'%(author.lastname, author.firstname))
            source.author = authorlist
        #insert the standard-comment in addition to a possibly existing user-specific comment
        standardcomment = 'X-Values are measured with an energy resolution of %s eV. Therefore every shown peak is the original peak shape convoluted with our resolution. Energy scans are calibrated. Therefore we estimate an error of 0.1 eV' % energyscan.energyresolution 
        if energyscan.comment != '':
            usercomment = energyscan.comment
            energyscan.comment = 'Comment of the Producer: ' + usercomment + ' Additional Comment: ' + standardcomment
            energyscan.comment = standardcomment 
        #give warning when we converted inchikey to chemical formula for searching
        if inchiconvertedsearch is True:
            inchiwarning = 'WARNING: For this query, an InChI-Key was converted to a stoichiometric formula, because otherwise no results were obtained. '
            energyscan.comment = inchiwarning + energyscan.comment
        #prepare the origin data
        ES_list = energyscan.energyscan_data.split()
        k = 0
        x = []
        y = []
        for datapoint in ES_list:
            datapoint = datapoint.replace(',','.')
            #even -> x-value
            if k % 2 == 0:
            #odd -> y-value
            k = k + 1
        if len(x) != len(y):
            LOG('WARNING - number of x and y values is not equal')
        #create datasets
        energyscan.DataSets = []
        dataset = DataSet(energyscan.source.id, x, y, energyscan.productiondate, energyscan.y_units)
        dataset.description = 'crossSection'
        dataset.accuracytype = 'systematic'
        #here we combine the list for molecules, atoms and sources from this particular energyscan with the query-wide list and remove all duplicates
        #see http://stackoverflow.com/questions/1319338/combining-two-lists-and-removing-duplicates-without-removing-duplicates-in-orig
        molecules = molecules + list(set(molecules_internal) - set(molecules))
        atoms = atoms + list(set(atoms_internal) - set(atoms))
        sources = sources + list(set(sources_internal) - set(sources))
    #count species and sources in order to return it to the headerinfo
    nsources = len(sources)
    nmolecules = len(molecules)
    natoms = len(atoms)
    nspecies = natoms + nmolecules
    #Create the Last Modified header
    #the header must not be newer than now!
    if lastmodifiedheader > datetime.datetime.now():
        lastmodifiedheader = datetime.datetime.now()
    #not necessary any more, since t. marquart changed the behaviour of the NS
    #lastmodifiedheader = formatdate(time.mktime(lastmodifiedheader.timetuple()))
    # Create the header with some useful info. The key names here are
    # standardized and shouldn't be changed.
    headerinfo = {\
    # Return the data if it is not empty... The keynames are standardized. 
    if nenergyscans > 0:
        return {'CollTrans':energyscans,
        return {}