# This file is: Copyright (c) 2011-2012 by Frank Vanden berghen. All rights reserved.
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET
import base64
import array
import operator
from math import isnan

class Model:  
    def __init__(self, filePath):
        tree=ET.parse(filePath)
        xMainNode=tree.getroot()
        str_=xMainNode.find('Header').get('version')
        try:
            self.targetCol=xMainNode.find('MetaData').find('Target').text
            self.targetCol=processString(self.targetCol)
            self.targetCol+="_"
        except AttributeError: # no 'MetaData' or 'Target'
            self.targetCol = ""
        j=0
        iv=[]
        x = xMainNode.find("UnivModels")
        for xrec in x.findall("Model"):
            str_ = processString(xrec.get('field'))
            R_field="R"+str(j)+"_"+str_
            name = xrec.get('name')[1:]
            iv.append(int(name));
            j+=1

        #parse self.betas and self.vIdx for regression model
        b=xMainNode.find('RegressionModel')
        tmpBetas=b64decode(b.find('betas').text,'d')
        tmpvIdx=b64decode(b.find('indexes').text,'i')
        self.betas=np.empty(len(tmpBetas))
        self.betas[len(tmpBetas)-1]=tmpBetas[len(tmpBetas)-1];
        
        #Prepare RegressionModel
        for j in range(len(tmpvIdx)):
            k=tmpvIdx[j]
            i=len(iv)-1
            while i>0:
                if iv[i]==k:
                    break
                i-=1
            self.betas[j]=tmpBetas[i]

        # REAL parsing
        i=0
        self.modelColumns=np.empty(len(iv),dtype=object)
        self.l=np.empty(len(iv),dtype=np.int8)
        self.vyListM=np.empty(len(iv))
        self.vyl=np.empty(len(iv),dtype=object)
        self.vxl=np.empty(len(iv),dtype=object)
        self.dictList=np.empty(len(iv),dtype=object)

        for xrec in x.findall('Model'):
            str_ = xrec.get('type')
            self.modelColumns[i]=xrec.get('field')
            if str_=="MapValues":
                vy=b64decode(xrec.find('y').text,'d')
                xx=xrec.find('Modalities')
                d={}
                if xrec.get('tname')=='bb64':
                    #is binary recoding
                    vyt=[0]*15
                    vyt[0]=vy[0]
                    for j in range(6,15):
                        vyt[j]=vy[2]
                    for j in range(1,6):
                        vyt[j]=vy[1]
                    d[""]=vyt[0]
                    j=1
                    for mod in xx.findall('Mod'):
                        d[mod.get('val')]=vyt[j]
                        j+=1
                    self.vyListM[i]=vyt[0]
                else:
                    d[""]=vy[0]
                    j=1
                    for mod in xx.findall('Mod'):
                        d[mod.get('val')]=vy[j]
                        j+=1
                    self.vyListM[i]=vy[0]
                self.dictList[i]=d
                self.l[i]=0

            else:
                vx=b64decode(xrec.find('x').text,'d')
                vy=b64decode(xrec.find('y').text,'d')
                vx2=[]
                vx2.append(vx[0])
                vy2=[]
                vy2.append(vy[0])     # missing
                vy2.append(vy[1])     # out-of-range left
                for j in range(1,len(vx)-1):
                    if vy[j+1]==vy[j+2]:
                       continue
                    vx2.append(vx[j])
                    vy2.append(vy[j+1])
                j+=1
                vx2.append(vx[j])     # last modality X
                vy2.append(vy[j+1])   # last modality Y
                vy2.append(vy[j+1])   # out-of-range right

                self.vyl[i]=np.array(vy2)
                self.vxl[i]=np.array(vx2)
                self.l[i]=1

            i+=1

        #Parsing of the probability mapping
        try:
            x=xMainNode.find('ProbabilityMapping')
            vx=b64decode(x.find('x').text,'d')
            vy=b64decode(x.find('y').text,'d')
            self.vx2=[]
            vy2=[]
            for j in range(len(vy)):
                if vx[j]!=vx[j-1]:
                    self.vx2.append(vx[j])
                    vy2.append(vy[j])
            self.vyp=np.empty(len(self.vx2))
            self.vym=np.empty(len(self.vx2))
            self.vyp[0]=vy2[0];
            self.vyp[len(self.vx2)-1]=vy2[len(self.vx2)-1];
            for j in range(1,len(self.vx2)-1):
                t=(vy2[j]-vy2[j-1])/(self.vx2[j]-self.vx2[j-1])
                self.vym[j]=t
                self.vyp[j]=vy2[j]-self.vx2[j]*t;
            self.vx2=np.array(self.vx2)
        except AttributeError: # no 'ProbabilityMapping'
            self.vx2=[]

    def apply(self,df):
        #COMPUTING RECODING
        dfOutScore = np.zeros(len(df.index))
        dfOutProba = np.zeros(len(df.index))

        colNames=list(df)
        idxColInDF=np.empty(len(self.modelColumns),dtype=np.int32)
        for i in range(len(self.modelColumns)):
            idxColInDF[i] = colNames.index(self.modelColumns[i])

        for line in range(len(df.index)):
            res = self.betas[-1]
            for i in range(len(self.l)):
                # NORM CONTINUOUS
                if self.l[i]:
                    data = float(df.iloc[line,idxColInDF[i]])
                    vxl=self.vxl[i]
                    if isnan(data):
                        res+=self.vyl[i][0]*self.betas[i]
                    elif data < vxl[0]:
                        res+=self.vyl[i][1]*self.betas[i]
                    elif data > vxl[-1]:
                        res+=self.vyl[i][-1]*self.betas[i]
                    else:
                        j=1
                        while data > vxl[j]:
                            j+=1
                        res += self.vyl[i][j+1]*self.betas[i]
                        
                # MAP VALUE
                else:
                    data = str(df.iloc[line,idxColInDF[i]])
                    try:
                        res+=(self.dictList[i][data.strip()])*self.betas[i]
                    except KeyError:
                        res+=(self.vyListM[i])*self.betas[i]
            
            dfOutScore[line]=res

            #compute proba
            if len(self.vx2):
                proba=0
                if res > self.vx2[0]:
                    proba=self.vyp[0]
                elif res < self.vx2[-1]:
                    proba=self.vyp[-1]
                else:
                    for i in range(1,len(self.vx2)):
                        if res > self.vx2[i]:
                            proba = self.vyp[i]+res*self.vym[i]
                            break
                dfOutProba[line]=proba
            else:
                dfOutProba[line]=res

        return pd.DataFrame(data={self.targetCol+'score':dfOutScore,self.targetCol+'proba':dfOutProba})

def processString(s):
    return s.replace(' ','_')

def b64decode(s,t):
    v=base64.b64decode(s)
    return array.array(t,v) # byte to double
