#  Code for Monte Carlo class, Jonathan Goodman
#  http://www.math.nyu.edu/faculty/goodman/teaching/MonteCarlo20/
#  Posterior.py
#  The author gives permission for anyone to use this publicly posted 
#  code for any purpose.  The code was written for teaching, not research
#  or commercial use.  It has not been tested thoroughly and probably has
#  serious bugs.  Results may be inaccurate, incorrect, or just wrong.

#  The three modules are used for the Week 2 demo of a Gaussian  approximation
#  to a posterior distribtion.  See Week 2 notes.
#        PDFapproxDemo.py
#        Posterior.py
#        MakeFakeData.py

#  This module defines the class Posterior.
#  It has code to read a data file and store the data in an instance of the
#  Posterior class.
#  That class also has functions that return the potential function for ...
#  ... the PDF, along with other information needed for the demo.


import numpy as np
import csv               #  package for reading and writing .csv files


class Posterior:
    """A class to represent a Bayesian posterior PDF based on data read
       from a data file.
       The constructor reads and remembers the data.
       MAP() returns the MAP point -- maximum a-posteriori point
       phi(x) returs the PDF at x
       grad(x) returns the gradient of phi(x) at x, as a numpy array
       Hess(x) returns the Hessian of phi(x) at x, as a two index numpy array
    """
       
    def __init__(self):
        
        FakeDataFileName = "SimpleFakeData"       #  name the output .csv file ...

        FakeDataFileName = FakeDataFileName + ".csv"           #  ... filename.csv
        csvfile = open( FakeDataFileName, 'r', newline = '')   #  open the file for reading
        cfr     = csv.reader( csvfile)                         #  instantiate the reader object

        X_list = []          #  Never use a list for a data array, except here.
        n      = 0           #  number of data items read so far
        for row in cfr:
           xis = row[0]      #  returns a character string,
           xi = float(xis)   #  xi, for X[i], the floating point number
           X_list.append(xi)
           n = n + 1         #  count data points
   
        if ( n == 0 ):
           print("Posterior instatiation read no data")
   
        self.X     = np.array(X_list)    #  remember the data
        self.n     = n
        self.mu_s  = np.mean(self.X)     #  mu_s is for mu_*
        self.var_s = np.var(self.X)      #  the numpy empirical variance
        
    def MAP(self):
        MAP_point = np.zeros(2)
        MAP_point[0] = self.mu_s
        MAP_point[1] = np.sqrt( self.var_s )
        return MAP_point
    
    def phi( self, theta):
        mu    = theta[0]    # unpack independent variables
        sig   = theta[1]    # give parameters their problem specific names
        mu_s  = self.mu_s   # simplify formulas involving the sample mean ...
        var_s = self.var_s  # ... and variance ...
        n     = self.n      # ... and number of data points
                
        phi = n*np.log(sig) \
            + (1./(2*sig**2))*n*( var_s + (mu_s-mu)**2 )  # formula from Week 2 notes
        return phi
        
    def grad( self, theta):
        mu    = theta[0]    # unpack independent variables
        sig   = theta[1]    # give parameters their problem specific names
        mu_s  = self.mu_s   # simplify formulas involving the sample mean ...
        var_s = self.var_s  # ... and variance ...
        n     = self.n      # ... and number of data points
       
        grad = np.zeros(2)   # allocate the numpy array for the answer
        
        grad[0] = (-n/sig**2)*(mu_s - mu)
        grad[1] = n/sig   \
                - (n/sig**3)*( var_s + (mu_s - mu)**2)
        return grad
        
    def Hess( self, theta):
        mu    = theta[0]    # unpack independent variables
        sig   = theta[1]    # give parameters their problem specific names
        mu_s  = self.mu_s   # simplify formulas involving the sample mean ...
        var_s = self.var_s  # ... and variance ...
        n     = self.n      # ... and number of data points
       
       
        Hess = np.zeros([2,2])   # allocate the numpy array for the answer
        
        Hess[0,0] = n/(sig*sig)
        Hess[0,1] = (2./sig**3)*n*(mu_s-mu)
        Hess[1,0] = Hess[0,1]
        Hess[1,1] = (-n)/(sig**2) \
                  + ( (3.*n)/(sig**4) )*( var_s + (mu_s - mu)**2)
        return Hess