# Code for Monte Carlo class, Jonathan Goodman # http://www.math.nyu.edu/faculty/goodman/teaching/MonteCarlo20/ # Posterior.py # The author gives permission for anyone to use this publicly posted # code for any purpose. The code was written for teaching, not research # or commercial use. It has not been tested thoroughly and probably has # serious bugs. Results may be inaccurate, incorrect, or just wrong. # The three modules are used for the Week 2 demo of a Gaussian approximation # to a posterior distribtion. See Week 2 notes. # PDFapproxDemo.py # Posterior.py # MakeFakeData.py # This module defines the class Posterior. # It has code to read a data file and store the data in an instance of the # Posterior class. # That class also has functions that return the potential function for ... # ... the PDF, along with other information needed for the demo. import numpy as np import csv # package for reading and writing .csv files class Posterior: """A class to represent a Bayesian posterior PDF based on data read from a data file. The constructor reads and remembers the data. MAP() returns the MAP point -- maximum a-posteriori point phi(x) returs the PDF at x grad(x) returns the gradient of phi(x) at x, as a numpy array Hess(x) returns the Hessian of phi(x) at x, as a two index numpy array """ def __init__(self): FakeDataFileName = "SimpleFakeData" # name the output .csv file ... FakeDataFileName = FakeDataFileName + ".csv" # ... filename.csv csvfile = open( FakeDataFileName, 'r', newline = '') # open the file for reading cfr = csv.reader( csvfile) # instantiate the reader object X_list = [] # Never use a list for a data array, except here. n = 0 # number of data items read so far for row in cfr: xis = row[0] # returns a character string, xi = float(xis) # xi, for X[i], the floating point number X_list.append(xi) n = n + 1 # count data points if ( n == 0 ): print("Posterior instatiation read no data") self.X = np.array(X_list) # remember the data self.n = n self.mu_s = np.mean(self.X) # mu_s is for mu_* self.var_s = np.var(self.X) # the numpy empirical variance def MAP(self): MAP_point = np.zeros(2) MAP_point[0] = self.mu_s MAP_point[1] = np.sqrt( self.var_s ) return MAP_point def phi( self, theta): mu = theta[0] # unpack independent variables sig = theta[1] # give parameters their problem specific names mu_s = self.mu_s # simplify formulas involving the sample mean ... var_s = self.var_s # ... and variance ... n = self.n # ... and number of data points phi = n*np.log(sig) \ + (1./(2*sig**2))*n*( var_s + (mu_s-mu)**2 ) # formula from Week 2 notes return phi def grad( self, theta): mu = theta[0] # unpack independent variables sig = theta[1] # give parameters their problem specific names mu_s = self.mu_s # simplify formulas involving the sample mean ... var_s = self.var_s # ... and variance ... n = self.n # ... and number of data points grad = np.zeros(2) # allocate the numpy array for the answer grad[0] = (-n/sig**2)*(mu_s - mu) grad[1] = n/sig \ - (n/sig**3)*( var_s + (mu_s - mu)**2) return grad def Hess( self, theta): mu = theta[0] # unpack independent variables sig = theta[1] # give parameters their problem specific names mu_s = self.mu_s # simplify formulas involving the sample mean ... var_s = self.var_s # ... and variance ... n = self.n # ... and number of data points Hess = np.zeros([2,2]) # allocate the numpy array for the answer Hess[0,0] = n/(sig*sig) Hess[0,1] = (2./sig**3)*n*(mu_s-mu) Hess[1,0] = Hess[0,1] Hess[1,1] = (-n)/(sig**2) \ + ( (3.*n)/(sig**4) )*( var_s + (mu_s - mu)**2) return Hess