#  Python 3.7
#  For the course Stochastic Calculus, Fall semester 2018, Courant Institute, NYU
#  Author: course instructor, Jonathan Goodman
#  https://www.math.nyu.edu/faculty/goodman/teaching/StochCalc2018/StochCalc.html

#  filename: HistogramDemo1.py
#  See Lesson6.pdf (on class web site above) for details and notation.

#  Create and plot a histogram of independent Gaussian random variables

import numpy             as np      # numerical computing library
import matplotlib.pyplot as plt     # library of plot routines
import numpy.random      as ran     # random number generators

print("Python file GaussianMean.py")
print("Show that the mean of independent Gaussians is Gaussian")


n = 300000    # number of random samples
L = 41      # number of bins
a = -2.5      # left end of the histogram region
b =  2.5      # right end of the histogram region

#    Set up the bins, the bin centers, and the bin count array

dx = (b-a)/L   # dx = bin size = (length being binned)/(number of bins)

N  = np.zeros( shape = [L], dtype =  int )  # bin counts, initialized to zero

#    xc = array of bin centers, uniformly spaced (using linspace).
#         The smallest one is a*.5*dx (half a bin size away from a)

xc = np.linspace( start = a+.5*dx, stop = b-.5*dx, num = L)

X1a = ran.randn(n)    # two arrays of n independent standard normals each.
X2a = ran.randn(n)

#    Generate the array of means

Xm  = .5*( X1a + X2a)

#   Find the bin counts

for X in Xm:                          # Put the means into bins
    k = int(np.floor( ( X-a )/dx ) )  # the bin X goes into
    if ( ( k >= 0 ) and ( k < L ) ):  # check that k is in the bin range
        N[k] += 1                     # increment the count for bin k

uh = N/(n*dx)                         # uh = u hat = estimate of PDF
Neb = np.ndarray( [L], dtype=float)   # N (bin count) error bar
for k in range(L):
    p = dx*uh[k]                      # estimated probability of a sample in B[k]
    Neb[k] = np.sqrt(n*p*(1.-p))      # square root of the Bernoulli variance
ueb = Neb/(n*dx)                      # standard dev of u is s.dev of N, scaled

#     Evaluate the true PDF using the normal density formula with appropriate variance

s2  = .5                              # variance is supposed to be sigma^2 = .5
pf  = 1./np.sqrt( 2.*np.pi*s2)        # the pre-factor
c   = 1./(2.*s2)                      # u = pf*e^{-cx^2)
pdf = lambda x: pf*np.exp( -c*x*x)   # Gaussian pdf formula (1/sqrt(2*pi))e^{-x^2/2}
ue  = np.ndarray([L], dtype = float)  # ue is for "u exact" (the actual pdf)
for k in range(L):
    ue[k] = pdf(xc[k])                # evaluate the density formula at bin centers

plt.errorbar( xc, uh, yerr=ueb, linestyle = '', marker = 'o', label = 'simulation data')
plt.plot( xc, ue, 'b--', label = 'theory')
fn    = float(n)      # convert for printing.  It can be a big number.
title = "Sum of Gaussians, " + "{:8.2e}".format(fn) + " samples, " + str(L) + " bins"
plt.title(title)
plt.xlabel('x')
plt.ylabel('u')

plt.legend()
plt.grid()
plt.savefig('GaussianMean.pdf')
plt.show()