# Python 3.7 # For the course Stochastic Calculus, Fall semester 2018, Courant Institute, NYU # Author: course instructor, Jonathan Goodman # https://www.math.nyu.edu/faculty/goodman/teaching/StochCalc2018/StochCalc.html # filename: HistogramDemo1.py # See Lesson6.pdf (on class web site above) for details and notation. # Create and plot a histogram of independent Gaussian random variables import numpy as np # numerical computing library import matplotlib.pyplot as plt # library of plot routines import numpy.random as ran # random number generators print("Python file HistogramDemo1.py") print("How to make and display a histogram") n = 1000 # number of random samples L = 21 # number of bins a = -3 # left end of the histogram region b = 3 # right end of the histogram region # Set up the bins, the bin centers, and the bin count array dx = (b-a)/L # dx = bin size = (length being binned)/(number of bins) N = np.zeros( shape = [L], dtype = int ) # bin counts, initialized to zero # xc = array of bin centers, uniformly spaced (using linspace). # The smallest one is a*.5*dx (half a bin size away from a) xc = np.linspace( start = a+.5*dx, stop = b-.5*dx, num = L) Xa = ran.randn(n) # an array of n independent standard normals. # Find the bin counts for X in Xa: # look at each X in the X array k = int(np.floor( ( X-a )/dx ) ) # the bin X goes into if ( ( k >= 0 ) and ( k < L ) ): # check that k is in the bin range N[k] += 1 # increment the count for bin k uh = N/(n*dx) # uh = u hat = estimate of PDF Neb = np.ndarray( [L], dtype=float) # N (bin count) error bar for k in range(L): p = dx*uh[k] # estimated probability of a sample in B[k] Neb[k] = np.sqrt(n*p*(1.-p)) # square root of the Bernoulli variance ueb = Neb/(n*dx) # standard dev of u is s.dev of N, scaled # Evaluate the true PDF using the standard normal density formula pf = 1./np.sqrt( 2.*np.pi) # the pre-factor pdf = lambda x: pf*np.exp( -.5*x*x) # Gaussian pdf formula (1/sqrt(2*pi))e^{-x^2/2} ue = np.ndarray([L], dtype = float) # ue is for "u exact" (the actual pdf) for k in range(L): ue[k] = pdf(xc[k]) # evaluate the density formula at bin centers plt.errorbar( xc, uh, yerr=ueb, linestyle = '', marker = 'o', label = 'histogram') plt.plot( xc, ue, 'b--', label = 'gaussian') title = "gaussian density estimation, " + str(n) + " samples, " + str(L) + " bins" plt.title(title) plt.xlabel('x') plt.ylabel('u') plt.legend() plt.savefig('HistogramDemo1.pdf') plt.show()