#  Python 3.7
#  For the course Stochastic Calculus, Fall semester 2018, Courant Institute, NYU
#  Author: course instructor, Jonathan Goodman
#  https://www.math.nyu.edu/faculty/goodman/teaching/StochCalc2018/StochCalc.html

#  filename: CauchyDistribution.py
#  The code is an edited version of HistogramDemo1.py, on the resources page
#  See assignment7.pdf (on class web site above) for details and notation.

#  Create and plot a histogram of the Cauchy distribution

import numpy             as np      # numerical computing library
import matplotlib.pyplot as plt     # library of plot routines
import numpy.random      as ran     # random number generators

print("Python file CauchyDistribution.py")
print("How to generate Cauchy random variables")


n = 400000   # number of random samples
L = 41       # number of bins
a = -8       # left end of the histogram region
b =  8       # right end of the histogram region

#    Set up the bins, the bin centers, and the bin count array

dx = (b-a)/L   # dx = bin size = (length being binned)/(number of bins)

N  = np.zeros( shape = [L], dtype =  int )  # bin counts, initialized to zero

#    xc = array of bin centers, uniformly spaced (using linspace).
#         The smallest one is a*.5*dx (half a bin size away from a)

xc = np.linspace( start = a+.5*dx, stop = b-.5*dx, num = L)

Ua = np.pi*(ran.rand(n) - .5)
Xa = np.tan(Ua)
#Xa = ran.randn(n)    # an array of n independent Cauchy random variables.

#   Find the bin counts

for X in Xa:                          # look at each X in the X array
    k = int(np.floor( ( X-a )/dx ) )  # the bin X goes into
    if ( ( k >= 0 ) and ( k < L ) ):  # check that k is in the bin range
        N[k] += 1                     # increment the count for bin k

uh = N/(n*dx)                         # uh = u hat = estimate of PDF
Neb = np.ndarray( [L], dtype=float)   # N (bin count) error bar
for k in range(L):
    p = dx*uh[k]                      # estimated probability of a sample in B[k]
    Neb[k] = np.sqrt(n*p*(1.-p))      # square root of the Bernoulli variance
ueb = Neb/(n*dx)                      # standard dev of u is s.dev of N, scaled

#     Evaluate the true PDF using the Cauchy density formula

pf  = 1./np.sqrt( 2.*np.pi)           # the pre-factor
pdf = lambda x: 1./((1.+x*x)*np.pi)   # Cauchy pdf formula (1/pi)*(1/(1*x^2))
ue  = np.ndarray([L], dtype = float)  # ue is for "u exact" (the actual pdf)
for k in range(L):
    ue[k] = pdf(xc[k])                # evaluate the density formula at bin centers

#    Spaces added so the numbers line up under the headings
#    dataLine a character string that is one line of numberical output.
#    It is assembled one number at a time, to fix the formats and number of blanks.

print(" Cauchy random variable simulation\n")
print("    bin center    bin count   expected bin count  error bar   error in standard deviations\n")
for k in range(L):
    dataLine =            "  "      + "{:10.3f}".format(xc[k])       # bin center
    dataLine = dataLine + "    "    + "{:7d}".format(N[k])           # actual bin count
    dataLine = dataLine + "       " + "{:10.3f}".format(n*ue[k]*dx)  # expected bin count
    dataLine = dataLine + "       " + "{:10.3f}".format(Neb[k])      # error bar for bin count
    nsd      = (N[k]-(n*ue[k]*dx))/Neb[k]                            # number of standard deviations ..
    dataLine = dataLine + "    "    + "{:10.3f}".format(nsd)         # .. from the mean
    print(dataLine)

plt.errorbar( xc, uh, yerr=ueb, linestyle = '', marker = 'o', label = 'histogram')
plt.plot( xc, ue, 'b--', label = 'Cauchy density')
fn    = float(n)      # convert for printing.  It can be a big number.
title = "Cauchy density estimation, " + "{:8.2e}".format(fn) + " samples, " + str(L) + " bins"
plt.title(title)
plt.xlabel('x')
plt.ylabel('u')

plt.legend()
plt.grid()
plt.savefig('CauchyDensityCheck.pdf')
print("\nClose the figure to end the program")
plt.show()