% This is a LaTex file.
% Homework for the course "Scientific Computing",
% Spring semester, 1996, Jonathan Goodman.
% A latex format for making homework assignments.
\documentstyle[12pt]{article}
% The page format, somewhat wider and taller page than in art12.sty.
\topmargin -0.1in \headsep 0in \textheight 8.9in \footskip 0.6in
\oddsidemargin 0in \evensidemargin 0in \textwidth 6.5in
\begin{document}
% Definitions of commonly used symbols.
% The title and header.
\noindent
{\scriptsize Scientific Computing, Spring 1996} \hfill
\begin{center}
\large
Assignment 8.
\normalsize
\end{center}
\noindent
Given May 6, due May 15.
\vspace{.3in}
% The questions!
\noindent
{\bf Objective:} To explore Gaussian random variables and variance reduction.
\noindent
{\bf Warning:} Monte--Carlo burns computer time. This assignment has been
scaled to fit into a recent model Pentium computer or better. If it takes
too long on your computer, scale it down a bit.
\vspace{.5cm}
\begin{description}
\item[(1)]
The Box--Muller algorithm takes two uniform random variables and produces
two independent standard normals, $X$ and $Y$. We will test whether
$(X,Y)$ produced have the correct density function,
\begin{equation}
f(x,y) = \frac{1}{2\pi} e^{-(x^2+y^2)/2} \;\; ,
\end{equation}
using bins in two dimensions. Choose $h = \Delta x = \Delta y$, take
$x_j = j\cdot j$ and $y_k = k \cdot h$, and define bins in the plane by:
\begin{displaymath}
B_{jk} = \left\{ (x,y) \mid \left| x-x_j \right| \leq \frac{h}{2}
\;\; \mbox{and} \;\;
\left| y-y_k \right| \leq \frac{h}{2} \right\} \;\; .
\end{displaymath}
If we take $n$ pairs, $(X_t,Y_t)$, $t=1$, $\ldots$, $n$, then the bin
counts are
\begin{displaymath}
N_{jk} = \# \left\{ (X_t,Y_t) \in B_{jk} \mid 1 \leq t \leq n \right \} \;\; .
\end{displaymath}
The expected counts are
\begin{equation}
< N_{jk} > = n \int \!\!\!\! \int_{B_{jk}} f(x,y)dxdy \;\; ,
\end{equation}
where $f$ is given by (1).
The midpoint rule for the integral in (2) is second order accurate.
In terms of local truncation error, this means that
\begin{displaymath}
\int \!\!\!\! \int_{B_{jk}} f(x,y)dxdy = h^2 f(x_j,y_k) + O(h^4) \;\; .
\end{displaymath}
(This is different from the one dimensional case.) In two dimensions
we do not want $h$ so small because that would create too many bins.
Therefore we want a more accurate integration formula. Show that
\begin{equation}
\int \!\!\!\! \int_{B_{jk}} f(x,y)dxdy = h^2 f(x_j,y_k)
+ C h^4 \left( \frac{\partial^2 f}{\partial x^2}
+ \frac{\partial^2 f}{\partial y^2} \right) (x_j,y_k)
+ O(h^6) \;\; .
\end{equation}
Find the constant, $C$, to make this true.
Choose $h$ so that for $n=10^6$, the central bin, $B_{00}$, has on the
order of a thousand points in it. Do the bin counts agree with the
theoretical prediction from (2) and (3)?
\item[(2)]
Now call the independent Gaussians given by Box--Muller $X_t$.
Box--Moller gives
$X_t$, $X_{t+1}$ from $T_t$, $T_{t+1}$. This a change of notation from
problem 1, where $X_{2t-1}$ and $X_{2t}$ were called $X_t$ and $Y_t$.
Use the standard Monte--Carlo estimates
\begin{displaymath}
= \frac{1}{\sqrt{2\pi}} \int_{\infty}^{\infty} x^{2p}e^{-x^2/2} dx
\approx \frac{1}{n} \sum_{t=1}^n X_t^{2p} \;\; .
\end{displaymath}
For $p=1$, $2$, and $5$, how many samples are required to get the answer
to within $1\%$? Why are more samples required for large $p$?
It is not simply because $=945$ is larger than $$; we
are looking for relative accuracy.
\item[(3)]
A standard Brownian motion can be simulated by taking
\begin{displaymath}
X_{k+1} = X_k + \sqrt{\Delta t} Z_k \;\; ,
\end{displaymath}
where the $Z_k$ are i.i.d.\ standard normals, $X_k \approx X(k\Delta t)$,
and $X_0 = 0$. Here, take $\Delta t = .1$. We want to estimate
\begin{displaymath}
{\cal P} = \mbox{Pr}\left( X_k \geq 3 \;\; \mbox{for some} \;\; t_k \leq 1
\right) \;\; .
\end{displaymath}
\begin{description}
\item[(a)]
What accuracy to you get with $10^6$ samples? Note that this uses
$10^7$ i.i.d.\ standard normals.
\item[(b)]
Express $\cal P$ as
\begin{displaymath}
{\cal P} = \int \cdots \int
\phi(z_1,\ldots,z_{10}) f(z_1,\ldots,z_{10}) dz_1 \cdots dz_{10} \;\; ,
\end{displaymath}
where\begin{displaymath}
\phi = \left\{ \begin{array}{ll}
1 & \mbox{if $X_k \geq 3$ for some $t_k \leq 1$} \\
0 & \mbox{otherwise.} \end{array} \right.
\end{displaymath}
and
\begin{displaymath}
f(z_1,\ldots,z_{10}) = \frac{1}{\left(2\pi\right)^{10/2}}
\exp\left\{-(z_1^2 + \cdots + z_{10}^2)/2\right\} \;; .
\end{displaymath}
Suppose instead we take $Z_k$ to be i.i.d.\ Gaussians with mean $a$ and
variance $1$. Show that this is equivalent to writing $\cal P$ in the
form
\begin{displaymath}
{\cal P} = \int \cdots \int
\phi(z_1,\ldots,z_{10}) \frac{f(z)}{g(z)} \cdot g(z_1,\ldots,z_{10})
dz_1 \cdots dz_{10} \;\; ,
\end{displaymath}
where $g$ represents the joint density for ten independent biased
gaussian random variables. Show that
\begin{equation}
{\cal P} \approx \frac{1}{n}
\sum_{t=1}^n \phi(Z^{(t)}) \frac{f(Z^{(t)})}{g(Z^{(t)})} \;\; ,
\end{equation}
where the $Z^{(t)}$ are each ten dimensional vectors.
\item[(c)]
On the computer, show that (4) can estimate $\cal P$ to $1\%$ accuracy
with far smaller $n$ if $a$ is chosen well.
\end{description}
\end{description}
\end{document}