Python, MATLAB, Julia, R code: Chapter 8

Acknowledgement: The Julia code is written by the contributors listed here.

Chapter 8.1 Maximum-likelihood Estimation

Visualizing the likelihood function

Visualize the function

log mathcal{L}(theta;|;S) = Slog theta + (N-S) log (1-theta)

% MATLAB: Visualize the likelihood function
N = 50;
S = 1:N;
theta = linspace(0.1,0.9,100);
[S_grid, theta_grid] = meshgrid(S, theta);
L = S_grid.*log(theta_grid) + (N-S_grid).*log(1-theta_grid);
s = surf(S,theta,L);
s.LineStyle = '-';
colormap jet
view(65,15)
set(gcf, 'Position', [100, 100, 800, 800]);
set(gca,'FontWeight','bold','fontsize',14);

figure;
N = 50;
S = 1:0.1:N;
theta = linspace(0.1,0.9,1000);
[S_grid, theta_grid] = meshgrid(S, theta);
L = S_grid.*log(theta_grid) + (N-S_grid).*log(1-theta_grid);
imagesc(S,theta,L);
set(gcf, 'Position', [100, 100, 800, 800]);
set(gca,'FontWeight','bold','fontsize',14);
colormap jet
# Julia: Visualizing the likelihood function
using Plots
N = 50
S = range(1., N, step=0.1)
theta = range(0.1, 0.9,length=100)
L(S, theta) = S*log(theta) + (N-S)*log(1. - theta)
# the surface plot
plotlyjs() # this backend allows rotating with mouse
p1 = surface(S, theta, (S, theta) -> L(S, theta), color=:jet, xlabel="S", ylabel="theta",
    title="L(theta|S)")
# the bird's eye view
gr()
p2 = heatmap(S, theta, (S, theta) -> L(S, theta), color=:jet, xlabel="S", ylabel="theta",
    title="bird's eye view")
SS = 25 # set the value for the slice here
vline!([SS], label=false, color=:black)
# slice through the log likelihood
p3 = plot(theta, theta -> L(SS, theta),label=false, xlabel="theta", title="L(theta|S=$SS)")
plot(p2, p3)
# R: Visualize the likelood function
library(pracma)
library(plot3D)

N = 50
S = 1:N
theta = seq(0.1, 0.9, (0.9+0.1)/100)
mesh = meshgrid(S, theta)
S_grid = mesh$X
theta_grid = mesh$Y
L = S_grid * log(theta_grid) + (N-S_grid) * log(1-theta_grid)
L = t(L)
persp3D(S, theta, L, theta=65, phi=15, border="black", lwd=0.3, bty="b2", xlab="S", ylab="θ", zlab="", ticktype="detailed")

# Slice through the log likelihood
N = 50
S = seq(from=1, to=N, by=0.1)
theta = seq(0.1, 0.9, (0.1+0.9)/1000)
mesh = meshgrid(S, theta)
S_grid = mesh$X
theta_grid = mesh$Y
L = S_grid * log(theta_grid) + (N-S_grid) * log(1-theta_grid)
L = t(L)
image(S, theta, L, col=rainbow(256), ylim=c(0.9, 0.1))

Visualizing the likelihood function

% MATLAB code
N = 50;
S = 1:N;
theta = linspace(0.1,0.9,100);
[S_grid, theta_grid] = meshgrid(S, theta);
L = S_grid.*log(theta_grid) + (N-S_grid).*log(1-theta_grid);

figure;
plot(theta, L(:,12), 'LineWidth', 6, 'Color', [0,0,0]);
xlabel('theta');
axis([0.1 0.9 min(ylim) max(ylim)]);
grid on;
set(gcf, 'Position', [100, 100, 600, 400]);
set(gca,'FontWeight','bold','fontsize',14);
# R code
library(pracma)

N = 50
S = 1:N
theta = seq(0.1, 0.9, (0.1+0.9)/100)
mesh = meshgrid(S, theta)
S_grid = mesh$X
theta_grid = mesh$Y
L = S_grid * log(theta_grid) + (N-S_grid) * log(1-theta_grid)
L_df = data.frame(L)
colnames(L_df) = S

plot(theta, L_df$"12", type="n")
grid()
lines(theta, L_df$"12", lwd=6)
title(expression(paste("L(", theta, " | S = 12)")))

ML estimation for single-photon imaging

Image download: cameraman.tif (65KB)

% MATLAB code
lambda = im2double(imread('cameraman.tif'));
T = 100;
x = poissrnd( repmat(lambda, [1,1,T]) );
y = (x>=1);
lambdahat = -log(1-mean(y,3));

figure(1);
imshow(x(:,:,1));

figure(2);
imshow(lambdahat,[]);
# Python code
import cv2
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
lambd = cv2.imread('./cameraman.tif')               # read image
lambd = cv2.cvtColor(lambd, cv2.COLOR_BGR2GRAY)/255 # gray scale
T = 100
lambdT = np.repeat(lambd[:, :, np.newaxis], T, axis=2)  # repeat image
x = stats.poisson.rvs(lambdT)                       # Poisson statistics
y = (x>=1).astype(float)                            # binary truncation
lambdhat = -np.log(1-np.mean(y,axis=2))             # ML estimation
plt.imshow(lambdhat,cmap='gray')
# Julia: ML estimation for single-photon imaging
using Images, Distributions
using ImageView:imshow
download("https://probability4datascience.com/data/cameraman.tif", "./cameraman.tif")
lambda  = Float64.(load("cameraman.tif"))
T = 100
x = [rand.(Poisson.(lambda)) for _=1:T]
y = [x[i] .>= 1. for i=1:T]
lambdahat = -log.(1. .- mean(y))
imshow(x[1]) # a single sample image
imshow(lambdahat) # the ML recovered image
# R code
library(imager)
lambda = as.data.frame(load.image("cameraman.tif"))
lambda = xtabs(value ~ x+y, data=lambda)
T = 100
x = c()
for (i in 1:T) {
    x = append(x, rpois(length(lambda), lambda))
}
x = array(x, c(256, 256, T))
y = (x>=1)
mu = apply(y, c(1,2), mean)
lambdahat = -log(1-mu)
fig1 = x[,,1]

# Flip matrix since `image()` reads the matrix bottom up
flip_matrix = function(m) m[,nrow(m):1]

# Figure 1: A single sample image
image(flip_matrix(fig1), col=gray.colors(255))

# Figure 2: ML Recovered Image
image(flip_matrix(lambdahat), col=gray.colors(255))

Chapter 8.2 Properties of the ML estimation

Visualizing the invariance principle

% MATLAB code
figure;
N = 50;
S = 20;
theta = linspace(0.1,0.9,1000);
L = S.*log(theta) + (N-S).*log(1-theta);
plot(theta, L, 'LineWidth', 6, 'Color', [0.5,0.5,0.75]);
set(gcf, 'Position', [100, 100, 600, 300]);
set(gca,'FontWeight','bold','fontsize',14);
grid on;


h_theta = -log(1-theta);
figure;
plot( theta, h_theta, 'LineWidth', 6, 'Color', [0,0,0]);
set(gcf, 'Position', [100, 100, 600, 400]);
set(gca,'FontWeight','bold','fontsize',14);
grid on;

theta = linspace(0.1,2.5,1000);
L = S.*log(1-exp(-theta)) - theta.*(N-S);
figure;
plot(theta, L, 'LineWidth', 6, 'Color', [0,0,0.75]);
set(gcf, 'Position', [100, 100, 600, 400]);
set(gca,'FontWeight','bold','fontsize',14);
grid on;
# Julia: Visualizing the invariance principle
using Plots
N = 50
S = 20
theta = range(0.1,0.9,length=1000)
eta  = -log.(1. .- theta) # the reparameterization transformation
L(theta) = S*log(theta) + (N-S)*log(1. - theta) # log likelihood in terms of theta
L(eta) = S*log(1. - exp(-eta)) - (N-S)*eta # log likelihood in terms of eta
# plot the log likelihood with theta parameterization
p4 = plot(theta, theta -> L(theta), linewidth=5, color=:blue, label=false)
xlabel!("theta")
title!("L(theta|S=$S)")
vline!([0.4], label=false, color=:red)
# plot the transformation
p2 = plot(theta, eta, linewidth=5, color=:black, label=false)
xlabel!("theta")
ylabel!("eta")
title!("eta(theta)")
vline!([0.4], label=false, color=:red)
hline!([0.5], label=false, color=:green)
# plot the log likelihood with eta parameterization
p1 = plot(L.(eta), eta,  linewidth=5, color=:blue, label=false, xflip=true)
ylabel!("eta")
hline!([0.5], label=false, color=:green)
title!("L(eta|S=$S)")
# blank place holder plot
p3 = plot(grid=false, xaxis=false, yaxis=false, xticks=false, yticks=false)
# plot the whole set in the final image
plot(p1, p2, p3, p4)
# R code
N = 50
S = 20
theta = seq(0.1, 0.9, (0.1+0.9)/1000)
L = S * log(theta) + (N-S) * log(1-theta)
plot(theta, L, type="n", xlab=expression(theta), ylab=expression(paste("Log L(", theta, "|S = 20)")))
title("Bernoulli")
grid()
lines(theta, L, lwd=6, col="#8080BF")

h_theta = -log(1-theta)
plot(theta, h_theta, type="n", xlab=expression(theta), ylab=expression(paste(eta, " = h(", theta, ")")))
grid()
lines(theta, h_theta, lwd=6)

theta = seq(0.1, 2.5, (0.1+2.5)/1000)
L = S * log(1-exp(-theta)) - theta * (N-S)
plot(theta, L, type="n")
title("Truncated Poisson")
grid()
lines(theta, L, lwd=6, col="#0000BF")

Chapter 8.3 Maximum-a-Posteriori Estimation

Influence of the priors

% MATLAB code
N = 1;
sigma0 = 1;
mu0    = 0.0;
x = 5;%*rand(N,1);
t = linspace(-3,7,1000);

q = NaN(1,1000);
for i=1:N
    [val,idx] = min(abs(t-x(i)));
    q(idx) = 0.1;
end

p0 = normpdf(t,0,1);
theta = (mean(x)*sigma0^2+mu0/N)/(sigma0^2+1/N);
p1 = normpdf(t,theta,1);
prior = normpdf(t,mu0,sigma0)/10;

figure;
h1 = plot(t,normpdf(t,mean(x),1),'LineWidth',4,'Color',[0.8,0.8,0.8]); hold on;
h2 = plot(t,p1,'LineWidth',4,'Color',[0,0.0,0.0]);
h3 = stem(t,q,'LineWidth',4,'Color',[0.5,0.5,0.5],'MarkerSize',10);
h5 = plot(t,prior,'LineWidth',4,'Color',[1,0.5,0.0]);

grid on;
axis([-3 7 0 0.5]);
xticks(-5:1:10);
yticks(0:0.05:0.5);
set(gcf, 'Position', [100, 100, 600, 300]);
set(gca,'FontWeight','bold','fontsize',14);
# Julia: Influence of the priors
# likelihood is N(theta,1)
using Plots, Distributions
N = 5       # sample size
mu0  = 0.0   # prior mean
sigma0  = 1.    # prior std. dev.
theta = 5.      # true mean of sample data
x = rand(Normal(theta,1), N)    # the sample data
xbar  = mean(x) # mean of sample data
t = range(-3.,7.,length=1000)
thetahat = (sigma0^2. * xbar + mu0/N)/(sigma0^2. + 1. /N) # MAP
sigmahat = sqrt(1. / (1. / sigma0^2. + N))  # s.d. posterior
p0 = pdf(Normal(xbar,1.0), t)       # likelihood
p1 = pdf(Normal(thetahat, sigmahat), t)  # posterior
prior = pdf(Normal(mu0,sigma0),t)     # prior, divided by 10 to
plot(t, [p0, p1, prior], label=["Likelihood" "Posterior" "prior"], linewidth=3, legend=:topleft)
plot!([x], [0.1*ones(N)], line=:stem, label="data", marker=:circle, linewidth=3, title="N = $N")
# R code: Influence of the priors
N = 5
mu0 = 0.0
sigma0 = 1
theta = 5
x = rnorm(N, 5, 1)
xbar = mean(x)
t = seq(-3, 7, (10)/1000)
q = numeric(1000)
for (i in 1:N) {
    x = abs(t-x[i])
    a = min(x)
    q[match(a, x)] = 0.1
}
thetahat = (sigma0^2. * xbar + mu0/N)/(sigma0^2. + 1. /N)
sigmahat = sqrt(1. / (1. / sigma0^2 + N))
p0 = dnorm(t, xbar, 1.)
p1 = dnorm(t, thetahat, sigmahat)
prior = dnorm(t, mu0, sigma0)/10
plot(t, p1, type="n")
title("N = 5")
legend(-2, 0.9, legend=c("Likelihood", "Posterior", "Prior", "Data"), col=c("blue", "orange", "green", "purple"), lty=1:1)
grid()
lines(t, p0, lwd = 4, col="blue")
lines(t, p1, lwd = 4, col="orange")
lines(t, prior, lwd = 4, col="green")
lines(t, q, lwd = 4, col="purple")
grid()

Conjugate priors

% MATLAB code
sigma0 = 0.25;
mu0    = 0.0;

mu     = 1;
sigma  = 0.25;

Nset = [0 1 2 5 8 12 20];
x0 = sigma*randn(100,1);

for i=1:7
    N = Nset(i);
    x = x0(1:N);
    t = linspace(-1,1.5,1000);

    p0     = normpdf(t,0,1);
    theta  = mu*(N*sigma0^2)/(N*sigma0^2+sigma^2) + mu0*(sigma^2)/(N*sigma0^2+sigma^2);
    sigmaN = sqrt(1/(1/sigma0^2+N/sigma^2));
    posterior(:,i) = normpdf(t,theta,sigmaN);
end

figure;
plot(t,posterior(:,1)', 'LineWidth',2,'Color',[0.9,0.0,0.0]);  hold on;
plot(t,posterior(:,2)', 'LineWidth',2,'Color',[1,0.6,0.0]);
plot(t,posterior(:,3)', 'LineWidth',2,'Color',[1,0.9,0.3]);
plot(t,posterior(:,4)', 'LineWidth',2,'Color',[0.0,0.8,0.1]);
plot(t,posterior(:,5)', 'LineWidth',2,'Color',[0.0,0.6,0.6]);
plot(t,posterior(:,6)', 'LineWidth',2,'Color',[0.0,0.2,0.8]);
plot(t,posterior(:,7)', 'LineWidth',2,'Color',[0.5,0.2,0.5]);

legend('N = 0', 'N = 1', 'N = 2', 'N = 5', 'N = 8', 'N = 12', 'N = 20', 'Location', 'NW');
grid on;
axis([-1 1.5 0 8]);
set(gcf, 'Position', [100, 100, 600, 300]);
set(gca,'FontWeight','bold','fontsize',14);
% Julia
using Plots, Distributions
sigma0 = 0.25
mu0 = 0.0
mu  = 1.
sigma  = 0.25
Nset = [0 1 2 5 8 12 20]
x0 = sigma .* randn(100)
posterior = zeros(1000,7)
t = range(-1.,1.5,length=1000)
p0 = pdf(Normal(0.,1.), t)
for i=1:7
    N = Nset[i]
    x = x0[1:N]
    theta  = mu * (N*sigma0^2.)/(N*sigma0^2. + sigma ^2.) + mu0*(sigma^2.)/(N*sigma0^2. + sigma^2.)
    sigmaN= sqrt(1. /(1. /sigma0^2. + N/sigma^2.))
    posterior[:,i] = pdf(Normal(theta, sigmaN), t)
end
plot(t, posterior, linewidth=2.,
    label=["N = 0" "N = 1" "N = 2" "N = 5" "N = 8" "N = 12" "N = 20"],
    legend=:topleft)
# R code
sigma0 = 0.25
mu0 = 0.0
mu = 1
sigma = 0.25
Nset = c(0, 1, 2, 5, 8, 12, 20)
x0 = sigma * rnorm(100)
posterior = list()
for (i in 1:7) {
    N = Nset[i]
    x = x0[1:N]
    t = seq(-1, 1.5, 2.5/1000)
    p0 = dnorm(t, 0, 1)
    theta = mu*(N*sigma0^2)/(N*sigma0^2+sigma^2) + mu0*(sigma^2)/(N*sigma0^2+sigma^2)
    sigmaN = sqrt(1/(1/sigma0^2+N/sigma^2));
    posterior[[i]] = dnorm(t, theta, sigmaN)
}

plot(t, posterior[[7]], type="n", xlab="", ylab="")
grid()
lines(t, posterior[[1]], lwd=3, col="red")
lines(t, posterior[[2]], lwd=3, col="orange")
lines(t, posterior[[3]], lwd=3, col="yellow")
lines(t, posterior[[4]], lwd=3, col="green")
lines(t, posterior[[5]], lwd=3, col="turquoise")
lines(t, posterior[[6]], lwd=3, col="blue")
lines(t, posterior[[7]], lwd=3, col="purple")

legend(-0.9, 7, legend=c("N = 0", "N = 1", "N = 2", "N = 5", "N = 8", "N = 12", "N = 20"), col=c("red", "orange", "yellow", "green", "turquoise", "blue", "purple"), lty=1:1, lwd=3)
grid()