%pylab inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Populating the interactive namespace from numpy and matplotlib


%pylab inline

Populating the interactive namespace from numpy and matplotlib


# Simulate 
np.random.seed(1)

w1 = 0.5 
w0 = 3
sigma2 = 3
N = 100
err = np.random.normal(0,sigma2,N)
X = np.array(sorted(np.random.uniform(-10,10,N)))
T = w0 + X*w1 + err


plt.plot(X,T,".")
plt.xlabel('x')
plt.ylabel('t')
plt.show()


def posterior(x, t, w0, w1,sigma2):
    
    logconst = -0.5*np.log(2*np.pi)*sigma2
    logprob = 0        # flat prior
    logprob = (-(t - w0 - w1*x)*(t - w0 - w1*x)/2/sigma2 + logconst).sum()

    return np.exp(logprob)



nbin   = 500
w0_min = -2
w0_max = +5
w1_min = -2
w1_max = +5

x0 = np.linspace(w0_min, w0_max, nbin) 
x1 = np.linspace(w1_min, w1_max, nbin)

def posterior_pdf(X,T):
    h2 = []
    bsize_w0 = (w0_max - w0_min)/nbin #x0[1]-x0[0]
    bsize_w1 = (w1_max - w1_min)/nbin
    for b0 in range(nbin):
        w0 = w0_min + b0*bsize_w0   
        row = []
        for b1 in range(nbin):
            w1 = w1_min + b1*bsize_w1  
            ev = posterior(X, T, w0, w1,sigma2) * bsize_w0 * bsize_w1
            row.append(ev)
        h2.append(row)

    # normalization
    h2  = np.array(h2)/np.sum(np.array(h2)) 
    
    return h2


pdf= posterior_pdf(X,T)


pdf.sum()

1.0


w0 ,w1 = np.unravel_index(pdf.argmax(), pdf.shape)
w0,w1

(370, 182)


x0[w0], x1[w1]

(3.1903807615230457, 0.5531062124248498)


fig,axs = subplots(ncols= 2,nrows =1)
fig.set_figwidth(20)
fig.set_figheight(10)
axs[0].imshow(pdf, extent=[w1_min, w1_max, w0_max, w0_min])

pcm = axs[1].imshow(pdf[int(w0-30):int(w0+30),int(w1-30):int(w1+30)], 
                extent=[x1[w1-30], x1[w1+30], x0[w0+30], x0[w0-30]])

axs[0].axvline(x1[w1], c = 'r', ls ='--')
axs[0].axhline(x0[w0], c = 'r', ls ='--')

fig.colorbar(pcm,ax=axs[0], shrink =0.35)
fig.colorbar(pcm,ax=axs[1], shrink =0.35)

axs[0].set_title('joint posterior')
axs[0].set_ylabel(r'$w0$')
axs[0].set_xlabel(r'$w1$')
axs[1].set_xlabel(r'$w1$')
axs[1].set_title('joint posterior (but more intimate)')

Text(0.5, 1.0, 'joint posterior (but more intimate)')


# from scipy.stats.contingency import margins
def marginalization(pdf):

    pdf_w0,pdf_w1 = pdf.sum(axis=1),pdf.sum(axis=0) #margins(pdf) # ~
    pdf_w0 = pdf_w0.T
    cdf_w0 = np.cumsum(pdf_w0)
    cdf_w1 = np.cumsum(pdf_w1)
    return cdf_w0,cdf_w1, pdf_w0,pdf_w1

    
cdf_w0,cdf_w1, pdf_w0,pdf_w1 = marginalization(pdf)


fig,axs = subplots(ncols= 2,nrows =2)
fig.set_figwidth(20)
fig.set_figheight(10)
axs[0][0].plot(x0,pdf_w0)
axs[0][1].plot(x1,pdf_w1)
axs[1][0].plot(x0,cdf_w0)
axs[1][1].plot(x1,cdf_w1)

axs[0,0].set_title('$w_0$',fontsize=18)
axs[0,1].set_title('$w_1$', fontsize=18)

Text(0.5, 1.0, '$w_1$')


def parameters_sample(cdf_w0,cdf_w1):
    r = np.random.uniform(low=0.0, high=1.0, size=2)
    i = int(np.argwhere(cdf_w0>r[0])[0] -1)
    i = max(i,0)
    w0_sampled = x0[i]
    i = int(np.argwhere(cdf_w1>r[1])[0] -1)
    i = max(i,0)

    w1_sampled = x0[i]

    return w0_sampled,w1_sampled


def plot_sample(X,T,cdf_w0,cdf_w1):
    for i in range(5):
        w0_sampled,w1_sampled = parameters_sample(cdf_w0,cdf_w1)
        print("w0: ", w0_sampled," w1: ",w1_sampled)
        Y = []
        for i in range(len(X)):
            Y.append(w0_sampled + w1_sampled * X[i])

        plt.plot(X,T,".")
        plt.plot(X,Y)
    plt.show()
    
plot_sample(X,T,cdf_w0,cdf_w1)

w0:  2.797595190380761  w1:  0.5390781563126255
w0:  3.148296593186373  w1:  0.5671342685370742
w0:  3.092184368737475  w1:  0.5811623246492985
w0:  3.2044088176352705  w1:  0.5110220440881763
w0:  3.3166332665330662  w1:  0.5531062124248498


def plot_sample(X,T,cdf_w0,cdf_w1):
    for i in range(5):
        w0_sampled,w1_sampled = parameters_sample(cdf_w0,cdf_w1)
        print("w0: ", w0_sampled," w1: ",w1_sampled)
        Y = []
        for i in range(len(X)):
            Y.append(w0_sampled + w1_sampled * X[i])

        plt.plot(X,T,".")
        plt.plot(X,Y)
    plt.show()
    
plot_sample(X,T,cdf_w0,cdf_w1)

w0:  2.895791583166333  w1:  0.5390781563126255
w0:  3.260521042084169  w1:  0.5811623246492985
w0:  2.5871743486973946  w1:  0.5951903807615229
w0:  3.1202404809619235  w1:  0.5951903807615229
w0:  3.2184368737474953  w1:  0.5671342685370742


def sub_sampling(sub_idx):
    X1 = X[sub_idx]
    T1 = T[sub_idx]

    pdf1=posterior_pdf(X1,T1)
    cdf_w0_1,cdf_w1_1,_,_ = marginalization(pdf1)

    plot_sample(X1,T1,cdf_w0_1,cdf_w1_1)


sub_idx = np.random.choice(N, 10)
sub_sampling(sub_idx)

w0:  2.7835671342685373  w1:  0.7074148296593186
w0:  3.0360721442885774  w1:  0.6092184368737477
w0:  2.643286573146293  w1:  0.6372745490981964
w0:  2.19438877755511  w1:  0.623246492985972
w0:  2.222444889779559  w1:  0.6092184368737477


sub_idx = range(17,32)
sub_sampling(sub_idx)

w0:  2.1663326653306614  w1:  0.1603206412825653
w0:  4.719438877755511  w1:  0.42685370741482975
w0:  -0.4288577154308617  w1:  0.1603206412825653
w0:  0.5110220440881763  w1:  0.5531062124248498
w0:  3.9058116232464934  w1:  0.6933867735470942


sub_idx = range(15)
sub_sampling(sub_idx)

w0:  3.1903807615230457  w1:  0.2585170340681362
w0:  0.5250501002004007  w1:  0.6653306613226455
w0:  2.4048096192384767  w1:  0.623246492985972
w0:  2.6713426853707416  w1:  0.5811623246492985
w0:  2.3486973947895793  w1:  0.6092184368737477


sub_idx = range(35,50)
sub_sampling(sub_idx)

w0:  4.158316633266534  w1:  0.8056112224448899
w0:  3.849699398797595  w1:  0.3426853707414832
w0:  3.9058116232464934  w1:  0.3707414829659319
w0:  4.915831663326653  w1:  0.5390781563126255
w0:  4.49498997995992  w1:  1.2124248496993988


sub_idx = list(range(7))+list(range(42,50))
sub_sampling(sub_idx)

w0:  4.438877755511022  w1:  0.6513026052104207
w0:  3.6392785571142285  w1:  0.623246492985972
w0:  3.9759519038076157  w1:  0.5671342685370742
w0:  4.312625250501002  w1:  0.5951903807615229
w0:  4.803607214428858  w1:  0.7494989979959921


import scipy.stats
# don't forget to generate the 500 random samples as in the previous post
# adapted from https://letianzj.github.io/mcmc-linear-regression.html

def prior_probability(w_vector):
    a = w_vector[0]     # intercept
    b = w_vector[1]     # slope
    #
    a_prior = 1
    b_prior = 1
    # a_prior = scipy.stats.norm(0.5, 0.5).pdf(a)
    #b_prior = scipy.stats.norm(0.5, 0.5).pdf(b)
    return np.log(a) + np.log(b)


# Given w, the likehood of seeing x and t
def likelihood_probability(w_vector):
    a = w_vector[0]     # intercept
    b = w_vector[1]     # slope
    t_predict = a + b * X
    single_likelihoods = scipy.stats.norm(t_predict, np.sqrt(sigma2)).pdf(T)        # we know sigma_e is 3.0
    return np.sum(np.log(single_likelihoods))

# We don't need to know the denominator of support f(y)
# as it will be canceled out in the acceptance ratio
def posterior_probability(w_vector):
    return likelihood_probability(w_vector) + prior_probability(w_vector)

# jump from w to new w

# proposal function is Gaussian centered at w
def proposal_function(w_vector):
    a = w_vector[0]
    b = w_vector[1]
    a_new = np.random.normal(a, 0.5)
    b_new = np.random.normal(b, 0.5)
    w_new = [a_new, b_new]
    return w_new

# run the Monte Carlo

n_samples = 30000

initial_w_vector = [0.5, 0.5]        # start value
results = np.zeros([n_samples,2])            # record the results
results[0,0] = initial_w_vector[0]
results[0, 1] = initial_w_vector[1]
for step in range(1, n_samples):               # loop 50,000 times
    if step % 5000 == 0:
        print('step: {}'.format(step))

    w_old = results[step-1, :]
    w_proposal = proposal_function(w_old)

    # Use np.exp to restore from log numbers
    prob = np.exp(posterior_probability(w_proposal) - posterior_probability(w_old))

    if np.random.uniform(0,1) < prob:
        results[step, :] = w_proposal    # jump
    else:
        results[step, :] = w_old         # stay

burn_in = 10000
w_posterior = results[burn_in:, :]
print(w_posterior.mean(axis=0))        # use average as point estimates

<ipython-input-1-a978602104c7>:13: RuntimeWarning: invalid value encountered in log
  return np.log(a) + np.log(b)

step: 5000
step: 10000
step: 15000
step: 20000
step: 25000
[3.18341861 0.54985753]


# present the results
fig, axes = plt.subplots(1,3, figsize=(12,3), dpi=100)
axes[0].hist(w_posterior[:,0], bins=20, color='blue')
axes[0].axvline(w_posterior.mean(axis=0)[0], color='red', linestyle='dashed', linewidth=2)
axes[0].title.set_text('Posterior -- w0')

axes[1].hist(w_posterior[:,1], bins=20, color='blue')
axes[1].axvline(w_posterior.mean(axis=0)[1], color='red', linestyle='dashed', linewidth=2)
axes[1].title.set_text('Posterior -- w1')

axes[2].hist2d(*w_posterior.T, bins=50)
axes[2].title.set_text('Posterior -- w0,w1')
axes[2].axvline(w_posterior.mean(axis=0)[0], color='red', linestyle='dashed', linewidth=2)
axes[2].axhline(w_posterior.mean(axis=0)[1], color='red', linestyle='dashed', linewidth=2)

axes[2].set_xlabel('w0')
axes[2].set_xlabel('w1')

plt.subplots_adjust(wspace=(0.4))
plt.show()


import seaborn as sns
sns.jointplot(x=w_posterior[:,0], y=w_posterior[:,1])

<seaborn.axisgrid.JointGrid at 0x7fdb50e9d130>


plt.plot(results[:,0], label='Burned chain')
plt.plot(np.arange(burn_in, len(results)),w_posterior[:,0], label='w_1 samples')
plt.legend()
plt.show()


from statsmodels.graphics import tsaplots
tsaplots.plot_acf(w_posterior[:,0], lags=60)
plt.ylabel('Correlation')
plt.xlabel('Lag')

plt.show()


lag = 300
plt.scatter(w_posterior[:-lag,0], w_posterior[lag:,0], alpha=0.05)
plt.ylabel(r'$w_0^{\tau _0+l}$', fontsize=14)
plt.xlabel(r'$w_0^{\tau _0}$', fontsize=14)
plt.title(f'Lag = {lag}')
plt.show()

Inference of a joint posterior distribution and sampling¶

This is a derived from ITILA 28.2¶

Find maximum a posteriori estimate of $(w_0, w_1)$¶

Plot joint posterior $P(w_0,w_1)$¶

Two sampling methods:¶

Marginalization¶

Mean field approximation¶

How does the width of the posterior change as we subsample the simulated data?¶

MCMC¶

MCMC results¶

Issue 1: initial samples are pretty bad¶

Issue 2: samples aren't independent¶