%pylab inline
# import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Populating the interactive namespace from numpy and matplotlib

a = 100
b = 10
c = 100
sigma = 5

data_QTL_0 = np.random.normal(loc=a, scale=sigma, size=1000)
data_QTL_1 = np.random.normal(loc=a+b, scale=sigma, size=1000)
data_NQTL = np.random.normal(loc=c, scale=sigma, size=1000)
all_data = [data_QTL_0, data_QTL_1, data_NQTL]
bs = np.linspace(min(np.concatenate(all_data)), max(np.concatenate(all_data)), 50)

fig, ax = plt.subplots(2, 1, figsize=(9, 6), sharex=True)

# plot QTL case
ax[0].set_title('QTL model: a = {:.0f}, b = {:.0f}, $\sigma$ = {:.0f}'.format(a, b, sigma))
ax[0].hist(data_QTL_0, bins=bs,
         alpha=0.5, label='$g=0: N(a, \sigma)$')
ax[0].axvline(a, ls='--', c='C0', label='a')
ax[0].hist(data_QTL_1, bins=bs,
         alpha=0.5, label='$g=1: N(a+b, \sigma)$')
ax[0].axvline(a+b, ls='--', c='C1', label='a+b')
ax[0].legend(loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0)

# plot NQTL case
ax[1].set_title('NQTL model: c = {:.0f}, $\sigma$ = {:.0f}'.format(c, sigma))
ax[1].hist(data_NQTL, bins=bs,
           alpha=0.5, color='C2', label='$N(c, \sigma)$')
ax[1].axvline(c, ls='--', c='C2', label='c')
ax[1].legend(loc='upper left', bbox_to_anchor=(1.05, 1), borderaxespad=0)

plt.show()

def evidenceH1(data, ls, tm=1,tp=20):

    logsigma = np.log(ls[-1] - ls[0])
    logarea  = logsigma - np.log(len(ls))

    N = len(data)
    sum_t = data.sum()
    logterm = []

    for ld in ls:

        lognum  = -sum_t/ld
        logz    = np.log(ld) + np.log(np.exp(-tm/ld) - np.exp(-tp/ld))
        logden  = N*logz
        for t in data:
            logpost = lognum - logden - logsigma + logarea

        logterm.append(logpost)

    logev   = logsumexp(logterm)

    return logev

def evidenceH2(data, ls, tm=1,tp=20, etas = [0.5]):

    logsigma = np.log(ls[-1] - ls[0])
    logarea  = logsigma - np.log(len(ls))
    logterm = []

    for eta in etas:
        for ld1 in ls:

            logz1 = np.log(ld1) + np.log(np.exp(-tm/ld1) - np.exp(-tp/ld1))
            row = []
            for ld2 in ls:

                logz2 = np.log(ld2) + np.log(np.exp(-tm/ld2) - np.exp(-tp/ld2))
                logval=0

                for t in data:
                    logval += np.log(np.exp(np.log(eta) - t/ld1 - logz1) + np.exp(np.log(1-eta) - t/ld2 - logz2))

                row.append(logval -2*logsigma + 2*logarea)
            logterm.append(row)

    logev = logsumexp(logterm)

    return logev

%pylab inline

Populating the interactive namespace from numpy and matplotlib

# Simulate

w1 = 0.5
w0 = 3
sigma2 = 3
N = 100
err = np.random.normal(0,sigma2,N)
X = np.array(sorted(np.random.uniform(-10,10,N)))
T = w0 + X*w1 + err

plt.plot(X,T,".")
plt.show()

def posterior(x, t, w0, w1,sigma2):

    logconst = -0.5*np.log(2*np.pi)*sigma2
    logprob = 0        # flat prior
    logprob = (-(t - w0 - w1*x)*(t - w0 - w1*x)/2/sigma2 + logconst).sum()

    return np.exp(logprob)



nbin   = 500
w0_min = -2
w0_max = +5
w1_min = -2
w1_max = +5

x0 = np.linspace(w0_min, w0_max, nbin)
x1 = np.linspace(w1_min, w1_max, nbin)

def posterior_pdf(X,T):
    h2 = []
    bsize_w0 = (w0_max - w0_min)/nbin #x0[1]-x0[0]
    bsize_w1 = (w1_max - w1_min)/nbin
    for b0 in range(nbin):
        w0 = w0_min + b0*bsize_w0
        row = []
        for b1 in range(nbin):
            w1 = w1_min + b1*bsize_w1
            ev = posterior(X, T, w0, w1,sigma2) * bsize_w0 * bsize_w1
            row.append(ev)
        h2.append(row)

    # normalization
    h2  = np.array(h2)/np.sum(np.array(h2))

    return h2


pdf= posterior_pdf(X,T)

w0 ,w1 = np.unravel_index(pdf.argmax(), pdf.shape)
w0,w1

(346, 179)

x0[346], x1[179]

(2.8537074148296595, 0.5110220440881763)

fig,axs = subplots(ncols= 2,nrows =1)
fig.set_figwidth(20)
fig.set_figheight(10)
axs[0].imshow(pdf)

pcm = axs[1].imshow(pdf[int(w0-30):int(w0+30),int(w1-30):int(w1+30)])

axs[0].axvline(w1, c = 'r', ls ='--')
axs[0].axhline(w0, c = 'r', ls ='--')

fig.colorbar(pcm,ax=axs[0], shrink =0.35)
fig.colorbar(pcm,ax=axs[1], shrink =0.35)

axs[0].set_title('posterior matrix')
axs[0].set_ylabel(r'$w0$ index')
axs[0].set_xlabel(r'$w1$ index')
axs[1].set_xlabel(r'$w1$ index')
axs[1].set_title('posterior matrix matrix  zoomed')

Text(0.5, 1.0, 'posterior matrix matrix  zoomed')

# from scipy.stats.contingency import margins
def marginalization(pdf):

    pdf_w0,pdf_w1 = pdf.sum(axis=1),pdf.sum(axis=0) #margins(pdf) # ~
    pdf_w0 = pdf_w0.T
    cdf_w0 = np.cumsum(pdf_w0)
    cdf_w1 = np.cumsum(pdf_w1)
    return cdf_w0,cdf_w1, pdf_w0,pdf_w1


cdf_w0,cdf_w1, pdf_w0,pdf_w1 = marginalization(pdf)

fig,axs = subplots(ncols= 2,nrows =2)
fig.set_figwidth(20)
fig.set_figheight(10)
axs[0][0].plot(x0,pdf_w0)
axs[0][1].plot(x1,pdf_w1)
axs[1][0].plot(x0,cdf_w0)
axs[1][1].plot(x1,cdf_w1)

[<matplotlib.lines.Line2D at 0x7f546a8cf8d0>]

def parameters_sample(cdf_w0,cdf_w1):
    r = np.random.uniform(low=0.0, high=1.0, size=2)
    i = int(np.argwhere(cdf_w0>r[0])[0] -1)
    i = max(i,0)
    w0_sampled = x0[i]
    i = int(np.argwhere(cdf_w1>r[1])[0] -1)
    i = max(i,0)

    w1_sampled = x0[i]

    return w0_sampled,w1_sampled

def plot_sample(X,T,cdf_w0,cdf_w1):
    for i in range(5):
        w0_sampled,w1_sampled = parameters_sample(cdf_w0,cdf_w1)
        print("w0: ", w0_sampled," w1: ",w1_sampled)
        Y = []
        for i in range(len(X)):
            Y.append(w0_sampled + w1_sampled * X[i])

        plt.plot(X,T,".")
        plt.plot(X,Y)
    plt.show()

plot_sample(X,T,cdf_w0,cdf_w1)

w0:  3.2324649298597192  w1:  0.49699398797595196
w0:  2.965931863727455  w1:  0.5250501002004007
w0:  2.8677354709418834  w1:  0.5250501002004007
w0:  2.9378757515030056  w1:  0.4689378757515028
w0:  3.2745490981963927  w1:  0.45490981963927846

def sub_sampling(sub_idx):
    X1 = X[sub_idx]
    T1 = T[sub_idx]

    pdf1=posterior_pdf(X1,T1)
    cdf_w0_1,cdf_w1_1,_,_ = marginalization(pdf1)

    plot_sample(X1,T1,cdf_w0_1,cdf_w1_1)

sub_idx = np.random.choice(N, 10)
sub_sampling(sub_idx)

w0:  2.6993987975951903  w1:  0.7074148296593186
w0:  2.993987975951904  w1:  0.6372745490981964
w0:  4.018036072144288  w1:  0.5671342685370742
w0:  2.993987975951904  w1:  0.7354709418837677
w0:  3.7935871743486977  w1:  0.49699398797595196

sub_idx = range(17,32)
sub_sampling(sub_idx)

w0:  -0.9759519038076152  w1:  0.7915831663326656
w0:  -0.30260521042084165  w1:  0.006012024048096087
w0:  -1.8597194388777556  w1:  -0.34468937875751493
w0:  -1.9438877755511021  w1:  -0.3587174348697395
w0:  -2.0  w1:  -0.008016032064128265

sub_idx = range(15)
sub_sampling(sub_idx)

w0:  1.591182364729459  w1:  0.3006012024048097
w0:  -0.751503006012024  w1:  0.4128256513026054
w0:  3.5410821643286576  w1:  0.4408817635270541
w0:  -0.3306613226452906  w1:  0.5811623246492985
w0:  3.3026052104208414  w1:  0.35671342685370755

sub_idx = range(35,50)
sub_sampling(sub_idx)

w0:  3.3867735470941884  w1:  0.6372745490981964
w0:  1.8717434869739478  w1:  0.42685370741482975
w0:  2.0400801603206418  w1:  0.7494989979959921
w0:  3.2745490981963927  w1:  0.9599198396793587
w0:  1.6893787575150303  w1:  0.6372745490981964

sub_idx = list(range(7))+list(range(42,50))
sub_sampling(sub_idx)

w0:  1.6192384769539077  w1:  0.21643286573146314
w0:  1.7595190380761525  w1:  0.5390781563126255
w0:  1.6613226452905812  w1:  0.42685370741482975
w0:  2.657314629258517  w1:  0.27254509018036055
w0:  2.3206412825651306  w1:  0.21643286573146314

%pylab inline
# Simulate points
w1 = 0.5
w0 = 3
sigma2 = 3
N = 50
err = np.random.normal(0,sigma2,N)
X = np.array(sorted(np.random.uniform(-10,10,N)))
T = w0 + X*w1 + err



plt.plot(X,T,".")
plt.show()

Populating the interactive namespace from numpy and matplotlib

from scipy.special import logsumexp
def log_posterior(x, t, w0, w1,sigma2):
    logconst = -0.5*np.log(2*np.pi)*sigma2
    logP = 0        # flat prior
    logP = sum(-(t - w0 - w1*x)*(t - w0 - w1*x)/2/sigma2 + logconst)
    return logP



w0_prev = 0
w1_prev = 0

posterior_prev=  log_posterior(X, T, w0_prev, w1_prev,sigma2)


iterations = 100000

w0_sampled = np.empty(iterations)
w1_sampled = np.empty(iterations)
for i in range(iterations):
    # draw a new set of parameters using previous value
    w0_cur = w0_prev+np.random.normal(0, 0.1)
    w1_cur = w1_prev+np.random.normal(0, 0.1)

    # calculate the posterior with the new parameters
    posterior_cur =  log_posterior(X, T, w0_cur, w1_cur,sigma2)

    #calculate the ratio
    alpha = logsumexp(posterior_cur-posterior_prev)
    if alpha >0:
        u = np.random.uniform()
        if u>=alpha: # reject the current
            w0_prev = w0_prev
            w1_prev = w1_prev
            posterior_prev = posterior_prev

        elif u<alpha: # accept with probability alpha
            w0_prev = w0_cur
            w1_prev = w1_cur
            posterior_prev = posterior_cur

    # Update the result arrays
    w0_sampled[i] = w0_prev
    w1_sampled[i] = w1_prev

plt.plot(list(range(iterations)), w0_sampled)
plt.hlines(np.mean(w0_sampled[1000:]), -2000, iterations+2000, 'r')
plt.xlabel("Iterations")
plt.ylabel("sampled w_0")
plt.show()

plt.plot(list(range(iterations)), w1_sampled)
plt.hlines(np.mean(w1_sampled[1000:]), -2000, iterations+2000,'r')
plt.xlabel("Iterations")
plt.ylabel("sampled w_1")
plt.show()

from scipy.special import logsumexp
def log_posterior(x, t, w0, w1,sigma2):
    logconst = -0.5*np.log(2*np.pi)*sigma2
    logP = 0        # flat prior
    logP = sum(-(t - w0 - w1*x)*(t - w0 - w1*x)/2/sigma2 + logconst)
    return logP



w0_prev = 0
w1_prev = 0

posterior_prev=  log_posterior(X, T, w0_prev, w1_prev,sigma2)


iterations = 100000

w0_sampled = []
w1_sampled = []
i = 0
stop=1
while stop == 1:

    # draw a new set of parameters using previous value
    w0_cur = w0_prev+np.random.normal(0, 0.1)
    w1_cur = w1_prev+np.random.normal(0, 0.1)

    # calculate the posterior with the new parameters
    posterior_cur =  log_posterior(X, T, w0_cur, w1_cur,sigma2)

    #calculate the ratio
    alpha = logsumexp(posterior_cur-posterior_prev)
    if alpha >0:
        u = np.random.uniform()
        if u>=alpha: # reject the current
            w0_prev = w0_prev
            w1_prev = w1_prev
            posterior_prev = posterior_prev

        elif u<alpha: # accept with probability alpha
            w0_prev = w0_cur
            w1_prev = w1_cur
            posterior_prev = posterior_cur

    # Update the result arrays
    w0_sampled.append(w0_prev)
    w1_sampled.append(w1_prev)

    i += 1

    if i >1000:
        if (w0_sampled[-1] - w0_sampled[-2] == 0) and (w1_sampled[-1] - w1_sampled[-2] == 0):
            stop = 0

len(w0_sampled)

1001

plt.plot(list(range(len(w0_sampled))), w0_sampled)
plt.hlines(np.mean(w0_sampled[1000:]), 0, 2000, 'r')
plt.xlabel("Iterations")
plt.ylabel("sampled w_0")
plt.show()

plt.plot(list(range(len(w1_sampled))), w1_sampled)
plt.hlines(np.mean(w1_sampled[1000:]), 0, 2000,'r')
plt.xlabel("Iterations")
plt.ylabel("sampled w_1")
plt.show()

i

2

Goals for today's section¶

What's the general problem?¶

Technical note¶

This is a derived from ITILA 28.2¶