from scipy import stats
from math import sqrt
import numpy as np
import matplotlib.pyplot as plt

def sampleX(sigma, n, theta):
    """
        Returns a sample of n independent values 
        from a normal random variable of standard deviation sigma
        and "unknown" mean theta.
    """
    return stats.norm.rvs(loc=theta, scale=sigma, size=n)

def CI1(x, sigma, alpha):
    """
        Returns the alpha-confidence interval
        based on measurement -- realization -- x.
        (We assume x is a numpy array.)
    """
    n = len(x)
    z = stats.norm.ppf(1-alpha/2)
    a = x.mean() - z*sigma/sqrt(n)
    b = x.mean() + z*sigma/sqrt(n)
    return (a,b)

def CI2(x, sigma, alpha):
    """
        Returns the alpha-confidence interval
        based on measurement -- realization -- x.
        (We assume x is a numpy array.)
    """
    return stats.norm.interval(confidence=1-alpha, 
                              loc=x.mean(),
                              scale=sigma/sqrt(len(x)))

CI1(np.array([2.9]), 1, .05)

(0.9400360154599459, 4.8599639845400535)

CI2(np.array([2.9]), 1, .05)

(0.9400360154599459, 4.8599639845400535)

2.9-1.96, 2.9+1.96

(0.94, 4.859999999999999)

data = np.array([1.82, 1.00, 2.50, 3.00, 0.50, 2.97, 1.76, 1.35, 3.41])

CI1(data, sigma=1, alpha=0.05)

(1.3811231162644266, 2.6877657726244624)

CI2(data, sigma=1, alpha=0.05)

(1.3811231162644266, 2.6877657726244624)

data.mean() - 1.96/3, data.mean() + 1.96/3

(1.3811111111111112, 2.687777777777778)

def CI3(x, alpha):
    """
        Returns the alpha-confidence interval
        based on measurement -- realization -- x.
        (We assume x is a numpy array.)
    """
    n = len(x)
    sigma = x.std(ddof=1)
    z = stats.t.ppf(1-alpha/2, n-1)
    a = x.mean() - z*sigma/sqrt(n)
    b = x.mean() + z*sigma/sqrt(n)
    return (a,b)

def CI4(x, alpha):
    """
        Returns the alpha-confidence interval
        based on measurement -- realization -- x.
        (We assume x is a numpy array.)
    """
    return stats.t.interval(confidence=1-alpha, 
                            df=len(x)-1,
                            loc=x.mean(),
                            scale=stats.sem(x))

data2 = np.array([8.47, 10.91, 10.87, 9.46, 10.40])

data2.mean()

10.022

data2.std(ddof=1), data2.std(ddof=0)

(1.0458345949527579, 0.93542289901413)

m = sum(data2)/len(data2); m

10.022

sqrt(sum([ (x - m)**2 for x in data2])/(len(data2)-1)), sqrt(sum([ (x - m)**2 for x in data2])/len(data2))

(1.0458345949527579, 0.93542289901413)

data2.std(ddof=1)/sqrt(len(data2))

0.467711449507065

stats.sem(data2)

0.467711449507065

CI3(data2, 0.05)

(8.723424835371143, 11.320575164628858)

CI4(data2, 0.05)

(8.723424835371143, 11.320575164628858)

CI1(data2, data2.std(ddof=1), 0.05)

(9.105302403809128, 10.938697596190872)

def estimate_is_correct(interval, theta):
    return interval[0] <= theta <= interval[1]

theta  = 2.454
correct = 0
alpha = .1
num = 10**4
L = []
D = []
for _ in range(num):
    data = sampleX(2,5,secret)
    D.append(data)
    I = CI1(x=data,sigma=2,alpha=alpha)
    L.append(I)
    if estimate_is_correct(I,theta):
        correct = correct + 1
        
print(f"Correct estimates: {correct}, should be about: {(1-alpha)*num}" )

Correct estimates: 8900, should be about: 9000.0

%matplotlib notebook
fig = plt.figure()
num = 20
for i in range(num):
    plt.plot([L[i][0],L[i][1]], [i/num,i/num], 'b-', linewidth=1.5)
    plt.plot(D[i], [i/num for _ in D[i]], 'go', markersize=2)

plt.plot([secret, secret], [-0.1, 1.1], 'r:', linewidth=1)

plt.show()

theta  = 2.454
correct = 0
alpha = .1
num = 10**4

for _ in range(num):
    data = sampleX(2,5,theta)
    I = CI1(x=data,sigma=data.std(ddof=1),alpha=alpha)
    if estimate_is_correct(I,theta):
        correct = correct + 1
        
print(f"Correct estimates: {correct}, should be about: {(1-alpha)*num}" )

Correct estimates: 8237, should be about: 9000.0

theta  = 2.454
correct = 0
alpha = .1
num = 10**4

for _ in range(num):
    data = sampleX(2,5,theta)
    I = CI3(x=data,alpha=alpha)
    if estimate_is_correct(I,theta):
        correct = correct + 1
        
print(f"Correct estimates: {correct}, should be about: {(1-alpha)*num}" )

Correct estimates: 9028, should be about: 9000.0

Confidence intervals¶

Known variance¶

Estimates of confidence levels¶

First we know the true sigma¶

Now we don't know it, but use the same method¶

Finally, we use the Student t function¶