import numpy as np
There are 3 ways to estimate the variance of a sample of values drawn from a distribution.
def f(X):
mean = X.mean()
N = X.shape[0]
return np.sum((X-mean)**2) / (N-1)
def g(X):
mean = X.mean()
N = X.shape[0]
return np.sum((X-mean)**2) / N
def h(X):
mean = X.mean()
N = X.shape[0]
return np.sum((X-mean)**2) / (N+1)
num_experiments = 20000
sample_size = 2000
samples = np.random.randn(num_experiments, sample_size)
true_variance = 1
print('Our plan is running %d experiments to test an estimator.' % num_experiments)
print('In every experiment, we provide the estimator a sample of %d values drawn from the standard Gaussian distribution.' %
sample_size)
print()
print('The true variance is 1, but the estimator will not know this, and will give us its estimate in every experiment.')
print('Then we will calculate the bias, variance, and MSE (mean sqaured error) of the estimator after %d experiments.' %
num_experiments)
print()
print('Note that we will calculate the MSE in two equivalent ways. Think about why they give the same result.')
def evaluate_an_estimator(func, samples, true_value):
estimates = np.empty(samples.shape[0])
for i, sample in enumerate(samples):
estimates[i] = func(sample)
estimator_bias = (estimates - true_value ).mean()
estimator_var = np.square(estimates - estimates.mean()).mean()
estimator_MSE_1 = np.square(estimates - true_value ).mean()
estimator_MSE_2 = estimator_bias**2 + estimator_var
print('%s: bias = %12.5e var = %12.5e MSE_1 = %12.5e MSE_2 = %12.5e' %
(str(func), estimator_bias, estimator_var, estimator_MSE_1, estimator_MSE_2))
evaluate_an_estimator(f, samples, true_variance)
evaluate_an_estimator(g, samples, true_variance)
evaluate_an_estimator(h, samples, true_variance)
We have tested 3 variance estimators for the standard Gaussian distribution and make the following conclusions: