# -*- coding: utf-8 -*- """ Created on Fri Sep 10 16:59:10 2021 @author: Rene """ import numpy as np import pandas as pd from scipy.stats import t import matplotlib.pyplot as plt def two_sided_t_test(x, mu_0, alpha): n = len(x) x_bar = np.mean(x) t_quantile = t.ppf(1-(alpha/2),n-1) st_dev_x_bar = np.std(x,ddof=1)/(n**0.5) t_statistic = (x_bar-mu_0)/st_dev_x_bar p_value = 2*(1-t.cdf(abs(t_statistic),n-1)) results = np.zeros(3) results[0] = t_statistic results[1] = t_quantile results[2] = p_value return(results) df = pd.read_csv('OldFaithFul.csv') Duration = df["Duration (s)"] # H_0: mu = mu_0 H_1: mu not = mu_0 mu_0 = 204 alpha = 0.05 n = len(Duration) s_n = np.std(Duration,ddof=1) x_bar = np.mean(Duration) results_t_test = two_sided_t_test(Duration, mu_0, alpha) t_density_at_quantile = t.pdf(results_t_test[1],n-1) # Assuming H_0 is True and with estimate for sigma evaluating theoretical pdf LB = -5 UB = 9 x_points = np.linspace(-5,5,1000) y_points = t.pdf(x_points,df=n-1) off_set_x = 0 off_set_y = 0.01 max_y = 0.4 # Comparing dataset with the theoretical pdf and mu_0 with x_bar # Setting the attributes for the Figure plt.rc('font', family='serif', size='10') Figure = plt.figure() Figure.set_figwidth(9) Figure.set_figheight(4.5) Figure.subplots_adjust(hspace=0.4, wspace=0.4) x_lims = (LB-off_set_x,UB+off_set_x) y_lims = (0-off_set_y,max_y+off_set_y) plt.axvline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) plt.axhline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) plt.xlim(x_lims) plt.ylim(y_lims) plt.xlabel('t') plt.ylabel('student - t (n-1) density') plt.plot(x_points,y_points,ls='-',lw=0.5,color='blue',label = 'student - t (n-1) density') plt.axvline(mu_0, lw=1, ls = '--',color = 'blue',alpha=0.5) plt.axvline(results_t_test[0], lw=1, ls = '--',color = 'red',alpha=0.5) plt.axvline(-results_t_test[0], lw=1, ls = '--',color = 'red',alpha=0.5) t_x_values = [results_t_test[1], results_t_test[1]] t_y_values = [0, t_density_at_quantile] plt.plot(t_x_values, t_y_values, color = 'darkgreen', linestyle="--",lw=1) t_x_values = [-results_t_test[1], -results_t_test[1]] t_y_values = [0, t_density_at_quantile] plt.plot(t_x_values, t_y_values, color = 'darkgreen', linestyle="--",lw=1) t_x_values = np.linspace(results_t_test[1],5,100) t_y_values = t.pdf(t_x_values,n-1) plt.fill_between(t_x_values, np.zeros(100), t_y_values, color = 'indianred',alpha=1) t_x_values = np.linspace(results_t_test[0],5,100) t_y_values = t.pdf(t_x_values,n-1) plt.fill_between(t_x_values, np.zeros(100), t_y_values, color = 'lightblue',alpha=0.5) t_x_values = np.linspace(LB,-results_t_test[1],100) t_y_values = t.pdf(t_x_values,n-1) plt.fill_between(t_x_values, np.zeros(100), t_y_values, color = 'indianred',alpha=1) t_x_values = np.linspace(-5,-results_t_test[0],100) t_y_values = t.pdf(t_x_values,n-1) plt.fill_between(t_x_values, np.zeros(100), t_y_values, color = 'lightblue',alpha=0.5) text_str = r'$H_0: \mu = \mu_0$' plt.text(LB-off_set_x+0.25,0.38,text_str,horizontalalignment='left',color = 'red') text_str = r'$H_1: \mu \neq \mu_0$' plt.text(LB-off_set_x+0.25,0.35,text_str,horizontalalignment='left',color = 'red') text_str = r'$\overline{x} = $' + f'{x_bar:2.2f}' plt.text(LB-off_set_x+0.25,0.32,text_str,horizontalalignment='left',color = 'red') text_str = r'$s_n = $' + f'{s_n:2.3f}' plt.text(LB-off_set_x+0.25,0.29,text_str,horizontalalignment='left',color = 'red') text_str = r'$t_{1-\alpha/2} = $' + f'{results_t_test[1]:2.2f}' plt.text(LB-off_set_x+0.25,0.26,text_str,horizontalalignment='left',color = 'red') text_str = r'$t_0\,=\,(\overline{x}-\mu_0)/(s_n/\sqrt{n})\,=$'+ f'{results_t_test[0]:2.2f}' plt.text(results_t_test[0]+0.25,0.20,text_str,horizontalalignment='left',color = 'red') text_str = r'$X \sim F(\cdot), E[X] = \mu$' plt.text(results_t_test[0]+0.25,0.23,text_str,horizontalalignment='left',color = 'red') text_str = r'$t_0\,=$'+ f'{-results_t_test[0]:2.2f}' plt.text(-results_t_test[0]-0.25,0.20,text_str,horizontalalignment='right',color = 'red') text_str = r'$p-value\,=\,2\times Pr(|T|>t_0)\,=$'+ f'{100*results_t_test[2]:2.2f}'+'%' plt.text(results_t_test[0]+0.25,0.17,text_str,horizontalalignment='left',color = 'red') text_str = r'Old Faithfull Duration Example: $T\,=\,(\overline{X}-\mu_0)/(S_n/\sqrt{n}):\mu_0=24,\,n=272,\,\alpha=$ 'f'{100*alpha:2.0f}'+'%' Figure.suptitle(text_str,size='14') plt.legend(loc = 'best') plt.savefig('Old_Faithfull.png', dpi=300)