# -*- coding: utf-8 -*- """ Created on Sat Mar 7 10:41:51 2020 @author: Johan Rene van Dorp """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import Dist_Library as dl df = pd.read_csv('OldFaithFul.csv') Durations = df["Duration (s)"] WaitingTimes = df["Waiting Time (Min)"] # Plotting Empirical Cumulative Distribution Functions plt.rc('font', family='serif', size='10') plt.figure(figsize=(10, 5)) ECDF_Figure = plt.figure() ECDF_Figure.set_figwidth(9) ECDF_Figure.set_figheight(4.5) ECDF_Figure.subplots_adjust(hspace=0.4, wspace=0.4) off_set_x = 10 # Plotting the Empirical CDF of the Durations x_25, q_index, x_k, x_k_plus_1 = dl.empirical_quantile(Durations,0.25) x_50, q_index, x_k, x_k_plus_1 = dl.empirical_quantile(Durations,0.50) x_75, q_index, x_k, x_k_plus_1 = dl.empirical_quantile(Durations,0.75) IQR = x_75 - x_25 n_d = len(Durations) LB = 90 UB = 330 Durations.ordered = np.sort(Durations) Durations.ordered = np.append(LB-off_set_x,Durations.ordered) Durations.ordered = np.append(Durations.ordered,UB+off_set_x) F = np.linspace(0,1,n_d) F = np.append(0,F) F = np.append(F,1) x_lims = (LB-off_set_x,UB+off_set_x) y_lims = (-0.05,1.1) Panel = ECDF_Figure.add_subplot(1,2,1) plt.step(Durations.ordered,F,lw = 1,color ='indianred') plt.xlim(x_lims) plt.ylim(y_lims) plt.xlabel('Duration (in Sec)') plt.ylabel('Empirical CDF $\hat{F}(d) = Pr(D \leq d)$') plt.axvline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) plt.axhline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) plt.axhline(1, lw=2, ls = ':',color = 'lightgray',alpha=0.5) text_str = 'n = '+str(n_d) plt.text(160,0.9,text_str,color = 'red',size = 10) plt.vlines(x_25, 0, 0.25, color='black', linestyles='--', lw=1) plt.vlines(x_50, 0, 0.50, color='black', linestyles=':', lw=1) plt.vlines(x_75, 0, 0.75, color='black', linestyles='--', lw=1) plt.hlines(0.25, 0, x_25, color='black', linestyles='--', lw=1) plt.hlines(0.50, 0, x_50, color='black', linestyles=':', lw=1) plt.hlines(0.75, 0, x_75, color='black', linestyles='--', lw=1) text_str = '0.25' plt.text(x_lims[0]+3,0.25+0.025,text_str,color = 'red') text_str = '0.50' plt.text(x_lims[0]+3,0.50+0.025,text_str,color = 'red') text_str = '0.75' plt.text(x_lims[0]+3,0.75+0.025,text_str,color = 'red') text_str = f'{x_25:3.0f}' plt.text(x_25+3, 0.025,text_str,color = 'red') text_str = f'{x_50:3.0f}' plt.text(x_50-40, 0.025,text_str,color = 'red') text_str = f'{x_75:3.0f}' plt.text(x_75+3, 0.025,text_str,color = 'red') plt.arrow(x_25, 0.1, IQR, 0, lw = 0.1, color = 'blue', head_length = 15, head_width = 0.03, length_includes_head = True) plt.arrow(x_25 + IQR, 0.1, -IQR, 0, lw = 0.1, color = 'blue', head_length = 15, head_width = 0.03, length_includes_head = True) text_str = 'IQR = ' + f'{IQR:2.0f}' plt.text((x_25+x_75)/2-28, 0.1+0.03,text_str,color = 'blue',size = 6) # Plotting the Empirical CDF of the Waiting Times x_25, q_index, x_k, x_k_plus_1 = dl.empirical_quantile(WaitingTimes,0.25) x_50, q_index, x_k, x_k_plus_1 = dl.empirical_quantile(WaitingTimes,0.50) x_75, q_index, x_k, x_k_plus_1 = dl.empirical_quantile(WaitingTimes,0.75) IQR = x_75 - x_25 LB = 40 UB = 100 n_w = len(WaitingTimes) WaitingTimes.ordered = np.sort(WaitingTimes) WaitingTimes.ordered = np.append(LB-off_set_x,WaitingTimes.ordered) WaitingTimes.ordered = np.append(WaitingTimes.ordered,UB+off_set_x) F = np.linspace(0,1,n_d) F = np.append(0,F) F = np.append(F,1) x_lims = (LB-off_set_x,UB+off_set_x) y_lims = (-0.05,1.1) Panel = ECDF_Figure.add_subplot(1,2,2) plt.step(WaitingTimes.ordered,F,lw = 1,color ='skyblue') plt.xlim(x_lims) plt.ylim(y_lims) plt.xlabel('Waiting Times (in Min)') plt.ylabel('Empirical CDF $\hat{F}(w) = Pr(W \leq w)$') plt.axvline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) plt.axhline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) plt.axhline(1, lw=2, ls = ':',color = 'lightgray',alpha=0.5) text_str = 'n = '+str(n_w) plt.text(55,0.9,text_str,color = 'red',size = 10) plt.vlines(x_25, 0, 0.25, color='black', linestyles='--', lw=1) plt.vlines(x_50, 0, 0.50, color='black', linestyles=':', lw=1) plt.vlines(x_75, 0, 0.75, color='black', linestyles='--', lw=1) plt.hlines(0.25, 0, x_25, color='black', linestyles='--', lw=1) plt.hlines(0.50, 0, x_50, color='black', linestyles=':', lw=1) plt.hlines(0.75, 0, x_75, color='black', linestyles='--', lw=1) text_str = '0.25' plt.text(x_lims[0]+3,0.25+0.025,text_str,color = 'red') text_str = '0.50' plt.text(x_lims[0]+3,0.50+0.025,text_str,color = 'red') text_str = '0.75' plt.text(x_lims[0]+3,0.75+0.025,text_str,color = 'red') text_str = f'{x_25:3.0f}' plt.text(x_25, 0.025,text_str,color = 'red') text_str = f'{x_50:3.0f}' plt.text(x_50-10, 0.025,text_str,color = 'red') text_str = f'{x_75:3.0f}' plt.text(x_75, 0.025,text_str,color = 'red') plt.arrow(x_25, 0.1, IQR, 0, lw = 0.1, color = 'blue', head_length = 4, head_width = 0.03, length_includes_head = True) plt.arrow(x_25 + IQR, 0.1, -IQR, 0, lw = 0.1, color = 'blue', head_length = 4, head_width = 0.03, length_includes_head = True) text_str = 'IQR = ' + f'{IQR:2.0f}' plt.text((x_25+x_75)/2-8, 0.1+0.03,text_str,color = 'blue',size = 6) ECDF_Figure.suptitle('Empirical CDFs Old Faithful Data and IQR',size='14') plt.savefig('Old_Faithful_IQR.png', dpi=300)