# -*- coding: utf-8 -*- """ Created on Sat Mar 7 10:41:51 2020 @author: Johan Rene van Dorp """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import Dist_Library as dl from sklearn.neighbors import KernelDensity df = pd.read_csv('OldFaithFul.csv') Durations = df["Duration (s)"] WaitingTimes = df["Waiting Time (Min)"] # Plotting a two panel histogram plot of the durations # the waiting times plt.rc('font', family='serif', size='10') plt.figure(figsize=(10, 5)) Hist_Figure = plt.figure() Hist_Figure.subplots_adjust(hspace=0.4, wspace=0.4) off_set_x = 10 # Calculating the histogram data for the left panel Number_of_bins = 22 LB = 90 UB = 330 bins = np.linspace(LB, UB, Number_of_bins+1) Duration_hist_data = dl.Estimate_empirical_histogram_table(bins,Durations) # Adding the left top panel with the theoretical pmf the_bounds = np.append(Duration_hist_data["LB"][0],Duration_hist_data["UB"]) the_density = np.append(0,Duration_hist_data["Bin PDF"]) x_lims = (LB-off_set_x,UB+off_set_x) y_lims = (0,0.014) Panel = Hist_Figure.add_subplot(1,2,1) dl.add_pmf_density_hist_to_figure_panel(Panel,the_bounds,the_density,'indianred') plt.xlim(x_lims) plt.ylim(y_lims) plt.xlabel('Duration (in Sec)') plt.ylabel('Density') text_str = '# Bins '+str(Number_of_bins) plt.text(160,0.013,text_str,color = 'red',size = 10) # Adding a Kernel Density to the plot Dur = Durations.to_numpy() kde = KernelDensity(bandwidth=9.0, kernel='gaussian') kde.fit(Dur[:, None]) x_d = np.linspace(LB-off_set_x, UB+off_set_x, 1000) logprob = kde.score_samples(x_d[:, None]) plt.plot(x_d, np.exp(logprob), alpha=1, color = 'navy') # Calculating the histogram data for the right panel Number_of_bins = 22 LB = 40 UB = 100 bins = np.linspace(LB, UB, Number_of_bins+1) WaitingTime_hist_data = dl.Estimate_empirical_histogram_table(bins,WaitingTimes) # Adding the righ top panel with the theoretical pmf the_bounds = np.append(WaitingTime_hist_data["LB"][0],WaitingTime_hist_data["UB"]) the_density = np.append(0,WaitingTime_hist_data["Bin PDF"]) x_lims = (LB-off_set_x,UB+off_set_x) y_lims = (0,0.07) Panel = Hist_Figure.add_subplot(1,2,2) dl.add_pmf_density_hist_to_figure_panel(Panel,the_bounds,the_density,'skyblue') plt.xlim(x_lims) plt.ylim(y_lims) plt.xlabel('Waiting Time (in Min)') plt.ylabel('Density') text_str = '# Bins '+str(Number_of_bins) plt.text(60,0.065,text_str,color = 'red',size = 10) # Adding a Kernel Density to the plot # Adding a Kernel Density to the plot Wai = WaitingTimes.to_numpy() kde = KernelDensity(bandwidth=3.0, kernel='gaussian') kde.fit(Wai[:, None]) x_d = np.linspace(LB-off_set_x, UB+off_set_x, 1000) logprob = kde.score_samples(x_d[:, None]) plt.plot(x_d, np.exp(logprob), alpha=1, color = 'navy') Hist_Figure.suptitle('Histograms Old Faithful Data',size='14') plt.savefig('Old_Faithful.png', dpi=1200)