# -*- coding: utf-8 -*- """ Created on Sat Mar 7 10:41:51 2020 @author: Johan Rene van Dorp """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import Dist_Library as dl from sklearn.neighbors import KernelDensity df = pd.read_csv('SoftwareFailure.csv') FailTimes = df["FailureTimes"] n_data = len(FailTimes) # Plotting a two panel histogram plot of the durations # the waiting times plt.rc('font', family='serif', size='10') plt.figure(figsize=(10, 5)) Figure = plt.figure() Figure.subplots_adjust(hspace=0.4, wspace=0.4) off_set_x = 250 # Calculating the histogram data for the left panel Number_of_bins = 15 LB = min(FailTimes) UB = max(FailTimes) bins = np.linspace(LB, UB, Number_of_bins+1) hist_data = dl.Estimate_empirical_histogram_table(bins,FailTimes) # Adding the left top panel with the theoretical pmf the_bounds = np.append(hist_data["LB"][0],hist_data["UB"]) the_density = np.append(0,hist_data["Bin PDF"]) x_lims = (LB-off_set_x,UB+off_set_x) y_lims = (0,0.002) Panel = Figure.add_subplot(1,2,1) dl.add_pmf_density_hist_to_figure_panel(Panel,the_bounds,the_density,'indianred') plt.xlim(x_lims) plt.ylim(y_lims) plt.xlabel('Software Failure Times') plt.ylabel('Density') text_str = '# Bins '+str(Number_of_bins) plt.text(2250,0.0017,text_str,color = 'red',size = 10) # Adding a Kernel Density to the plot FT = FailTimes.to_numpy() kde = KernelDensity(bandwidth=100, kernel='gaussian') kde.fit(FT[:, None]) x_d = np.linspace(LB-off_set_x, UB+off_set_x, 1000) logprob = kde.score_samples(x_d[:, None]) plt.plot(x_d, np.exp(logprob), alpha=1, color = 'navy',label ='Kernel Density') plt.legend(loc = 'best') # Plotting the Empirical CDF of the Waiting Times FailTimes.ordered = np.sort(FailTimes) FailTimes.ordered = np.append(LB-off_set_x,FailTimes.ordered) FailTimes.ordered = np.append(FailTimes.ordered,UB+off_set_x) F = np.arange(1,(n_data+1))/n_data F = np.append(0,F) F = np.append(F,1) x_lims = (LB-off_set_x,UB+off_set_x) y_lims = (-0.05,1.1) Panel = Figure.add_subplot(1,2,2) plt.step(FailTimes.ordered,F,lw = 1,color ='skyblue') plt.xlim(x_lims) plt.ylim(y_lims) plt.xlabel('Software Failure Times') plt.ylabel('Empirical CDF $\hat{F}(t) = Pr(T \leq t)$') plt.axvline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) plt.axhline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) plt.axhline(1, lw=2, ls = ':',color = 'lightgray',alpha=0.5) text_str = 'n = '+str(n_data) plt.text(100,1.025,text_str,color = 'red',size = 10) q_level = 0.8 emp_quantile, q_index, x_k, x_k_plus_1 = dl.empirical_quantile(FailTimes,q_level) Panel.vlines(emp_quantile, 0, q_level, color='black', linestyles=':', lw=1) Panel.hlines(q_level, 0, emp_quantile, color='black', linestyles=':', lw=1) text_str = 'p = ' + str(q_level) plt.text(emp_quantile+115,q_level,text_str, color = 'red') text_str = '$\hat{x}_p = $' + f'{emp_quantile:5.1f}' plt.text(emp_quantile+115,0.025,text_str, color = 'red') Figure.suptitle('Histogram and ECDF Software Failure Data',size='14') plt.savefig('SoftwareFailure.png', dpi=1200)