# -*- coding: utf-8 -*- """ Created on Sat Mar 7 10:41:51 2020 @author: Johan Rene van Dorp """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import Dist_Library as dl df = pd.read_csv('OldFaithFul.csv') Durations = df["Duration (s)"] WaitingTimes = df["Waiting Time (Min)"] n_d = len(Durations) # Plotting a scatter plot and a regression lines plt.rc('font', family='serif', size='10') plt.figure(figsize=(10, 5)) Scatter_Figure = plt.figure() off_set_x = 10 off_set_y = 10 LB_d = 90 UB_d = 330 LB_w = 40 UB_w = 100 # Plotting the scatter plot x_lims = (LB_d-off_set_x,UB_d+off_set_x) y_lims = (LB_w-off_set_y,UB_w+off_set_y) Panel = Scatter_Figure.add_subplot(1,1,1) plt.scatter(Durations,WaitingTimes,color ='indianred', s = 10) plt.xlim(x_lims) plt.ylim(y_lims) plt.xlabel('Duration (in Sec) of Eruption') plt.ylabel('Waiting Times (in Min) for next Eruption') plt.axvline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) plt.axhline(0, lw=2, ls = '-',color = 'lightgray',alpha=0.5) a, b = np.polyfit(Durations, WaitingTimes, 1) f_linear = np.poly1d((a, b)) plt.plot((LB_d,UB_d), f_linear((LB_d,UB_d)), '-', lw=1, color = 'royalblue') forecast_d = 225 forecast_w = f_linear(225) plt.vlines(forecast_d, 0, forecast_w, color='black', linestyles='--', lw=1) plt.hlines(forecast_w, 0, forecast_d, color='black', linestyles='--', lw=1) text_str = str(forecast_d)+' Sec' plt.text(forecast_d+5,32,text_str,color = 'red',size = 10) text_str = f'{forecast_w:5.2f}' +' Min' plt.text(LB_d,forecast_w+2,text_str,color = 'red',size = 10) R_Squared = 100*dl.rsquared(Durations, WaitingTimes) text_str = '$y = $' + f'{a:4.3f}' + '$ \cdot x + $' + f'{b:4.3f}' + \ ', $R^2 =$' + f'{R_Squared:4.1f}' +'%' plt.text(LB_d,100,text_str,color = 'royalblue',size = 10) text_str = '$n = $' + str(n_d) plt.text(LB_d,105,text_str,color = 'red',size = 10) Scatter_Figure.suptitle('Old Faithful Geyser Data',size='14') plt.savefig('Old_Faithful_Scatter.png', dpi=1200)