# -*- coding: utf-8 -*- """ Created on Wed Mar 4 09:50:31 2020 @author: Johan Rene van Dorp """ import numpy as np import matplotlib.patches as patches import pandas as pd from scipy.stats import linregress def Estimate_empirical_pmf(outcomes,samples): # Counting the number of times a sample appears counts = dict() for sample in samples: if sample not in counts: counts[sample] = 1 else: counts[sample] = counts[sample]+1 # Adding the outcomes to the counts that were not present in the sample for outcome in outcomes: if outcome not in counts: counts[outcome] = 0 # evaluating the counts in the order of the outcomes lst = list(counts.keys()) lst.sort() the_counts = np.zeros(len(outcomes)) i = -1 for key in lst: i = i + 1 the_counts[i] = counts[key] # Finally converting the counts to emprical pmf values pmf = the_counts / sum(the_counts) return(pmf) def Calc_boundaries_and_widths(outcomes): # This functions determines the bounds and width for the plot of # a pmf with the outcomes as the possible occurrences mid_points = (outcomes[0:len(outcomes)-1]+outcomes[1:len(outcomes)])/2 first_element = outcomes[0] last_element = outcomes[len(outcomes)-1] bounds = np.append(first_element,mid_points) bounds = np.append(bounds,last_element) widths = bounds[1:len(bounds)]-bounds[0:len(bounds)-1] return(bounds,widths) def add_pmf_prob_hist_to_figure_panel(figure_panel,outcomes,probs,color,the_alpha): bounds, widths = Calc_boundaries_and_widths(outcomes) for i in np.arange(len(bounds)-1): figure_panel.add_patch(patches.Rectangle( (bounds[i], 0), widths[i], probs[i], facecolor = color, edgecolor = 'black', alpha = the_alpha)) def add_pmf_density_hist_to_figure_panel(figure_panel,bounds,density,color): n = len(bounds) widths = bounds[1:n]-bounds[0:(n-1)] for i in np.arange(len(bounds)-1): figure_panel.add_patch(patches.Rectangle( (bounds[i], 0), widths[i], density[i+1], facecolor = color, edgecolor = 'black', alpha = 0.5)) def Estimate_empirical_histogram(bounds,samples): n = len(bounds) m = len(samples) empirical_pmf = np.zeros(n) df = pd.DataFrame({'The_Sample':samples}) cdf_lb = df[df