# -*- coding: utf-8 -*-
"""
Created on Mon Mar  2 08:44:28 2020

@author: Johan Rene van Dorp
"""
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import binom
from scipy.stats import t
import pandas as pd
import Dist_Library as dl

# This function calculates the hamming distance between two binary
# vectors of the same length
def Hamming_distance(x,y):
    z = sum(abs(x-y))
    n = len(x)
    return (z/n)


# This function samples the difference between John and Mary m times 
# using n coin tosses to identify them
    
def John_and_Mary_Difference(m,n,p):
    sample = np.zeros(m)
    for i in np.arange(m):
        # Bernouilli random sample of size n with success probability p
        # for both Jonn and Mary
        John = np.random.binomial(1,p,n)
        Mary = np.random.binomial(1,p,n)
        # Evaluating the Hamming Distrance between John and Mary
        H_dist = Hamming_distance(John,Mary)
        sample[i] = H_dist
    return sample

# We are supposing the John and Mary are identified by the_n Bernoulli trials
# each with sucches probability the_p    
the_n = 10
bern_sample_size = 1
the_prob = 0.5
# Sampling from a bernoulli random variable is the same as
# sampling from a binomial random variable with sample size 1
# Here we are generating 10 coin tosses for John and Mary
John = np.random.binomial(bern_sample_size,the_prob,the_n)
Mary = np.random.binomial(bern_sample_size,the_prob,the_n)
H_dist = Hamming_distance(John,Mary)
print('John:',John)
print('Mary:',Mary)
print('H_dist:',H_dist)

# Create random sample of size m of distances between John and Mary
# to estimatime empirically the pmf of the Hamming Distances between
# Johan and Mary 
the_m = 100
JM_Sample = John_and_Mary_Difference(the_m,the_n,the_prob)

# Evaluating the empirical and theoretical probabilities 
the_outcomes = np.arange(0,the_n+1)/the_n
the_emp_probs = dl.Estimate_empirical_pmf(the_outcomes,JM_Sample)
the_theo_probs = binom.pmf(np.arange(0,the_n+1), the_n, the_prob)
JM_pmf_data = pd.DataFrame({'Outcomes':the_outcomes,
                       'Theoretical PMF':the_theo_probs,
                       'Empirical PMF':the_emp_probs})
print(JM_pmf_data)

# Evaluating the bounds, widths and height for the bars in the histograms
the_bounds, the_widths = dl.Calc_boundaries_and_widths(the_outcomes)



# Plotting the theoretical and empirical probability mass functions
plt.rc('font', family='serif', size='12')

JM_figure = plt.figure(1)
JM_figure.set_figwidth(9)
JM_figure.set_figheight(4.5)
JM_figure.subplots_adjust(left=0.1, bottom=0.15, right=0.95, top=0.9, hspace=0.1, wspace=0.3)

# Adding the left panel with the theoretical pmf
Left_Panel = JM_figure.add_subplot(1,2,1)

# Evaluating the x-axis and y-axis limits
x_lims = (the_bounds[0], the_bounds[len(the_bounds)-1])
max_height = 1.1*max(max(the_theo_probs),max(the_emp_probs))
y_lims = (0,max_height+max_height/10)

plt.xlim(x_lims)
plt.ylim(y_lims)
plt.xlabel('$H_{10}(J,M)$ Distance')
plt.ylabel('Theoretical pmf')
dl.add_pmf_prob_hist_to_figure_panel(Left_Panel,the_outcomes,the_theo_probs,'indianred',0.5)

# Adding an confidence interval error bar for the fourth outcome
bar_p = the_theo_probs[3]
alpha = 0.05
SE = ((bar_p*(1-bar_p))/the_m)**0.5
t_q = t.ppf(1-alpha/2,the_m-1)
Half_Width = 0
plt.errorbar(the_outcomes[3], bar_p, yerr = Half_Width,
                     fmt='o', color='darkgreen', capsize = 5,label = 'Theo. Value')

text_value = bar_p
text_str = f'{text_value:3.2f}'
plt.text(the_outcomes[3]-0.075,text_value,text_str,color = 'red',size = 12,verticalalignment = 'center', horizontalalignment = 'right')
plt.legend(loc='best')

# Adding the right panel with the empirical pmf
Right_Panel = JM_figure.add_subplot(1,2,2)
plt.xlim(x_lims)
plt.ylim(y_lims)
plt.xlabel('$H_{10}(J,M)$ Distance')
plt.ylabel('Empirical pmf')
dl.add_pmf_prob_hist_to_figure_panel(Right_Panel,the_outcomes,the_emp_probs,'skyblue',0.5)

text_str = 'm = ' +str(the_m)
plt.text(0.1,max_height,text_str,color = 'red')

# Adding an confidence interval error bar for the fourth outcome
bar_p = the_emp_probs[3]
alpha = 0.05
SE = ((bar_p*(1-bar_p))/the_m)**0.5
t_q = t.ppf(1-alpha/2,the_m-1)
Half_Width = t_q*SE
plt.errorbar(the_outcomes[3], bar_p, yerr = Half_Width,
                     fmt='o', color='darkgreen', capsize = 5,label = '95% Conf. Int.')

text_value = bar_p + Half_Width
text_str = f'{text_value:3.2f}'
plt.text(the_outcomes[3]-0.075,text_value,text_str,color = 'red',size = 12,verticalalignment = 'center', horizontalalignment = 'right')

text_value = bar_p - Half_Width
text_str = f'{text_value:3.2f}'
plt.text(the_outcomes[3]-0.075,text_value,text_str,color = 'red',size = 12,verticalalignment = 'center', horizontalalignment = 'right')

text_value = bar_p
text_str = f'{text_value:3.2f}'
plt.text(the_outcomes[3]-0.075,text_value,text_str,color = 'red',size = 12,verticalalignment = 'center', horizontalalignment = 'right')

plt.legend(loc='best')

JM_figure.suptitle('Distance between John and Mary',size='16')

plt.savefig('Figure1.png', dpi=300)

# Plotting the theoretical and empirical cumulative distributuon functionsw
plt.rc('font', family='serif', size='12')

JM_figure = plt.figure(2)
JM_figure.set_figwidth(9)
JM_figure.set_figheight(4.5)
JM_figure.subplots_adjust(left=0.1, bottom=0.15, right=0.95, top=0.9, hspace=0.1, wspace=0.3)

# Adding the left panel with the theoretical pmf
Left_Panel = JM_figure.add_subplot(1,2,1)

# Evaluating the x-axis and y-axis limits
x_lims = (the_bounds[0], the_bounds[len(the_bounds)-1])
y_lims = (0,1.1)

plt.xlim(x_lims)
plt.ylim(y_lims)
plt.xlabel('$H_{10}(J,M)$ Distance')
plt.ylabel('Theoretical cdf')
dl.plot_discrete_cdf(Left_Panel,the_outcomes,the_theo_probs,'indianred',2)

# Adding an confidence interval error bar for the fourth outcome
bar_p = np.sum(the_theo_probs[4])
alpha = 0.05
SE = ((bar_p*(1-bar_p))/the_m)**0.5
t_q = t.ppf(1-alpha/2,the_m-1)
Half_Width = 0
plt.errorbar((the_outcomes[4]+the_outcomes[5])/2, bar_p, yerr = Half_Width,
                     fmt='o', color='darkgreen', capsize = 5,label = 'Theo. Value')

text_value = bar_p
text_str = f'{text_value:3.2f}'
plt.text(the_outcomes[4]-0.025,text_value,text_str,color = 'red',size = 12,verticalalignment = 'center', horizontalalignment = 'right')

plt.legend(loc='best')

# Adding the right panel with the empirical pmf
Right_Panel = JM_figure.add_subplot(1,2,2)
plt.xlim(x_lims)
plt.ylim(y_lims)
plt.xlabel('$H_{10}(J,M)$ Distance')
plt.ylabel('Empirical cdf')
dl.plot_discrete_cdf(Right_Panel,the_outcomes,the_emp_probs,'skyblue',2)

text_str = 'm = ' +str(the_m)
plt.text(0.1,0.9,text_str,color = 'red')

# Adding an confidence interval error bar for the fourth outcome
bar_p = the_emp_probs[4]
alpha = 0.05
SE = ((bar_p*(1-bar_p))/the_m)**0.5
t_q = t.ppf(1-alpha/2,the_m-1)
Half_Width = t_q*SE
plt.errorbar((the_outcomes[4]+the_outcomes[5])/2, bar_p, yerr = Half_Width,
             fmt='o', color='darkgreen', capsize = 5,label = '95% Conf. Int.')

text_value = bar_p + Half_Width
text_str = f'{text_value:3.2f}'
plt.text(the_outcomes[4]-0.025,text_value,text_str,color = 'red',size = 12,verticalalignment = 'center', horizontalalignment = 'right')

text_value = bar_p - Half_Width
text_str = f'{text_value:3.2f}'
plt.text(the_outcomes[4]-0.025,text_value,text_str,color = 'red',size = 12,verticalalignment = 'center', horizontalalignment = 'right')

text_value = bar_p
text_str = f'{text_value:3.2f}'
plt.text(the_outcomes[4]-0.025,text_value,text_str,color = 'red',size = 12,verticalalignment = 'center', horizontalalignment = 'right')

plt.legend(loc='best')

JM_figure.suptitle('Distance between John and Mary',size='16')

plt.savefig('Figure2.png', dpi=300)