#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import datetime
pd.set_option('display.max_columns',None)
data=pd.read_csv('Q2.csv')
print(data)
data.dtypes
###Check the data has missing value first.
Data_NULL = data.isnull().sum()*100/data.shape[0]
Data_NULL
###Then we need check for duplicate instances in this dataset.
dups=data.duplicated()
print('Number of duplicate rows=%d'%(dups.sum()))
data=data.drop_duplicates()
data
###Converting the "Measure Start Date" and "Measure End Date"
data['Measure Start Date'] = pd.to_datetime(data['Measure Start Date'])
data['Measure End Date'] = pd.to_datetime(data['Measure End Date'])
data.dtypes
###Calcuate Midpoint Date: ( [Measure Start Date] + [Measure End Data] ) / 2
data['Midpoint Date'] = data['Measure Start Date'] + (data['Measure End Date'] - data['Measure Start Date'] )/2
data
###Benchmark value
data['Benchmark'] = 73
###Create a binary column to convert above or below to 1/0
data['Above Benchmark']= np.where(data['Minutes to Pain Medication'] >= data['Benchmark'],1,0)
data
###Create a binary column to convert above or below to 1/0
data['Cum_AboveBenchmark'] = (data['Above Benchmark'] == 1).cumsum()
data['Consecutive Below Benchmark'] = data.groupby(data['Cum_AboveBenchmark']).cumcount()
data
data.dtypes
count_Yes = data.loc[data['Above Benchmark'] == 1, 'Above Benchmark'].count()
count_No = data.loc[data['Above Benchmark'] == 0, 'Above Benchmark'].count()
R_Above = count_Yes/count_No
R_Above
R_Below = count_No/count_Yes
R_Below
###Calculation of Ratio = count No / count Yes
Ratio = count_No/count_Yes
Ratio
### Calculate the upper control limit
data['UCL'] = (Ratio + 3*(math.sqrt(Ratio*(1+Ratio)))).round(2)
### Create the dataframe
data1 = pd.DataFrame(data, columns=['Midpoint Date', 'Benchmark', 'Above Benchmark', 'Consecutive Below Benchmark', 'UCL'])
data1
#Create the time chart
plt.plot('Midpoint Date', 'Consecutive Below Benchmark', data=data1, marker = 's', markerfacecolor = 'blue', linewidth =1, color = 'blue')
plt.plot('Midpoint Date','UCL', data=data1, markersize = 5, color = 'red', linewidth =1)
plt.legend()
plt.title('Response Time to Pain')
plt.xlabel('Midpoint Date')
plt.ylabel('Consecutive Below Benchmark')
plt.yticks(np.arange(0,3,0.5))