#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import datetime
pd.set_option('display.max_columns',None)

Step1. Import Data¶

data=pd.read_csv('Q2.csv')
print(data)

  Measure Start Date Measure End Date  Minutes to Pain Medication  Sample
0           4/1/2013        3/31/2014                          78     132
1           7/1/2013        6/30/2014                          68     124
2           7/1/2013        6/30/2014                          68     124
3          10/1/2013        9/30/2014                          72     122
4           1/1/2014       12/31/2014                          76     138
5           4/1/2014        3/31/2015                          85     144
6           7/1/2014        6/30/2015                          86     137
7          10/1/2014        9/30/2015                          95     121
8           1/1/2015       12/31/2015                          99     111

data.dtypes

Measure Start Date            object
Measure End Date              object
Minutes to Pain Medication     int64
Sample                         int64
dtype: object

Step2. Data Cleaning¶

###Check the data has missing value first.
Data_NULL = data.isnull().sum()*100/data.shape[0]
Data_NULL

Measure Start Date            0.0
Measure End Date              0.0
Minutes to Pain Medication    0.0
Sample                        0.0
dtype: float64

###Then we need check for duplicate instances in this dataset.
dups=data.duplicated()
print('Number of duplicate rows=%d'%(dups.sum()))

Number of duplicate rows=1

data=data.drop_duplicates()
data

Step3.Calculate Midpoint Date¶

###Converting the "Measure Start Date" and "Measure End Date"
data['Measure Start Date'] = pd.to_datetime(data['Measure Start Date'])
data['Measure End Date'] = pd.to_datetime(data['Measure End Date'])
data.dtypes

Measure Start Date            datetime64[ns]
Measure End Date              datetime64[ns]
Minutes to Pain Medication             int64
Sample                                 int64
dtype: object

###Calcuate Midpoint Date: ( [Measure Start Date] + [Measure End Data] ) / 2  
data['Midpoint Date'] = data['Measure Start Date'] + (data['Measure End Date'] - data['Measure Start Date'] )/2
data

###Benchmark value
data['Benchmark'] = 73

###Create a binary column to convert above or below to 1/0
data['Above Benchmark']= np.where(data['Minutes to Pain Medication'] >= data['Benchmark'],1,0)
data

###Create a binary column to convert above or below to 1/0
data['Cum_AboveBenchmark'] = (data['Above Benchmark'] == 1).cumsum()

data['Consecutive Below Benchmark'] = data.groupby(data['Cum_AboveBenchmark']).cumcount()
data

data.dtypes

Measure Start Date             datetime64[ns]
Measure End Date               datetime64[ns]
Minutes to Pain Medication              int64
Sample                                  int64
Midpoint Date                  datetime64[ns]
Benchmark                               int64
Above Benchmark                         int32
Cum_AboveBenchmark                      int32
Consecutive Below Benchmark             int64
dtype: object

count_Yes = data.loc[data['Above Benchmark'] == 1, 'Above Benchmark'].count()
count_No = data.loc[data['Above Benchmark'] == 0, 'Above Benchmark'].count()

R_Above = count_Yes/count_No
R_Above

3.0

R_Below = count_No/count_Yes
R_Below

0.3333333333333333

###Calculation of Ratio = count No / count Yes
Ratio = count_No/count_Yes
Ratio

0.3333333333333333

### Calculate the upper control limit
data['UCL'] = (Ratio + 3*(math.sqrt(Ratio*(1+Ratio)))).round(2)

### Create the dataframe
data1 = pd.DataFrame(data, columns=['Midpoint Date', 'Benchmark', 'Above Benchmark', 'Consecutive Below Benchmark', 'UCL'])
data1

#Create the time chart
plt.plot('Midpoint Date', 'Consecutive Below Benchmark', data=data1, marker = 's', markerfacecolor = 'blue', linewidth =1, color = 'blue')
plt.plot('Midpoint Date','UCL', data=data1, markersize = 5, color = 'red', linewidth =1)
plt.legend()
plt.title('Response Time to Pain')
plt.xlabel('Midpoint Date')
plt.ylabel('Consecutive Below Benchmark')
plt.yticks(np.arange(0,3,0.5))

([<matplotlib.axis.YTick at 0x9068850>,
  <matplotlib.axis.YTick at 0x9068640>,
  <matplotlib.axis.YTick at 0x905bdf0>,
  <matplotlib.axis.YTick at 0x96efe50>,
  <matplotlib.axis.YTick at 0x9708100>,
  <matplotlib.axis.YTick at 0x9708388>],
 <a list of 6 Text major ticklabel objects>)

	Measure Start Date	Measure End Date	Minutes to Pain Medication	Sample	Midpoint Date
0	2013-04-01	2014-03-31	78	132	2013-09-30
1	2013-07-01	2014-06-30	68	124	2013-12-30
3	2013-10-01	2014-09-30	72	122	2014-04-01
4	2014-01-01	2014-12-31	76	138	2014-07-02
5	2014-04-01	2015-03-31	85	144	2014-09-30
6	2014-07-01	2015-06-30	86	137	2014-12-30
7	2014-10-01	2015-09-30	95	121	2015-04-01
8	2015-01-01	2015-12-31	99	111	2015-07-02

	Midpoint Date	Benchmark	Above Benchmark	Consecutive Below Benchmark	UCL
0	2013-09-30	73	1	0	2.33
1	2013-12-30	73	0	1	2.33
3	2014-04-01	73	0	2	2.33
4	2014-07-02	73	1	0	2.33
5	2014-09-30	73	1	0	2.33
6	2014-12-30	73	1	0	2.33
7	2015-04-01	73	1	0	2.33
8	2015-07-02	73	1	0	2.33

	Measure Start Date	Measure End Date	Minutes to Pain Medication	Sample
0	4/1/2013	3/31/2014	78	132
1	7/1/2013	6/30/2014	68	124
3	10/1/2013	9/30/2014	72	122
4	1/1/2014	12/31/2014	76	138
5	4/1/2014	3/31/2015	85	144
6	7/1/2014	6/30/2015	86	137
7	10/1/2014	9/30/2015	95	121
8	1/1/2015	12/31/2015	99	111