MathJax.Hub.Config({ tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ], displayMath: [ ['$$','$$'], ["\\[","\\]"] ], processEscapes: true, processEnvironments: true }, // Center justify equations in code and markdown cells. Elsewhere // we use CSS to left justify single line equations in code cells. displayAlign: 'center', "HTML-CSS": { styles: {'.MathJax_Display': {"margin": 0}}, linebreaks: { automatic: true } } });
In [1]:
#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import datetime
pd.set_option('display.max_columns',None)

Step1. Import Data

In [2]:
data=pd.read_csv('Q2.csv')
print(data)
  Measure Start Date Measure End Date  Minutes to Pain Medication  Sample
0           4/1/2013        3/31/2014                          78     132
1           7/1/2013        6/30/2014                          68     124
2           7/1/2013        6/30/2014                          68     124
3          10/1/2013        9/30/2014                          72     122
4           1/1/2014       12/31/2014                          76     138
5           4/1/2014        3/31/2015                          85     144
6           7/1/2014        6/30/2015                          86     137
7          10/1/2014        9/30/2015                          95     121
8           1/1/2015       12/31/2015                          99     111
In [3]:
data.dtypes
Out[3]:
Measure Start Date            object
Measure End Date              object
Minutes to Pain Medication     int64
Sample                         int64
dtype: object

Step2. Data Cleaning

In [4]:
###Check the data has missing value first.
Data_NULL = data.isnull().sum()*100/data.shape[0]
Data_NULL
Out[4]:
Measure Start Date            0.0
Measure End Date              0.0
Minutes to Pain Medication    0.0
Sample                        0.0
dtype: float64
In [5]:
###Then we need check for duplicate instances in this dataset.
dups=data.duplicated()
print('Number of duplicate rows=%d'%(dups.sum()))
Number of duplicate rows=1
In [6]:
data=data.drop_duplicates()
data
Out[6]:
Measure Start Date Measure End Date Minutes to Pain Medication Sample
0 4/1/2013 3/31/2014 78 132
1 7/1/2013 6/30/2014 68 124
3 10/1/2013 9/30/2014 72 122
4 1/1/2014 12/31/2014 76 138
5 4/1/2014 3/31/2015 85 144
6 7/1/2014 6/30/2015 86 137
7 10/1/2014 9/30/2015 95 121
8 1/1/2015 12/31/2015 99 111

Step3.Calculate Midpoint Date

In [7]:
###Converting the "Measure Start Date" and "Measure End Date"
data['Measure Start Date'] = pd.to_datetime(data['Measure Start Date'])
data['Measure End Date'] = pd.to_datetime(data['Measure End Date'])
data.dtypes
Out[7]:
Measure Start Date            datetime64[ns]
Measure End Date              datetime64[ns]
Minutes to Pain Medication             int64
Sample                                 int64
dtype: object
In [8]:
###Calcuate Midpoint Date: ( [Measure Start Date] + [Measure End Data] ) / 2  
data['Midpoint Date'] = data['Measure Start Date'] + (data['Measure End Date'] - data['Measure Start Date'] )/2
data
Out[8]:
Measure Start Date Measure End Date Minutes to Pain Medication Sample Midpoint Date
0 2013-04-01 2014-03-31 78 132 2013-09-30
1 2013-07-01 2014-06-30 68 124 2013-12-30
3 2013-10-01 2014-09-30 72 122 2014-04-01
4 2014-01-01 2014-12-31 76 138 2014-07-02
5 2014-04-01 2015-03-31 85 144 2014-09-30
6 2014-07-01 2015-06-30 86 137 2014-12-30
7 2014-10-01 2015-09-30 95 121 2015-04-01
8 2015-01-01 2015-12-31 99 111 2015-07-02
In [9]:
###Benchmark value
data['Benchmark'] = 73
In [10]:
###Create a binary column to convert above or below to 1/0
data['Above Benchmark']= np.where(data['Minutes to Pain Medication'] >= data['Benchmark'],1,0)
data
Out[10]:
Measure Start Date Measure End Date Minutes to Pain Medication Sample Midpoint Date Benchmark Above Benchmark
0 2013-04-01 2014-03-31 78 132 2013-09-30 73 1
1 2013-07-01 2014-06-30 68 124 2013-12-30 73 0
3 2013-10-01 2014-09-30 72 122 2014-04-01 73 0
4 2014-01-01 2014-12-31 76 138 2014-07-02 73 1
5 2014-04-01 2015-03-31 85 144 2014-09-30 73 1
6 2014-07-01 2015-06-30 86 137 2014-12-30 73 1
7 2014-10-01 2015-09-30 95 121 2015-04-01 73 1
8 2015-01-01 2015-12-31 99 111 2015-07-02 73 1
In [11]:
###Create a binary column to convert above or below to 1/0
data['Cum_AboveBenchmark'] = (data['Above Benchmark'] == 1).cumsum()
In [12]:
data['Consecutive Below Benchmark'] = data.groupby(data['Cum_AboveBenchmark']).cumcount()
data
Out[12]:
Measure Start Date Measure End Date Minutes to Pain Medication Sample Midpoint Date Benchmark Above Benchmark Cum_AboveBenchmark Consecutive Below Benchmark
0 2013-04-01 2014-03-31 78 132 2013-09-30 73 1 1 0
1 2013-07-01 2014-06-30 68 124 2013-12-30 73 0 1 1
3 2013-10-01 2014-09-30 72 122 2014-04-01 73 0 1 2
4 2014-01-01 2014-12-31 76 138 2014-07-02 73 1 2 0
5 2014-04-01 2015-03-31 85 144 2014-09-30 73 1 3 0
6 2014-07-01 2015-06-30 86 137 2014-12-30 73 1 4 0
7 2014-10-01 2015-09-30 95 121 2015-04-01 73 1 5 0
8 2015-01-01 2015-12-31 99 111 2015-07-02 73 1 6 0
In [13]:
data.dtypes
Out[13]:
Measure Start Date             datetime64[ns]
Measure End Date               datetime64[ns]
Minutes to Pain Medication              int64
Sample                                  int64
Midpoint Date                  datetime64[ns]
Benchmark                               int64
Above Benchmark                         int32
Cum_AboveBenchmark                      int32
Consecutive Below Benchmark             int64
dtype: object
In [14]:
count_Yes = data.loc[data['Above Benchmark'] == 1, 'Above Benchmark'].count()
count_No = data.loc[data['Above Benchmark'] == 0, 'Above Benchmark'].count()          
In [15]:
R_Above = count_Yes/count_No
R_Above
Out[15]:
3.0
In [16]:
R_Below = count_No/count_Yes
R_Below
Out[16]:
0.3333333333333333
In [17]:
###Calculation of Ratio = count No / count Yes
Ratio = count_No/count_Yes
Ratio
Out[17]:
0.3333333333333333
In [18]:
### Calculate the upper control limit
data['UCL'] = (Ratio + 3*(math.sqrt(Ratio*(1+Ratio)))).round(2)
In [19]:
### Create the dataframe
data1 = pd.DataFrame(data, columns=['Midpoint Date', 'Benchmark', 'Above Benchmark', 'Consecutive Below Benchmark', 'UCL'])
data1
Out[19]:
Midpoint Date Benchmark Above Benchmark Consecutive Below Benchmark UCL
0 2013-09-30 73 1 0 2.33
1 2013-12-30 73 0 1 2.33
3 2014-04-01 73 0 2 2.33
4 2014-07-02 73 1 0 2.33
5 2014-09-30 73 1 0 2.33
6 2014-12-30 73 1 0 2.33
7 2015-04-01 73 1 0 2.33
8 2015-07-02 73 1 0 2.33
In [20]:
#Create the time chart
plt.plot('Midpoint Date', 'Consecutive Below Benchmark', data=data1, marker = 's', markerfacecolor = 'blue', linewidth =1, color = 'blue')
plt.plot('Midpoint Date','UCL', data=data1, markersize = 5, color = 'red', linewidth =1)
plt.legend()
plt.title('Response Time to Pain')
plt.xlabel('Midpoint Date')
plt.ylabel('Consecutive Below Benchmark')
plt.yticks(np.arange(0,3,0.5))
Out[20]:
([<matplotlib.axis.YTick at 0x9068850>,
  <matplotlib.axis.YTick at 0x9068640>,
  <matplotlib.axis.YTick at 0x905bdf0>,
  <matplotlib.axis.YTick at 0x96efe50>,
  <matplotlib.axis.YTick at 0x9708100>,
  <matplotlib.axis.YTick at 0x9708388>],
 <a list of 6 Text major ticklabel objects>)
In [ ]: