In [53]:
#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [54]:
#Import data files 

d_201611=pd.read_csv (r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q1\data\TimelyandEffectiveCare_Hospital_20161110.csv', engine='python')
d_201608=pd.read_csv (r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q1\data\TimelyandEffectiveCare_Hospital_20160810.csv', engine='python')
d_201605=pd.read_csv (r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q1\data\TimelyandEffectiveCare_Hospital_20160504.csv', engine='python')
d_201512=pd.read_csv (r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q1\data\TimelyandEffectiveCare_Hospital_20151210.csv', engine='python')
d_201510=pd.read_csv (r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q1\data\TimelyandEffectiveCare_Hospital_20151008.csv', engine='python')
d_201507=pd.read_csv (r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q1\data\TimelyandEffectiveCare_Hospital_20150716.csv', engine='python')
d_201505=pd.read_csv (r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q1\data\TimelyandEffectiveCare_Hospital_20150506.csv', engine='python')
d_201504=pd.read_csv (r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q1\data\TimelyandEffectiveCare_Hospital_20150416.csv', engine='python')
d_201501=pd.read_csv (r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q1\data\TimelyandEffectiveCare_Hospital_20150122.csv', engine='python')
In [51]:
#View data in dataframe for 201611

d_201611.head(2)
Out[51]:
Provider ID Hospital Name Address City State ZIP Code County Name Phone Number Condition Measure ID Measure Name Score Sample Footnote Measure Start Date Measure End Date
0 10001 SOUTHEAST ALABAMA MEDICAL CENTER 1108 ROSS CLARK CIRCLE DOTHAN AL 36301 HOUSTON 3347938701 Heart Attack or Chest Pain AMI_7a Fibrinolytic Therapy Received Within 30 Minute... Not Available Not Available 7 - No cases met the criteria for this measure. 01/01/2015 12/31/2015
1 10001 SOUTHEAST ALABAMA MEDICAL CENTER 1108 ROSS CLARK CIRCLE DOTHAN AL 36301 HOUSTON 3347938701 Heart Attack or Chest Pain AMI_8a Primary PCI Received Within 90 Minutes of Hosp... 92 24 3 - Results are based on a shorter time period... 01/01/2015 09/30/2015
In [58]:
#Note that Provider ID for 201611 dataset is Integer while in other datasets it is a string value
#Sccore and Sample are also Object value and need to be converted to a float to allow for calculations

print("\033[1m" +'201611 data types' + "\033[0m")
print (d_201611.dtypes)
print("\033[1m" +'201608 data types' + "\033[0m")
print (d_201608.dtypes)
print("\033[1m" +'201605 data types' + "\033[0m")
print (d_201605.dtypes)
print("\033[1m" +'201512 data types' + "\033[0m")
print (d_201512.dtypes)
print("\033[1m" +'201510 data types' + "\033[0m")
print (d_201510.dtypes)
print("\033[1m" +'201507 data types' + "\033[0m")
print (d_201507.dtypes)
print("\033[1m" +'201505 data types' + "\033[0m")
print (d_201505.dtypes)
print("\033[1m" +'201504 data types' + "\033[0m")
print (d_201504.dtypes)
print("\033[1m" +'201501 data types' + "\033[0m")
print (d_201501.dtypes)
201611 data types
Provider ID            int64
Hospital Name         object
Address               object
City                  object
State                 object
ZIP Code               int64
County Name           object
Phone Number           int64
Condition             object
Measure ID            object
Measure Name          object
Score                 object
Sample                object
Footnote              object
Measure Start Date    object
Measure End Date      object
dtype: object
201608 data types
Provider ID           object
Hospital Name         object
Address               object
City                  object
State                 object
ZIP Code               int64
County Name           object
Phone Number           int64
Condition             object
Measure ID            object
Measure Name          object
Score                 object
Sample                object
Footnote              object
Measure Start Date    object
Measure End Date      object
dtype: object
201605 data types
Provider ID           object
Hospital Name         object
Address               object
City                  object
State                 object
ZIP Code               int64
County Name           object
Phone Number           int64
Condition             object
Measure ID            object
Measure Name          object
Score                 object
Sample                object
Footnote              object
Measure Start Date    object
Measure End Date      object
dtype: object
201512 data types
Provider ID           object
Hospital Name         object
Address               object
City                  object
State                 object
ZIP Code               int64
County Name           object
Phone Number           int64
Condition             object
Measure ID            object
Measure Name          object
Score                 object
Sample                object
Footnote              object
Measure Start Date    object
Measure End Date      object
dtype: object
201510 data types
Provider ID           object
Hospital Name         object
Address               object
City                  object
State                 object
ZIP Code               int64
County Name           object
Phone Number           int64
Condition             object
Measure ID            object
Measure Name          object
Score                 object
Sample                object
Footnote              object
Measure Start Date    object
Measure End Date      object
dtype: object
201507 data types
Provider ID           object
Hospital Name         object
Address               object
City                  object
State                 object
ZIP Code               int64
County Name           object
Phone Number           int64
Condition             object
Measure ID            object
Measure Name          object
Score                 object
Sample                object
Footnote              object
Measure Start Date    object
Measure End Date      object
dtype: object
201505 data types
Provider ID           object
Hospital Name         object
Address               object
City                  object
State                 object
ZIP Code               int64
County Name           object
Phone Number           int64
Condition             object
Measure ID            object
Measure Name          object
Score                 object
Sample                object
Footnote              object
Measure Start Date    object
Measure End Date      object
dtype: object
201504 data types
Provider ID           object
Hospital Name         object
Address               object
City                  object
State                 object
ZIP Code               int64
County Name           object
Phone Number           int64
Condition             object
Measure ID            object
Measure Name          object
Score                 object
Sample                object
Footnote              object
Measure Start Date    object
Measure End Date      object
dtype: object
201501 data types
Provider ID           object
Hospital Name         object
Address               object
City                  object
State                 object
ZIP Code               int64
County Name           object
Phone Number           int64
Condition             object
Measure ID            object
Measure Name          object
Score                 object
Sample                object
Footnote              object
Measure Start Date    object
Measure End Date      object
dtype: object
In [80]:
#Filter for SOUTHEAST ALABAMA MEDICAL CENTER for each year.  
#Filter for City = Dothan
#Select measure ID "SCIP_INF_3"; this measure refers to prophylactic antibiotic use. 

h = ['SOUTHEAST ALABAMA MEDICAL CENTER' ] 
c = ['DOTHAN']
m = ['SCIP_INF_3']

d_201611s=pd.DataFrame(d_201611, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
d_201611s = d_201611s[(d_201611s['Hospital Name'].isin(h)) & (d_201611s['City'].isin (c)) & (d_201611s['Measure ID'].isin (m))]
print("\033[1m" +'201611 samc Filtered Dataset' + "\033[0m")
print(d_201611s.head())

d_201608s=pd.DataFrame(d_201608, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
d_201608s = d_201608s[(d_201608s['Hospital Name'].isin(h)) & (d_201608s['City'].isin (c)) & (d_201608s['Measure ID'].isin (m))]
print("\033[1m" +'201608 samc Filtered Dataset' + "\033[0m")
print(d_201608s.head())


d_201605s=pd.DataFrame(d_201605, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
d_201605s = d_201605s[(d_201605s['Hospital Name'].isin(h)) & (d_201605s['City'].isin (c)) & (d_201605s['Measure ID'].isin (m))]
print("\033[1m" +'201605 samc Filtered Dataset' + "\033[0m")
print(d_201605s.head())


d_201512s=pd.DataFrame(d_201512, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
d_201512s = d_201512s[(d_201512s['Hospital Name'].isin(h)) & (d_201512s['City'].isin (c)) & (d_201512s['Measure ID'].isin (m))]
print("\033[1m" +'201512 samc Filtered Dataset' + "\033[0m")
print(d_201512s.head())

d_201510s=pd.DataFrame(d_201510, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
d_201510s = d_201510s[(d_201510s['Hospital Name'].isin(h)) & (d_201510s['City'].isin (c)) & (d_201510s['Measure ID'].isin (m))]
print("\033[1m" +'201510 samc Filtered Dataset' + "\033[0m")
print(d_201510s.head())


d_201507s=pd.DataFrame(d_201507, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
d_201507s = d_201507s[(d_201507s['Hospital Name'].isin(h)) & (d_201507s['City'].isin (c)) & (d_201507s['Measure ID'].isin (m))]
print("\033[1m" +'201507 samc Filtered Dataset' + "\033[0m")
print(d_201507s.head())

d_201505s=pd.DataFrame(d_201505, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
d_201505s = d_201505s[(d_201505s['Hospital Name'].isin(h)) & (d_201505s['City'].isin (c)) & (d_201505s['Measure ID'].isin (m))]
print("\033[1m" +'201505 samc Filtered Dataset' + "\033[0m")
print(d_201505s.head())

d_201504s=pd.DataFrame(d_201504, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
d_201504s = d_201505s[(d_201504s['Hospital Name'].isin(h)) & (d_201504s['City'].isin (c)) & (d_201504s['Measure ID'].isin (m))]
print("\033[1m" +'201504 samc Filtered Dataset' + "\033[0m")
print(d_201504s.head())

d_201501s=pd.DataFrame(d_201501, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
d_201501s = d_201501s[(d_201501s['Hospital Name'].isin(h)) & (d_201501s['City'].isin (c)) & (d_201501s['Measure ID'].isin (m))]
print("\033[1m" +'201501 samc Filtered Dataset' + "\033[0m")
print(d_201501s.head())
201611 samc Filtered Dataset
    Provider ID                     Hospital Name    City State  Measure ID  \
26        10001  SOUTHEAST ALABAMA MEDICAL CENTER  DOTHAN    AL  SCIP_INF_3   

   Score Sample Measure Start Date Measure End Date  
26    99    170         01/01/2015       09/30/2015  
201608 samc Filtered Dataset
   Provider ID                     Hospital Name    City State  Measure ID  \
25       10001  SOUTHEAST ALABAMA MEDICAL CENTER  DOTHAN    AL  SCIP_INF_3   

   Score Sample Measure Start Date Measure End Date  
25   100    256          10/1/2014        9/30/2015  
201605 samc Filtered Dataset
   Provider ID                     Hospital Name    City State  Measure ID  \
23       10001  SOUTHEAST ALABAMA MEDICAL CENTER  DOTHAN    AL  SCIP_INF_3   

   Score Sample Measure Start Date Measure End Date  
23    99    340           7/1/2014        6/30/2015  
201512 samc Filtered Dataset
   Provider ID                     Hospital Name    City State  Measure ID  \
23       10001  SOUTHEAST ALABAMA MEDICAL CENTER  DOTHAN    AL  SCIP_INF_3   

   Score Sample Measure Start Date Measure End Date  
23    99    337           4/1/2014        3/31/2015  
201510 samc Filtered Dataset
   Provider ID                     Hospital Name    City State  Measure ID  \
30      010001  SOUTHEAST ALABAMA MEDICAL CENTER  DOTHAN    AL  SCIP_INF_3   

   Score Sample Measure Start Date Measure End Date  
30    98    332         01/01/2014       12/31/2014  
201507 samc Filtered Dataset
   Provider ID                     Hospital Name    City State  Measure ID  \
30      010001  SOUTHEAST ALABAMA MEDICAL CENTER  DOTHAN    AL  SCIP_INF_3   

   Score Sample Measure Start Date Measure End Date  
30    98    329         10/01/2013       09/30/2014  
201505 samc Filtered Dataset
   Provider ID                     Hospital Name    City State  Measure ID  \
30      010001  SOUTHEAST ALABAMA MEDICAL CENTER  DOTHAN    AL  SCIP_INF_3   

   Score Sample Measure Start Date Measure End Date  
30    98    330         07/01/2013       06/30/2014  
201504 samc Filtered Dataset
   Provider ID                     Hospital Name    City State  Measure ID  \
30      010001  SOUTHEAST ALABAMA MEDICAL CENTER  DOTHAN    AL  SCIP_INF_3   

   Score Sample Measure Start Date Measure End Date  
30    98    330         07/01/2013       06/30/2014  
201501 samc Filtered Dataset
   Provider ID                     Hospital Name    City State  Measure ID  \
30      010001  SOUTHEAST ALABAMA MEDICAL CENTER  DOTHAN    AL  SCIP_INF_3   

   Score Sample Measure Start Date Measure End Date  
30    98    324         04/01/2013       03/31/2014  
C:\software\Anaconda\v3\p36\lib\site-packages\ipykernel_launcher.py:55: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
In [81]:
#Find number of rows and columns for the 9 data frames


print(d_201611s.shape)
print(d_201608s.shape)
print(d_201605s.shape)
print(d_201512s.shape)
print(d_201510s.shape)
print(d_201507s.shape)
print(d_201505s.shape)
print(d_201504s.shape)
print(d_201501s.shape)
(1, 9)
(1, 9)
(1, 9)
(1, 9)
(1, 9)
(1, 9)
(1, 9)
(1, 9)
(1, 9)
In [84]:
#Check resulting dataframe for 201611s
d_201611s.head()
Out[84]:
Provider ID Hospital Name City State Measure ID Score Sample Measure Start Date Measure End Date
26 10001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 99 170 01/01/2015 09/30/2015
In [87]:
#Check for non numeric values in the HCAHPS Linear Mean Value column for the above data frames.   
#The traceback feedback includes a ValueError and shows that it could not convert the 'Not Applicable' string to a float. 

d_201611s['Score'].astype('float')
Out[87]:
26    99.0
Name: Score, dtype: float64
In [161]:
#Append the 9 data frames into one dataset

dfs = [d_201611s, d_201608s, d_201605s, d_201512s,  d_201510s,d_201507s,  d_201505s,  d_201504s,d_201501s ]  
samc = pd.concat(dfs)
samc
Out[161]:
Provider ID Hospital Name City State Measure ID Score Sample Measure Start Date Measure End Date
26 10001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 99 170 01/01/2015 09/30/2015
25 10001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 100 256 10/1/2014 9/30/2015
23 10001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 99 340 7/1/2014 6/30/2015
23 10001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 99 337 4/1/2014 3/31/2015
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 332 01/01/2014 12/31/2014
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 329 10/01/2013 09/30/2014
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 330 07/01/2013 06/30/2014
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 330 07/01/2013 06/30/2014
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 324 04/01/2013 03/31/2014
In [162]:
samc.dtypes
Out[162]:
Provider ID           object
Hospital Name         object
City                  object
State                 object
Measure ID            object
Score                 object
Sample                object
Measure Start Date    object
Measure End Date      object
dtype: object
In [163]:
#Convert data types

samc['Score'] = pd.to_numeric(samc['Score'])
samc['Sample'] = pd.to_numeric(samc['Sample'])
samc['Measure Start Date'] = pd.to_datetime(samc['Measure Start Date'])
samc['Measure End Date'] = pd.to_datetime(samc['Measure End Date'])
samc.dtypes
Out[163]:
Provider ID                   object
Hospital Name                 object
City                          object
State                         object
Measure ID                    object
Score                          int64
Sample                         int64
Measure Start Date    datetime64[ns]
Measure End Date      datetime64[ns]
dtype: object
In [195]:
#Calcuate Midway Point: ( [Measure Start Date] + [Measure End Data] ) / 2  

samc['MidP'] = samc['Measure Start Date'] + (samc['Measure End Date'] - samc['Measure Start Date'])/2

#Calcuate Rate of Overuse: [Score] / [Sample]
samc['RateOveruse'] = samc['Score']/samc['Sample']

#Grand Rate: SUM (Score) / SUM (Sample)
samc['Grandrate'] = sum(samc['Score'])/sum(samc['Sample'])

#Upper Limit: Grand Rate + 1.96*SQRT((Grand Rate*(1-Grand Rate))/Sample)
#Lower Limit: Grand Rate - 1.96*SQRT((Grand Rate*(1-Grand Rate))/Sample)

samc['UCL'] = samc['Grandrate'] + (1.96 * np.sqrt((samc['Grandrate'] * (1 - samc['Grandrate']))/samc['Sample']))
samc['LCL'] = samc['Grandrate'] - (1.96 * np.sqrt((samc['Grandrate'] * (1 - samc['Grandrate']))/samc['Sample']))
samc
Out[195]:
Provider ID Hospital Name City State Measure ID Score Sample Measure Start Date Measure End Date MidP RateOveruse Grandrate UCL LCL
26 10001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 99 170 2015-01-01 2015-09-30 2015-05-17 0.582353 0.32278 0.393063 0.252497
25 10001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 100 256 2014-10-01 2015-09-30 2015-04-01 0.390625 0.32278 0.380054 0.265507
23 10001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 99 340 2014-07-01 2015-06-30 2014-12-30 0.291176 0.32278 0.372478 0.273083
23 10001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 99 337 2014-04-01 2015-03-31 2014-09-30 0.293769 0.32278 0.372698 0.272862
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 332 2014-01-01 2014-12-31 2014-07-02 0.295181 0.32278 0.373073 0.272487
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 329 2013-10-01 2014-09-30 2014-04-01 0.297872 0.32278 0.373302 0.272259
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 330 2013-07-01 2014-06-30 2013-12-30 0.296970 0.32278 0.373225 0.272335
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 330 2013-07-01 2014-06-30 2013-12-30 0.296970 0.32278 0.373225 0.272335
30 010001 SOUTHEAST ALABAMA MEDICAL CENTER DOTHAN AL SCIP_INF_3 98 324 2013-04-01 2014-03-31 2013-09-30 0.302469 0.32278 0.373690 0.271870
In [229]:
#Create the p-chart
plt.figure(figsize=(15,5))
plt.plot('Measure Start Date', 'RateOveruse', data=samc, marker = 's', markerfacecolor = 'blue', linewidth =1, color = 'blue')
plt.plot('Measure Start Date', 'UCL', data=samc, markersize = 0, color = 'red', linewidth =1)
plt.plot('Measure Start Date', 'LCL', data=samc, markersize = 0, color = 'red', linewidth =1)
plt.rotation=60
plt.legend()
plt.title('Rate of Post-Op Antibiotic Overuse: Apr 2013 - Jan 2015')
plt.xlabel('Measure Start Date')
plt.ylabel('Rate of Overuse')
Out[229]:
Text(0,0.5,'Rate of Overuse')