In [1]:
#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
In [14]:
#Import files = 6 seperate datasets
data_2015=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20152010.csv', engine='python')
data_2016=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20162019.csv', engine='python')
data_2017=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20171024.csv', engine='python')
data_2018=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20181031.csv', engine='python')
data_2019=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20191030.csv', engine='python')
data_2020=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20200422.csv', engine='python')
data_2018.head(2)
Out[14]:
Provider ID Hospital Name Address City State ZIP Code County Name Phone Number Condition Measure ID Measure Name Score Sample Footnote Measure Start Date Measure End Date
0 10001 SOUTHEAST ALABAMA MEDICAL CENTER 1108 ROSS CLARK CIRCLE DOTHAN AL 36301 HOUSTON 3347938701 Emergency Department ED_1b ED1 231 601 2 - Data submitted were based on a sample of c... 1/1/2017 12/31/2017
1 10001 SOUTHEAST ALABAMA MEDICAL CENTER 1108 ROSS CLARK CIRCLE DOTHAN AL 36301 HOUSTON 3347938701 Emergency Department ED_2b ED2 68 601 2 - Data submitted were based on a sample of c... 1/1/2017 12/31/2017
In [23]:
#Filter BROOKDALE HOSPITAL MEDICAL CENTER

h = ['BROOKDALE HOSPITAL MEDICAL CENTER'] 
m = ['IMM_3_OP_27_FAC_ADHPCT']
mm = ['IMM_3']

data_2015A=pd.DataFrame(data_2015, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date', 'Start Date'] )
data_2015A = data_2015A[(data_2015A ['Hospital Name'].isin(h)) & (data_2015A ['Measure ID'].isin (m))]
data_2015A.head()

data_2016A=pd.DataFrame(data_2016, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2016A = data_2016A[(data_2016A ['Hospital Name'].isin(h)) & (data_2016A ['Measure ID'].isin (m))]
print(data_2016A.head())

data_2017A=pd.DataFrame(data_2017, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2017A = data_2017A[(data_2017A ['Hospital Name'].isin(h)) & (data_2017A ['Measure ID'].isin (m))]
data_2017A.head()

data_2018A=pd.DataFrame(data_2018, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2018A = data_2018A[(data_2018A ['Hospital Name'].isin(h)) & (data_2018A ['Measure ID'].isin (m))]
data_2018A.head()

data_2019A=pd.DataFrame(data_2019, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2019A = data_2019A[(data_2019A ['Hospital Name'].isin(h)) & (data_2019A ['Measure ID'].isin (mm))]
data_2019A.head()

data_2020A=pd.DataFrame(data_2020, columns =['Facility ID', 'Facility Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Start Date', 'End Date'] )
data_2020A = data_2020A[(data_2020A ['Facility Name'].isin(h)) & (data_2020A ['Measure ID'].isin (mm))]

print(data_2015A.head())
print(data_2016A.head())
       Provider ID                      Hospital Name      City State  \
64376       330233  BROOKDALE HOSPITAL MEDICAL CENTER  BROOKLYN    NY   

                   Measure ID Score Sample Measure Start Date Measure End Date  
64376  IMM_3_OP_27_FAC_ADHPCT    82     39          10/1/2015        3/31/2016  
       Provider ID                      Hospital Name      City State  \
110747      330233  BROOKDALE HOSPITAL MEDICAL CENTER  BROOKLYN    NY   

                    Measure ID Score Sample Measure Start Date  \
110747  IMM_3_OP_27_FAC_ADHPCT    69   3970          10/1/2014   

       Measure End Date  Start Date  
110747        3/31/2015         NaN  
       Provider ID                      Hospital Name      City State  \
64376       330233  BROOKDALE HOSPITAL MEDICAL CENTER  BROOKLYN    NY   

                   Measure ID Score Sample Measure Start Date Measure End Date  
64376  IMM_3_OP_27_FAC_ADHPCT    82     39          10/1/2015        3/31/2016  
In [26]:
#Filter NEW YORK-PRESBYTERIAN HOSPITAL

h = ['NEW YORK-PRESBYTERIAN HOSPITAL'] 
m = ['IMM_3_OP_27_FAC_ADHPCT']
mm = ['IMM_3']

data_2015B=pd.DataFrame(data_2015, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2015B = data_2015B[(data_2015B ['Hospital Name'].isin(h)) & (data_2015B ['Measure ID'].isin (m)) ]
data_2015B.head()


data_2016B=pd.DataFrame(data_2016, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2016B = data_2016B[(data_2016B ['Hospital Name'].isin(h)) & (data_2016B ['Measure ID'].isin (m))]
data_2016B.head()

data_2017B=pd.DataFrame(data_2017, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2017B = data_2017B[(data_2017B ['Hospital Name'].isin(h)) & (data_2017B ['Measure ID'].isin (m))]
data_2017B.head()

data_2018B=pd.DataFrame(data_2018, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2018B = data_2018B[(data_2018B ['Hospital Name'].isin(h)) & (data_2018B ['Measure ID'].isin (m))]
data_2018B.head()

data_2019B=pd.DataFrame(data_2019, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2019B = data_2019B[(data_2019B ['Hospital Name'].isin(h)) & (data_2019B ['Measure ID'].isin (mm))]

data_2020B=pd.DataFrame(data_2020, columns =['Facility ID', 'Facility Name', 'City', 'State', 'Measure ID', 
                                                     'Score', 'Sample', 'Start Date', 'End Date'] )
data_2020B = data_2020B[(data_2020B ['Facility Name'].isin(h)) & (data_2020B ['Measure ID'].isin (mm))]

print(data_2017B.head())
print(data_2020B.head())
       Provider ID                   Hospital Name      City State  \
63012       330101  NEW YORK-PRESBYTERIAN HOSPITAL  NEW YORK    NY   

                   Measure ID Score Sample Measure Start Date Measure End Date  
63012  IMM_3_OP_27_FAC_ADHPCT    88  35405          10/1/2016        3/31/2017  
      Facility ID                   Facility Name      City State Measure ID  \
54076      330101  NEW YORK-PRESBYTERIAN HOSPITAL  NEW YORK    NY      IMM_3   

      Score Sample Start Date   End Date  
54076    90  37332  10/1/2018  3/31/2019  
In [27]:
#Rename columns for 2019 and 2020 becuase facility ID and Facility Name differ from previous dataset for that have Provider ID and Hospital Name
data_2019A.rename(columns = {'Facility ID':'Provider ID', 'Facility Name':'Hospital Name', 'Start Date':'Measure Start Date',
                              'End Date':'Measure End Date'}, inplace = True) 
print(data_2019A.head())

data_2020A.rename(columns = {'Facility ID':'Provider ID', 'Facility Name':'Hospital Name', 'Start Date':'Measure Start Date',
                              'End Date':'Measure End Date'}, inplace = True) 
print(data_2020A.head())

data_2019B.rename(columns = {'Facility ID':'Provider ID', 'Facility Name':'Hospital Name', 'Start Date':'Measure Start Date',
                              'End Date':'Measure End Date'}, inplace = True) 

data_2020B.rename(columns = {'Facility ID':'Provider ID', 'Facility Name':'Hospital Name', 'Start Date':'Measure Start Date',
                              'End Date':'Measure End Date'}, inplace = True) 
print(data_2020B.head())
       Provider ID                      Hospital Name      City State  \
55199          NaN  BROOKDALE HOSPITAL MEDICAL CENTER  BROOKLYN    NY   

      Measure ID Score Sample Measure Start Date Measure End Date  
55199      IMM_3    86   5252          10/1/2018        3/31/2019  
      Provider ID                      Hospital Name      City State  \
55273      330233  BROOKDALE HOSPITAL MEDICAL CENTER  BROOKLYN    NY   

      Measure ID Score Sample Measure Start Date Measure End Date  
55273      IMM_3    86   5252          10/1/2018        3/31/2019  
      Provider ID                   Hospital Name      City State Measure ID  \
54076      330101  NEW YORK-PRESBYTERIAN HOSPITAL  NEW YORK    NY      IMM_3   

      Score Sample Measure Start Date Measure End Date  
54076    90  37332          10/1/2018        3/31/2019  
In [29]:
#Append data frames for data A

dfA = [data_2015A, data_2016A, data_2017A, data_2018A, data_2019A,data_2020A ]  
data_A = pd.concat(dfA)
print(data_A)

#Append dataframe for data B
dfB = [data_2015B, data_2016B, data_2017B, data_2018B, data_2019B,data_2020B ]  
data_B = pd.concat(df)
print(data_B)
            City                      Hospital Name Measure End Date  \
110747  BROOKLYN  BROOKDALE HOSPITAL MEDICAL CENTER        3/31/2015   
64376   BROOKLYN  BROOKDALE HOSPITAL MEDICAL CENTER        3/31/2016   
64442   BROOKLYN  BROOKDALE HOSPITAL MEDICAL CENTER        3/31/2017   
67049   BROOKLYN  BROOKDALE HOSPITAL MEDICAL CENTER        3/31/2018   
55199   BROOKLYN  BROOKDALE HOSPITAL MEDICAL CENTER        3/31/2019   
55273   BROOKLYN  BROOKDALE HOSPITAL MEDICAL CENTER        3/31/2019   

                    Measure ID Measure Start Date Provider ID Sample Score  \
110747  IMM_3_OP_27_FAC_ADHPCT          10/1/2014      330233   3970    69   
64376   IMM_3_OP_27_FAC_ADHPCT          10/1/2015      330233     39    82   
64442   IMM_3_OP_27_FAC_ADHPCT          10/1/2016      330233   3616    79   
67049   IMM_3_OP_27_FAC_ADHPCT          10/1/2017      330233   3432    83   
55199                    IMM_3          10/1/2018         NaN   5252    86   
55273                    IMM_3          10/1/2018      330233   5252    86   

        Start Date State  
110747         NaN    NY  
64376          NaN    NY  
64442          NaN    NY  
67049          NaN    NY  
55199          NaN    NY  
55273          NaN    NY  
       Provider ID                   Hospital Name        City State  \
108070      330101  NEW YORK-PRESBYTERIAN HOSPITAL    NEW YORK    NY   
62946       330101  NEW YORK-PRESBYTERIAN HOSPITAL    NEW YORK    NY   
63012       330101  NEW YORK-PRESBYTERIAN HOSPITAL    NEW YORK    NY   
65209       330061  NEW YORK-PRESBYTERIAN HOSPITAL  BRONXVILLE    NY   
65554       330101  NEW YORK-PRESBYTERIAN HOSPITAL    NEW YORK    NY   
54076       330101  NEW YORK-PRESBYTERIAN HOSPITAL    NEW YORK    NY   

                    Measure ID Score Sample Measure Start Date  \
108070  IMM_3_OP_27_FAC_ADHPCT    92  32258          10/1/2014   
62946   IMM_3_OP_27_FAC_ADHPCT    88  33938          10/1/2015   
63012   IMM_3_OP_27_FAC_ADHPCT    88  35405          10/1/2016   
65209   IMM_3_OP_27_FAC_ADHPCT    85   2545          10/1/2017   
65554   IMM_3_OP_27_FAC_ADHPCT    82  39112          10/1/2017   
54076                    IMM_3    90  37332          10/1/2018   

       Measure End Date  
108070        3/31/2015  
62946         3/31/2016  
63012         3/31/2017  
65209         3/31/2018  
65554         3/31/2018  
54076         3/31/2019  
C:\Users\Debora Vito\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:4: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.

To retain the current behavior and silence the warning, pass sort=False

  after removing the cwd from sys.path.
In [7]:
#Convert data types for data A
data_A['Score'] = pd.to_numeric(data_A['Score'])
data_A['Measure Start Date'] = pd.to_datetime(data_A['Measure Start Date'])
data_A['Measure End Date'] = pd.to_datetime(data_A['Measure End Date'])
data_A.dtypes
Out[7]:
City                          object
Hospital Name                 object
Measure End Date      datetime64[ns]
Measure ID                    object
Measure Start Date    datetime64[ns]
Provider ID                   object
Sample                         int64
Score                          int64
Start Date            datetime64[ns]
State                         object
dtype: object
In [8]:
#Convert data types for data B
data_B['Score'] = pd.to_numeric(data_B['Score'])
data_B['Measure Start Date'] = pd.to_datetime(data_B['Measure Start Date'])
data_B['Measure End Date'] = pd.to_datetime(data_B['Measure End Date'])
data_B.dtypes
Out[8]:
Provider ID                   object
Hospital Name                 object
City                          object
State                         object
Measure ID                    object
Score                          int64
Sample                         int64
Measure Start Date    datetime64[ns]
Measure End Date      datetime64[ns]
dtype: object
In [9]:
#rename SCORE to RateVaccination since that is the percentage of vaccinations
data_A.rename(columns = {'Score':'RateVaccination'}, inplace = True) 
data_B.rename(columns = {'Score':'RateVaccination'}, inplace = True) 
In [10]:
#Create a new df for BrookDale Hospital Medical Center
BD=pd.DataFrame(data_A, columns =['Provider ID', 'Hospital Name', 'State', 'Measure ID', 'RateVaccination','Measure Start Date',
                                                       'Measure End Date'] )
#Calcuate Midway Point: ( [Measure Start Date] + [Measure End Data] ) / 2  

BD['MidP'] = BD['Measure Start Date'] + (BD['Measure End Date'] - BD['Measure Start Date'])/2
BD.head()
Out[10]:
Provider ID Hospital Name State Measure ID RateVaccination Measure Start Date Measure End Date MidP
110747 330233 BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3_OP_27_FAC_ADHPCT 69 2014-10-01 2015-03-31 2014-12-30 12:00:00
64376 330233 BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3_OP_27_FAC_ADHPCT 82 2015-10-01 2016-03-31 2015-12-31 00:00:00
64442 330233 BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3_OP_27_FAC_ADHPCT 79 2016-10-01 2017-03-31 2016-12-30 12:00:00
67049 330233 BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3_OP_27_FAC_ADHPCT 83 2017-10-01 2018-03-31 2017-12-30 12:00:00
55199 NaN BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3 86 2018-10-01 2019-03-31 2018-12-30 12:00:00
In [11]:
#Calculate average rate of vaccine for Brooksdale Hospital: Rate dervies from score that is in percentages already in provided data
BD['AvgRate_BD']= data_A['RateVaccination'].mean()

#Calcuate Rate of Vaccination for NY Presebrterian Hosp : Rate dervies from score that is in percentages already in provided data
BD['AvgRate_NYP']= data_B['RateVaccination'].mean()

#Calcuate Standards Dev for Rate of Vaccination for NY Presebryterian Hosp
BD['STDDEV_NYP']= data_B['RateVaccination'].std()

#Z-score
BD['ztsat'] = 1.96
BD.head()

#Calculate the UCL and LCL based on NY Presbyterian Hosp
BD['UCL'] = BD['AvgRate_NYP'] + BD['ztsat'] * BD['STDDEV_NYP']
BD['LCL'] = BD['AvgRate_NYP'] - BD['ztsat'] * BD['STDDEV_NYP']
BD
Out[11]:
Provider ID Hospital Name State Measure ID RateVaccination Measure Start Date Measure End Date MidP AvgRate_BD AvgRate_NYP STDDEV_NYP ztsat UCL LCL
110747 330233 BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3_OP_27_FAC_ADHPCT 69 2014-10-01 2015-03-31 2014-12-30 12:00:00 80.833333 87.5 3.563706 1.96 94.484864 80.515136
64376 330233 BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3_OP_27_FAC_ADHPCT 82 2015-10-01 2016-03-31 2015-12-31 00:00:00 80.833333 87.5 3.563706 1.96 94.484864 80.515136
64442 330233 BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3_OP_27_FAC_ADHPCT 79 2016-10-01 2017-03-31 2016-12-30 12:00:00 80.833333 87.5 3.563706 1.96 94.484864 80.515136
67049 330233 BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3_OP_27_FAC_ADHPCT 83 2017-10-01 2018-03-31 2017-12-30 12:00:00 80.833333 87.5 3.563706 1.96 94.484864 80.515136
55199 NaN BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3 86 2018-10-01 2019-03-31 2018-12-30 12:00:00 80.833333 87.5 3.563706 1.96 94.484864 80.515136
55273 330233 BROOKDALE HOSPITAL MEDICAL CENTER NY IMM_3 86 2018-10-01 2019-03-31 2018-12-30 12:00:00 80.833333 87.5 3.563706 1.96 94.484864 80.515136
In [12]:
#Plot Data
plt.figure(figsize=(15,5))
plt.plot('MidP', 'RateVaccination', data=BD, marker = 's', markerfacecolor = 'blue', linewidth =1, color = 'blue')
plt.plot('MidP', 'UCL', data=BD, markersize = 0, color = 'red', linewidth =1)
plt.plot('MidP', 'LCL', data=BD, markersize = 0, color = 'red', linewidth =1)
plt.rotation=60
plt.legend()
plt.title('Rate of Flu Vaccination at Brookdale Hospital Medical Center NY')
plt.xlabel('Time Period')
plt.ylabel('Rate of Flu Vaccination')
Out[12]:
Text(0,0.5,'Rate of Flu Vaccination')