#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
#Import files = 6 seperate datasets
data_2015=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20152010.csv', engine='python')
data_2016=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20162019.csv', engine='python')
data_2017=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20171024.csv', engine='python')
data_2018=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20181031.csv', engine='python')
data_2019=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20191030.csv', engine='python')
data_2020=pd.read_csv(r'C:\Users\Debora Vito\Documents\HAP 725\Teachone\20200422.csv', engine='python')
data_2018.head(2)
#Filter BROOKDALE HOSPITAL MEDICAL CENTER
h = ['BROOKDALE HOSPITAL MEDICAL CENTER']
m = ['IMM_3_OP_27_FAC_ADHPCT']
mm = ['IMM_3']
data_2015A=pd.DataFrame(data_2015, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date', 'Start Date'] )
data_2015A = data_2015A[(data_2015A ['Hospital Name'].isin(h)) & (data_2015A ['Measure ID'].isin (m))]
data_2015A.head()
data_2016A=pd.DataFrame(data_2016, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2016A = data_2016A[(data_2016A ['Hospital Name'].isin(h)) & (data_2016A ['Measure ID'].isin (m))]
print(data_2016A.head())
data_2017A=pd.DataFrame(data_2017, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2017A = data_2017A[(data_2017A ['Hospital Name'].isin(h)) & (data_2017A ['Measure ID'].isin (m))]
data_2017A.head()
data_2018A=pd.DataFrame(data_2018, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2018A = data_2018A[(data_2018A ['Hospital Name'].isin(h)) & (data_2018A ['Measure ID'].isin (m))]
data_2018A.head()
data_2019A=pd.DataFrame(data_2019, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2019A = data_2019A[(data_2019A ['Hospital Name'].isin(h)) & (data_2019A ['Measure ID'].isin (mm))]
data_2019A.head()
data_2020A=pd.DataFrame(data_2020, columns =['Facility ID', 'Facility Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Start Date', 'End Date'] )
data_2020A = data_2020A[(data_2020A ['Facility Name'].isin(h)) & (data_2020A ['Measure ID'].isin (mm))]
print(data_2015A.head())
print(data_2016A.head())
#Filter NEW YORK-PRESBYTERIAN HOSPITAL
h = ['NEW YORK-PRESBYTERIAN HOSPITAL']
m = ['IMM_3_OP_27_FAC_ADHPCT']
mm = ['IMM_3']
data_2015B=pd.DataFrame(data_2015, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2015B = data_2015B[(data_2015B ['Hospital Name'].isin(h)) & (data_2015B ['Measure ID'].isin (m)) ]
data_2015B.head()
data_2016B=pd.DataFrame(data_2016, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2016B = data_2016B[(data_2016B ['Hospital Name'].isin(h)) & (data_2016B ['Measure ID'].isin (m))]
data_2016B.head()
data_2017B=pd.DataFrame(data_2017, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2017B = data_2017B[(data_2017B ['Hospital Name'].isin(h)) & (data_2017B ['Measure ID'].isin (m))]
data_2017B.head()
data_2018B=pd.DataFrame(data_2018, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2018B = data_2018B[(data_2018B ['Hospital Name'].isin(h)) & (data_2018B ['Measure ID'].isin (m))]
data_2018B.head()
data_2019B=pd.DataFrame(data_2019, columns =['Provider ID', 'Hospital Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Measure Start Date', 'Measure End Date'] )
data_2019B = data_2019B[(data_2019B ['Hospital Name'].isin(h)) & (data_2019B ['Measure ID'].isin (mm))]
data_2020B=pd.DataFrame(data_2020, columns =['Facility ID', 'Facility Name', 'City', 'State', 'Measure ID',
'Score', 'Sample', 'Start Date', 'End Date'] )
data_2020B = data_2020B[(data_2020B ['Facility Name'].isin(h)) & (data_2020B ['Measure ID'].isin (mm))]
print(data_2017B.head())
print(data_2020B.head())
#Rename columns for 2019 and 2020 becuase facility ID and Facility Name differ from previous dataset for that have Provider ID and Hospital Name
data_2019A.rename(columns = {'Facility ID':'Provider ID', 'Facility Name':'Hospital Name', 'Start Date':'Measure Start Date',
'End Date':'Measure End Date'}, inplace = True)
print(data_2019A.head())
data_2020A.rename(columns = {'Facility ID':'Provider ID', 'Facility Name':'Hospital Name', 'Start Date':'Measure Start Date',
'End Date':'Measure End Date'}, inplace = True)
print(data_2020A.head())
data_2019B.rename(columns = {'Facility ID':'Provider ID', 'Facility Name':'Hospital Name', 'Start Date':'Measure Start Date',
'End Date':'Measure End Date'}, inplace = True)
data_2020B.rename(columns = {'Facility ID':'Provider ID', 'Facility Name':'Hospital Name', 'Start Date':'Measure Start Date',
'End Date':'Measure End Date'}, inplace = True)
print(data_2020B.head())
#Append data frames for data A
dfA = [data_2015A, data_2016A, data_2017A, data_2018A, data_2019A,data_2020A ]
data_A = pd.concat(dfA)
print(data_A)
#Append dataframe for data B
dfB = [data_2015B, data_2016B, data_2017B, data_2018B, data_2019B,data_2020B ]
data_B = pd.concat(df)
print(data_B)
#Convert data types for data A
data_A['Score'] = pd.to_numeric(data_A['Score'])
data_A['Measure Start Date'] = pd.to_datetime(data_A['Measure Start Date'])
data_A['Measure End Date'] = pd.to_datetime(data_A['Measure End Date'])
data_A.dtypes
#Convert data types for data B
data_B['Score'] = pd.to_numeric(data_B['Score'])
data_B['Measure Start Date'] = pd.to_datetime(data_B['Measure Start Date'])
data_B['Measure End Date'] = pd.to_datetime(data_B['Measure End Date'])
data_B.dtypes
#rename SCORE to RateVaccination since that is the percentage of vaccinations
data_A.rename(columns = {'Score':'RateVaccination'}, inplace = True)
data_B.rename(columns = {'Score':'RateVaccination'}, inplace = True)
#Create a new df for BrookDale Hospital Medical Center
BD=pd.DataFrame(data_A, columns =['Provider ID', 'Hospital Name', 'State', 'Measure ID', 'RateVaccination','Measure Start Date',
'Measure End Date'] )
#Calcuate Midway Point: ( [Measure Start Date] + [Measure End Data] ) / 2
BD['MidP'] = BD['Measure Start Date'] + (BD['Measure End Date'] - BD['Measure Start Date'])/2
BD.head()
#Calculate average rate of vaccine for Brooksdale Hospital: Rate dervies from score that is in percentages already in provided data
BD['AvgRate_BD']= data_A['RateVaccination'].mean()
#Calcuate Rate of Vaccination for NY Presebrterian Hosp : Rate dervies from score that is in percentages already in provided data
BD['AvgRate_NYP']= data_B['RateVaccination'].mean()
#Calcuate Standards Dev for Rate of Vaccination for NY Presebryterian Hosp
BD['STDDEV_NYP']= data_B['RateVaccination'].std()
#Z-score
BD['ztsat'] = 1.96
BD.head()
#Calculate the UCL and LCL based on NY Presbyterian Hosp
BD['UCL'] = BD['AvgRate_NYP'] + BD['ztsat'] * BD['STDDEV_NYP']
BD['LCL'] = BD['AvgRate_NYP'] - BD['ztsat'] * BD['STDDEV_NYP']
BD
#Plot Data
plt.figure(figsize=(15,5))
plt.plot('MidP', 'RateVaccination', data=BD, marker = 's', markerfacecolor = 'blue', linewidth =1, color = 'blue')
plt.plot('MidP', 'UCL', data=BD, markersize = 0, color = 'red', linewidth =1)
plt.plot('MidP', 'LCL', data=BD, markersize = 0, color = 'red', linewidth =1)
plt.rotation=60
plt.legend()
plt.title('Rate of Flu Vaccination at Brookdale Hospital Medical Center NY')
plt.xlabel('Time Period')
plt.ylabel('Rate of Flu Vaccination')