#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
#Import data files
data= pd.read_excel(r'C:\Users\user1\Documents\muriyo\gmu\hap725\week5\q2\data\DataonFallsinNursingHome.xlsx')
data
Case | Time 1 | Time 2 | Time 3 | Time 4 | Time 5 | Time 6 | Time 7 | Time 8 | Time 9 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.25 | 0.55 | 0.40 | 0.15 | 0.55 | 0.75 | 0.20 | 0.35 | 0.40 |
1 | 2 | 0.40 | 0.25 | 0.70 | 0.45 | 0.60 | 0.45 | 0.15 | 0.80 | 0.50 |
2 | 3 | 0.70 | 0.40 | 0.60 | 0.70 | 0.45 | 0.05 | 0.10 | 0.50 | 0.25 |
3 | 4 | 0.40 | 0.45 | 0.55 | 0.80 | 0.50 | 0.90 | 0.25 | 0.55 | 0.70 |
4 | 5 | 0.15 | 0.20 | 0.70 | 0.45 | 0.65 | 0.50 | 0.60 | 0.75 | 0.40 |
5 | 6 | 0.20 | 0.65 | 0.60 | 0.60 | 0.65 | 0.60 | 0.70 | 0.35 | 0.55 |
6 | 7 | 0.50 | 0.10 | 0.55 | 0.25 | 0.25 | 0.70 | 0.40 | 0.60 | 0.30 |
7 | 8 | 0.50 | 0.50 | 0.30 | 0.10 | 0.35 | 0.35 | 0.35 | 0.45 | 0.75 |
8 | 9 | 0.30 | 0.75 | 0.65 | 0.80 | 0.60 | 0.65 | 0.50 | 0.30 | 0.20 |
9 | 10 | 0.20 | 0.35 | 0.60 | 0.40 | 0.40 | 0.40 | 0.75 | 0.65 | 0.60 |
10 | 11 | 0.40 | 0.65 | 0.05 | 0.25 | 0.35 | 0.60 | 0.65 | 0.75 | 0.55 |
11 | 12 | 0.30 | 0.20 | 0.25 | 0.65 | 0.10 | 0.25 | 0.70 | 0.40 | 0.60 |
12 | 13 | 0.45 | 0.65 | 0.45 | 0.80 | 0.40 | 0.75 | 0.55 | 0.45 | 0.65 |
13 | 14 | 0.25 | 0.30 | 0.65 | 0.25 | 0.50 | 0.30 | 0.65 | 0.55 | 0.75 |
14 | 15 | 0.25 | 0.25 | 0.70 | 0.60 | 0.25 | 0.25 | 0.70 | 0.35 | 0.60 |
15 | 16 | 0.40 | 0.45 | 0.60 | 0.80 | 0.65 | 0.40 | 0.35 | 0.75 | 0.75 |
16 | 17 | 0.45 | 0.30 | 0.25 | 0.85 | 0.25 | 0.75 | 0.65 | 0.60 | 0.45 |
17 | 18 | 0.35 | 0.50 | 0.75 | 0.45 | 0.45 | 0.75 | 0.40 | 0.25 | 0.45 |
18 | 19 | 0.25 | 0.75 | NaN | 0.50 | 0.70 | 0.55 | 0.70 | 0.50 | NaN |
19 | 20 | 0.10 | 0.60 | NaN | 0.20 | 0.60 | 0.70 | NaN | 0.65 | NaN |
20 | 21 | NaN | NaN | NaN | 0.45 | NaN | NaN | NaN | NaN | NaN |
21 | Reported Falls | 8.00 | 6.00 | 7.00 | 8.00 | 5.00 | 6.00 | 4.00 | 5.00 | 4.00 |
#drop the Case column
data=data.drop(columns = '\xa0Case', axis = 1)
data
Time 1 | Time 2 | Time 3 | Time 4 | Time 5 | Time 6 | Time 7 | Time 8 | Time 9 | |
---|---|---|---|---|---|---|---|---|---|
0 | 0.25 | 0.55 | 0.40 | 0.15 | 0.55 | 0.75 | 0.20 | 0.35 | 0.40 |
1 | 0.40 | 0.25 | 0.70 | 0.45 | 0.60 | 0.45 | 0.15 | 0.80 | 0.50 |
2 | 0.70 | 0.40 | 0.60 | 0.70 | 0.45 | 0.05 | 0.10 | 0.50 | 0.25 |
3 | 0.40 | 0.45 | 0.55 | 0.80 | 0.50 | 0.90 | 0.25 | 0.55 | 0.70 |
4 | 0.15 | 0.20 | 0.70 | 0.45 | 0.65 | 0.50 | 0.60 | 0.75 | 0.40 |
5 | 0.20 | 0.65 | 0.60 | 0.60 | 0.65 | 0.60 | 0.70 | 0.35 | 0.55 |
6 | 0.50 | 0.10 | 0.55 | 0.25 | 0.25 | 0.70 | 0.40 | 0.60 | 0.30 |
7 | 0.50 | 0.50 | 0.30 | 0.10 | 0.35 | 0.35 | 0.35 | 0.45 | 0.75 |
8 | 0.30 | 0.75 | 0.65 | 0.80 | 0.60 | 0.65 | 0.50 | 0.30 | 0.20 |
9 | 0.20 | 0.35 | 0.60 | 0.40 | 0.40 | 0.40 | 0.75 | 0.65 | 0.60 |
10 | 0.40 | 0.65 | 0.05 | 0.25 | 0.35 | 0.60 | 0.65 | 0.75 | 0.55 |
11 | 0.30 | 0.20 | 0.25 | 0.65 | 0.10 | 0.25 | 0.70 | 0.40 | 0.60 |
12 | 0.45 | 0.65 | 0.45 | 0.80 | 0.40 | 0.75 | 0.55 | 0.45 | 0.65 |
13 | 0.25 | 0.30 | 0.65 | 0.25 | 0.50 | 0.30 | 0.65 | 0.55 | 0.75 |
14 | 0.25 | 0.25 | 0.70 | 0.60 | 0.25 | 0.25 | 0.70 | 0.35 | 0.60 |
15 | 0.40 | 0.45 | 0.60 | 0.80 | 0.65 | 0.40 | 0.35 | 0.75 | 0.75 |
16 | 0.45 | 0.30 | 0.25 | 0.85 | 0.25 | 0.75 | 0.65 | 0.60 | 0.45 |
17 | 0.35 | 0.50 | 0.75 | 0.45 | 0.45 | 0.75 | 0.40 | 0.25 | 0.45 |
18 | 0.25 | 0.75 | NaN | 0.50 | 0.70 | 0.55 | 0.70 | 0.50 | NaN |
19 | 0.10 | 0.60 | NaN | 0.20 | 0.60 | 0.70 | NaN | 0.65 | NaN |
20 | NaN | NaN | NaN | 0.45 | NaN | NaN | NaN | NaN | NaN |
21 | 8.00 | 6.00 | 7.00 | 8.00 | 5.00 | 6.00 | 4.00 | 5.00 | 4.00 |
#Transpose the dataset columns
df_t = data.T
df_t
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 |
#Adding Time Period Column
df_t['TimePeriod']= ['Time 1','Time 2','Time 3','Time 4','Time 5','Time 6','Time 7','Time 8','Time 9']
df_t
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | TimePeriod | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 |
#Renaming columns names
df_t.rename(columns ={0: 'C0', 1: 'C1', 2: 'C2', 3: 'C3', 4: 'C4', 5: 'C5', 6: 'C6', 7: 'C7', 8: 'C8', 9: 'C9', 10: 'C10',
11: 'C11', 12: 'C12', 13: 'C13', 14: 'C14', 15: 'C15', 16: 'C16', 17: 'C17', 18: 'C18', 19: 'C19',
20: 'C20', 21:'ReportedFalls'}, inplace = True)
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 |
#Count the number of cases by time period
# Resulting count values took into effect the Reported Falls and TimePeriod column, which need to be excluded
df_t['Count'] = df_t.loc[:, 'C0':].count(axis=1)
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | Count | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 | 22 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 | 22 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 | 20 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 | 23 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 | 22 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 | 22 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 | 21 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 | 22 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 | 20 |
#Excluding Reported Falls and TimePeriod column
df_t['FinalCount'] = (df_t['Count']-2)
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | Count | FinalCount | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 | 22 | 20 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 | 22 | 20 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 | 20 | 18 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 | 23 | 21 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 | 22 | 20 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 | 22 | 20 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 | 21 | 19 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 | 22 | 20 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 | 20 | 18 |
#drop the Count column
df_t=df_t.drop(columns = 'Count', axis = 1)
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | FinalCount | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 | 20 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 | 20 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 | 18 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 | 21 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 | 20 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 | 20 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 | 19 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 | 20 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 | 18 |
#Calculate the Observed Rate = Number of Falls / Total Count
df_t['ObservedRate'] = (df_t['ReportedFalls'])/(df_t['FinalCount'])
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | FinalCount | ObservedRate | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 | 20 | 0.400000 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 | 20 | 0.300000 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 | 18 | 0.388889 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 | 21 | 0.380952 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 | 20 | 0.250000 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 | 20 | 0.300000 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 | 19 | 0.210526 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 | 20 | 0.250000 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 | 18 | 0.222222 |
#Expected Rate = average probablity of all cases for each timeperiod
df_t['ExpectedRate'] = df_t[['C0','C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13','C14','C15','C16',
'C17','C18','C19','C20']].mean(axis=1)
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | FinalCount | ObservedRate | ExpectedRate | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 | 20 | 0.400000 | 0.340000 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 | 20 | 0.300000 | 0.442500 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 | 18 | 0.388889 | 0.519444 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 | 21 | 0.380952 | 0.500000 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 | 20 | 0.250000 | 0.462500 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 | 20 | 0.300000 | 0.532500 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 | 19 | 0.210526 | 0.492105 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 | 20 | 0.250000 | 0.527500 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 | 18 | 0.222222 | 0.525000 |
#select specific columns from df_t dataframe
df2=pd.DataFrame(df_t.iloc[:,0:21])# first 20 columns of data frame (C0 to C20)
df2
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN |
#Expected Deviation (Part 1)
#Create new dataframe using data from df2 and calculate risk
df3=pd.DataFrame()
for (columnName, columnData) in df2.iteritems():
df3[columnName+'Risk']=(df2[columnName])*(1-df2[columnName])
#Print df3 dataframe
df3
C0Risk | C1Risk | C2Risk | C3Risk | C4Risk | C5Risk | C6Risk | C7Risk | C8Risk | C9Risk | C10Risk | C11Risk | C12Risk | C13Risk | C14Risk | C15Risk | C16Risk | C17Risk | C18Risk | C19Risk | C20Risk | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.1875 | 0.2400 | 0.2100 | 0.2400 | 0.1275 | 0.1600 | 0.2500 | 0.2500 | 0.2100 | 0.1600 | 0.2400 | 0.2100 | 0.2475 | 0.1875 | 0.1875 | 0.2400 | 0.2475 | 0.2275 | 0.1875 | 0.0900 | NaN |
Time 2 | 0.2475 | 0.1875 | 0.2400 | 0.2475 | 0.1600 | 0.2275 | 0.0900 | 0.2500 | 0.1875 | 0.2275 | 0.2275 | 0.1600 | 0.2275 | 0.2100 | 0.1875 | 0.2475 | 0.2100 | 0.2500 | 0.1875 | 0.2400 | NaN |
Time 3 | 0.2400 | 0.2100 | 0.2400 | 0.2475 | 0.2100 | 0.2400 | 0.2475 | 0.2100 | 0.2275 | 0.2400 | 0.0475 | 0.1875 | 0.2475 | 0.2275 | 0.2100 | 0.2400 | 0.1875 | 0.1875 | NaN | NaN | NaN |
Time 4 | 0.1275 | 0.2475 | 0.2100 | 0.1600 | 0.2475 | 0.2400 | 0.1875 | 0.0900 | 0.1600 | 0.2400 | 0.1875 | 0.2275 | 0.1600 | 0.1875 | 0.2400 | 0.1600 | 0.1275 | 0.2475 | 0.2500 | 0.1600 | 0.2475 |
Time 5 | 0.2475 | 0.2400 | 0.2475 | 0.2500 | 0.2275 | 0.2275 | 0.1875 | 0.2275 | 0.2400 | 0.2400 | 0.2275 | 0.0900 | 0.2400 | 0.2500 | 0.1875 | 0.2275 | 0.1875 | 0.2475 | 0.2100 | 0.2400 | NaN |
Time 6 | 0.1875 | 0.2475 | 0.0475 | 0.0900 | 0.2500 | 0.2400 | 0.2100 | 0.2275 | 0.2275 | 0.2400 | 0.2400 | 0.1875 | 0.1875 | 0.2100 | 0.1875 | 0.2400 | 0.1875 | 0.1875 | 0.2475 | 0.2100 | NaN |
Time 7 | 0.1600 | 0.1275 | 0.0900 | 0.1875 | 0.2400 | 0.2100 | 0.2400 | 0.2275 | 0.2500 | 0.1875 | 0.2275 | 0.2100 | 0.2475 | 0.2275 | 0.2100 | 0.2275 | 0.2275 | 0.2400 | 0.2100 | NaN | NaN |
Time 8 | 0.2275 | 0.1600 | 0.2500 | 0.2475 | 0.1875 | 0.2275 | 0.2400 | 0.2475 | 0.2100 | 0.2275 | 0.1875 | 0.2400 | 0.2475 | 0.2475 | 0.2275 | 0.1875 | 0.2400 | 0.1875 | 0.2500 | 0.2275 | NaN |
Time 9 | 0.2400 | 0.2500 | 0.1875 | 0.2100 | 0.2400 | 0.2475 | 0.2100 | 0.1875 | 0.1600 | 0.2400 | 0.2475 | 0.2400 | 0.2275 | 0.1875 | 0.2400 | 0.1875 | 0.2475 | 0.2475 | NaN | NaN | NaN |
#Expected Deviation (Part 2)
#Determine Sum of Risk using df3 dataframe
df3['ExpectedRate'] = df3[['C0Risk','C1Risk','C2Risk','C3Risk','C4Risk','C5Risk','C6Risk','C7Risk','C8Risk','C9Risk',
'C10Risk','C11Risk','C12Risk','C13Risk','C14Risk','C15Risk','C16Risk',
'C17Risk','C18Risk','C19Risk','C20Risk']].sum(axis=1)
df3
C0Risk | C1Risk | C2Risk | C3Risk | C4Risk | C5Risk | C6Risk | C7Risk | C8Risk | C9Risk | C10Risk | C11Risk | C12Risk | C13Risk | C14Risk | C15Risk | C16Risk | C17Risk | C18Risk | C19Risk | C20Risk | ExpectedRate | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.1875 | 0.2400 | 0.2100 | 0.2400 | 0.1275 | 0.1600 | 0.2500 | 0.2500 | 0.2100 | 0.1600 | 0.2400 | 0.2100 | 0.2475 | 0.1875 | 0.1875 | 0.2400 | 0.2475 | 0.2275 | 0.1875 | 0.0900 | NaN | 4.1000 |
Time 2 | 0.2475 | 0.1875 | 0.2400 | 0.2475 | 0.1600 | 0.2275 | 0.0900 | 0.2500 | 0.1875 | 0.2275 | 0.2275 | 0.1600 | 0.2275 | 0.2100 | 0.1875 | 0.2475 | 0.2100 | 0.2500 | 0.1875 | 0.2400 | NaN | 4.2125 |
Time 3 | 0.2400 | 0.2100 | 0.2400 | 0.2475 | 0.2100 | 0.2400 | 0.2475 | 0.2100 | 0.2275 | 0.2400 | 0.0475 | 0.1875 | 0.2475 | 0.2275 | 0.2100 | 0.2400 | 0.1875 | 0.1875 | NaN | NaN | NaN | 3.8475 |
Time 4 | 0.1275 | 0.2475 | 0.2100 | 0.1600 | 0.2475 | 0.2400 | 0.1875 | 0.0900 | 0.1600 | 0.2400 | 0.1875 | 0.2275 | 0.1600 | 0.1875 | 0.2400 | 0.1600 | 0.1275 | 0.2475 | 0.2500 | 0.1600 | 0.2475 | 4.1050 |
Time 5 | 0.2475 | 0.2400 | 0.2475 | 0.2500 | 0.2275 | 0.2275 | 0.1875 | 0.2275 | 0.2400 | 0.2400 | 0.2275 | 0.0900 | 0.2400 | 0.2500 | 0.1875 | 0.2275 | 0.1875 | 0.2475 | 0.2100 | 0.2400 | NaN | 4.4425 |
Time 6 | 0.1875 | 0.2475 | 0.0475 | 0.0900 | 0.2500 | 0.2400 | 0.2100 | 0.2275 | 0.2275 | 0.2400 | 0.2400 | 0.1875 | 0.1875 | 0.2100 | 0.1875 | 0.2400 | 0.1875 | 0.1875 | 0.2475 | 0.2100 | NaN | 4.0525 |
Time 7 | 0.1600 | 0.1275 | 0.0900 | 0.1875 | 0.2400 | 0.2100 | 0.2400 | 0.2275 | 0.2500 | 0.1875 | 0.2275 | 0.2100 | 0.2475 | 0.2275 | 0.2100 | 0.2275 | 0.2275 | 0.2400 | 0.2100 | NaN | NaN | 3.9475 |
Time 8 | 0.2275 | 0.1600 | 0.2500 | 0.2475 | 0.1875 | 0.2275 | 0.2400 | 0.2475 | 0.2100 | 0.2275 | 0.1875 | 0.2400 | 0.2475 | 0.2475 | 0.2275 | 0.1875 | 0.2400 | 0.1875 | 0.2500 | 0.2275 | NaN | 4.4675 |
Time 9 | 0.2400 | 0.2500 | 0.1875 | 0.2100 | 0.2400 | 0.2475 | 0.2100 | 0.1875 | 0.1600 | 0.2400 | 0.2475 | 0.2400 | 0.2275 | 0.1875 | 0.2400 | 0.1875 | 0.2475 | 0.2475 | NaN | NaN | NaN | 3.9975 |
#Expected Deviation (Part 3)
#Determine Square Root of Risk using df3 dataframe
df3['SqRootRisk'] = df3['ExpectedRate']**(1/2)
df3
C0Risk | C1Risk | C2Risk | C3Risk | C4Risk | C5Risk | C6Risk | C7Risk | C8Risk | C9Risk | C10Risk | C11Risk | C12Risk | C13Risk | C14Risk | C15Risk | C16Risk | C17Risk | C18Risk | C19Risk | C20Risk | ExpectedRate | SqRootRisk | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.1875 | 0.2400 | 0.2100 | 0.2400 | 0.1275 | 0.1600 | 0.2500 | 0.2500 | 0.2100 | 0.1600 | 0.2400 | 0.2100 | 0.2475 | 0.1875 | 0.1875 | 0.2400 | 0.2475 | 0.2275 | 0.1875 | 0.0900 | NaN | 4.1000 | 2.024846 |
Time 2 | 0.2475 | 0.1875 | 0.2400 | 0.2475 | 0.1600 | 0.2275 | 0.0900 | 0.2500 | 0.1875 | 0.2275 | 0.2275 | 0.1600 | 0.2275 | 0.2100 | 0.1875 | 0.2475 | 0.2100 | 0.2500 | 0.1875 | 0.2400 | NaN | 4.2125 | 2.052438 |
Time 3 | 0.2400 | 0.2100 | 0.2400 | 0.2475 | 0.2100 | 0.2400 | 0.2475 | 0.2100 | 0.2275 | 0.2400 | 0.0475 | 0.1875 | 0.2475 | 0.2275 | 0.2100 | 0.2400 | 0.1875 | 0.1875 | NaN | NaN | NaN | 3.8475 | 1.961505 |
Time 4 | 0.1275 | 0.2475 | 0.2100 | 0.1600 | 0.2475 | 0.2400 | 0.1875 | 0.0900 | 0.1600 | 0.2400 | 0.1875 | 0.2275 | 0.1600 | 0.1875 | 0.2400 | 0.1600 | 0.1275 | 0.2475 | 0.2500 | 0.1600 | 0.2475 | 4.1050 | 2.026080 |
Time 5 | 0.2475 | 0.2400 | 0.2475 | 0.2500 | 0.2275 | 0.2275 | 0.1875 | 0.2275 | 0.2400 | 0.2400 | 0.2275 | 0.0900 | 0.2400 | 0.2500 | 0.1875 | 0.2275 | 0.1875 | 0.2475 | 0.2100 | 0.2400 | NaN | 4.4425 | 2.107724 |
Time 6 | 0.1875 | 0.2475 | 0.0475 | 0.0900 | 0.2500 | 0.2400 | 0.2100 | 0.2275 | 0.2275 | 0.2400 | 0.2400 | 0.1875 | 0.1875 | 0.2100 | 0.1875 | 0.2400 | 0.1875 | 0.1875 | 0.2475 | 0.2100 | NaN | 4.0525 | 2.013082 |
Time 7 | 0.1600 | 0.1275 | 0.0900 | 0.1875 | 0.2400 | 0.2100 | 0.2400 | 0.2275 | 0.2500 | 0.1875 | 0.2275 | 0.2100 | 0.2475 | 0.2275 | 0.2100 | 0.2275 | 0.2275 | 0.2400 | 0.2100 | NaN | NaN | 3.9475 | 1.986832 |
Time 8 | 0.2275 | 0.1600 | 0.2500 | 0.2475 | 0.1875 | 0.2275 | 0.2400 | 0.2475 | 0.2100 | 0.2275 | 0.1875 | 0.2400 | 0.2475 | 0.2475 | 0.2275 | 0.1875 | 0.2400 | 0.1875 | 0.2500 | 0.2275 | NaN | 4.4675 | 2.113646 |
Time 9 | 0.2400 | 0.2500 | 0.1875 | 0.2100 | 0.2400 | 0.2475 | 0.2100 | 0.1875 | 0.1600 | 0.2400 | 0.2475 | 0.2400 | 0.2275 | 0.1875 | 0.2400 | 0.1875 | 0.2475 | 0.2475 | NaN | NaN | NaN | 3.9975 | 1.999375 |
#Add the SqRootRisk column from df3 into the df_t dataframe
df_t['SqRootRisk']= df3['SqRootRisk']
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | FinalCount | ObservedRate | ExpectedRate | SqRootRisk | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 | 20 | 0.400000 | 0.340000 | 2.024846 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 | 20 | 0.300000 | 0.442500 | 2.052438 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 | 18 | 0.388889 | 0.519444 | 1.961505 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 | 21 | 0.380952 | 0.500000 | 2.026080 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 | 20 | 0.250000 | 0.462500 | 2.107724 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 | 20 | 0.300000 | 0.532500 | 2.013082 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 | 19 | 0.210526 | 0.492105 | 1.986832 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 | 20 | 0.250000 | 0.527500 | 2.113646 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 | 18 | 0.222222 | 0.525000 | 1.999375 |
#Calculate Expected Deviation
df_t['ExpectedDeviation']=df_t['SqRootRisk']/df_t['FinalCount']
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | FinalCount | ObservedRate | ExpectedRate | SqRootRisk | ExpectedDeviation | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 | 20 | 0.400000 | 0.340000 | 2.024846 | 0.101242 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 | 20 | 0.300000 | 0.442500 | 2.052438 | 0.102622 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 | 18 | 0.388889 | 0.519444 | 1.961505 | 0.108972 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 | 21 | 0.380952 | 0.500000 | 2.026080 | 0.096480 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 | 20 | 0.250000 | 0.462500 | 2.107724 | 0.105386 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 | 20 | 0.300000 | 0.532500 | 2.013082 | 0.100654 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 | 19 | 0.210526 | 0.492105 | 1.986832 | 0.104570 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 | 20 | 0.250000 | 0.527500 | 2.113646 | 0.105682 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 | 18 | 0.222222 | 0.525000 | 1.999375 | 0.111076 |
#Add T-Value based on degrees of freedom for FinalCount Column in df_t dataframe
#T-Table source -- https://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf
# dof 17, t = 2.110
# dof 18, t = 2.101
# dof 19, t = 2.093
# dof 20, t = 2.086
df_t['TValue']=[2.093, 2.093, 2.110, 2.086, 2.093, 2.093, 2.101, 2.093, 2.110]
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | FinalCount | ObservedRate | ExpectedRate | SqRootRisk | ExpectedDeviation | TValue | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 | 20 | 0.400000 | 0.340000 | 2.024846 | 0.101242 | 2.093 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 | 20 | 0.300000 | 0.442500 | 2.052438 | 0.102622 | 2.093 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 | 18 | 0.388889 | 0.519444 | 1.961505 | 0.108972 | 2.110 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 | 21 | 0.380952 | 0.500000 | 2.026080 | 0.096480 | 2.086 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 | 20 | 0.250000 | 0.462500 | 2.107724 | 0.105386 | 2.093 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 | 20 | 0.300000 | 0.532500 | 2.013082 | 0.100654 | 2.093 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 | 19 | 0.210526 | 0.492105 | 1.986832 | 0.104570 | 2.101 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 | 20 | 0.250000 | 0.527500 | 2.113646 | 0.105682 | 2.093 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 | 18 | 0.222222 | 0.525000 | 1.999375 | 0.111076 | 2.110 |
#Calculate UCL & LCL
#UCL = Expected Rate + (TValue * Expected Deviation)
#LCL = Expected Rate - (TValue * Expected Deviation)
df_t['UCL'] = round(df_t['ExpectedRate'] + (df_t['TValue']*df_t['ExpectedDeviation']), 2)
df_t['LCL'] = round(df_t['ExpectedRate'] - (df_t['TValue']*df_t['ExpectedDeviation']), 2)
df_t
C0 | C1 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C18 | C19 | C20 | ReportedFalls | TimePeriod | FinalCount | ObservedRate | ExpectedRate | SqRootRisk | ExpectedDeviation | TValue | UCL | LCL | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Time 1 | 0.25 | 0.40 | 0.70 | 0.40 | 0.15 | 0.20 | 0.50 | 0.50 | 0.30 | 0.20 | 0.40 | 0.30 | 0.45 | 0.25 | 0.25 | 0.40 | 0.45 | 0.35 | 0.25 | 0.10 | NaN | 8.0 | Time 1 | 20 | 0.400000 | 0.340000 | 2.024846 | 0.101242 | 2.093 | 0.55 | 0.13 |
Time 2 | 0.55 | 0.25 | 0.40 | 0.45 | 0.20 | 0.65 | 0.10 | 0.50 | 0.75 | 0.35 | 0.65 | 0.20 | 0.65 | 0.30 | 0.25 | 0.45 | 0.30 | 0.50 | 0.75 | 0.60 | NaN | 6.0 | Time 2 | 20 | 0.300000 | 0.442500 | 2.052438 | 0.102622 | 2.093 | 0.66 | 0.23 |
Time 3 | 0.40 | 0.70 | 0.60 | 0.55 | 0.70 | 0.60 | 0.55 | 0.30 | 0.65 | 0.60 | 0.05 | 0.25 | 0.45 | 0.65 | 0.70 | 0.60 | 0.25 | 0.75 | NaN | NaN | NaN | 7.0 | Time 3 | 18 | 0.388889 | 0.519444 | 1.961505 | 0.108972 | 2.110 | 0.75 | 0.29 |
Time 4 | 0.15 | 0.45 | 0.70 | 0.80 | 0.45 | 0.60 | 0.25 | 0.10 | 0.80 | 0.40 | 0.25 | 0.65 | 0.80 | 0.25 | 0.60 | 0.80 | 0.85 | 0.45 | 0.50 | 0.20 | 0.45 | 8.0 | Time 4 | 21 | 0.380952 | 0.500000 | 2.026080 | 0.096480 | 2.086 | 0.70 | 0.30 |
Time 5 | 0.55 | 0.60 | 0.45 | 0.50 | 0.65 | 0.65 | 0.25 | 0.35 | 0.60 | 0.40 | 0.35 | 0.10 | 0.40 | 0.50 | 0.25 | 0.65 | 0.25 | 0.45 | 0.70 | 0.60 | NaN | 5.0 | Time 5 | 20 | 0.250000 | 0.462500 | 2.107724 | 0.105386 | 2.093 | 0.68 | 0.24 |
Time 6 | 0.75 | 0.45 | 0.05 | 0.90 | 0.50 | 0.60 | 0.70 | 0.35 | 0.65 | 0.40 | 0.60 | 0.25 | 0.75 | 0.30 | 0.25 | 0.40 | 0.75 | 0.75 | 0.55 | 0.70 | NaN | 6.0 | Time 6 | 20 | 0.300000 | 0.532500 | 2.013082 | 0.100654 | 2.093 | 0.74 | 0.32 |
Time 7 | 0.20 | 0.15 | 0.10 | 0.25 | 0.60 | 0.70 | 0.40 | 0.35 | 0.50 | 0.75 | 0.65 | 0.70 | 0.55 | 0.65 | 0.70 | 0.35 | 0.65 | 0.40 | 0.70 | NaN | NaN | 4.0 | Time 7 | 19 | 0.210526 | 0.492105 | 1.986832 | 0.104570 | 2.101 | 0.71 | 0.27 |
Time 8 | 0.35 | 0.80 | 0.50 | 0.55 | 0.75 | 0.35 | 0.60 | 0.45 | 0.30 | 0.65 | 0.75 | 0.40 | 0.45 | 0.55 | 0.35 | 0.75 | 0.60 | 0.25 | 0.50 | 0.65 | NaN | 5.0 | Time 8 | 20 | 0.250000 | 0.527500 | 2.113646 | 0.105682 | 2.093 | 0.75 | 0.31 |
Time 9 | 0.40 | 0.50 | 0.25 | 0.70 | 0.40 | 0.55 | 0.30 | 0.75 | 0.20 | 0.60 | 0.55 | 0.60 | 0.65 | 0.75 | 0.60 | 0.75 | 0.45 | 0.45 | NaN | NaN | NaN | 4.0 | Time 9 | 18 | 0.222222 | 0.525000 | 1.999375 | 0.111076 | 2.110 | 0.76 | 0.29 |
#Plot the chart
plt.plot('TimePeriod','ObservedRate',data=df_t, marker = 's',color = 'blue')
plt.plot('TimePeriod','UCL',data=df_t, markersize=0, color='red')
plt.plot('TimePeriod','LCL',data=df_t, markersize=0, color='red')
plt.legend()
plt.title('Comparison of Rates')
plt.xlabel('Time Period')
plt.ylabel('Observed Rate')
Text(0,0.5,'Observed Rate')