import pandas as pd
import numpy as np
v2c = {
'Infectious_and_parasitic_diseases': 'bs01lr',
'Neoplasms': 'bs02lr',
'Endocrine,nutritional_and_metabolic diseases_and_immunity_disorders': 'bs03lr',
'Diseases_of_the_blood_and_bloodforming_organs': 'bs04lr',
'Mental_disorders': 'bs05lr',
'Diseases_of_the_nervous_system_and_sense_organs': 'bs06lr',
'Diseases_of_the_circulatory_system': 'bs07lr',
'Diseases_of_the_respiratory_system': 'bs08lr',
'Diseases_of_the_digestive_system': 'bs09lr',
'Diseases_of_the_genitourinary_system': 'bs10lr',
'Complications_of_pregnancy,_childbirth,_and_the_puerperium': 'bs11lr',
'Diseases_of_the_skin_and_subcutaneous_tissue': 'bs12lr',
'Diseases_of_the_musculoskeletal_system_and_connective_tissue': 'bs13lr',
'Congenital_anomalies': 'bs14lr',
'Symptoms,_signs,_and_ill_defined_conditions': 'bs16lr',
'Injury_and_poisoning': 'bs17lr',
'E_codes': 'bs18lr',
'V_codes': 'bs19lr',
'RareDX': 'rare_disease'
}
c2v = {v: k for k, v in v2c.items()}
df = pd.read_csv('s3://jeeforce-artifacts/alemi/diabetes/BCwithTime and Order.csv', low_memory=False) \
.assign(
bc_1=lambda d: d['BC1'].apply(lambda v: v2c[v]),
bc_2=lambda d: d['BC2'].apply(lambda v: v2c[v])) \
.rename(columns={
'Total patnum': 'n',
'BC1 first DX Patnum': 'n_1',
'BC2 first DX Patnum': 'n_2',
'Same day BC1 and BC2 Patnum': 'n_t',
'SumDayDiff (total)': 'sum_day_diff',
'AvgDaydiff': 'avg_day_diff'}) \
.drop(columns=['BC1', 'BC2'])\
[['bc_1', 'bc_2', 'n', 'n_1', 'n_2', 'n_t', 'sum_day_diff', 'avg_day_diff']]
df.shape
(171, 8)
df
bc_1 | bc_2 | n | n_1 | n_2 | n_t | sum_day_diff | avg_day_diff | |
---|---|---|---|---|---|---|---|---|
0 | bs01lr | bs02lr | 27020 | 13534 | 12005 | 1481 | 2780593 | 102 |
1 | bs01lr | bs03lr | 51702 | 17835 | 28878 | 4989 | -14513656 | -280 |
2 | bs01lr | bs04lr | 16303 | 8354 | 7065 | 884 | 2894519 | 177 |
3 | bs01lr | bs05lr | 52414 | 19656 | 28917 | 3841 | -10176298 | -194 |
4 | bs01lr | bs06lr | 53127 | 26189 | 24227 | 2711 | 3378359 | 63 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
166 | bs17lr | bs19lr | 96140 | 41135 | 52157 | 2848 | -16061590 | -167 |
167 | bs17lr | rare_disease | 69589 | 20703 | 20791 | 28096 | -1486110 | -21 |
168 | bs18lr | bs19lr | 35582 | 13906 | 20907 | 769 | -9989873 | -280 |
169 | bs18lr | rare_disease | 30637 | 8724 | 9606 | 12308 | -1870223 | -61 |
170 | bs19lr | rare_disease | 179395 | 87515 | 68316 | 23565 | 24914942 | 138 |
171 rows × 8 columns
import itertools
import numpy as np
def get_val(v1, v2):
if v1 == v2:
return np.nan
#return df[(df['bc_1']==v1) | (df['bc_2']==v1)] \
# .assign(count=lambda d: d.apply(lambda r: r['n_1'] if r['bc_1']==v1 else r['n_2'], axis=1)) \
# ['count'].sum()
temp = df[(df['bc_1']==v1) & (df['bc_2']==v2)]
if temp.shape[0] == 0:
temp = df[(df['bc_1']==v2) & (df['bc_2']==v1)]
if temp.shape[0] > 0:
return -(temp.iloc[0]['n_1'] + temp.iloc[0]['n_2'])
return np.nan
body_systems = sorted(list(set(list(df['bc_1'].unique()) + list(df['bc_2'].unique()))))
M = pd.DataFrame(
[[get_val(v1, v2) for v2 in body_systems] for v1 in body_systems],
columns=body_systems, index=body_systems).fillna(0.0)
M = M + np.diag(np.abs(M).sum(axis=1))
M
bs01lr | bs02lr | bs03lr | bs04lr | bs05lr | bs06lr | bs07lr | bs08lr | bs09lr | bs10lr | bs11lr | bs12lr | bs13lr | bs14lr | bs16lr | bs17lr | bs18lr | bs19lr | rare_disease | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
bs01lr | 637625.0 | -25539.0 | -46713.0 | -15419.0 | -48573.0 | -50416.0 | -48145.0 | -35390.0 | -41477.0 | -30942.0 | -107.0 | -33270.0 | -52303.0 | -3450.0 | -58821.0 | -24687.0 | -10835.0 | -73860.0 | -37678.0 |
bs02lr | -25539.0 | 893455.0 | -76389.0 | -25380.0 | -61200.0 | -75341.0 | -78020.0 | -50145.0 | -56412.0 | -47919.0 | -94.0 | -43067.0 | -70433.0 | -4632.0 | -85087.0 | -27333.0 | -12250.0 | -108658.0 | -45556.0 |
bs03lr | -46713.0 | -76389.0 | 1837422.0 | -41624.0 | -136904.0 | -175336.0 | -163878.0 | -99835.0 | -116089.0 | -94713.0 | -198.0 | -79501.0 | -159663.0 | -8227.0 | -183868.0 | -57651.0 | -24303.0 | -266932.0 | -105598.0 |
bs04lr | -15419.0 | -25380.0 | -41624.0 | 518129.0 | -33595.0 | -40487.0 | -46325.0 | -28972.0 | -34101.0 | -29774.0 | -55.0 | -21993.0 | -39135.0 | -2853.0 | -45582.0 | -16403.0 | -8571.0 | -57584.0 | -30276.0 |
bs05lr | -48573.0 | -61200.0 | -136904.0 | -33595.0 | 1680058.0 | -149652.0 | -135241.0 | -92287.0 | -109153.0 | -76799.0 | -304.0 | -71780.0 | -153221.0 | -7370.0 | -173381.0 | -63430.0 | -24713.0 | -241322.0 | -101133.0 |
bs06lr | -50416.0 | -75341.0 | -175336.0 | -40487.0 | -149652.0 | 1889799.0 | -177224.0 | -102281.0 | -122021.0 | -95987.0 | -259.0 | -81166.0 | -170218.0 | -8488.0 | -189985.0 | -63380.0 | -25489.0 | -265552.0 | -96517.0 |
bs07lr | -48145.0 | -78020.0 | -163878.0 | -46325.0 | -135241.0 | -177224.0 | 1850264.0 | -101596.0 | -114760.0 | -100083.0 | -113.0 | -79493.0 | -155918.0 | -8284.0 | -189170.0 | -55262.0 | -24451.0 | -272447.0 | -99854.0 |
bs08lr | -35390.0 | -50145.0 | -99835.0 | -28972.0 | -92287.0 | -102281.0 | -101596.0 | 1217719.0 | -79064.0 | -60762.0 | -209.0 | -54032.0 | -101529.0 | -5858.0 | -118668.0 | -42901.0 | -18460.0 | -155308.0 | -70422.0 |
bs09lr | -41477.0 | -56412.0 | -116089.0 | -34101.0 | -109153.0 | -122021.0 | -114760.0 | -79064.0 | 1401260.0 | -70168.0 | -191.0 | -61867.0 | -120766.0 | -6734.0 | -138066.0 | -49362.0 | -20587.0 | -182189.0 | -78253.0 |
bs10lr | -30942.0 | -47919.0 | -94713.0 | -29774.0 | -76799.0 | -95987.0 | -100083.0 | -60762.0 | -70168.0 | 1110989.0 | -263.0 | -48565.0 | -89969.0 | -5923.0 | -105469.0 | -34665.0 | -16355.0 | -141002.0 | -61631.0 |
bs11lr | -107.0 | -94.0 | -198.0 | -55.0 | -304.0 | -259.0 | -113.0 | -209.0 | -191.0 | -263.0 | 3460.0 | -154.0 | -304.0 | -12.0 | -331.0 | -127.0 | -45.0 | -481.0 | -213.0 |
bs12lr | -33270.0 | -43067.0 | -79501.0 | -21993.0 | -71780.0 | -81166.0 | -79493.0 | -54032.0 | -61867.0 | -48565.0 | -154.0 | 981037.0 | -81554.0 | -5144.0 | -92211.0 | -37046.0 | -15882.0 | -117168.0 | -57144.0 |
bs13lr | -52303.0 | -70433.0 | -159663.0 | -39135.0 | -153221.0 | -170218.0 | -155918.0 | -101529.0 | -120766.0 | -89969.0 | -304.0 | -81554.0 | 1870607.0 | -8584.0 | -189026.0 | -72231.0 | -28247.0 | -270537.0 | -106969.0 |
bs14lr | -3450.0 | -4632.0 | -8227.0 | -2853.0 | -7370.0 | -8488.0 | -8284.0 | -5858.0 | -6734.0 | -5923.0 | -12.0 | -5144.0 | -8584.0 | 108173.0 | -9595.0 | -3989.0 | -1922.0 | -11544.0 | -5564.0 |
bs16lr | -58821.0 | -85087.0 | -183868.0 | -45582.0 | -173381.0 | -189985.0 | -189170.0 | -118668.0 | -138066.0 | -105469.0 | -331.0 | -92211.0 | -189026.0 | -9595.0 | 2104293.0 | -73645.0 | -29656.0 | -293391.0 | -128341.0 |
bs17lr | -24687.0 | -27333.0 | -57651.0 | -16403.0 | -63430.0 | -63380.0 | -55262.0 | -42901.0 | -49362.0 | -34665.0 | -127.0 | -37046.0 | -72231.0 | -3989.0 | -73645.0 | 772278.0 | -15380.0 | -93292.0 | -41494.0 |
bs18lr | -10835.0 | -12250.0 | -24303.0 | -8571.0 | -24713.0 | -25489.0 | -24451.0 | -18460.0 | -20587.0 | -16355.0 | -45.0 | -15882.0 | -28247.0 | -1922.0 | -29656.0 | -15380.0 | 330289.0 | -34813.0 | -18330.0 |
bs19lr | -73860.0 | -108658.0 | -266932.0 | -57584.0 | -241322.0 | -265552.0 | -272447.0 | -155308.0 | -182189.0 | -141002.0 | -481.0 | -117168.0 | -270537.0 | -11544.0 | -293391.0 | -93292.0 | -34813.0 | 2741911.0 | -155831.0 |
rare_disease | -37678.0 | -45556.0 | -105598.0 | -30276.0 | -101133.0 | -96517.0 | -99854.0 | -70422.0 | -78253.0 | -61631.0 | -213.0 | -57144.0 | -106969.0 | -5564.0 | -128341.0 | -41494.0 | -18330.0 | -155831.0 | 1240804.0 |
pd.DataFrame({
'off': np.abs((M - np.diag(np.diag(M))).sum(axis=1)),
'diag': pd.Series(np.diag(M), index=M.index)
})
off | diag | |
---|---|---|
bs01lr | 637625.0 | 637625.0 |
bs02lr | 893455.0 | 893455.0 |
bs03lr | 1837422.0 | 1837422.0 |
bs04lr | 518129.0 | 518129.0 |
bs05lr | 1680058.0 | 1680058.0 |
bs06lr | 1889799.0 | 1889799.0 |
bs07lr | 1850264.0 | 1850264.0 |
bs08lr | 1217719.0 | 1217719.0 |
bs09lr | 1401260.0 | 1401260.0 |
bs10lr | 1110989.0 | 1110989.0 |
bs11lr | 3460.0 | 3460.0 |
bs12lr | 981037.0 | 981037.0 |
bs13lr | 1870607.0 | 1870607.0 |
bs14lr | 108173.0 | 108173.0 |
bs16lr | 2104293.0 | 2104293.0 |
bs17lr | 772278.0 | 772278.0 |
bs18lr | 330289.0 | 330289.0 |
bs19lr | 2741911.0 | 2741911.0 |
rare_disease | 1240804.0 | 1240804.0 |
def get_b(v):
s = df[(df['bc_1']==v) | (df['bc_2']==v)] \
.assign(
n_before=lambda d: d.apply(lambda r: r['n_1'] if r['bc_1']==v else r['n_2'], axis=1),
n_after=lambda d: d.apply(lambda r: r['n_2'] if r['bc_1']==v else r['n_1'], axis=1)) \
[['n_before', 'n_after']].sum()
before = s['n_before']
after = s['n_after']
return 1 + 0.5 * (before - after)
b = pd.Series([get_b(v) for v in body_systems], index=body_systems)
b
bs01lr -2325.5 bs02lr -32545.5 bs03lr 235467.0 bs04lr -25599.5 bs05lr 160016.0 bs06lr -91299.5 bs07lr 310095.0 bs08lr -5315.5 bs09lr 1696.0 bs10lr -17708.5 bs11lr -252.0 bs12lr -64952.5 bs13lr 5269.5 bs14lr -8357.5 bs16lr -127663.5 bs17lr -65917.0 bs18lr -37673.5 bs19lr -109307.5 rare_disease -123607.0 dtype: float64
C = 2 * np.eye(M.shape[0]) + M
C
bs01lr | bs02lr | bs03lr | bs04lr | bs05lr | bs06lr | bs07lr | bs08lr | bs09lr | bs10lr | bs11lr | bs12lr | bs13lr | bs14lr | bs16lr | bs17lr | bs18lr | bs19lr | rare_disease | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
bs01lr | 637627.0 | -25539.0 | -46713.0 | -15419.0 | -48573.0 | -50416.0 | -48145.0 | -35390.0 | -41477.0 | -30942.0 | -107.0 | -33270.0 | -52303.0 | -3450.0 | -58821.0 | -24687.0 | -10835.0 | -73860.0 | -37678.0 |
bs02lr | -25539.0 | 893457.0 | -76389.0 | -25380.0 | -61200.0 | -75341.0 | -78020.0 | -50145.0 | -56412.0 | -47919.0 | -94.0 | -43067.0 | -70433.0 | -4632.0 | -85087.0 | -27333.0 | -12250.0 | -108658.0 | -45556.0 |
bs03lr | -46713.0 | -76389.0 | 1837424.0 | -41624.0 | -136904.0 | -175336.0 | -163878.0 | -99835.0 | -116089.0 | -94713.0 | -198.0 | -79501.0 | -159663.0 | -8227.0 | -183868.0 | -57651.0 | -24303.0 | -266932.0 | -105598.0 |
bs04lr | -15419.0 | -25380.0 | -41624.0 | 518131.0 | -33595.0 | -40487.0 | -46325.0 | -28972.0 | -34101.0 | -29774.0 | -55.0 | -21993.0 | -39135.0 | -2853.0 | -45582.0 | -16403.0 | -8571.0 | -57584.0 | -30276.0 |
bs05lr | -48573.0 | -61200.0 | -136904.0 | -33595.0 | 1680060.0 | -149652.0 | -135241.0 | -92287.0 | -109153.0 | -76799.0 | -304.0 | -71780.0 | -153221.0 | -7370.0 | -173381.0 | -63430.0 | -24713.0 | -241322.0 | -101133.0 |
bs06lr | -50416.0 | -75341.0 | -175336.0 | -40487.0 | -149652.0 | 1889801.0 | -177224.0 | -102281.0 | -122021.0 | -95987.0 | -259.0 | -81166.0 | -170218.0 | -8488.0 | -189985.0 | -63380.0 | -25489.0 | -265552.0 | -96517.0 |
bs07lr | -48145.0 | -78020.0 | -163878.0 | -46325.0 | -135241.0 | -177224.0 | 1850266.0 | -101596.0 | -114760.0 | -100083.0 | -113.0 | -79493.0 | -155918.0 | -8284.0 | -189170.0 | -55262.0 | -24451.0 | -272447.0 | -99854.0 |
bs08lr | -35390.0 | -50145.0 | -99835.0 | -28972.0 | -92287.0 | -102281.0 | -101596.0 | 1217721.0 | -79064.0 | -60762.0 | -209.0 | -54032.0 | -101529.0 | -5858.0 | -118668.0 | -42901.0 | -18460.0 | -155308.0 | -70422.0 |
bs09lr | -41477.0 | -56412.0 | -116089.0 | -34101.0 | -109153.0 | -122021.0 | -114760.0 | -79064.0 | 1401262.0 | -70168.0 | -191.0 | -61867.0 | -120766.0 | -6734.0 | -138066.0 | -49362.0 | -20587.0 | -182189.0 | -78253.0 |
bs10lr | -30942.0 | -47919.0 | -94713.0 | -29774.0 | -76799.0 | -95987.0 | -100083.0 | -60762.0 | -70168.0 | 1110991.0 | -263.0 | -48565.0 | -89969.0 | -5923.0 | -105469.0 | -34665.0 | -16355.0 | -141002.0 | -61631.0 |
bs11lr | -107.0 | -94.0 | -198.0 | -55.0 | -304.0 | -259.0 | -113.0 | -209.0 | -191.0 | -263.0 | 3462.0 | -154.0 | -304.0 | -12.0 | -331.0 | -127.0 | -45.0 | -481.0 | -213.0 |
bs12lr | -33270.0 | -43067.0 | -79501.0 | -21993.0 | -71780.0 | -81166.0 | -79493.0 | -54032.0 | -61867.0 | -48565.0 | -154.0 | 981039.0 | -81554.0 | -5144.0 | -92211.0 | -37046.0 | -15882.0 | -117168.0 | -57144.0 |
bs13lr | -52303.0 | -70433.0 | -159663.0 | -39135.0 | -153221.0 | -170218.0 | -155918.0 | -101529.0 | -120766.0 | -89969.0 | -304.0 | -81554.0 | 1870609.0 | -8584.0 | -189026.0 | -72231.0 | -28247.0 | -270537.0 | -106969.0 |
bs14lr | -3450.0 | -4632.0 | -8227.0 | -2853.0 | -7370.0 | -8488.0 | -8284.0 | -5858.0 | -6734.0 | -5923.0 | -12.0 | -5144.0 | -8584.0 | 108175.0 | -9595.0 | -3989.0 | -1922.0 | -11544.0 | -5564.0 |
bs16lr | -58821.0 | -85087.0 | -183868.0 | -45582.0 | -173381.0 | -189985.0 | -189170.0 | -118668.0 | -138066.0 | -105469.0 | -331.0 | -92211.0 | -189026.0 | -9595.0 | 2104295.0 | -73645.0 | -29656.0 | -293391.0 | -128341.0 |
bs17lr | -24687.0 | -27333.0 | -57651.0 | -16403.0 | -63430.0 | -63380.0 | -55262.0 | -42901.0 | -49362.0 | -34665.0 | -127.0 | -37046.0 | -72231.0 | -3989.0 | -73645.0 | 772280.0 | -15380.0 | -93292.0 | -41494.0 |
bs18lr | -10835.0 | -12250.0 | -24303.0 | -8571.0 | -24713.0 | -25489.0 | -24451.0 | -18460.0 | -20587.0 | -16355.0 | -45.0 | -15882.0 | -28247.0 | -1922.0 | -29656.0 | -15380.0 | 330291.0 | -34813.0 | -18330.0 |
bs19lr | -73860.0 | -108658.0 | -266932.0 | -57584.0 | -241322.0 | -265552.0 | -272447.0 | -155308.0 | -182189.0 | -141002.0 | -481.0 | -117168.0 | -270537.0 | -11544.0 | -293391.0 | -93292.0 | -34813.0 | 2741913.0 | -155831.0 |
rare_disease | -37678.0 | -45556.0 | -105598.0 | -30276.0 | -101133.0 | -96517.0 | -99854.0 | -70422.0 | -78253.0 | -61631.0 | -213.0 | -57144.0 | -106969.0 | -5564.0 | -128341.0 | -41494.0 | -18330.0 | -155831.0 | 1240806.0 |
from sklearn.linear_model import LinearRegression, Ridge, Lasso
model = LinearRegression()
model.fit(C, b)
LinearRegression()
coef = pd.Series(model.coef_, index=C.columns).sort_values(ascending=False)
coef_df = coef \
.to_frame(name='coefficient') \
.reset_index() \
.rename(columns={'index': 'code'}) \
.assign(name=lambda d: d['code'].apply(lambda v: c2v[v]))[['name', 'code', 'coefficient']]
coef_df
name | code | coefficient | |
---|---|---|---|
0 | Diseases_of_the_circulatory_system | bs07lr | 0.175061 |
1 | Endocrine,nutritional_and_metabolic diseases_a... | bs03lr | 0.138932 |
2 | Mental_disorders | bs05lr | 0.107442 |
3 | Diseases_of_the_musculoskeletal_system_and_con... | bs13lr | 0.022813 |
4 | Diseases_of_the_digestive_system | bs09lr | 0.021342 |
5 | Diseases_of_the_respiratory_system | bs08lr | 0.016024 |
6 | Infectious_and_parasitic_diseases | bs01lr | 0.014166 |
7 | Diseases_of_the_genitourinary_system | bs10lr | 0.006209 |
8 | V_codes | bs19lr | -0.010618 |
9 | Neoplasms | bs02lr | -0.013564 |
10 | Diseases_of_the_nervous_system_and_sense_organs | bs06lr | -0.020825 |
11 | Diseases_of_the_blood_and_bloodforming_organs | bs04lr | -0.027325 |
12 | Symptoms,_signs,_and_ill_defined_conditions | bs16lr | -0.033625 |
13 | Diseases_of_the_skin_and_subcutaneous_tissue | bs12lr | -0.043886 |
14 | Congenital_anomalies | bs14lr | -0.058136 |
15 | Complications_of_pregnancy,_childbirth,_and_th... | bs11lr | -0.062223 |
16 | Injury_and_poisoning | bs17lr | -0.064011 |
17 | RareDX | rare_disease | -0.073413 |
18 | E_codes | bs18lr | -0.094364 |
# if + then 1 before 2
# if - then 2 before 1
def get_avg_days_to(code1, code2):
q = f'(bc_1=="{code1}" and bc_2=="{code2}") or (bc_1=="{code2}" and bc_2=="{code1}")'
temp_df = df.query(q)
r = temp_df.iloc[0]
return {
'bc_current': code1,
'bc_next': code2,
'bc_1': r['bc_1'],
'bc_2': r['bc_2'],
'n': r['n'],
'n_1': r['n_1'],
'n_2': r['n_2'],
'sum_day_diff': r['sum_day_diff'],
'avg_day_diff': r['avg_day_diff']
}
codes = coef_df['code']
enriched_coef_df = coef_df.join(pd.DataFrame([get_avg_days_to(code1, code2) for code1, code2 in zip(codes, codes[1:])]))
enriched_coef_df
name | code | coefficient | bc_current | bc_next | bc_1 | bc_2 | n | n_1 | n_2 | sum_day_diff | avg_day_diff | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Diseases_of_the_circulatory_system | bs07lr | 0.175061 | bs07lr | bs03lr | bs03lr | bs07lr | 236654.0 | 71451.0 | 92427.0 | -16117593.0 | -68.0 |
1 | Endocrine,nutritional_and_metabolic diseases_a... | bs03lr | 0.138932 | bs03lr | bs05lr | bs03lr | bs05lr | 167953.0 | 70078.0 | 66826.0 | 7271597.0 | 43.0 |
2 | Mental_disorders | bs05lr | 0.107442 | bs05lr | bs13lr | bs05lr | bs13lr | 177852.0 | 88085.0 | 65136.0 | 29697660.0 | 166.0 |
3 | Diseases_of_the_musculoskeletal_system_and_con... | bs13lr | 0.022813 | bs13lr | bs09lr | bs09lr | bs13lr | 138272.0 | 60503.0 | 60263.0 | 894739.0 | 6.0 |
4 | Diseases_of_the_digestive_system | bs09lr | 0.021342 | bs09lr | bs08lr | bs08lr | bs09lr | 89307.0 | 39115.0 | 39949.0 | -1142146.0 | -12.0 |
5 | Diseases_of_the_respiratory_system | bs08lr | 0.016024 | bs08lr | bs01lr | bs01lr | bs08lr | 37774.0 | 17810.0 | 17580.0 | 1427342.0 | 37.0 |
6 | Infectious_and_parasitic_diseases | bs01lr | 0.014166 | bs01lr | bs10lr | bs01lr | bs10lr | 33429.0 | 15515.0 | 15427.0 | 3203398.0 | 95.0 |
7 | Diseases_of_the_genitourinary_system | bs10lr | 0.006209 | bs10lr | bs19lr | bs10lr | bs19lr | 150458.0 | 73732.0 | 67270.0 | -4018836.0 | -26.0 |
8 | V_codes | bs19lr | -0.010618 | bs19lr | bs02lr | bs02lr | bs19lr | 114789.0 | 54667.0 | 53991.0 | 2415423.0 | 21.0 |
9 | Neoplasms | bs02lr | -0.013564 | bs02lr | bs06lr | bs02lr | bs06lr | 80568.0 | 38094.0 | 37247.0 | 263825.0 | 3.0 |
10 | Diseases_of_the_nervous_system_and_sense_organs | bs06lr | -0.020825 | bs06lr | bs04lr | bs04lr | bs06lr | 42545.0 | 19340.0 | 21147.0 | -4051944.0 | -95.0 |
11 | Diseases_of_the_blood_and_bloodforming_organs | bs04lr | -0.027325 | bs04lr | bs16lr | bs04lr | bs16lr | 50861.0 | 23360.0 | 22222.0 | 2960253.0 | 58.0 |
12 | Symptoms,_signs,_and_ill_defined_conditions | bs16lr | -0.033625 | bs16lr | bs12lr | bs12lr | bs16lr | 98485.0 | 44621.0 | 47590.0 | 2243587.0 | 22.0 |
13 | Diseases_of_the_skin_and_subcutaneous_tissue | bs12lr | -0.043886 | bs12lr | bs14lr | bs12lr | bs14lr | 5582.0 | 2743.0 | 2401.0 | 549620.0 | 98.0 |
14 | Congenital_anomalies | bs14lr | -0.058136 | bs14lr | bs11lr | bs11lr | bs14lr | 13.0 | 5.0 | 7.0 | 1682.0 | 129.0 |
15 | Complications_of_pregnancy,_childbirth,_and_th... | bs11lr | -0.062223 | bs11lr | bs17lr | bs11lr | bs17lr | 134.0 | 46.0 | 81.0 | -17149.0 | -127.0 |
16 | Injury_and_poisoning | bs17lr | -0.064011 | bs17lr | rare_disease | bs17lr | rare_disease | 69589.0 | 20703.0 | 20791.0 | -1486110.0 | -21.0 |
17 | RareDX | rare_disease | -0.073413 | rare_disease | bs18lr | bs18lr | rare_disease | 30637.0 | 8724.0 | 9606.0 | -1870223.0 | -61.0 |
18 | E_codes | bs18lr | -0.094364 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
b_df = b \
.to_frame(name='before_after') \
.reset_index().rename(columns={'index': 'code'}) \
.assign(name=lambda d: d['code'].apply(lambda v: c2v[v]))[['name', 'code', 'before_after']]
b_df
name | code | before_after | |
---|---|---|---|
0 | Infectious_and_parasitic_diseases | bs01lr | -2325.5 |
1 | Neoplasms | bs02lr | -32545.5 |
2 | Endocrine,nutritional_and_metabolic diseases_a... | bs03lr | 235467.0 |
3 | Diseases_of_the_blood_and_bloodforming_organs | bs04lr | -25599.5 |
4 | Mental_disorders | bs05lr | 160016.0 |
5 | Diseases_of_the_nervous_system_and_sense_organs | bs06lr | -91299.5 |
6 | Diseases_of_the_circulatory_system | bs07lr | 310095.0 |
7 | Diseases_of_the_respiratory_system | bs08lr | -5315.5 |
8 | Diseases_of_the_digestive_system | bs09lr | 1696.0 |
9 | Diseases_of_the_genitourinary_system | bs10lr | -17708.5 |
10 | Complications_of_pregnancy,_childbirth,_and_th... | bs11lr | -252.0 |
11 | Diseases_of_the_skin_and_subcutaneous_tissue | bs12lr | -64952.5 |
12 | Diseases_of_the_musculoskeletal_system_and_con... | bs13lr | 5269.5 |
13 | Congenital_anomalies | bs14lr | -8357.5 |
14 | Symptoms,_signs,_and_ill_defined_conditions | bs16lr | -127663.5 |
15 | Injury_and_poisoning | bs17lr | -65917.0 |
16 | E_codes | bs18lr | -37673.5 |
17 | V_codes | bs19lr | -109307.5 |
18 | RareDX | rare_disease | -123607.0 |
M.to_csv('s3://jeeforce-artifacts/alemi/diabetes/M.csv', index=False)
C.to_csv('s3://jeeforce-artifacts/alemi/diabetes/C.csv', index=False)
b_df.to_csv('s3://jeeforce-artifacts/alemi/diabetes/b.csv', index=False)
coef_df.to_csv('s3://jeeforce-artifacts/alemi/diabetes/ordering.csv', index=False)
enriched_coef_df.to_csv('s3://jeeforce-artifacts/alemi/diabetes/ordering-enriched.csv', index=False)