In [1]:
import pandas as pd
import numpy as np

v2c = {
    'Infectious_and_parasitic_diseases': 'bs01lr',
    'Neoplasms': 'bs02lr',
    'Endocrine,nutritional_and_metabolic diseases_and_immunity_disorders': 'bs03lr',
    'Diseases_of_the_blood_and_bloodforming_organs': 'bs04lr',
    'Mental_disorders': 'bs05lr',
    'Diseases_of_the_nervous_system_and_sense_organs': 'bs06lr',
    'Diseases_of_the_circulatory_system': 'bs07lr',
    'Diseases_of_the_respiratory_system': 'bs08lr',
    'Diseases_of_the_digestive_system': 'bs09lr',
    'Diseases_of_the_genitourinary_system': 'bs10lr',
    'Complications_of_pregnancy,_childbirth,_and_the_puerperium': 'bs11lr',
    'Diseases_of_the_skin_and_subcutaneous_tissue': 'bs12lr',
    'Diseases_of_the_musculoskeletal_system_and_connective_tissue': 'bs13lr',
    'Congenital_anomalies': 'bs14lr',
    'Symptoms,_signs,_and_ill_defined_conditions': 'bs16lr',
    'Injury_and_poisoning': 'bs17lr',
    'E_codes': 'bs18lr',
    'V_codes': 'bs19lr',
    'RareDX': 'rare_disease'
}

c2v = {v: k for k, v in v2c.items()}

df = pd.read_csv('s3://jeeforce-artifacts/alemi/diabetes/BCwithTime and Order.csv', low_memory=False) \
    .assign(
        bc_1=lambda d: d['BC1'].apply(lambda v: v2c[v]), 
        bc_2=lambda d: d['BC2'].apply(lambda v: v2c[v])) \
    .rename(columns={
        'Total patnum': 'n', 
        'BC1 first DX Patnum': 'n_1',
        'BC2 first DX Patnum': 'n_2',
        'Same day  BC1 and BC2 Patnum': 'n_t',
        'SumDayDiff (total)': 'sum_day_diff', 
        'AvgDaydiff': 'avg_day_diff'}) \
    .drop(columns=['BC1', 'BC2'])\
    [['bc_1', 'bc_2', 'n', 'n_1', 'n_2', 'n_t', 'sum_day_diff', 'avg_day_diff']]

df.shape
Out[1]:
(171, 8)
In [2]:
df
Out[2]:
bc_1 bc_2 n n_1 n_2 n_t sum_day_diff avg_day_diff
0 bs01lr bs02lr 27020 13534 12005 1481 2780593 102
1 bs01lr bs03lr 51702 17835 28878 4989 -14513656 -280
2 bs01lr bs04lr 16303 8354 7065 884 2894519 177
3 bs01lr bs05lr 52414 19656 28917 3841 -10176298 -194
4 bs01lr bs06lr 53127 26189 24227 2711 3378359 63
... ... ... ... ... ... ... ... ...
166 bs17lr bs19lr 96140 41135 52157 2848 -16061590 -167
167 bs17lr rare_disease 69589 20703 20791 28096 -1486110 -21
168 bs18lr bs19lr 35582 13906 20907 769 -9989873 -280
169 bs18lr rare_disease 30637 8724 9606 12308 -1870223 -61
170 bs19lr rare_disease 179395 87515 68316 23565 24914942 138

171 rows × 8 columns

In [3]:
import itertools
import numpy as np

def get_val(v1, v2):
    if v1 == v2:
        return np.nan
        #return df[(df['bc_1']==v1) | (df['bc_2']==v1)] \
        #    .assign(count=lambda d: d.apply(lambda r: r['n_1'] if r['bc_1']==v1 else r['n_2'], axis=1)) \
        #    ['count'].sum()
    
    temp = df[(df['bc_1']==v1) & (df['bc_2']==v2)]
    
    if temp.shape[0] == 0:
        temp = df[(df['bc_1']==v2) & (df['bc_2']==v1)]
    
    if temp.shape[0] > 0:
        return -(temp.iloc[0]['n_1'] + temp.iloc[0]['n_2'])
    
    return np.nan

body_systems = sorted(list(set(list(df['bc_1'].unique()) + list(df['bc_2'].unique()))))

M = pd.DataFrame(
    [[get_val(v1, v2) for v2 in body_systems] for v1 in body_systems], 
    columns=body_systems, index=body_systems).fillna(0.0)
M = M + np.diag(np.abs(M).sum(axis=1))
M
Out[3]:
bs01lr bs02lr bs03lr bs04lr bs05lr bs06lr bs07lr bs08lr bs09lr bs10lr bs11lr bs12lr bs13lr bs14lr bs16lr bs17lr bs18lr bs19lr rare_disease
bs01lr 637625.0 -25539.0 -46713.0 -15419.0 -48573.0 -50416.0 -48145.0 -35390.0 -41477.0 -30942.0 -107.0 -33270.0 -52303.0 -3450.0 -58821.0 -24687.0 -10835.0 -73860.0 -37678.0
bs02lr -25539.0 893455.0 -76389.0 -25380.0 -61200.0 -75341.0 -78020.0 -50145.0 -56412.0 -47919.0 -94.0 -43067.0 -70433.0 -4632.0 -85087.0 -27333.0 -12250.0 -108658.0 -45556.0
bs03lr -46713.0 -76389.0 1837422.0 -41624.0 -136904.0 -175336.0 -163878.0 -99835.0 -116089.0 -94713.0 -198.0 -79501.0 -159663.0 -8227.0 -183868.0 -57651.0 -24303.0 -266932.0 -105598.0
bs04lr -15419.0 -25380.0 -41624.0 518129.0 -33595.0 -40487.0 -46325.0 -28972.0 -34101.0 -29774.0 -55.0 -21993.0 -39135.0 -2853.0 -45582.0 -16403.0 -8571.0 -57584.0 -30276.0
bs05lr -48573.0 -61200.0 -136904.0 -33595.0 1680058.0 -149652.0 -135241.0 -92287.0 -109153.0 -76799.0 -304.0 -71780.0 -153221.0 -7370.0 -173381.0 -63430.0 -24713.0 -241322.0 -101133.0
bs06lr -50416.0 -75341.0 -175336.0 -40487.0 -149652.0 1889799.0 -177224.0 -102281.0 -122021.0 -95987.0 -259.0 -81166.0 -170218.0 -8488.0 -189985.0 -63380.0 -25489.0 -265552.0 -96517.0
bs07lr -48145.0 -78020.0 -163878.0 -46325.0 -135241.0 -177224.0 1850264.0 -101596.0 -114760.0 -100083.0 -113.0 -79493.0 -155918.0 -8284.0 -189170.0 -55262.0 -24451.0 -272447.0 -99854.0
bs08lr -35390.0 -50145.0 -99835.0 -28972.0 -92287.0 -102281.0 -101596.0 1217719.0 -79064.0 -60762.0 -209.0 -54032.0 -101529.0 -5858.0 -118668.0 -42901.0 -18460.0 -155308.0 -70422.0
bs09lr -41477.0 -56412.0 -116089.0 -34101.0 -109153.0 -122021.0 -114760.0 -79064.0 1401260.0 -70168.0 -191.0 -61867.0 -120766.0 -6734.0 -138066.0 -49362.0 -20587.0 -182189.0 -78253.0
bs10lr -30942.0 -47919.0 -94713.0 -29774.0 -76799.0 -95987.0 -100083.0 -60762.0 -70168.0 1110989.0 -263.0 -48565.0 -89969.0 -5923.0 -105469.0 -34665.0 -16355.0 -141002.0 -61631.0
bs11lr -107.0 -94.0 -198.0 -55.0 -304.0 -259.0 -113.0 -209.0 -191.0 -263.0 3460.0 -154.0 -304.0 -12.0 -331.0 -127.0 -45.0 -481.0 -213.0
bs12lr -33270.0 -43067.0 -79501.0 -21993.0 -71780.0 -81166.0 -79493.0 -54032.0 -61867.0 -48565.0 -154.0 981037.0 -81554.0 -5144.0 -92211.0 -37046.0 -15882.0 -117168.0 -57144.0
bs13lr -52303.0 -70433.0 -159663.0 -39135.0 -153221.0 -170218.0 -155918.0 -101529.0 -120766.0 -89969.0 -304.0 -81554.0 1870607.0 -8584.0 -189026.0 -72231.0 -28247.0 -270537.0 -106969.0
bs14lr -3450.0 -4632.0 -8227.0 -2853.0 -7370.0 -8488.0 -8284.0 -5858.0 -6734.0 -5923.0 -12.0 -5144.0 -8584.0 108173.0 -9595.0 -3989.0 -1922.0 -11544.0 -5564.0
bs16lr -58821.0 -85087.0 -183868.0 -45582.0 -173381.0 -189985.0 -189170.0 -118668.0 -138066.0 -105469.0 -331.0 -92211.0 -189026.0 -9595.0 2104293.0 -73645.0 -29656.0 -293391.0 -128341.0
bs17lr -24687.0 -27333.0 -57651.0 -16403.0 -63430.0 -63380.0 -55262.0 -42901.0 -49362.0 -34665.0 -127.0 -37046.0 -72231.0 -3989.0 -73645.0 772278.0 -15380.0 -93292.0 -41494.0
bs18lr -10835.0 -12250.0 -24303.0 -8571.0 -24713.0 -25489.0 -24451.0 -18460.0 -20587.0 -16355.0 -45.0 -15882.0 -28247.0 -1922.0 -29656.0 -15380.0 330289.0 -34813.0 -18330.0
bs19lr -73860.0 -108658.0 -266932.0 -57584.0 -241322.0 -265552.0 -272447.0 -155308.0 -182189.0 -141002.0 -481.0 -117168.0 -270537.0 -11544.0 -293391.0 -93292.0 -34813.0 2741911.0 -155831.0
rare_disease -37678.0 -45556.0 -105598.0 -30276.0 -101133.0 -96517.0 -99854.0 -70422.0 -78253.0 -61631.0 -213.0 -57144.0 -106969.0 -5564.0 -128341.0 -41494.0 -18330.0 -155831.0 1240804.0
In [4]:
pd.DataFrame({
    'off': np.abs((M - np.diag(np.diag(M))).sum(axis=1)),
    'diag': pd.Series(np.diag(M), index=M.index)
})
Out[4]:
off diag
bs01lr 637625.0 637625.0
bs02lr 893455.0 893455.0
bs03lr 1837422.0 1837422.0
bs04lr 518129.0 518129.0
bs05lr 1680058.0 1680058.0
bs06lr 1889799.0 1889799.0
bs07lr 1850264.0 1850264.0
bs08lr 1217719.0 1217719.0
bs09lr 1401260.0 1401260.0
bs10lr 1110989.0 1110989.0
bs11lr 3460.0 3460.0
bs12lr 981037.0 981037.0
bs13lr 1870607.0 1870607.0
bs14lr 108173.0 108173.0
bs16lr 2104293.0 2104293.0
bs17lr 772278.0 772278.0
bs18lr 330289.0 330289.0
bs19lr 2741911.0 2741911.0
rare_disease 1240804.0 1240804.0
In [5]:
def get_b(v):
    s = df[(df['bc_1']==v) | (df['bc_2']==v)] \
        .assign(
            n_before=lambda d: d.apply(lambda r: r['n_1'] if r['bc_1']==v else r['n_2'], axis=1),
            n_after=lambda d: d.apply(lambda r: r['n_2'] if r['bc_1']==v else r['n_1'], axis=1)) \
        [['n_before', 'n_after']].sum()
    before = s['n_before']
    after = s['n_after']
    return 1 + 0.5 * (before - after)

b = pd.Series([get_b(v) for v in body_systems], index=body_systems)
b
Out[5]:
bs01lr           -2325.5
bs02lr          -32545.5
bs03lr          235467.0
bs04lr          -25599.5
bs05lr          160016.0
bs06lr          -91299.5
bs07lr          310095.0
bs08lr           -5315.5
bs09lr            1696.0
bs10lr          -17708.5
bs11lr            -252.0
bs12lr          -64952.5
bs13lr            5269.5
bs14lr           -8357.5
bs16lr         -127663.5
bs17lr          -65917.0
bs18lr          -37673.5
bs19lr         -109307.5
rare_disease   -123607.0
dtype: float64
In [6]:
C = 2 * np.eye(M.shape[0]) + M
C
Out[6]:
bs01lr bs02lr bs03lr bs04lr bs05lr bs06lr bs07lr bs08lr bs09lr bs10lr bs11lr bs12lr bs13lr bs14lr bs16lr bs17lr bs18lr bs19lr rare_disease
bs01lr 637627.0 -25539.0 -46713.0 -15419.0 -48573.0 -50416.0 -48145.0 -35390.0 -41477.0 -30942.0 -107.0 -33270.0 -52303.0 -3450.0 -58821.0 -24687.0 -10835.0 -73860.0 -37678.0
bs02lr -25539.0 893457.0 -76389.0 -25380.0 -61200.0 -75341.0 -78020.0 -50145.0 -56412.0 -47919.0 -94.0 -43067.0 -70433.0 -4632.0 -85087.0 -27333.0 -12250.0 -108658.0 -45556.0
bs03lr -46713.0 -76389.0 1837424.0 -41624.0 -136904.0 -175336.0 -163878.0 -99835.0 -116089.0 -94713.0 -198.0 -79501.0 -159663.0 -8227.0 -183868.0 -57651.0 -24303.0 -266932.0 -105598.0
bs04lr -15419.0 -25380.0 -41624.0 518131.0 -33595.0 -40487.0 -46325.0 -28972.0 -34101.0 -29774.0 -55.0 -21993.0 -39135.0 -2853.0 -45582.0 -16403.0 -8571.0 -57584.0 -30276.0
bs05lr -48573.0 -61200.0 -136904.0 -33595.0 1680060.0 -149652.0 -135241.0 -92287.0 -109153.0 -76799.0 -304.0 -71780.0 -153221.0 -7370.0 -173381.0 -63430.0 -24713.0 -241322.0 -101133.0
bs06lr -50416.0 -75341.0 -175336.0 -40487.0 -149652.0 1889801.0 -177224.0 -102281.0 -122021.0 -95987.0 -259.0 -81166.0 -170218.0 -8488.0 -189985.0 -63380.0 -25489.0 -265552.0 -96517.0
bs07lr -48145.0 -78020.0 -163878.0 -46325.0 -135241.0 -177224.0 1850266.0 -101596.0 -114760.0 -100083.0 -113.0 -79493.0 -155918.0 -8284.0 -189170.0 -55262.0 -24451.0 -272447.0 -99854.0
bs08lr -35390.0 -50145.0 -99835.0 -28972.0 -92287.0 -102281.0 -101596.0 1217721.0 -79064.0 -60762.0 -209.0 -54032.0 -101529.0 -5858.0 -118668.0 -42901.0 -18460.0 -155308.0 -70422.0
bs09lr -41477.0 -56412.0 -116089.0 -34101.0 -109153.0 -122021.0 -114760.0 -79064.0 1401262.0 -70168.0 -191.0 -61867.0 -120766.0 -6734.0 -138066.0 -49362.0 -20587.0 -182189.0 -78253.0
bs10lr -30942.0 -47919.0 -94713.0 -29774.0 -76799.0 -95987.0 -100083.0 -60762.0 -70168.0 1110991.0 -263.0 -48565.0 -89969.0 -5923.0 -105469.0 -34665.0 -16355.0 -141002.0 -61631.0
bs11lr -107.0 -94.0 -198.0 -55.0 -304.0 -259.0 -113.0 -209.0 -191.0 -263.0 3462.0 -154.0 -304.0 -12.0 -331.0 -127.0 -45.0 -481.0 -213.0
bs12lr -33270.0 -43067.0 -79501.0 -21993.0 -71780.0 -81166.0 -79493.0 -54032.0 -61867.0 -48565.0 -154.0 981039.0 -81554.0 -5144.0 -92211.0 -37046.0 -15882.0 -117168.0 -57144.0
bs13lr -52303.0 -70433.0 -159663.0 -39135.0 -153221.0 -170218.0 -155918.0 -101529.0 -120766.0 -89969.0 -304.0 -81554.0 1870609.0 -8584.0 -189026.0 -72231.0 -28247.0 -270537.0 -106969.0
bs14lr -3450.0 -4632.0 -8227.0 -2853.0 -7370.0 -8488.0 -8284.0 -5858.0 -6734.0 -5923.0 -12.0 -5144.0 -8584.0 108175.0 -9595.0 -3989.0 -1922.0 -11544.0 -5564.0
bs16lr -58821.0 -85087.0 -183868.0 -45582.0 -173381.0 -189985.0 -189170.0 -118668.0 -138066.0 -105469.0 -331.0 -92211.0 -189026.0 -9595.0 2104295.0 -73645.0 -29656.0 -293391.0 -128341.0
bs17lr -24687.0 -27333.0 -57651.0 -16403.0 -63430.0 -63380.0 -55262.0 -42901.0 -49362.0 -34665.0 -127.0 -37046.0 -72231.0 -3989.0 -73645.0 772280.0 -15380.0 -93292.0 -41494.0
bs18lr -10835.0 -12250.0 -24303.0 -8571.0 -24713.0 -25489.0 -24451.0 -18460.0 -20587.0 -16355.0 -45.0 -15882.0 -28247.0 -1922.0 -29656.0 -15380.0 330291.0 -34813.0 -18330.0
bs19lr -73860.0 -108658.0 -266932.0 -57584.0 -241322.0 -265552.0 -272447.0 -155308.0 -182189.0 -141002.0 -481.0 -117168.0 -270537.0 -11544.0 -293391.0 -93292.0 -34813.0 2741913.0 -155831.0
rare_disease -37678.0 -45556.0 -105598.0 -30276.0 -101133.0 -96517.0 -99854.0 -70422.0 -78253.0 -61631.0 -213.0 -57144.0 -106969.0 -5564.0 -128341.0 -41494.0 -18330.0 -155831.0 1240806.0
In [7]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso

model = LinearRegression()
model.fit(C, b)
Out[7]:
LinearRegression()
In [8]:
coef = pd.Series(model.coef_, index=C.columns).sort_values(ascending=False)
coef_df = coef \
    .to_frame(name='coefficient') \
    .reset_index() \
    .rename(columns={'index': 'code'}) \
    .assign(name=lambda d: d['code'].apply(lambda v: c2v[v]))[['name', 'code', 'coefficient']]
coef_df
Out[8]:
name code coefficient
0 Diseases_of_the_circulatory_system bs07lr 0.175061
1 Endocrine,nutritional_and_metabolic diseases_a... bs03lr 0.138932
2 Mental_disorders bs05lr 0.107442
3 Diseases_of_the_musculoskeletal_system_and_con... bs13lr 0.022813
4 Diseases_of_the_digestive_system bs09lr 0.021342
5 Diseases_of_the_respiratory_system bs08lr 0.016024
6 Infectious_and_parasitic_diseases bs01lr 0.014166
7 Diseases_of_the_genitourinary_system bs10lr 0.006209
8 V_codes bs19lr -0.010618
9 Neoplasms bs02lr -0.013564
10 Diseases_of_the_nervous_system_and_sense_organs bs06lr -0.020825
11 Diseases_of_the_blood_and_bloodforming_organs bs04lr -0.027325
12 Symptoms,_signs,_and_ill_defined_conditions bs16lr -0.033625
13 Diseases_of_the_skin_and_subcutaneous_tissue bs12lr -0.043886
14 Congenital_anomalies bs14lr -0.058136
15 Complications_of_pregnancy,_childbirth,_and_th... bs11lr -0.062223
16 Injury_and_poisoning bs17lr -0.064011
17 RareDX rare_disease -0.073413
18 E_codes bs18lr -0.094364
In [9]:
# if + then 1 before 2
# if - then 2 before 1
def get_avg_days_to(code1, code2):
    q = f'(bc_1=="{code1}" and bc_2=="{code2}") or (bc_1=="{code2}" and bc_2=="{code1}")'
    temp_df = df.query(q)
    r = temp_df.iloc[0]
    return {
        'bc_current': code1,
        'bc_next': code2,
        'bc_1': r['bc_1'],
        'bc_2': r['bc_2'],
        'n': r['n'],
        'n_1': r['n_1'],
        'n_2': r['n_2'],
        'sum_day_diff': r['sum_day_diff'],
        'avg_day_diff': r['avg_day_diff']
    }

codes = coef_df['code']
enriched_coef_df = coef_df.join(pd.DataFrame([get_avg_days_to(code1, code2) for code1, code2 in zip(codes, codes[1:])]))
enriched_coef_df
Out[9]:
name code coefficient bc_current bc_next bc_1 bc_2 n n_1 n_2 sum_day_diff avg_day_diff
0 Diseases_of_the_circulatory_system bs07lr 0.175061 bs07lr bs03lr bs03lr bs07lr 236654.0 71451.0 92427.0 -16117593.0 -68.0
1 Endocrine,nutritional_and_metabolic diseases_a... bs03lr 0.138932 bs03lr bs05lr bs03lr bs05lr 167953.0 70078.0 66826.0 7271597.0 43.0
2 Mental_disorders bs05lr 0.107442 bs05lr bs13lr bs05lr bs13lr 177852.0 88085.0 65136.0 29697660.0 166.0
3 Diseases_of_the_musculoskeletal_system_and_con... bs13lr 0.022813 bs13lr bs09lr bs09lr bs13lr 138272.0 60503.0 60263.0 894739.0 6.0
4 Diseases_of_the_digestive_system bs09lr 0.021342 bs09lr bs08lr bs08lr bs09lr 89307.0 39115.0 39949.0 -1142146.0 -12.0
5 Diseases_of_the_respiratory_system bs08lr 0.016024 bs08lr bs01lr bs01lr bs08lr 37774.0 17810.0 17580.0 1427342.0 37.0
6 Infectious_and_parasitic_diseases bs01lr 0.014166 bs01lr bs10lr bs01lr bs10lr 33429.0 15515.0 15427.0 3203398.0 95.0
7 Diseases_of_the_genitourinary_system bs10lr 0.006209 bs10lr bs19lr bs10lr bs19lr 150458.0 73732.0 67270.0 -4018836.0 -26.0
8 V_codes bs19lr -0.010618 bs19lr bs02lr bs02lr bs19lr 114789.0 54667.0 53991.0 2415423.0 21.0
9 Neoplasms bs02lr -0.013564 bs02lr bs06lr bs02lr bs06lr 80568.0 38094.0 37247.0 263825.0 3.0
10 Diseases_of_the_nervous_system_and_sense_organs bs06lr -0.020825 bs06lr bs04lr bs04lr bs06lr 42545.0 19340.0 21147.0 -4051944.0 -95.0
11 Diseases_of_the_blood_and_bloodforming_organs bs04lr -0.027325 bs04lr bs16lr bs04lr bs16lr 50861.0 23360.0 22222.0 2960253.0 58.0
12 Symptoms,_signs,_and_ill_defined_conditions bs16lr -0.033625 bs16lr bs12lr bs12lr bs16lr 98485.0 44621.0 47590.0 2243587.0 22.0
13 Diseases_of_the_skin_and_subcutaneous_tissue bs12lr -0.043886 bs12lr bs14lr bs12lr bs14lr 5582.0 2743.0 2401.0 549620.0 98.0
14 Congenital_anomalies bs14lr -0.058136 bs14lr bs11lr bs11lr bs14lr 13.0 5.0 7.0 1682.0 129.0
15 Complications_of_pregnancy,_childbirth,_and_th... bs11lr -0.062223 bs11lr bs17lr bs11lr bs17lr 134.0 46.0 81.0 -17149.0 -127.0
16 Injury_and_poisoning bs17lr -0.064011 bs17lr rare_disease bs17lr rare_disease 69589.0 20703.0 20791.0 -1486110.0 -21.0
17 RareDX rare_disease -0.073413 rare_disease bs18lr bs18lr rare_disease 30637.0 8724.0 9606.0 -1870223.0 -61.0
18 E_codes bs18lr -0.094364 NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [10]:
b_df = b \
    .to_frame(name='before_after') \
    .reset_index().rename(columns={'index': 'code'}) \
    .assign(name=lambda d: d['code'].apply(lambda v: c2v[v]))[['name', 'code', 'before_after']]
b_df
Out[10]:
name code before_after
0 Infectious_and_parasitic_diseases bs01lr -2325.5
1 Neoplasms bs02lr -32545.5
2 Endocrine,nutritional_and_metabolic diseases_a... bs03lr 235467.0
3 Diseases_of_the_blood_and_bloodforming_organs bs04lr -25599.5
4 Mental_disorders bs05lr 160016.0
5 Diseases_of_the_nervous_system_and_sense_organs bs06lr -91299.5
6 Diseases_of_the_circulatory_system bs07lr 310095.0
7 Diseases_of_the_respiratory_system bs08lr -5315.5
8 Diseases_of_the_digestive_system bs09lr 1696.0
9 Diseases_of_the_genitourinary_system bs10lr -17708.5
10 Complications_of_pregnancy,_childbirth,_and_th... bs11lr -252.0
11 Diseases_of_the_skin_and_subcutaneous_tissue bs12lr -64952.5
12 Diseases_of_the_musculoskeletal_system_and_con... bs13lr 5269.5
13 Congenital_anomalies bs14lr -8357.5
14 Symptoms,_signs,_and_ill_defined_conditions bs16lr -127663.5
15 Injury_and_poisoning bs17lr -65917.0
16 E_codes bs18lr -37673.5
17 V_codes bs19lr -109307.5
18 RareDX rare_disease -123607.0
In [11]:
M.to_csv('s3://jeeforce-artifacts/alemi/diabetes/M.csv', index=False)
C.to_csv('s3://jeeforce-artifacts/alemi/diabetes/C.csv', index=False)
b_df.to_csv('s3://jeeforce-artifacts/alemi/diabetes/b.csv', index=False)
coef_df.to_csv('s3://jeeforce-artifacts/alemi/diabetes/ordering.csv', index=False)
enriched_coef_df.to_csv('s3://jeeforce-artifacts/alemi/diabetes/ordering-enriched.csv', index=False)
In [ ]: