/**************************************************** STEP 1: USING STOMACHE CANCER DATA CLEANING DATA *****************************************************/ SELECT [ID] ,Cast([Cancer] as float) Cancer ,[I305 1] I305_1 ,[I309 81] I309_81 ,[I311 ] I311 ,[IE849 7] IE849_7 ,[I150 9] I150_9 ,[I276 1] I276_1 ,[I276 8] I276_8 ,[I530 81] I530_81 ,[I263 9] I263_9 ,[I276 51] I276_51 ,[IV15 82] IV15_82 ,[I511 9] I511_9 ,[I401 9] I401_9 ,[I787 20] I787_20 ,[I564 00] I564_00 ,[I272 4] I272_4 ,[I280 9] I280_9 ,[I285 9] I285_9 ,[I496 ] I496 ,[I458 9] I458_9 ,[I486 ] I486 ,[IV58 61] IV58_61 ,[I197 7] I197_7 ,[I578 9] I578_9 ,[I584 9] I584_9 ,[IV66 7] IV66_7 ,[I244 9] I244_9 ,[I414 01] I414_01 ,[I599 0] I599_0 ,[I414 00] I414_00 ,[I585 9] I585_9 ,[I600 00] I600_00 ,[I428 0] I428_0 ,[I427 31] I427_31 ,[I403 90] I403_90 ,Cast([Dead] as float) Dead into StomachCancer_Table FROM [HAP823].[dbo].[StomachCancer] /************************************************ Ensuring that the table was created correctly ************************************************/ SELECT * FROM StomachCancer_Table /************************************************ STEP 2: Creating Strata ************************************************/ DROP TABLE #STRATA_TABLE SELECT ID ,Cancer ,Dead ,[I150_9]+[I197_7]+[I244_9]+[I263_9]+[I272_4]+[I276_1]+[I276_51]+[I276_8]+[I280_9] +[I285_9]+[I305_1]+[I309_81]+[I311]+[I401_9]+[I403_90]+[I414_00]+[I414_01]+[I427_31] +[I428_0]+[I458_9]+[I486]+[I496]+[I511_9]+[I530_81]+[I564_00]+[I578_9]+[I584_9]+[I585_9] +[I599_0]+[I600_00]+[I787_20]+[IE849_7]+[IV15_82]+[IV58_61]+[IV66_7] Strata INTO #STRATA_TABLE FROM StomachCancer_Table ORDER BY STRATA; /************************************************ Ensuring that the table was created correctly ************************************************/ SELECT * FROM #STRATA_TABLE /************************************************ STEP 3: Creating CASES ************************************************/ DROP TABLE #CASES --Cases Select Count(distinct ID) As nCases ,SUM(IIF(Dead = 1, 1., 0.)) as a ,SUM(IIF(Dead = 0, 1., 0.)) as b ,STRATA INTO #Cases FROM #STRATA_TABLE WHERE CANCER = 1 GROUP BY STRATA /************************************************ STEP 4: Creating CONTROLS ************************************************/ DROP TABLE #Controls -- Controls Select Count(distinct ID) As nControls ,SUM(IIF(Dead = 1, 1., 0.)) as C ,SUM(IIF(Dead = 0, 1., 0.)) as D ,STRATA INTO #Controls FROM #STRATA_TABLE WHERE CANCER = 0 GROUP BY STRATA --(26761 rows affected) /************************************************ STEP 5: INNER JOIN ************************************************/ /*Inner Join*/ DROP TABLE #MATCH SELECT Round(Cast([nCases] as float)/Cast([nControls] as float),2) As [Weight] ,nControls ,c -- no cancer & dead ,d -- no cancer & not dead ,#Cases.* INTO #MATCH FROM #Cases JOIN #Controls On #Cases.Strata = #Controls.STRATA ORDER BY [WEIGHT] /************************************************ STEP 5.1: FINDING OVERLAP ************************************************/ declare @tcases as float set @tcases = (SELECT SUM(nCases) From #Cases) select sum(ncases)*100/@tcases as overlap from #MATCH -- Overlap : 53.1400966183575 /********************************************************** STEP 6: LEFT JOINING TO IMPROVE OVERLAP! **********************************************************/ DROP TABLE #MATCH_LEFT /*Left Join*/ SELECT ROW_NUMBER() OVER (ORDER BY #Cases.STRATA DESC) as ID ,'0' STRATA_SET ,Round(Cast([nCases] as float)/Cast([nControls] as float),2) As [Weight] ,nControls ,c -- no cancer & dead ,d -- no cancer & not dead ,#Cases.* INTO #MATCH_LEFT FROM #Cases LEFT JOIN #Controls On #Cases.Strata = #Controls.STRATA SELECT * FROM #MATCH_LEFT /********************************************************** STEP 6.1: Creating two tables 1. Matches on cases 2. No matches on cases **********************************************************/ DROP TABLE #MATCH_NULL SELECT * INTO #MATCH_NULL ------ Match NULL FROM #MATCH_LEFT WHERE [WEIGHT] IS NULL --(96 rows affected) DROP TABLE #MATCH_NOT_NULL SELECT * INTO #MATCH_NOT_NULL ------ Match NOT NULL FROM #MATCH_LEFT WHERE [WEIGHT] IS NOT NULL --(84 rows affected) SELECT * FROM #MATCH_NOT_NULL /********************************************************** STEP 6.2: Creating table for Control Strata **********************************************************/ DROP TABLE #CONTROLS_STRATA SELECT nControls ,c ,d ,STRATA ,RIGHT(STRATA, LEN(STRATA) -1) STRATA_1 ,RIGHT(STRATA, LEN(STRATA) -2) STRATA_2 ,RIGHT(STRATA, LEN(STRATA) -3) STRATA_3 ,RIGHT(STRATA, LEN(STRATA) -4) STRATA_4 ,RIGHT(STRATA, LEN(STRATA) -5) STRATA_5 ,RIGHT(STRATA, LEN(STRATA) -6) STRATA_6 ,RIGHT(STRATA, LEN(STRATA) -7) STRATA_7 ,RIGHT(STRATA, LEN(STRATA) -8) STRATA_8 ,RIGHT(STRATA, LEN(STRATA) -9) STRATA_9 ,RIGHT(STRATA, LEN(STRATA) -10) STRATA_10 INTO #CONTROLS_STRATA FROM #Controls /********************************************************** STEP 6.3: Creating table for Case Strata **********************************************************/ DROP TABLE #Cases_STRATA SELECT ID ,nCases ,a ,b ,RIGHT(STRATA, LEN(STRATA) -1) STRATA_1 ,RIGHT(STRATA, LEN(STRATA) -2) STRATA_2 ,RIGHT(STRATA, LEN(STRATA) -3) STRATA_3 ,RIGHT(STRATA, LEN(STRATA) -4) STRATA_4 ,RIGHT(STRATA, LEN(STRATA) -5) STRATA_5 ,RIGHT(STRATA, LEN(STRATA) -6) STRATA_6 ,RIGHT(STRATA, LEN(STRATA) -7) STRATA_7 ,RIGHT(STRATA, LEN(STRATA) -8) STRATA_8 ,RIGHT(STRATA, LEN(STRATA) -9) STRATA_9 ,RIGHT(STRATA, LEN(STRATA) -10) STRATA_10 INTO #CASES_STRATA FROM #MATCH_NULL -- (96 rows affected) /********************************************************** STEP 7: Dropping 1 attributes **********************************************************/ DROP TABLE #MATCH_1 SELECT DISTINCT ID, '1' STRATA_SET ,round(CAST([nCases] as float)/Cast([nControls] as float),2) [Weight] ,nControls ,t2.c ,t2.d ,nCases ,a ,b ,t1.STRATA_1 STRATA INTO #MATCH_1 FROM #CASES_STRATA t1 LEFT JOIN #CONTROLS_STRATA t2 on t1.STRATA_1 = T2.STRATA_1 DROP TABLE #MATCHED_NULL SELECT * INTO #MATCHED_NULL FROM #MATCH_1 WHERE [WEIGHT] IS NOT NULL /********************************************************** STEP 7.1: Dropping 2 attributes **********************************************************/ DROP TABLE #MATCH_2 SELECT DISTINCT ID ,'2' STRATA_SET ,round(CAST(SUM([nCases]) as float)/Cast(SUM([nControls]) as float),2) [Weight] ,SUM(nControls) nControls ,SUM(t2.c) C ,SUM(t2.d) D ,SUM(nCases) nCases ,SUM(a) A ,SUM(b) B ,t1.STRATA_2 STRATA INTO #MATCH_2 FROM #CASES_STRATA t1 LEFT JOIN #CONTROLS_STRATA t2 on t1.STRATA_2 = T2.STRATA_2 WHERE ID NOT IN (SELECT ID FROM #MATCHED_NULL) GROUP BY ID,t1.STRATA_2 INSERT INTO #MATCHED_NULL SELECT * FROM #MATCH_2 WHERE [WEIGHT] IS NOT NULL SELECT * FROM #MATCHED_NULL order by ID /********************************************************** STEP 7.2: Dropping 3 attributes **********************************************************/ DROP TABLE #MATCH_3 SELECT DISTINCT ID ,'3' STRATA_SET ,round(CAST(SUM([nCases]) as float)/Cast(SUM([nControls]) as float),2) [Weight] ,SUM(nControls) nControls ,SUM(t2.c) C ,SUM(t2.d) D ,SUM(nCases) nCases ,SUM(a) A ,SUM(b) B ,t1.STRATA_3 STRATA INTO #MATCH_3 FROM #CASES_STRATA t1 LEFT JOIN #CONTROLS_STRATA t2 on t1.STRATA_3 = T2.STRATA_3 WHERE ID NOT IN (SELECT ID FROM #MATCHED_NULL) GROUP BY ID,t1.STRATA_3 INSERT INTO #MATCHED_NULL SELECT * FROM #MATCH_3 WHERE [WEIGHT] IS NOT NULL SELECT * FROM #MATCHED_NULL ORDER BY ID /********************************************************** STEP 7.3: Dropping 4 attributes **********************************************************/ DROP TABLE #MATCH_4 SELECT DISTINCT ID ,'4' STRATA_SET ,round(CAST(SUM([nCases]) as float)/Cast(SUM([nControls]) as float),2) [Weight] ,SUM(nControls) nControls ,SUM(t2.c) C ,SUM(t2.d) D ,SUM(nCases) nCases ,SUM(a) A ,SUM(b) B ,t1.STRATA_4 STRATA INTO #MATCH_4 FROM #CASES_STRATA t1 LEFT JOIN #CONTROLS_STRATA t2 on t1.STRATA_4 = T2.STRATA_4 WHERE ID NOT IN (SELECT ID FROM #MATCHED_NULL) GROUP BY ID,t1.STRATA_4 INSERT INTO #MATCHED_NULL SELECT * FROM #MATCH_4 WHERE [WEIGHT] IS NOT NULL SELECT * FROM #MATCHED_NULL ORDER BY ID /********************************************************** STEP 7.4: Dropping 5 attributes **********************************************************/ DROP TABLE #MATCH_5 SELECT DISTINCT ID ,'5' STRATA_SET ,round(CAST(SUM([nCases]) as float)/Cast(SUM([nControls]) as float),2) [Weight] ,SUM(nControls) nControls ,SUM(t2.c) C ,SUM(t2.d) D ,SUM(nCases) nCases ,SUM(a) A ,SUM(b) B ,t1.STRATA_5 STRATA INTO #MATCH_5 FROM #CASES_STRATA t1 LEFT JOIN #CONTROLS_STRATA t2 on t1.STRATA_5 = T2.STRATA_5 WHERE ID NOT IN (SELECT ID FROM #MATCHED_NULL) GROUP BY ID,t1.STRATA_5 INSERT INTO #MATCHED_NULL SELECT * FROM #MATCH_5 WHERE [WEIGHT] IS NOT NULL SELECT * FROM #MATCHED_NULL ORDER BY ID /********************************************************** STEP 7.5: Dropping 6 attributes **********************************************************/ DROP TABLE #MATCH_6 SELECT DISTINCT ID ,'6' STRATA_SET ,round(CAST(SUM([nCases]) as float)/Cast(SUM([nControls]) as float),2) [Weight] ,SUM(nControls) nControls ,SUM(t2.c) C ,SUM(t2.d) D ,SUM(nCases) nCases ,SUM(a) A ,SUM(b) B ,t1.STRATA_6 STRATA INTO #MATCH_6 FROM #CASES_STRATA t1 LEFT JOIN #CONTROLS_STRATA t2 on t1.STRATA_6 = T2.STRATA_6 WHERE ID NOT IN (SELECT ID FROM #MATCHED_NULL) GROUP BY ID,t1.STRATA_6 INSERT INTO #MATCHED_NULL SELECT * FROM #MATCH_6 WHERE [WEIGHT] IS NOT NULL SELECT * FROM #MATCHED_NULL ORDER BY ID /********************************************************** STEP 7.6: Dropping 7 attributes **********************************************************/ DROP TABLE #MATCH_7 SELECT DISTINCT ID ,'7' STRATA_SET ,round(CAST(SUM([nCases]) as float)/Cast(SUM([nControls]) as float),2) [Weight] ,SUM(nControls) nControls ,SUM(t2.c) C ,SUM(t2.d) D ,SUM(nCases) nCases ,SUM(a) A ,SUM(b) B ,t1.STRATA_7 STRATA INTO #MATCH_7 FROM #CASES_STRATA t1 LEFT JOIN #CONTROLS_STRATA t2 on t1.STRATA_7 = T2.STRATA_7 WHERE ID NOT IN (SELECT ID FROM #MATCHED_NULL) GROUP BY ID,t1.STRATA_7 INSERT INTO #MATCHED_NULL SELECT * FROM #MATCH_7 WHERE [WEIGHT] IS NOT NULL SELECT * FROM #MATCHED_NULL ORDER BY ID /********************************************************** STEP 8: CALCULATING OVERLAP **********************************************************/ DROP TABLE #FINAL SELECT * INTO #FINAL FROM #MATCH_NOT_NULL INSERT INTO #FINAL SELECT * FROM #MATCHED_NULL select * from #FINAL /**************************************************************************************************************** CASES THAT DID NOT MATCH ORIGINALLY ID STRATA_SET Weight nControls c d nCases a b STRATA ------------------------------------------------------------------------------------------------------------- 5 1 0.07 15 2.0 13.0 1 1.0 0.0 0001001000001001000000000000000000 6 1 0.02 53 4.0 49.0 1 0.0 1.0 0001000000010000000000100000000000 7 1 0.01 77 8.0 69.0 1 0.0 1.0 0001000000001000000000000100000000 11 1 0 786 45.0 741.0 1 0.0 1.0 0000000000010000000000000000000000 31 2 0.06 72 15.0 57.0 4 4.0 0.0 000000000000000000000000000010000 28 2 0.07 14 5.0 9.0 1 1.0 0.0 000000000001000100000000100000000 27 2 0.01 103 12.0 91.0 1 1.0 0.0 000000100000000000000000000000000 23 2 0.5 2 1.0 1.0 1 1.0 0.0 000001000001000100000100000000000 20 2 0.02 44 3.0 41.0 1 1.0 0.0 001000000001000000000000000000010 139 3 1 1 0.0 1.0 1 1.0 0.0 00000000010000000000110000000100 42 3 1 1 0.0 1.0 1 1.0 0.0 00000000011000100000100000000010 37 3 0.5 2 0.0 2.0 1 1.0 0.0 01010000000000000000000000000100 50 4 0.13 8 1.0 7.0 1 0.0 1.0 1000000000010000000100000000000 13 4 0.14 21 7.0 14.0 3 3.0 0.0 1000000000100000000000110000000 34 5 0.33 6 2.0 4.0 2 0.0 2.0 000000000000001010000000000000 160 5 0.5 2 2.0 0.0 1 1.0 0.0 000000000110110010000010000000 151 5 1 1 1.0 0.0 1 0.0 1.0 000000001000000000100000110000 12 5 0.2 5 2.0 3.0 1 1.0 0.0 000010000000000000000000010000 67 5 1 1 1.0 0.0 1 1.0 0.0 000010000001000000010000000001 117 5 1 1 0.0 1.0 1 0.0 1.0 000100001000100000100000000000 9 5 0.5 2 0.0 2.0 1 1.0 0.0 000110001000000000100000000000 105 5 1 1 1.0 0.0 1 1.0 0.0 010000001101110000000110000000 99 5 1 1 0.0 1.0 1 0.0 1.0 100000000001000000100000000000 53 6 0.09 92 21.0 71.0 8 8.0 0.0 00000000000000000000000010000 48 6 0.4 5 1.0 4.0 2 0.0 2.0 00000001000000000000000010100 97 6 1 1 0.0 1.0 1 1.0 0.0 00000010000000000000110000000 96 6 0.43 7 0.0 7.0 3 3.0 0.0 01000000000000000000000001000 141 7 1 1 1.0 0.0 1 1.0 0.0 0000010000000000010000010001 3 7 0.43 7 2.0 5.0 3 0.0 3.0 0000101000000010000000100000 55 7 1 2 0.0 2.0 2 2.0 0.0 0001000000000000000100001000 103 7 1 1 0.0 1.0 1 1.0 0.0 0100001000000000100000010000 54 7 1 1 0.0 1.0 1 1.0 0.0 0100001010000001000000000000 */ /****************************************************************************************************************/ /********************************************************** STEP 8.1: CALCULATING OVERLAP **********************************************************/ DECLARE @tcases as float set @tcases = (SELECT SUM(nCases) From #Cases) SELECT ROUND(SUM(a+b)*100/@tcases,5) as overlap from #FINAL /* Original overlap - 53.1401 Matching after dropping up to 7 attributes overlap - 77.77778 */