/*LICENSE This software is provided under the standard MIT License (below). Copyright (c) 2020, Boston University. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software, and the author should be referenced. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.*/ /******************************************************************** *bootstrap_macro_Y_M_C_binary.sas does the bootstrap for primary analysis *of outcome Y mediator M confounder C binary mediator analysis * *Author: Judith Lok * 4/21/2020 *Annotated: 9/26/2020, 9/27/2020, and 10/4/2020 *9/26-28/2020: Dataset that is being used has all observed datapoints, * and only those, no longer a need to create the dummy's * before calling the macro *Datasets used: in principle, derived.mediation *********************************************************************/ libname derived "C:\judith\mediation\code\derived"; libname temp "C:\judith\mediation\code\derived\temp"; filename myfile 'C:\judith\SAStemp\mylog.log'; proc format; value outdisctype 1 = "<=4 weeks" 2 = "5-8 weeks" 3 = ">8 weeks"; run; /****************** *NUMREPS: number of bootstrap samples *INDATASET: dataset to be analyzed * Needs to have a variable PATNUM that is a patient identifier *OUTDATASET: dataset that will have the resulting estimates for all bootstrap samples *OUTCOME: binary outcome Y of interest *CONFOUNDER: the common causes C of the mediator and the outcome *MEDIATOR: the binary mediator M ************************************/ %macro bootstrap_macro_M_C(NUMREPS=2, INDATASET=derived.mediation, OUTDATASET=results_wk4_M_C_binary, OUTCOME=sup_1_1000_week4, CONFOUNDER=NNRTIbased_num, MEDIATOR=Pre_ATI_SCA_below, OR=2); *Write to file if more than 3 bootstrap samples to avoid screen overflow; %if (&NUMREPS>3) %then %do; proc printto log=myfile; run; %end; %else %do; proc printto; run; %end; *Restrict analysis to observations with non-missing C and M; *Create variable indata which is 1 for original data; data analysisdata; set &INDATASET; if (&CONFOUNDER ne . and &MEDIATOR ne .); indata=1; run; *Create a dataset of unique patids to create bootstrap samples; data bootstrap; set analysisdata (keep=PATNUM indata); run; *Create dataset with all mediator values for all patients to obtain result estimates; * for those C and M combinations; *Variable indata was 1 for original data, will now be 0 for these dummy data; data analysisdatanotindata; set analysisdata; &MEDIATOR=1-&MEDIATOR; indata=0; run; data analysisdata; set analysisdata analysisdatanotindata; run; proc sort data=analysisdata; by PATNUM indata; run; *******************************************************************************; * Create new dataset that resamples with replacement from our original patids *; *******************************************************************************; data bootsamp; do sampnum=1 to &NUMREPS; do i=1 to nobs; x=ceil(ranuni(12)*nobs); set bootstrap nobs=nobs point=x; newPATNUM=i; output; end; end; stop; run; proc sort data=bootsamp; by sampnum PATNUM; run; /* wbootsamp will have sampnum, PATNUM and number=number of times that patid occurs in sample "sampnum" */ proc means data=bootsamp noprint; by sampnum PATNUM; var PATNUM; output out=wbootsamp N=number; run; %macro dobootstrap(); %local rep; %let rep=0; *********************************************************************************; * REP=0 is the actual dataset and REP=1 to numreps are the bootstrap replicates *; *********************************************************************************; *number is the number of times a particular patient is in the bootstrap sample; %do REP=0 %to &NUMREPS; %if &REP=0 %then %do; data bootanaldata; set analysisdata; sampnum=0; number=1; run; %end; %else %do; data bootanaldata; set wbootsamp; if (sampnum=&rep); run; data bootanaldata; merge bootanaldata (in=ini keep=sampnum PATNUM number) analysisdata; by PATNUM; if ini; run; %end; *We want to run weighted (by number) models, fit only on those with indata=1; * but store results for all; data bootanaldata; set bootanaldata; indata_number=indata*number; run; proc sort data=bootanaldata; by PATNUM indata; run; *Model for mediator given the common cause, fit on the real data and the real bootstrap sample; *Predictions also appear in observations with indata=0; *Print results for the main analysis and the first bootstrap sample, suppress for other bootstrap samples; %if (&REP=0 or &REP=1) %then %do; proc logistic data=bootanaldata DESCENDING; title "Mediator model"; model &MEDIATOR=&CONFOUNDER; weight indata_number; output out=p_Mtilde_eq_1 p=p_Mtilde_eq_1; run; %end; %else %do; proc logistic data=bootanaldata DESCENDING noprint; model &MEDIATOR=&CONFOUNDER; weight indata_number; output out=p_Mtilde_eq_1 p=p_Mtilde_eq_1; run; %end; *Model for the outcome given the mediator and the common cause, fit in the real data and the real bootstrap sample; *Predictions also appear in observations with indata=0; *Print results for the main analysis and the first bootstrap sample, suppress for other bootstrap samples; %if (&REP=0 or &REP=1) %then %do; proc logistic data=bootanaldata DESCENDING; title "Outcome model"; model &OUTCOME=&CONFOUNDER &MEDIATOR; weight indata_number; output out=p_OUTCOME p=p_OUTCOME; run; %end; %else %do; proc logistic data=bootanaldata DESCENDING noprint; model &OUTCOME=&CONFOUNDER &MEDIATOR; weight indata_number; output out=p_OUTCOME p=p_OUTCOME; run; %end; *Merge original data, mediator model output, outcome model output; data indirecteffect; merge bootanaldata p_Mtilde_eq_1 (keep=PATNUM indata p_Mtilde_eq_1) p_OUTCOME (keep=PATNUM indata p_OUTCOME); by PATNUM indata; run; data indirecteffect; set indirecteffect; if (&MEDIATOR=1) then pM0tilde=p_Mtilde_eq_1; if (&MEDIATOR=0) then pM0tilde=(1-p_Mtilde_eq_1); *OR refers to hypothesized OR of M1 being below; if (&MEDIATOR=1) then pM1tilde_OR2=2*p_Mtilde_eq_1/(1+p_Mtilde_eq_1); if (&MEDIATOR=0) then pM1tilde_OR2=(1-p_Mtilde_eq_1)/(1+p_Mtilde_eq_1); if (&MEDIATOR=1) then pM1tilde_OR3=3*p_Mtilde_eq_1/(1+2*p_Mtilde_eq_1); if (&MEDIATOR=0) then pM1tilde_OR3=(1-p_Mtilde_eq_1)/(1+2*p_Mtilde_eq_1); if (&MEDIATOR=1) then pM1tilde_OR10=10*p_Mtilde_eq_1/(1+9*p_Mtilde_eq_1); if (&MEDIATOR=0) then pM1tilde_OR10=(1-p_Mtilde_eq_1)/(1+9*p_Mtilde_eq_1); if (&MEDIATOR=1) then pM1tilde_ORasked=&OR*p_Mtilde_eq_1/(1+(&OR-1)*p_Mtilde_eq_1); if (&MEDIATOR=0) then pM1tilde_ORasked=(1-p_Mtilde_eq_1)/(1+(&OR-1)*p_Mtilde_eq_1); run; *Estimate indirect effect for various odds ratios for the mediator; data indirecteffect; set indirecteffect; pYeq1_Mtilde_OR2=pM1tilde_OR2*p_OUTCOME; pYeq1_Mtilde_OR3=pM1tilde_OR3*p_OUTCOME; pYeq1_Mtilde_OR10=pM1tilde_OR10*p_OUTCOME; pYeq1_Mtilde_ORasked=pM1tilde_ORasked*p_OUTCOME; *And to compare with no effect of treatment on the mediator, and model check; pY_0_eq1=pM0tilde*p_OUTCOME; run; *First two terms/2 of indirect effect for intervention with OR of below equal to 2; proc means data=indirecteffect noprint; var pYeq1_Mtilde_OR2; weight number; output out=term1_2indirect_OR2_div2 mean=term1_2indirect_OR2_div2; run; *First two terms/2 of indirect effect for intervention with OR of below equal to 3; proc means data=indirecteffect noprint; var pYeq1_Mtilde_OR3; weight number; output out=term1_2indirect_OR3_div2 mean=term1_2indirect_OR3_div2; run; *First two terms/2 of indirect effect for intervention with OR of below equal to 10; proc means data=indirecteffect noprint; var pYeq1_Mtilde_OR10; weight number; output out=term1_2indirect_OR10_div2 mean=term1_2indirect_OR10_div2; run; *First two terms of indirect effect for intervention that puts all mediators below; proc means data=indirecteffect noprint; var p_OUTCOME; weight number; output out=allbelow mean=allbelow; where (&mediator=1); run; *First two terms/2 of indirect effect for intervention with OR of below equal to &OR; proc means data=indirecteffect noprint; var pYeq1_Mtilde_ORasked; weight number; output out=term1_2indirect_ORasked_div2 mean=term1_2indirect_ORasked_div2; run; *And the mean of Y_0; proc means data=bootanaldata noprint; var &OUTCOME; weight number; where (indata=1); output out=meanY_0 mean=meanY_0; run; data indirect_estimate; merge term1_2indirect_OR2_div2 term1_2indirect_OR3_div2 term1_2indirect_OR10_div2 term1_2indirect_ORasked_div2 allbelow meanY_0; run; data indirect_estimate; set indirect_estimate; indirect_estimate_OReq2=2*term1_2indirect_OR2_div2-meanY_0; indirect_estimate_OReq3=2*term1_2indirect_OR3_div2-meanY_0; indirect_estimate_OReq10=2*term1_2indirect_OR10_div2-meanY_0; indirect_estimate_OReqasked=2*term1_2indirect_ORasked_div2-meanY_0; indirect_estimate_allbelow=allbelow-meanY_0; run; data indirect_estimates; set %if &REP>0 %then indirect_estimates; indirect_estimate (in=a); if a then sampnum=&REP; run; %end; /* do loop over REP=number of bootstrap samples */ %mend dobootstrap; %dobootstrap; proc print data=indirect_estimates; title "POINT ESTIMATES results &OUTCOME &MEDIATOR &CONFOUNDER OR asked is &OR"; where (sampnum=0); var indirect_estimate_OReq2 indirect_estimate_OReq3 indirect_estimate_OReq10 indirect_estimate_OReqasked indirect_estimate_allbelow meanY_0; run; %let alphalev = .05; %let a1 = %sysevalf(&alphalev/2*100); %let a2 = %sysevalf((1 - &alphalev/2)*100); *Create confidence interval, Efrons percentile method; %let effectlist=indirect_estimate_OReq2 indirect_estimate_OReq3 indirect_estimate_OReq10 indirect_estimate_OReqasked indirect_estimate_allbelow; proc univariate data=indirect_estimates alpha=.05; title "PERCENTILES results &OUTCOME &mediator &confounder ORasked &OR"; var &effectlist; output out=pmethod std = &EFFECTLIST pctlpts=&a1 &a2 pctlpre = &EFFECTLIST pctlname = _lb _ub ; where sampnum>0; run; proc print data=pmethod; title "FINAL RESULTS BOOTSTRAP, &OUTCOME &mediator &confounder ORasked &OR"; var indirect_estimate_OReq2_lb indirect_estimate_OReq3_lb indirect_estimate_OReq10_lb indirect_estimate_OReqasked_lb indirect_estimate_allbelow_lb indirect_estimate_OReq2_ub indirect_estimate_OReq3_ub indirect_estimate_OReq10_ub indirect_estimate_OReqasked_ub indirect_estimate_allbelow_ub; run; data &OUTDATASET; set indirect_estimates; run; %mend;