/*LICENSE This software is provided under the standard MIT License (below). Copyright (c) 2020, Boston University. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software, and the author should be referenced. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.*/ /******************************************************************** *bootstrap_macro_Y_M_C_continuous.sas does the bootstrap for primary analysis *of outcome Y mediator M confounder C * *Continuous mediator with assay limit analysis * *Author: Judith Lok * 4/21/2020 *Annotated: 9/27/2020 and 10/5/2020 *********************************************************************/ libname derived "C:\judith\mediation\code\derived"; libname temp "C:\judith\mediation\code\derived\temp"; filename myfile 'C:\judith\SAStemp\mylog.log'; proc format; value outdisctype 1 = "<=4 weeks" 2 = "5-8 weeks" 3 = ">8 weeks"; run; /****************** *NUMREPS: number of bootstrap samples *INDATASET: dataset to be analyzed * Needs to have a variable PATNUM that is a patient identifier *OUTDATASET: dataset that will have the resulting estimates for all bootstrap samples *OUTCOME: binary outcome Y of interest *CONFOUNDER: the common causes C of the mediator and the outcome *MEDIATORBIN: the binary mediator M: 1 if below the assay limit, 0 if above *MEDIATORCONT: the continuous mediator (in our application on the log scale) * if it is above the assay limit, * 0 if it is below the assay limit. * if MEDIATORCONT has non-zero values below the assay limit, the macro will correct that *SHIFT: Shift of the mediator (to the left) under treatment *ASSAYLIMIT_log: Assay limit of the mediator on the same scale as the mediator ************************************/ %macro bootstrap_macro_Y_M_C_cont(NUMREPS=2, INDATASET=derived.mediationdata, OUTDATASET=results_wk4_M_C_continuous, OUTCOME=sup_1_1000_week4, CONFOUNDER=NNRTIbased_num, MEDIATORBIN=Pre_ATI_SCA_below, MEDIATORCONT=Pre_ATI_SCA_log, SHIFT=1, ASSAYLIMIT_log=0); *Write to file if more than 3 bootstrap samples to avoid screen overflow; %if (&NUMREPS>3) %then %do; proc printto log=myfile; run; %end; %else %do; proc printto; run; %end; *Restrict analysis to observations with non-missing C and M; data analysisdata; set &INDATASET; if (&CONFOUNDER ne . and &MEDIATORBIN ne .); *It is possible the assay limit was not programmed into &MEDIATORCONT; *So create &MEDIATORCONT as the interaction with being above &ASSAYLIMIT_log; *That is, set it to 0 if below the assay limit; *And keep it if above the assay limit, that is, if &MEDIATORBIN=0; &MEDIATORCONT=&MEDIATORCONT*(1-&MEDIATORBIN); indata=1; run; *Create a dataset of unique patids to create bootstrap samples; data bootstrap; set analysisdata (keep=PATNUM indata); run; *Create dataset with shifted mediator values for all patients to store modeling results; *Create variable indata which is 1 for original data and 0 for dummy data; data analysisdatanotindata; set analysisdata; indata=0; if(&MEDIATORCONT-&SHIFT<=&ASSAYLIMIT_log) then do; &MEDIATORBIN=1; &MEDIATORCONT=0; end; else do; &MEDIATORBIN=0; &MEDIATORCONT=&MEDIATORCONT-&SHIFT; end; run; data analysisdata; set analysisdata analysisdatanotindata; run; proc sort data=analysisdata; by PATNUM indata; run; *******************************************************************************; * Create new dataset that resamples with replacement from our original patids *; *******************************************************************************; data bootsamp; do sampnum=1 to &numreps; do i=1 to nobs; x=ceil(ranuni(12)*nobs); set bootstrap nobs=nobs point=x; newPATNUM=i; output; end; end; stop; run; proc sort data=bootsamp; by sampnum PATNUM; run; /* wbootsamp will have sampnum, PATNUM and number of times that patid occurs in sample "sampnum" */ proc means data=bootsamp noprint; by sampnum PATNUM; var PATNUM; output out=wbootsamp N=number; run; %macro dobootstrap(); %local rep; %let rep=0; *********************************************************************************; * REP=0 is the actual dataset and REP=1 to numreps are the bootstrap replicates *; *********************************************************************************; *number is the number of times a particular patient is in the bootstrap sample; %do REP=0 %to &NUMREPS; %if &REP=0 %then %do; data bootanaldata; set analysisdata; sampnum=0; number=1; run; %end; %else %do; data bootanaldata; set wbootsamp; if (sampnum=&rep); run; data bootanaldata; merge bootanaldata (in=ini keep=sampnum PATNUM number) analysisdata; by PATNUM; if ini; run; %end; *We want to run weighted (by number) models, fit only on those with indata=1; * but store results for all; data bootanaldata; set bootanaldata; indata_number=indata*number; run; proc sort data=bootanaldata; by PATNUM indata; run; *Model for the outcome given the mediator and the common cause, fit in the real data and the real bootstrap sample; *Predictions also appear in observations with indata=0; *Print results for the main analysis and the first bootstrap sample, suppress for other bootstrap samples; %if (&REP=0 or &REP=1) %then %do; proc logistic data=bootanaldata DESCENDING; title "Outcome model"; model &OUTCOME=&CONFOUNDER &MEDIATORBIN &MEDIATORCONT; weight indata_number; output out=p_OUTCOME p=p_OUTCOME; run; %end; %else %do; proc logistic data=bootanaldata DESCENDING noprint; title "Outcome model"; model &OUTCOME=&CONFOUNDER &MEDIATORBIN &MEDIATORCONT; weight indata_number; output out=p_OUTCOME p=p_OUTCOME; run; %end; *To obtain the average outcome under an organic intervention with a certain mediator shift; *Average the expected outcome under the shifted mediator; *The shifted mediator is in the records with indata=0; proc means data=p_OUTCOME noprint; title "Expected outcome under intervention &OUTCOME &MEDIATORBIN &MEDIATORCONT &CONFOUNDER; shift=&SHIFT"; var p_OUTCOME; where (indata=0); weight number; output out=term1_indir mean=term1_indir; run; *And the mean of Y_0; proc means data=bootanaldata noprint; title "Average observed outcome"; var &OUTCOME; where (indata=1); weight number; output out=meanY_0 mean=meanY_0; run; data indirect_estimate; merge term1_indir meanY_0; run; data indirect_estimate; set indirect_estimate; indirect_estimate=term1_indir-meanY_0; run; data indirect_estimates; set %if &REP>0 %then indirect_estimates; indirect_estimate (in=a); if a then sampnum=&REP; run; %end; /* do loop over REP=number of bootstrap samples */ %mend dobootstrap; %dobootstrap; proc print data=indirect_estimates; title "POINT ESTIMATES &OUTCOME &MEDIATORBIN &MEDIATORCONT &CONFOUNDER; shift=&SHIFT"; where (sampnum=0); run; %let alphalev = .05; %let a1 = %sysevalf(&alphalev/2*100); %let a2 = %sysevalf((1 - &alphalev/2)*100); * creating confidence interval, percentile method; %let effectlist=indirect_estimate; proc univariate data=indirect_estimates alpha=.05; title "Bootstrap PERCENTILES results &OUTCOME &MEDIATORBIN &MEDIATORCONT &CONFOUNDER; shift=&SHIFT"; var &effectlist; output out=pmethod std = &EFFECTLIST pctlpts=&a1 &a2 pctlpre = &EFFECTLIST pctlname = _lb _ub ; where sampnum>0; run; proc print data=pmethod; title "FINAL RESULTS BOOTSTRAP, &OUTCOME &MEDIATORBIN &MEDIATORCONT &CONFOUNDER; shift=&SHIFT"; run; data &OUTDATASET; set indirect_estimates; run; %mend;