|
A New View of Statistics | |
See the previous simulation for summary of findings on bias.
Method here is for known population between-subject variance,
i.e. no extra variance in the effect size from the denominator.
My simulations all use variables with unit variance, so here I put
denominator=1;
As I described on the main page, I used the fact that the width of
confidence intervals is proportional to 1/root(n). For this method,
you also need to know the acceptable widths of the confidence interval
for a given ES. I got those.from the boundaries of the steps
in the magnitude scale. Here are the confidence intervals that come
to the boundaries of each step, and the corresponding effect sizes:
ES CI
0 0.402
0.408 0.43
0.893 0.525
1.56 0.805
3.03 2.14
-0.408 0.43
-0.893 0.525
-1.56 0.805
-3.03 2.14
I fitted a 4th-order polynomial to these data with a graphing program
(Deltagraph). This is the curve in the figure:
CI = 3.575348E-3*ES**4 + 1.565327E-1*ES**2 + 4.015905E-1
[Note: When the sample SD is used to calculate effect size,
the data and equation are slightly different.
ES CI
0 0.402
0.405 0.43
0.88 0.525
1.54 0.805
2.9 2.14
-0.405 0.43
-0.88 0.525
-1.54 0.805
-2.9 2.14
CI = 6.163550E-3*ES**4 + 1.547127E-1*ES**2 + 4.029553E-1
The above equation was used for the simulations for effect size in
cross-sectional studies and ditto longitudinal studies using the sample SD.]
I then used this equation to predict the target confidence interval (cipred)
for a given sample ES. I divided that into the actual confidence interval,
squared the result, and multiplied by the current sample size to get the
next sample size.
I calculated the exact confidence interval for the given ES using
confint=2*1.96*sdes, where sdes, the standard devation of the effect size,
is sqrt(2/n).
The simulation now requires an additional parameter, the reliability
of the dependent variable. I ran the simulations for reliabilities
of 0.5, 0.8, 0.9, and 0.95.
Right, then, here is the simulation.
options linesize=85;
options pagesize=30;
%macro whatever;
*effect sizee=&es, rely=&r;
data dat1;
do trial=1 to &trialn;
do id=1 to &startn;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
keep trial ydiff id;
%mend;
%let trialn=1000; *no. of trials;
%let r=0.95; *reliability;
%let es=0.2; *effect size;
%let startn=10; *initial sample size;
%let nmax1=20; *total size limit for 1st interation;
%let nmax2=20; *total size limit for 2nd or more interations;
%whatever;
data dat0;
set;
dataset="initial";
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
data dat2;
set;
iter=1;
samplees=ydiff; *denominator=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
nnew=round(n*(confint/cipred)**2)-n;
if nnew+n>&nmax1 then nnew=&nmax1-n;
if nnew then do;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;
*proc print;
*run;
/*
*This bit checks that I've got the formulae right, by comparing the
sampling distribution with the Becker-derived es and its conf int;
*It's slashed off when the main simulation is run;
proc univariate noprint;
var samplees;
output mean=mean pctlpre=Q pctlpts=2.5 50 97.5;
title "Sampling distn for es in x-over, r=&r es=&es n=&startn";
title2 "for sample effect size";
proc print;
proc means mean std min max maxdec=2 data=dat2;
var samplees crrctes confint cipred;
title "Stats for es in x-over, r=&r es=&es n=&startn";
title2 "for Becker-derived es and its confidence interval";
run;
*/
*2nd iteration;
data dat1;
set dat1 dat2;
proc sort;
by trial;
*proc print;
*run;
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
data dat2;
set;
iter=2;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
nnew=round(n*(confint/cipred)**2)-n;
if nnew+n>&nmax2 then nnew=&nmax2-n;
if nnew then do;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;
*3nd iteration;
data dat1;
set dat1 dat2;
proc sort;
by trial;
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
data dat2;
set;
iter=3;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
nnew=round(n*(confint/cipred)**2)-n;
* if nnew+n>&nmax2 then nnew=&nmax2-n;
if nnew then do;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;
*4th iteration;
data dat1;
set dat1 dat2;
proc sort;
by trial;
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
data dat2;
set;
iter=4;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
nnew=round(n*(confint/cipred)**2)-n;
* if nnew+n>&nmax2 then nnew=&nmax2-n;
if nnew then do;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;
*output results;
data dat1;
set dat1 dat2;
dataset="final";
data datboth;
set dat0 dat1;
proc sort;
by dataset trial;
proc means noprint data=datboth;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by dataset trial;
data dat2;
set;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
confliml=samplees-confint/2;
conflimu=samplees+confint/2;
proc means noprint;
var n samplees confint confliml conflimu;
by dataset;
output mean=;
proc print noobs;
var dataset n samplees confint confliml conflimu;
format _numeric_ 5.2 n 4.;
title "ES stats rely=&r es=&es startn=&startn trials=&trialn";
title2 "nmax1=&nmax1 nmax2=&nmax2, longitudinal study, no control group";
data datfinal;
set dat2;
if dataset="final";
proc univariate noprint;
var samplees ;
output mean=mean pctlpre=Q pctlpts=2.5 50 97.5;
proc print noobs;
format _numeric_ 5.2;
title "Sampling distn for es, r=&r es=&es n=&startn";
title2 "for final sample effect size";
proc means n mean std min max maxdec=0 data=datfinal;
var n;
title "Stats for final sample size r=&r es=&es n=&startn nmax=&nmax";
proc sort data=dat1;
by trial iter;
proc means noprint;
var nnew;
output mean=;
by trial iter;
proc sort;
by iter;
proc means noprint;
var nnew;
by iter;
output n=n mean= std=std min=min max=max;
data;
set;
if iter;
proc print noobs;
var iter n nnew std min max;
format _numeric_ 5.0;
title "Number of extra observations at each iteration";
title2 "for rely=&r es=&es startn=&startn nmax1=&nmax1 nmax2=&nmax2";
run;
**************************;
*Here is a simulation using statistical significance to stop sampling;
*I set the sample-size limit to 80 for each step, because you can
get huge predictions for the sample size for the next round;
%macro whatever;
*effect sizee=&es, rely=&r;
data dat1;
do trial=1 to &trialn;
do id=1 to &startn;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
keep trial ydiff id;
%mend;
%let trialn=1000; *no. of trials;
%let r=0.8; *reliability;
%let es=0.41; *effect size;
%let startn=10; *initial sample size;
*%let nmax1=20; *not used;
*%let nmax2=20; *not used;
%whatever;
data dat0;
set;
dataset="initial";
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
data dat2;
set;
iter=1;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
nnew=round(n*(tail/samplees)**2)-n;
if nnew then do;
if n+nnew>80 then nnew=80-n;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;
*2nd iteration;
data dat1;
set dat1 dat2;
proc sort;
by trial;
*proc print;
*run;
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
data dat2;
set;
iter=2;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
nnew=round(n*(tail/samplees)**2)-n;
if nnew then do;
if n+nnew>80 then nnew=80-n;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;
*3nd iteration;
data dat1;
set dat1 dat2;
proc sort;
by trial;
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
data dat2;
set;
iter=3;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
nnew=round(n*(tail/samplees)**2)-n;
if nnew then do;
if n+nnew>80 then nnew=80-n;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;
*4th iteration;
data dat1;
set dat1 dat2;
proc sort;
by trial;
proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;
data dat2;
set;
iter=4;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
nnew=round(n*(tail/samplees)**2)-n;
if nnew then do;
if n+nnew>80 then nnew=80-n;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;
*output results;
data dat1;
set dat1 dat2;
dataset="final";
data datboth;
set dat0 dat1;
proc sort;
by dataset trial;
proc means noprint data=datboth;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by dataset trial;
data dat2;
set;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
confliml=samplees-confint/2;
conflimu=samplees+confint/2;
proc means noprint;
var n samplees confint confliml conflimu;
by dataset;
output mean=;
proc print noobs;
var dataset n samplees confint confliml conflimu;
format _numeric_ 5.2 n 4.;
title "ES stats rely=&r es=&es startn=&startn trials=&trialn";
title2 "nmax1=&nmax1 nmax2=&nmax2, longitudinal study, no control group";
data datfinal;
set dat2;
if dataset="final";
proc univariate noprint;
var samplees ;
output mean=mean pctlpre=Q pctlpts=2.5 50 97.5;
proc print noobs;
format _numeric_ 5.2;
title "Sampling distn for es, r=&r es=&es n=&startn";
title2 "for final sample effect size";
proc means n mean std min max maxdec=0 data=datfinal;
var n;
title "Stats for final sample size r=&r es=&es n=&startn nmax=&nmax";
proc sort data=dat1;
by trial iter;
proc means noprint;
var nnew;
output mean=;
by trial iter;
proc sort;
by iter;
proc means noprint;
var nnew;
by iter;
output n=n mean= std=std min=min max=max;
data;
set;
if iter;
proc print noobs;
var iter n nnew std min max;
format _numeric_ 5.0;
title "Number of extra observations at each iteration";
title2 "for rely=&r es=&es startn=&startn nmax1=&nmax1 nmax2=&nmax2";
run;
Go to: Previous · Contents · Search
· Home
resources=AT=sportsci.org · webmaster=AT=sportsci.org · Sportsci Homepage · Copyright
©1997
Last updated 1 June 97