A New View of Statistics Go to: Previous · Contents · Search · Home
On The Fly for the Effect-Size Statistic in
LONGITUDINAL STUDIES WITHOUT A CONTROL GROUP

```See the previous simulation for summary of findings on bias.

Method here is for known population between-subject variance,
i.e. no extra variance in the effect size from the denominator.
My simulations all use variables with unit variance, so here I put
denominator=1;

As I described on the main page, I used the fact that the width of
confidence intervals is proportional to 1/root(n).  For this method,
you also need to know the acceptable widths of the confidence interval
for a given ES. I got those.from the boundaries of the steps
in the magnitude scale.  Here are the confidence intervals that come
to the boundaries of each step, and the corresponding effect sizes:

ES       CI
0       0.402
0.408   0.43
0.893   0.525
1.56    0.805
3.03    2.14
-0.408  0.43
-0.893  0.525
-1.56   0.805
-3.03   2.14

I fitted a 4th-order polynomial to these data with a graphing program
(Deltagraph). This is the curve in the figure:
CI = 3.575348E-3*ES**4 + 1.565327E-1*ES**2 + 4.015905E-1

[Note: When the sample SD is used to calculate effect size,
the data and equation are slightly different.
ES      CI
0      0.402
0.405  0.43
0.88   0.525
1.54   0.805
2.9    2.14
-0.405 0.43
-0.88  0.525
-1.54  0.805
-2.9   2.14

CI = 6.163550E-3*ES**4 + 1.547127E-1*ES**2 + 4.029553E-1
The above equation was used for the simulations for effect size in
cross-sectional studies and ditto longitudinal studies using the sample SD.]

I then used this equation to predict the target confidence interval (cipred)
for a given sample ES. I divided that into the actual confidence interval,
squared the result, and multiplied by the current sample size to get the
next sample size.

I calculated the exact confidence interval for the given ES using
confint=2*1.96*sdes, where sdes, the standard devation of the effect size,
is sqrt(2/n).

The simulation now requires an additional parameter, the reliability
of the dependent variable.  I ran the simulations for reliabilities
of 0.5, 0.8, 0.9, and 0.95.

Right, then, here is the simulation.

options linesize=85;
options pagesize=30;

%macro whatever;
*effect sizee=&es, rely=&r;
data dat1;
do trial=1 to &trialn;
do id=1 to &startn;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
keep trial ydiff id;

%mend;

%let trialn=1000; *no. of trials;
%let r=0.95; *reliability;
%let es=0.2; *effect size;
%let startn=10;  *initial sample size;
%let nmax1=20; *total size limit for 1st interation;
%let nmax2=20; *total size limit for 2nd or more interations;
%whatever;

data dat0;
set;
dataset="initial";

proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;

data dat2;
set;
iter=1;
samplees=ydiff; *denominator=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
nnew=round(n*(confint/cipred)**2)-n;
if nnew+n>&nmax1 then nnew=&nmax1-n;
if nnew then do;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;

*proc print;
*run;

/*
*This bit checks that I've got the formulae right, by comparing the
sampling distribution with the Becker-derived es and its conf int;
*It's slashed off when the main simulation is run;

proc univariate noprint;
var samplees;
output mean=mean pctlpre=Q pctlpts=2.5 50 97.5;
title "Sampling distn for es in x-over, r=&r es=&es n=&startn";
title2 "for sample effect size";

proc print;

proc means mean std min max maxdec=2 data=dat2;
var samplees crrctes confint cipred;
title "Stats for es in x-over, r=&r es=&es n=&startn";
title2 "for Becker-derived es and its confidence interval";

run;
*/

*2nd iteration;

data dat1;
set dat1 dat2;

proc sort;
by trial;

*proc print;
*run;

proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;

data dat2;
set;
iter=2;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
nnew=round(n*(confint/cipred)**2)-n;
if nnew+n>&nmax2 then nnew=&nmax2-n;
if nnew then do;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;

*3nd iteration;

data dat1;
set dat1 dat2;

proc sort;
by trial;

proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;

data dat2;
set;
iter=3;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
nnew=round(n*(confint/cipred)**2)-n;
*  if nnew+n>&nmax2 then nnew=&nmax2-n;
if nnew then do;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;

*4th iteration;

data dat1;
set dat1 dat2;

proc sort;
by trial;

proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;

data dat2;
set;
iter=4;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
cipred = 3.575348E-3*samplees**4 + 1.565327E-1*samplees**2 + 4.015905E-1;
if cipred<confint then do;
nnew=round(n*(confint/cipred)**2)-n;
*  if nnew+n>&nmax2 then nnew=&nmax2-n;
if nnew then do;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;

*output results;

data dat1;
set dat1 dat2;
dataset="final";

data datboth;
set dat0 dat1;

proc sort;
by dataset trial;

proc means noprint data=datboth;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by dataset trial;

data dat2;
set;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
confliml=samplees-confint/2;
conflimu=samplees+confint/2;

proc means noprint;
var n samplees confint confliml conflimu;
by dataset;
output mean=;

proc print noobs;
var dataset n samplees confint confliml conflimu;
format _numeric_ 5.2 n 4.;
title "ES stats rely=&r es=&es startn=&startn trials=&trialn";
title2 "nmax1=&nmax1 nmax2=&nmax2, longitudinal study, no control group";

data datfinal;
set dat2;
if dataset="final";

proc univariate noprint;
var samplees ;
output mean=mean pctlpre=Q pctlpts=2.5 50 97.5;

proc print noobs;
format _numeric_ 5.2;
title "Sampling distn for es, r=&r es=&es n=&startn";
title2 "for final sample effect size";

proc means n mean std min max maxdec=0 data=datfinal;
var n;
title "Stats for final sample size  r=&r es=&es n=&startn nmax=&nmax";

proc sort data=dat1;
by trial iter;

proc means noprint;
var nnew;
output mean=;
by trial iter;

proc sort;
by iter;

proc means noprint;
var nnew;
by iter;
output n=n mean= std=std min=min max=max;

data;
set;
if iter;

proc print noobs;
var iter n nnew std min max;
format _numeric_ 5.0;
title "Number of extra observations at each iteration";
title2 "for rely=&r es=&es startn=&startn nmax1=&nmax1 nmax2=&nmax2";

run;

**************************;

*Here is a simulation using statistical significance to stop sampling;
*I set the sample-size limit to 80 for each step,  because you can
get huge predictions for the sample size for the next round;

%macro whatever;
*effect sizee=&es, rely=&r;
data dat1;
do trial=1 to &trialn;
do id=1 to &startn;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
keep trial ydiff id;

%mend;

%let trialn=1000; *no. of trials;
%let r=0.8; *reliability;
%let es=0.41; *effect size;
%let startn=10;  *initial sample size;
*%let nmax1=20; *not used;
*%let nmax2=20; *not used;
%whatever;

data dat0;
set;
dataset="initial";

proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;

data dat2;
set;
iter=1;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
nnew=round(n*(tail/samplees)**2)-n;
if nnew then do;
if n+nnew>80 then nnew=80-n;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;

*2nd iteration;

data dat1;
set dat1 dat2;

proc sort;
by trial;

*proc print;
*run;

proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;

data dat2;
set;
iter=2;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
nnew=round(n*(tail/samplees)**2)-n;
if nnew then do;
if n+nnew>80 then nnew=80-n;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;

*3nd iteration;

data dat1;
set dat1 dat2;

proc sort;
by trial;

proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;

data dat2;
set;
iter=3;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
nnew=round(n*(tail/samplees)**2)-n;
if nnew then do;
if n+nnew>80 then nnew=80-n;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;

*4th iteration;

data dat1;
set dat1 dat2;

proc sort;
by trial;

proc means noprint data=dat1;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by trial;

data dat2;
set;
iter=4;
samplees=ydiff; *denominator=1;
tail=1.96*stdydiff/sqrt(n);
if abs(samplees)<tail then do;
nnew=round(n*(tail/samplees)**2)-n;
if nnew then do;
if n+nnew>80 then nnew=80-n;
do id=n+1 to nnew+n;
true=rannor(0);
y1=sqrt(&r)*true+sqrt(1-&r)*rannor(0);
y2=sqrt(&r)*true+sqrt(1-&r)*rannor(0)+&es;
ydiff=y2-y1;
output;
end;
end;
end;
keep trial ydiff id nnew iter;

*output results;

data dat1;
set dat1 dat2;
dataset="final";

data datboth;
set dat0 dat1;

proc sort;
by dataset trial;

proc means noprint data=datboth;
var ydiff;
output out=dat n=n mean= std=stdydiff;
by dataset trial;

data dat2;
set;
samplees=ydiff; *denom=1;
confint=2*1.96*stdydiff/sqrt(n);
confliml=samplees-confint/2;
conflimu=samplees+confint/2;

proc means noprint;
var n samplees confint confliml conflimu;
by dataset;
output mean=;

proc print noobs;
var dataset n samplees confint confliml conflimu;
format _numeric_ 5.2 n 4.;
title "ES stats rely=&r es=&es startn=&startn trials=&trialn";
title2 "nmax1=&nmax1 nmax2=&nmax2, longitudinal study, no control group";

data datfinal;
set dat2;
if dataset="final";

proc univariate noprint;
var samplees ;
output mean=mean pctlpre=Q pctlpts=2.5 50 97.5;

proc print noobs;
format _numeric_ 5.2;
title "Sampling distn for es, r=&r es=&es n=&startn";
title2 "for final sample effect size";

proc means n mean std min max maxdec=0 data=datfinal;
var n;
title "Stats for final sample size  r=&r es=&es n=&startn nmax=&nmax";

proc sort data=dat1;
by trial iter;

proc means noprint;
var nnew;
output mean=;
by trial iter;

proc sort;
by iter;

proc means noprint;
var nnew;
by iter;
output n=n mean= std=std min=min max=max;

data;
set;
if iter;

proc print noobs;
var iter n nnew std min max;
format _numeric_ 5.0;
title "Number of extra observations at each iteration";
title2 "for rely=&r es=&es startn=&startn nmax1=&nmax1 nmax2=&nmax2";

run;```

Go to: Previous · Contents · Search · Home
resources=AT=sportsci.org · webmaster=AT=sportsci.org · Sportsci Homepage · Copyright ©1997
Last updated 1 June 97