Type : CREATION_INTERNE
Los datos se generan completamente dentro del script. La macro `%makeRegressorData` crea las variables independientes (continuas y de clasificación) con valores aleatorios. La macro `%AddDepVar` añade luego la variable dependiente `y` basándose en una fórmula lineal aplicada a las variables creadas previamente.
| 1 | %macro makeRegressorData(DATA=,nObs=500,nCont=5,nClass=5,nLev=3); |
| 2 | DATA &DATA; |
| 3 | drop i j; |
| 4 | %IF &nCont>0 %THEN %DO; array x{&nCont} x1-x&nCont; %END; |
| 5 | %IF &nClass>0 %THEN %DO; array c{&nClass} c1-c&nClass;%END; |
| 6 | DO i = 1 to &nObs; |
| 7 | %IF &nCont>0 %THEN %DO; |
| 8 | DO j= 1 to &nCont; |
| 9 | x{j} = rannor(1); |
| 10 | END; |
| 11 | %END; |
| 12 | %IF &nClass > 0 %THEN %DO; |
| 13 | DO j=1 to &nClass; |
| 14 | IF mod(j,3) = 0 THEN c{j} = ranbin(1,&nLev,.6); |
| 15 | ELSE IF mod(j,3) = 1 THEN c{j} = ranbin(1,&nLev,.5); |
| 16 | ELSE IF mod(j,3) = 2 THEN c{j} = ranbin(1,&nLev,.4); |
| 17 | END; |
| 18 | %END; |
| 19 | OUTPUT; |
| 20 | END; |
| 21 | RUN; |
| 22 | %mend; |
| 23 | |
| 24 | %macro AddDepVar(DATA=,modelRHS =,errorStd = 1); |
| 25 | DATA &DATA; |
| 26 | SET &DATA; |
| 27 | y = &modelRHS + &errorStd * rannor(1); |
| 28 | RUN; |
| 29 | %mend; |
| 1 | %makeRegressorData(DATA=traindata,nObs=500,nCont=5,nClass=5,nLev=3); |
| 2 | |
| 3 | %AddDepVar(DATA = traindata, |
| 4 | modelRHS= x1 + |
| 5 | 0.1*x2 - 0.1*x3 - 0.01* x4 - |
| 6 | c1, |
| 7 | errorStd= 1); |
| 1 | ods graphics on; |
| 2 | |
| 3 | PROC GLMSELECT DATA=traindata plots=coefficients; |
| 4 | class c1-c5/split; |
| 5 | effect s1=spline(x1/split); |
| 6 | model y = s1 x2-x5 c:/ |
| 7 | selection=lasso(steps=20 choose=sbc); |
| 8 | RUN; |
| 1 | PROC GLMSELECT DATA=traindata plots=coefficients; |
| 2 | class c1-c5; |
| 3 | effect s1=spline(x1); |
| 4 | effect s2=collection(x2 x3 x4); |
| 5 | model y = s1 s2 x5 c:/ |
| 6 | selection=grouplasso(steps=20 choose=sbc rho=0.8); |
| 7 | RUN; |