This example uses the same data set as the first example, but Mallow's
statistic is used as the criterion rather than
. Note that when Mallow's
statistic (or adjusted
) is specified, the method
setMaximumBestFound is used to indicate the total number of "best"
regressions (rather than indicating the number of best regressions per subset
size, as in the case of the
criterion). In this
example, the three best regressions are found to be (1, 2), (1, 2, 4), and (1,
2, 3).
import java.text.*;
import com.imsl.stat.*;
import com.imsl.math.PrintMatrix;
import com.imsl.math.PrintMatrixFormat;
public class SelectionRegressionEx2 {
public static void main(String[] args) throws Exception {
double x[][] = {
{7., 26., 6., 60.},
{1., 29., 15., 52.},
{11., 56., 8., 20.},
{11., 31., 8., 47.},
{7., 52., 6., 33.},
{11., 55., 9., 22.},
{3., 71., 17., 6.},
{1., 31., 22., 44.},
{2., 54., 18., 22.},
{21., 47., 4., 26},
{1., 40., 23., 34.},
{11., 66., 9., 12.},
{10.0, 68., 8., 12.}};
double y[] = {
78.5,
74.3,
104.3,
87.6,
95.9,
109.2,
102.7,
72.5,
93.1,
115.9,
83.8,
113.3,
109.4};
String criterionOption;
MessageFormat critMsg =
new MessageFormat("Regressions with {0} variable(s) ({1})");
MessageFormat critLabel =
new MessageFormat(" Criterion Variables");
MessageFormat coefMsg = new MessageFormat("Best Regressions with" +
" {0} variable(s) ({1})");
MessageFormat coefLabel = new MessageFormat("Variable Coefficient" +
" Standard Error t-statistic p-value");
MessageFormat critData = new MessageFormat("{0} {1} {2} {3}" +
" {4} {5}");
SelectionRegression sr = new SelectionRegression(4);
sr.setCriterionOption(sr.MALLOWS_CP_CRITERION);
sr.setMaximumBestFound(3);
sr.compute(x, y);
SelectionRegression.Statistics stats = sr.getStatistics();
criterionOption = new String("R-squared");
for (int i=1; i <= 4; i++) {
double[] tmpCrit = stats.getCriterionValues(i);
int[][] indvar = stats.getIndependentVariables(i);
Object p[] = {new Integer(i), criterionOption};
System.out.println(critMsg.format(p));
Object p1[] = {null};
System.out.println(critLabel.format(p1));
for (int j=0; j< tmpCrit.length; j++) {
System.out.print(" "+tmpCrit[j]+" ");
for (int k = 0; k < indvar[j].length ; k++) {
System.out.print(indvar[j][k]+" ");
}
System.out.println("");
}
System.out.println("");
}
String tmp;
for (int i=0; i < 3; i++) {
System.out.println("");
double[][] tmpCoef= stats.getCoefficientStatistics(i);
Object p[] = {new Integer(tmpCoef.length), criterionOption};
System.out.println(coefMsg.format(p));
Object p2[] = {null};
System.out.println(coefLabel.format(p2));
PrintMatrix pm = new PrintMatrix();
pm.setColumnSpacing(10);
NumberFormat nf = NumberFormat.getInstance();
nf.setMinimumFractionDigits(4);
PrintMatrixFormat tst = new PrintMatrixFormat();
tst.setNoColumnLabels();
tst.setNoRowLabels();
tst.setNumberFormat(nf);
pm.print(tst, tmpCoef);
System.out.println("");
System.out.println("");
}
}
}
Regressions with 1 variable(s) (R-squared)
Criterion Variables
138.73083349167865 4
142.48640693696262 2
202.54876912345225 1
315.15428414008386 3
Regressions with 2 variable(s) (R-squared)
Criterion Variables
2.6782415983184293 1 2
5.4958508247586515 1 4
22.373111964697628 3 4
138.2259197546432 2 4
198.09465256959135 1 3
Regressions with 3 variable(s) (R-squared)
Criterion Variables
3.0182334734873457 1 2 4
3.041279723064166 1 2 3
3.4968244423484762 1 3 4
7.337473995655984 2 3 4
Regressions with 4 variable(s) (R-squared)
Criterion Variables
5.0 1 2 3 4
Best Regressions with 2 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
1.0000 1.4683 0.1213 12.1047 0.0000
2.0000 0.6623 0.0459 14.4424 0.0000
Best Regressions with 3 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
1.0000 1.4519 0.1170 12.4100 0.0000
2.0000 0.4161 0.1856 2.2418 0.0517
4.0000 -0.2365 0.1733 -1.3650 0.2054
Best Regressions with 3 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
1.0000 1.6959 0.2046 8.2895 0.0000
2.0000 0.6569 0.0442 14.8508 0.0000
3.0000 0.2500 0.1847 1.3536 0.2089
Link to Java source.