Skip to content

Commit 0d49567

Browse files
authored
Add files via upload
1 parent 3d91ccd commit 0d49567

File tree

7 files changed

+1309
-0
lines changed

7 files changed

+1309
-0
lines changed

Latex File of Research Lasso, Ridge, Elastic Net.tex

Lines changed: 917 additions & 0 deletions
Large diffs are not rendered by default.

OscarExample.m

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
% Read Soil Dataset
2+
data = xlsread('C:\Users\sarmad\Desktop\soil.xlsx');
3+
4+
% -------------- Separating features in separate variable ---------------
5+
X = data(:, 1:15);
6+
7+
% -------------- Separating magnitude in separate variable --------------
8+
y = data(:,16);
9+
10+
11+
12+
13+
% Choose grid of parameter values to use
14+
cvalues = [0; .01; .05; .1;.25;.5;.75;.9;1];
15+
propvalues = [.0001; .001; .002; .00225; .0025; .00275; .0028; .0029; .003; .004; .005; .0075; .01;.025; .05; .1; .15;.2;.3;.4;.5;.6];
16+
17+
%%%% method = 2, chooses the sequential algorithm.
18+
19+
method = 2;
20+
initcoef = [];
21+
[CoefMatrix dfMatrix SSMatrix] = OscarSelect(X, y, cvalues, propvalues, initcoef, method); % Calls function to perform optimization on the grid.
22+

OscarReg.m

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
2+
3+
% X should be a matrix whose rows are the observations and columns are the
4+
% predictors (n by p). The intercept is omitted (so the response and each
5+
% predictor should have mean zero).
6+
7+
function [CoefMatrix dfMatrix SSMatrix] = OscarReg(X, y, cvalues, propvalues, initcoef)
8+
9+
p = length(X(1,:));
10+
q=p*(p-1)/2;
11+
12+
13+
% Standardize predictors to mean zero, variance 1, and center response to
14+
% mean zero.
15+
16+
for i = 1:p
17+
X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i));
18+
end;
19+
y = y-mean(y);
20+
21+
22+
23+
corrmat=X'*X;
24+
[ivec,jvec,svec]=find(corrmat);
25+
Sparseavec=[ivec;ivec+p;ivec;ivec+p];
26+
Sparsebvec=[jvec;jvec+p;jvec+p;jvec];
27+
Element1vec=[svec;svec;-svec;-svec];
28+
29+
F=sparse(Sparseavec,Sparsebvec,Element1vec,2*p+q,2*p+q);
30+
31+
clear corrmat ivec jvec svec Sparseavec Sparsebvec Element1vec;
32+
33+
c=[-X'*y;X'*y;zeros(q,1)];
34+
lowbound=[zeros(2*p,1);-inf(q,1)];
35+
36+
Sparse1vec=[ones(2*p+q,1);2;2;2;3;3;3];
37+
Sparse2vec=[(1:(2*p+q))';1;p+1;2*p+1;2;p+2;2*p+1];
38+
Elementvec=[ones(2*p,1);ones(q,1);1;1;-1;1;1;-1];
39+
rowcount=2;
40+
paircount=2*p+1;
41+
42+
initcoef1=[max(initcoef,0);-min(initcoef,0)];
43+
44+
for i=1:p
45+
for j=(i+1):p
46+
initcoef1=[initcoef1;max(abs(initcoef(i)),abs(initcoef(j)))];
47+
if rowcount>2
48+
Sparse1vec=[Sparse1vec;rowcount;rowcount;rowcount;rowcount+1;rowcount+1;rowcount+1];
49+
Sparse2vec=[Sparse2vec;i;i+p;paircount;j;j+p;paircount];
50+
Elementvec=[Elementvec;1;1;-1;1;1;-1];
51+
end;
52+
rowcount=rowcount+2;
53+
paircount=paircount+1;
54+
end;
55+
end;
56+
57+
58+
% Grid search over c values, then by proportion of bound. For each c, the
59+
% constraint matrix is updated.
60+
61+
CoefMatrix = zeros(p,length(propvalues),length(cvalues));
62+
SSMatrix = zeros(1,length(propvalues),length(cvalues));
63+
dfMatrix = zeros(1,length(propvalues),length(cvalues));
64+
65+
for ccount = 1:length(cvalues)
66+
cvalue = cvalues(ccount);
67+
tempvec=[(1-cvalue)*ones(2*p,1);cvalue*ones(q,1)];
68+
Elementvec(1:(2*p+q))=tempvec;
69+
clear tempvec;
70+
71+
A=sparse(Sparse1vec,Sparse2vec,Elementvec,2*q+1,2*p+q);
72+
initialnorm=A(1,:)*initcoef1;
73+
for propcount = 1:length(propvalues)
74+
tbound = propvalues(propcount)*initialnorm;
75+
b=[tbound; zeros(2*q,1)];
76+
77+
if (ccount == 1)
78+
if (propcount == 1)
79+
start=zeros(2*p+q,1);
80+
end;
81+
elseif (propcount > 1)
82+
start=[max(CoefMatrix(:,propcount-1,ccount),0);-min(CoefMatrix(:,propcount-1,ccount),0)];
83+
for i=1:p
84+
for j=(i+1):p
85+
start=[start;max(abs(start(i)),abs(start(j)))];
86+
end;
87+
end;
88+
else
89+
start=[max(CoefMatrix(:,propcount,ccount-1),0);-min(CoefMatrix(:,propcount,ccount-1),0)];
90+
for i=1:p
91+
for j=(i+1):p
92+
start=[start;max(abs(start(i)),abs(start(j)))];
93+
end;
94+
end;
95+
end;
96+
97+
98+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
99+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
100+
%%%%%%
101+
%%%%%%
102+
%%%%%% The TOMLAB package is now used as a quadratic programming solver.
103+
104+
%%%%%% IF AN ALTERNATIVE QUADRATIC PROGRAMMING SOLVER IS AVAILABLE,
105+
%%%%%% REPLACE THIS SECTION WITH A CALL TO THAT SOLVER BASED ON
106+
%%%%%% THE FORMULATION
107+
108+
% minimize (over x) : 0.5 x' F x + c' x
109+
% subject to A x <= b and x >= lowbound
110+
%
111+
112+
Prob = qpAssign(F, c, A, [], b, lowbound, [], start, [], [], [], [], [], [], [], []);
113+
Result = tomRun('sqopt', Prob, 0);
114+
115+
x=Result.x_k;
116+
exitflag = Result.ExitFlag;
117+
conv = (exitflag == 0);
118+
119+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
120+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
121+
122+
123+
CoefMatrix(:,propcount,ccount) = round((x(1:p)-x(p+1:2*p))*10^7)*10^(-7);
124+
SSMatrix(:,propcount,ccount) = sumsqr(y-X*CoefMatrix(:,propcount,ccount));
125+
126+
EffParVec=unique(abs(CoefMatrix(:,propcount,ccount)));
127+
EffParVec=EffParVec(EffParVec>0);
128+
dfMatrix(:,propcount,ccount) = length(EffParVec);
129+
130+
if conv == 0
131+
fprintf('Optimization may not have converged properly for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
132+
else
133+
fprintf('Optimization complete for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
134+
end;
135+
136+
end;
137+
138+
end;
139+
140+

OscarSelect.m

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
2+
function [CoefMatrix dfMatrix SSMatrix] = OscarSelect(X, y, cvalues, propvalues, initcoef, method)
3+
4+
p = length(X(1,:));
5+
6+
% Standardize predictors to mean zero, variance 1, and center response to
7+
% mean zero.
8+
9+
for i = 1:p
10+
X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i));
11+
end;
12+
y = y-mean(y);
13+
14+
if nargin < 6
15+
method = 2;
16+
if nargin < 5
17+
initcoef = [];
18+
end;
19+
end;
20+
21+
cvalues=sort(cvalues);
22+
propvalues=sort(propvalues);
23+
24+
if isempty(initcoef)
25+
[initcoef] = regress(y,X);
26+
end;
27+
if length(initcoef)<p
28+
error('initial estimate must be of length p');
29+
end;
30+
if min(cvalues)<0
31+
error('all values of c must be nonnegative');
32+
end;
33+
if max(cvalues)>1
34+
error('values for c cannot exceed 1');
35+
end;
36+
if min(propvalues)<=0
37+
error('values for proportion must be greater than 0');
38+
end;
39+
if max(propvalues)>=1
40+
error('values for proportion must be smaller than 1');
41+
end;
42+
43+
if method == 1
44+
[CoefMatrix dfMatrix SSMatrix] = OscarReg(X, y, cvalues, propvalues, initcoef);
45+
end;
46+
if method ~= 1
47+
[CoefMatrix dfMatrix SSMatrix] = OscarSeqReg(X, y, cvalues, propvalues, initcoef);
48+
end;

OscarSeqOpt.m

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
2+
function [coefs df ssquares OrderMatrix conv] = OscarSeqOpt (tbound, cvalue, Xmatrix, y, start, OrderMatrix)
3+
4+
p = length(Xmatrix(1,:))/2;
5+
6+
sdresponse = std(y);
7+
y = sqrt(p)*y/sdresponse;
8+
% The rescaling of the response is for computational purposes only (makes the overall variance of y to be p), the
9+
% coefficients will be rescaled back.
10+
11+
start = sqrt(p)*start/sdresponse;
12+
lowbound = zeros(2*p,1);
13+
14+
flagvalue = 0;
15+
itervalue = 0;
16+
SolCoef = start(1:p)-start((p+1):(2*p));
17+
18+
% The sequential constrained least squares problem is now solved by adding
19+
% an additional constraint at each step and iterating until convergence.
20+
21+
while (flagvalue == 0) && (itervalue < p^2)
22+
nconstraints = length(OrderMatrix(:,1));
23+
A1 = (1-cvalue)*ones(nconstraints,p)+cvalue*(p*ones(nconstraints,p)-OrderMatrix);
24+
Amatrix = [A1 A1];
25+
Bbound = (sqrt(p)*tbound/sdresponse)*ones(1,length(Amatrix(:,1)));
26+
27+
28+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
29+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
30+
31+
% A quadratic programming solver is now used to solve the constrained least
32+
% squares problem at each iteration.
33+
%
34+
% The problem is given as a constrained least squares in the form
35+
% Minimize with respect to u:
36+
%
37+
% || y - Xmatrix u ||^2
38+
%
39+
% subject to:
40+
% Amatrix u <= Bbound and u >= lowbound
41+
%
42+
% The following call to the least squares solver can be changed to use any solver if a more efficient one is
43+
% available. This is the only thing that needs to be changed.
44+
45+
options = optimset('Display', 'off', 'LargeScale', 'off');
46+
[x ssquare resid exitflag] = lsqlin(Xmatrix, y, Amatrix, Bbound, [], [], lowbound, [], start, options);
47+
48+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
49+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
50+
51+
52+
SolCoef1 = round((x(1:p)-x((p+1):(2*p)))*10^7)*10^(-7);
53+
[currcoef, neworder] = sort(-abs(SolCoef1));
54+
sameaslast = [0; (currcoef(2:p) == currcoef(1:(p-1)))];
55+
startblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) > 0); 0];
56+
endblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) < 0); sameaslast(p)];
57+
nblocksame = sum(startblocksame);
58+
vi = (1:p)';
59+
visbs = vi(logical(startblocksame));
60+
viebs = vi(logical(endblocksame));
61+
for j = 1:nblocksame
62+
blockmean = mean(vi(visbs(j):viebs(j)));
63+
vi(visbs(j):viebs(j)) = blockmean * ones(viebs(j) - visbs(j) + 1,1);
64+
end;
65+
[tempinvsort,vind] = sort(neworder);
66+
a1weights = vi(vind)';
67+
currcoef = -currcoef;
68+
OrderMatrix = unique([OrderMatrix; a1weights],'rows');
69+
flagvalue = 1;
70+
test = round(SolCoef*10^7)*10^(-7)-round(SolCoef1*10^7)*10^(-7);
71+
SolCoef = SolCoef1;
72+
flag2 = sqrt(sumsqr(test));
73+
if (flag2>10^(-7))
74+
flagvalue = 0;
75+
end;
76+
itervalue = itervalue+1;
77+
start = x;
78+
end;
79+
80+
coefs = sdresponse*SolCoef/sqrt(p);
81+
df = length(unique(currcoef(currcoef>0)));
82+
ssquares = (sdresponse^2)*ssquare/p;
83+
conv = ((itervalue < p^2) && (exitflag > 0));

OscarSeqReg.m

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
2+
3+
% X should be a matrix whose rows are the observations and columns are the
4+
% predictors (n by p). The intercept is omitted (so the response and each
5+
% predictor should have mean zero).
6+
7+
function [CoefMatrix dfMatrix SSMatrix] = OscarSeqReg(X, y, cvalues, propvalues, initcoef)
8+
9+
p = length(X(1,:));
10+
11+
% Standardize predictors to mean zero, variance 1, and center response to
12+
% mean zero.
13+
14+
for i = 1:p
15+
X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i));
16+
end;
17+
Xmatrix = [X -X]; % Need for positive and negative parts of beta
18+
y = y-mean(y);
19+
20+
% Order initial estimate by magnitude, including ties.
21+
% Initial estimate is used for first guess at ordering of coefficients and also to set maximum value
22+
% for the bound t used in the constraint. The bound is given as proportion
23+
% of the initial value.
24+
25+
[initcoeford, currorder] = sort(-abs(initcoef));
26+
sameaslast = [0; (initcoeford(2:p) == initcoeford(1:(p-1)))];
27+
startblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) > 0); 0];
28+
endblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) < 0); sameaslast(p)];
29+
nblocksame = sum(startblocksame);
30+
vi = (1:p)';
31+
visbs = vi(logical(startblocksame));
32+
viebs = vi(logical(endblocksame));
33+
for i = 1:nblocksame;
34+
blockmean = mean(vi(visbs(i):viebs(i)));
35+
vi(visbs(i):viebs(i)) = blockmean * ones(viebs(i) - visbs(i) + 1,1);
36+
end;
37+
[tempinvsort,vind] = sort(currorder);
38+
a1 = vi(vind)';
39+
initcoeford = -initcoeford;
40+
41+
% Grid search over c values, then by proportion of bound. For each c, the
42+
% weighting is recomputed.
43+
44+
CoefMatrix = zeros(p,length(propvalues),length(cvalues));
45+
SSMatrix = zeros(1,length(propvalues),length(cvalues));
46+
dfMatrix = zeros(1,length(propvalues),length(cvalues));
47+
48+
for ccount = 1:length(cvalues)
49+
OrderMatrix = a1;
50+
weighting = a1;
51+
cvalue = cvalues(ccount);
52+
for i=1:p
53+
weighting(i) = (1-cvalue)+cvalue*(p-i);
54+
end;
55+
for propcount = 1:length(propvalues)
56+
tbound = propvalues(propcount)*weighting*initcoeford;
57+
if (ccount == 1)
58+
if (propcount == 1)
59+
start=zeros(2*p,1);
60+
end;
61+
elseif (propcount > 1)
62+
start=[max(CoefMatrix(:,propcount-1,ccount),0);-min(CoefMatrix(:,propcount-1,ccount),0)];
63+
else
64+
start=[max(CoefMatrix(:,propcount,ccount-1),0);-min(CoefMatrix(:,propcount,ccount-1),0)];
65+
end;
66+
[coefs df ssquares conv] = OscarSeqOpt(tbound, cvalue, Xmatrix, y, start, OrderMatrix);
67+
CoefMatrix(:,propcount,ccount) = coefs;
68+
SSMatrix(:,propcount,ccount) = ssquares;
69+
dfMatrix(:,propcount,ccount) = df;
70+
if conv == 0
71+
fprintf('Optimization may not have converged properly for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
72+
else
73+
fprintf('Optimization complete for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
74+
end;
75+
end;
76+
end;
77+
78+

soil.csv

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
BaseSat,SumCation,CECbuffer,Ca,Mg,K,Na,P,Cu,Zn,Mn,HumicMatter,Density,pH,ExchAc,Diversity
2+
2.34,0.1576,0.614,0.0892,0.0328,0.0256,0.01,0,0.08,0.184,3.2,0.122,0.0822,0.516,0.466,0.2765957
3+
1.64,0.097,0.516,0.0454,0.0218,0.0198,0.01,0,0.064,0.112,2.734,0.0952,0.085,0.512,0.43,0.2613982
4+
5.2,0.452,0.828,0.3306,0.0758,0.0336,0.012,0.24,0.136,0.35,4.148,0.1822,0.0746,0.554,0.388,0.2553191
5+
4.1,0.3054,0.698,0.2118,0.0536,0.026,0.014,0.03,0.126,0.364,3.728,0.1646,0.0756,0.546,0.408,0.2401216
6+
2.7,0.2476,0.858,0.1568,0.0444,0.0304,0.016,0.384,0.078,0.376,4.756,0.2472,0.0692,0.45,0.624,0.1884498
7+
2.46,0.2256,0.828,0.1266,0.0478,0.0352,0.016,0.268,0.08,0.244,4.446,0.1984,0.0652,0.442,0.618,0.2370821
8+
6.02,0.688,1.068,0.5366,0.0974,0.04,0.014,0,0.168,0.17,3.364,0.245,0.0804,0.532,0.394,0.1641337
9+
5.54,0.6054,1.03,0.4684,0.0856,0.0354,0.016,0.038,0.198,0.146,3.46,0.2828,0.0778,0.52,0.442,0.2036474
10+
1.96,0.136,0.612,0.075,0.0278,0.0212,0.012,0.016,0.086,0.196,5.288,0.2706,0.0732,0.506,0.49,0.2553191
11+
2,0.1308,0.612,0.0686,0.0302,0.022,0.01,0.054,0.082,0.216,5.952,0.2662,0.075,0.504,0.49,0.2644377
12+
1.3,0.093,0.64,0.047,0.0172,0.0168,0.012,0.004,0.07,0.134,2.938,0.3824,0.0664,0.474,0.558,0.1702128
13+
1.22,0.0802,0.6,0.0382,0.0156,0.0164,0.01,0.026,0.07,0.124,2.178,0.3638,0.065,0.474,0.528,0.1854103
14+
1.6,0.1214,0.688,0.0526,0.0306,0.0282,0.01,0.006,0.076,0.204,3.002,0.2466,0.0694,0.494,0.578,0.224924
15+
1.28,0.0848,0.558,0.0304,0.0194,0.025,0.01,0,0.084,0.134,2.144,0.2264,0.0726,0.492,0.486,0.2340426
16+
2.88,0.2526,0.812,0.1678,0.0464,0.0204,0.018,0.256,0.164,0.318,3.426,0.3036,0.0704,0.466,0.576,0.2492401
17+
2.72,0.221,0.764,0.1452,0.042,0.0198,0.014,0.15,0.184,0.44,4.58,0.2394,0.0728,0.466,0.556,0.2492401
18+
2.08,0.1344,0.586,0.0756,0.026,0.0228,0.01,0,0.15,0.144,3.752,0.3234,0.073,0.522,0.462,0.2340426
19+
2.76,0.1788,0.59,0.1084,0.0382,0.0222,0.01,0,0.174,0.138,4.092,0.3048,0.0718,0.528,0.424,0.2462006
20+
5.48,0.7074,1.198,0.5622,0.0988,0.0264,0.02,0.598,0.146,0.298,4.08,0.307,0.0722,0.516,0.512,0.2036474
21+
5.6,0.659,1.142,0.5322,0.0856,0.0272,0.014,0.184,0.14,0.298,4.492,0.277,0.0796,0.516,0.498,0.2006079

0 commit comments

Comments
 (0)