Add files via upload

rightaway006 · web-flow · commit 0d49567115b5 · 2021-09-21T12:27:21.000+05:00
diff --git a/Latex File of Research Lasso, Ridge, Elastic Net.tex b/Latex File of Research Lasso, Ridge, Elastic Net.tex
diff --git a/OscarExample.m b/OscarExample.m
@@ -0,0 +1,22 @@
+% Read Soil Dataset
+data = xlsread('C:\Users\sarmad\Desktop\soil.xlsx');
+
+% -------------- Separating features in separate variable ---------------
+X = data(:, 1:15);
+  
+% -------------- Separating magnitude in separate variable --------------
+y = data(:,16);
+
+
+
+
+% Choose grid of parameter values to use
+cvalues = [0; .01; .05; .1;.25;.5;.75;.9;1];
+propvalues = [.0001; .001; .002; .00225; .0025; .00275; .0028; .0029; .003; .004; .005; .0075; .01;.025; .05; .1; .15;.2;.3;.4;.5;.6];
+
+%%%% method = 2, chooses the sequential algorithm.
+
+method = 2;     
+initcoef = [];
+[CoefMatrix dfMatrix SSMatrix] = OscarSelect(X, y, cvalues, propvalues, initcoef, method); % Calls function to perform optimization on the grid.
+
diff --git a/OscarReg.m b/OscarReg.m
@@ -0,0 +1,140 @@
+
+
+% X should be a matrix whose rows are the observations and columns are the
+% predictors (n by p). The intercept is omitted (so the response and each
+% predictor should have mean zero).
+
+function [CoefMatrix dfMatrix SSMatrix] = OscarReg(X, y, cvalues, propvalues, initcoef)
+
+p = length(X(1,:));
+q=p*(p-1)/2;
+
+
+% Standardize predictors to mean zero, variance 1, and center response to
+% mean zero.
+
+for i = 1:p
+  X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i));
+end;
+y = y-mean(y);
+
+
+
+corrmat=X'*X;
+[ivec,jvec,svec]=find(corrmat);
+Sparseavec=[ivec;ivec+p;ivec;ivec+p];
+Sparsebvec=[jvec;jvec+p;jvec+p;jvec];
+Element1vec=[svec;svec;-svec;-svec];
+
+F=sparse(Sparseavec,Sparsebvec,Element1vec,2*p+q,2*p+q);
+
+clear corrmat ivec jvec svec Sparseavec Sparsebvec Element1vec;
+
+c=[-X'*y;X'*y;zeros(q,1)];
+lowbound=[zeros(2*p,1);-inf(q,1)];
+
+Sparse1vec=[ones(2*p+q,1);2;2;2;3;3;3];
+Sparse2vec=[(1:(2*p+q))';1;p+1;2*p+1;2;p+2;2*p+1];
+Elementvec=[ones(2*p,1);ones(q,1);1;1;-1;1;1;-1];
+rowcount=2;
+paircount=2*p+1;
+
+initcoef1=[max(initcoef,0);-min(initcoef,0)];
+
+for i=1:p
+    for j=(i+1):p
+        initcoef1=[initcoef1;max(abs(initcoef(i)),abs(initcoef(j)))];
+        if rowcount>2
+            Sparse1vec=[Sparse1vec;rowcount;rowcount;rowcount;rowcount+1;rowcount+1;rowcount+1];
+            Sparse2vec=[Sparse2vec;i;i+p;paircount;j;j+p;paircount];
+            Elementvec=[Elementvec;1;1;-1;1;1;-1];
+        end;
+        rowcount=rowcount+2;
+        paircount=paircount+1;
+    end;
+end;
+
+
+% Grid search over c values, then by proportion of bound. For each c, the
+% constraint matrix is updated.
+
+CoefMatrix = zeros(p,length(propvalues),length(cvalues));
+SSMatrix = zeros(1,length(propvalues),length(cvalues));
+dfMatrix = zeros(1,length(propvalues),length(cvalues));
+
+for ccount = 1:length(cvalues)
+    cvalue = cvalues(ccount);
+    tempvec=[(1-cvalue)*ones(2*p,1);cvalue*ones(q,1)];
+    Elementvec(1:(2*p+q))=tempvec;
+    clear tempvec;
+
+    A=sparse(Sparse1vec,Sparse2vec,Elementvec,2*q+1,2*p+q);
+    initialnorm=A(1,:)*initcoef1; 
+    for propcount = 1:length(propvalues)
+        tbound = propvalues(propcount)*initialnorm;
+        b=[tbound; zeros(2*q,1)];
+        
+        if (ccount == 1)
+            if (propcount == 1)
+                start=zeros(2*p+q,1);
+            end;
+        elseif (propcount > 1)
+            start=[max(CoefMatrix(:,propcount-1,ccount),0);-min(CoefMatrix(:,propcount-1,ccount),0)];
+            for i=1:p
+                for j=(i+1):p
+                    start=[start;max(abs(start(i)),abs(start(j)))];
+                end;
+            end;
+        else
+            start=[max(CoefMatrix(:,propcount,ccount-1),0);-min(CoefMatrix(:,propcount,ccount-1),0)];
+            for i=1:p
+                for j=(i+1):p
+                    start=[start;max(abs(start(i)),abs(start(j)))];
+                end;
+            end;
+        end;
+
+        
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%
+%%%%%%
+%%%%%% The TOMLAB package is now used as a quadratic programming solver. 
+
+%%%%%% IF AN ALTERNATIVE QUADRATIC PROGRAMMING SOLVER IS AVAILABLE, 
+%%%%%% REPLACE THIS SECTION WITH A CALL TO THAT SOLVER BASED ON  
+%%%%%% THE FORMULATION 
+
+% minimize (over x) : 0.5 x' F x + c' x
+% subject to A x <= b and x >= lowbound
+%  
+
+        Prob = qpAssign(F, c, A, [], b, lowbound, [], start, [], [], [], [], [], [], [], []);
+        Result = tomRun('sqopt', Prob, 0);
+
+        x=Result.x_k;
+        exitflag = Result.ExitFlag;
+        conv = (exitflag == 0);
+            
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+   
+        CoefMatrix(:,propcount,ccount) = round((x(1:p)-x(p+1:2*p))*10^7)*10^(-7);
+        SSMatrix(:,propcount,ccount) = sumsqr(y-X*CoefMatrix(:,propcount,ccount));
+    
+        EffParVec=unique(abs(CoefMatrix(:,propcount,ccount)));
+        EffParVec=EffParVec(EffParVec>0);
+        dfMatrix(:,propcount,ccount) = length(EffParVec);
+    
+        if conv == 0
+            fprintf('Optimization may not have converged properly for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
+        else
+            fprintf('Optimization complete for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
+        end;
+        
+    end;
+    
+end;
+
+
diff --git a/OscarSelect.m b/OscarSelect.m
@@ -0,0 +1,48 @@
+
+function [CoefMatrix dfMatrix SSMatrix] = OscarSelect(X, y, cvalues, propvalues, initcoef, method)
+
+p = length(X(1,:));
+
+% Standardize predictors to mean zero, variance 1, and center response to
+% mean zero.
+
+for i = 1:p
+  X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i));
+end;
+y = y-mean(y);
+
+if nargin < 6
+    method = 2;
+    if nargin < 5
+        initcoef = [];
+    end;
+end;
+
+cvalues=sort(cvalues);
+propvalues=sort(propvalues);
+
+if isempty(initcoef)
+    [initcoef] = regress(y,X);     
+end;
+if length(initcoef)<p
+    error('initial estimate must be of length p');
+end;
+if min(cvalues)<0
+    error('all values of c must be nonnegative');
+end; 
+if max(cvalues)>1
+    error('values for c cannot exceed 1');
+end; 
+if min(propvalues)<=0
+    error('values for proportion must be greater than 0');
+end;
+if max(propvalues)>=1
+    error('values for proportion must be smaller than 1');
+end;
+
+if method == 1
+    [CoefMatrix dfMatrix SSMatrix] = OscarReg(X, y, cvalues, propvalues, initcoef);
+end;
+if method ~= 1
+    [CoefMatrix dfMatrix SSMatrix] = OscarSeqReg(X, y, cvalues, propvalues, initcoef);
+end;    
diff --git a/OscarSeqOpt.m b/OscarSeqOpt.m
@@ -0,0 +1,83 @@
+
+function [coefs df ssquares OrderMatrix conv] = OscarSeqOpt (tbound, cvalue, Xmatrix, y, start, OrderMatrix)
+
+p = length(Xmatrix(1,:))/2;
+
+sdresponse = std(y);
+y = sqrt(p)*y/sdresponse; 
+% The rescaling of the response is for computational purposes only (makes the overall variance of y to be p), the
+% coefficients will be rescaled back.
+
+start = sqrt(p)*start/sdresponse;
+lowbound = zeros(2*p,1);
+
+flagvalue = 0;
+itervalue = 0;
+SolCoef = start(1:p)-start((p+1):(2*p));
+
+% The sequential constrained least squares problem is now solved by adding
+% an additional constraint at each step and iterating until convergence.
+
+while (flagvalue == 0) && (itervalue < p^2)
+    nconstraints = length(OrderMatrix(:,1));
+    A1 = (1-cvalue)*ones(nconstraints,p)+cvalue*(p*ones(nconstraints,p)-OrderMatrix);
+    Amatrix = [A1 A1];
+    Bbound = (sqrt(p)*tbound/sdresponse)*ones(1,length(Amatrix(:,1)));
+
+    
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+% A quadratic programming solver is now used to solve the constrained least
+% squares problem at each iteration. 
+%
+% The problem is given as a constrained least squares in the form
+% Minimize with respect to u:
+%
+%       || y - Xmatrix u ||^2
+%
+%            subject to: 
+%        Amatrix u <= Bbound and u >= lowbound
+%
+% The following call to the least squares solver can be changed to use any solver if a more efficient one is
+% available. This is the only thing that needs to be changed.
+
+        options = optimset('Display', 'off', 'LargeScale', 'off');
+        [x ssquare resid exitflag] = lsqlin(Xmatrix, y, Amatrix, Bbound, [], [], lowbound, [], start, options);
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+    SolCoef1 = round((x(1:p)-x((p+1):(2*p)))*10^7)*10^(-7);
+    [currcoef, neworder] = sort(-abs(SolCoef1));
+    sameaslast = [0; (currcoef(2:p) == currcoef(1:(p-1)))];
+    startblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) > 0); 0];
+    endblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) < 0); sameaslast(p)];
+    nblocksame = sum(startblocksame);
+    vi = (1:p)';
+    visbs = vi(logical(startblocksame));
+    viebs = vi(logical(endblocksame));
+    for j = 1:nblocksame 
+        blockmean = mean(vi(visbs(j):viebs(j)));
+        vi(visbs(j):viebs(j)) = blockmean * ones(viebs(j) - visbs(j) + 1,1);
+    end;
+    [tempinvsort,vind] = sort(neworder);
+    a1weights = vi(vind)';                
+    currcoef = -currcoef;
+    OrderMatrix = unique([OrderMatrix; a1weights],'rows');
+    flagvalue = 1;
+    test = round(SolCoef*10^7)*10^(-7)-round(SolCoef1*10^7)*10^(-7);
+    SolCoef = SolCoef1;
+    flag2 = sqrt(sumsqr(test));
+    if (flag2>10^(-7))
+        flagvalue = 0;
+    end;
+    itervalue = itervalue+1;
+    start = x;
+end;
+
+coefs = sdresponse*SolCoef/sqrt(p);
+df = length(unique(currcoef(currcoef>0)));
+ssquares = (sdresponse^2)*ssquare/p;
+conv = ((itervalue < p^2) && (exitflag > 0));
diff --git a/OscarSeqReg.m b/OscarSeqReg.m
@@ -0,0 +1,78 @@
+
+
+% X should be a matrix whose rows are the observations and columns are the
+% predictors (n by p). The intercept is omitted (so the response and each
+% predictor should have mean zero).
+
+function [CoefMatrix dfMatrix SSMatrix] = OscarSeqReg(X, y, cvalues, propvalues, initcoef)
+
+p = length(X(1,:));
+   
+% Standardize predictors to mean zero, variance 1, and center response to
+% mean zero.
+
+for i = 1:p
+  X(:,i) = (X(:,i)-mean(X(:,i)))/std(X(:,i));
+end;
+Xmatrix = [X -X]; % Need for positive and negative parts of beta
+y = y-mean(y);
+
+% Order initial estimate by magnitude, including ties.
+% Initial estimate is used for first guess at ordering of coefficients and also to set maximum value
+% for the bound t used in the constraint. The bound is given as proportion
+% of the initial value.
+
+[initcoeford, currorder] = sort(-abs(initcoef));
+sameaslast = [0; (initcoeford(2:p) == initcoeford(1:(p-1)))];
+startblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) > 0); 0];
+endblocksame = [((sameaslast(2:p) - sameaslast(1:(p-1))) < 0); sameaslast(p)];
+nblocksame = sum(startblocksame);
+vi = (1:p)';
+visbs = vi(logical(startblocksame));
+viebs = vi(logical(endblocksame));
+for i = 1:nblocksame; 
+    blockmean = mean(vi(visbs(i):viebs(i)));
+    vi(visbs(i):viebs(i)) = blockmean * ones(viebs(i) - visbs(i) + 1,1);
+end;
+[tempinvsort,vind] = sort(currorder);
+a1 = vi(vind)';
+initcoeford = -initcoeford;   
+
+% Grid search over c values, then by proportion of bound. For each c, the
+% weighting is recomputed.
+
+CoefMatrix = zeros(p,length(propvalues),length(cvalues));
+SSMatrix = zeros(1,length(propvalues),length(cvalues));
+dfMatrix = zeros(1,length(propvalues),length(cvalues));
+
+for ccount = 1:length(cvalues)
+    OrderMatrix = a1;
+    weighting = a1;
+    cvalue = cvalues(ccount);
+    for i=1:p
+        weighting(i) = (1-cvalue)+cvalue*(p-i);
+    end;    
+    for propcount = 1:length(propvalues)
+        tbound = propvalues(propcount)*weighting*initcoeford;
+        if (ccount == 1)
+            if (propcount == 1)
+                start=zeros(2*p,1);
+            end;
+        elseif (propcount > 1)
+            start=[max(CoefMatrix(:,propcount-1,ccount),0);-min(CoefMatrix(:,propcount-1,ccount),0)];
+        else
+            start=[max(CoefMatrix(:,propcount,ccount-1),0);-min(CoefMatrix(:,propcount,ccount-1),0)];
+        end;
+        [coefs df ssquares conv] = OscarSeqOpt(tbound, cvalue, Xmatrix, y, start, OrderMatrix);
+        CoefMatrix(:,propcount,ccount) = coefs;
+        SSMatrix(:,propcount,ccount) = ssquares;
+        dfMatrix(:,propcount,ccount) = df;
+        if conv == 0
+            fprintf('Optimization may not have converged properly for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
+        else
+            fprintf('Optimization complete for c = %g and prop = %g.\n', cvalue, propvalues(propcount));
+        end;
+    end;
+end;
+
+
diff --git a/soil.csv b/soil.csv
@@ -0,0 +1,21 @@
+BaseSat,SumCation,CECbuffer,Ca,Mg,K,Na,P,Cu,Zn,Mn,HumicMatter,Density,pH,ExchAc,Diversity
+2.34,0.1576,0.614,0.0892,0.0328,0.0256,0.01,0,0.08,0.184,3.2,0.122,0.0822,0.516,0.466,0.2765957
+1.64,0.097,0.516,0.0454,0.0218,0.0198,0.01,0,0.064,0.112,2.734,0.0952,0.085,0.512,0.43,0.2613982
+5.2,0.452,0.828,0.3306,0.0758,0.0336,0.012,0.24,0.136,0.35,4.148,0.1822,0.0746,0.554,0.388,0.2553191
+4.1,0.3054,0.698,0.2118,0.0536,0.026,0.014,0.03,0.126,0.364,3.728,0.1646,0.0756,0.546,0.408,0.2401216
+2.7,0.2476,0.858,0.1568,0.0444,0.0304,0.016,0.384,0.078,0.376,4.756,0.2472,0.0692,0.45,0.624,0.1884498
+2.46,0.2256,0.828,0.1266,0.0478,0.0352,0.016,0.268,0.08,0.244,4.446,0.1984,0.0652,0.442,0.618,0.2370821
+6.02,0.688,1.068,0.5366,0.0974,0.04,0.014,0,0.168,0.17,3.364,0.245,0.0804,0.532,0.394,0.1641337
+5.54,0.6054,1.03,0.4684,0.0856,0.0354,0.016,0.038,0.198,0.146,3.46,0.2828,0.0778,0.52,0.442,0.2036474
+1.96,0.136,0.612,0.075,0.0278,0.0212,0.012,0.016,0.086,0.196,5.288,0.2706,0.0732,0.506,0.49,0.2553191
+2,0.1308,0.612,0.0686,0.0302,0.022,0.01,0.054,0.082,0.216,5.952,0.2662,0.075,0.504,0.49,0.2644377
+1.3,0.093,0.64,0.047,0.0172,0.0168,0.012,0.004,0.07,0.134,2.938,0.3824,0.0664,0.474,0.558,0.1702128
+1.22,0.0802,0.6,0.0382,0.0156,0.0164,0.01,0.026,0.07,0.124,2.178,0.3638,0.065,0.474,0.528,0.1854103
+1.6,0.1214,0.688,0.0526,0.0306,0.0282,0.01,0.006,0.076,0.204,3.002,0.2466,0.0694,0.494,0.578,0.224924
+1.28,0.0848,0.558,0.0304,0.0194,0.025,0.01,0,0.084,0.134,2.144,0.2264,0.0726,0.492,0.486,0.2340426
+2.88,0.2526,0.812,0.1678,0.0464,0.0204,0.018,0.256,0.164,0.318,3.426,0.3036,0.0704,0.466,0.576,0.2492401
+2.72,0.221,0.764,0.1452,0.042,0.0198,0.014,0.15,0.184,0.44,4.58,0.2394,0.0728,0.466,0.556,0.2492401
+2.08,0.1344,0.586,0.0756,0.026,0.0228,0.01,0,0.15,0.144,3.752,0.3234,0.073,0.522,0.462,0.2340426
+2.76,0.1788,0.59,0.1084,0.0382,0.0222,0.01,0,0.174,0.138,4.092,0.3048,0.0718,0.528,0.424,0.2462006
+5.48,0.7074,1.198,0.5622,0.0988,0.0264,0.02,0.598,0.146,0.298,4.08,0.307,0.0722,0.516,0.512,0.2036474
+5.6,0.659,1.142,0.5322,0.0856,0.0272,0.014,0.184,0.14,0.298,4.492,0.277,0.0796,0.516,0.498,0.2006079