Skip to content

Commit 8799815

Browse files
committed
Weighting modification and deflat protection
1 parent 7fcc889 commit 8799815

11 files changed

+142
-57
lines changed

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: ddsPLS
2-
Version: 1.1.6
3-
Date: 2020-03-18
2+
Version: 1.1.7
3+
Date: 2020-03-23
44
Title: Data-Driven Sparse Partial Least Squares Robust to Missing Samples for Mono and Multi-Block Data Sets
55
Description: Allows to build Multi-Data-Driven Sparse Partial Least Squares models. Multi-blocks with
66
high-dimensional settings are particularly sensible to this. It comes with visualization
@@ -20,7 +20,7 @@ Maintainer: Hadrien Lorenzo <hadrien.lorenzo.2015@gmail.com>
2020
License: MIT + file LICENSE
2121
Encoding: UTF-8
2222
ByteCompile: true
23-
RoxygenNote: 6.1.1
23+
RoxygenNote: 7.1.0
2424
Imports: RColorBrewer,MASS,graphics,stats,Rdpack,doParallel,foreach,parallel,corrplot,Rcpp (>= 0.12.18)
2525
RdMacros: Rdpack
2626
Suggests: knitr,rmarkdown,htmltools

R/mddsPLS.R

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,10 @@ MddsPLS_core <- function(Xs,Y,lambda=0,R=1,mode="reg",
176176
warning(paste("R not integer and estimated to ",R,sep=""),
177177
call. = FALSE)
178178
}
179+
if(deflat & is.null(mu)){
180+
stop("For now, deflation models are built only for the ddsPLS-Ridge models. Please change to ddsPLS-classic models or to ddsPLS-Ridge changing mu parameter to a positive real value.",
181+
call. = T)
182+
}
179183
#### Inside problems
180184
u_t_r = u_t_r_0 <- list()
181185
t_r <- list()
@@ -185,14 +189,14 @@ MddsPLS_core <- function(Xs,Y,lambda=0,R=1,mode="reg",
185189
z_t[[k]] <- matrix(NA,q,R)
186190
}
187191
for(r in 1:R){
188-
t_r[[r]] <- matrix(NA,n,K)
192+
t_r[[r]] <- matrix(0,n,K)
189193
}
190194
# BETA_r <- list()
195+
Ms_k_selected_lengths <- matrix(0,K,R)
191196
for(k in 1:K){
192197
if(norm(Ms[[k]],"2")<NZV){
193198
svd_k <- list(v=matrix(0,nrow = ncol(Ms[[k]]),ncol = R),
194199
d=rep(0,R))
195-
for(r in 1:R) t_r[[r]][,k] <- rep(0,n)
196200
z_t[[k]] <- matrix(0,nrow = q,ncol = R)
197201
t_t[[k]] <- matrix(0,nrow = n,ncol = R)
198202
}
@@ -239,16 +243,16 @@ MddsPLS_core <- function(Xs,Y,lambda=0,R=1,mode="reg",
239243
u_r_def <- u_r_def*0
240244
}
241245
t_r_def <- mmultC(X_0,u_r_def)
242-
t_r[[r]][,k] <- t_r_def
246+
t_r[[r]][,k] = t_t[[k]][,r] <- t_r_def
243247
z_t[[k]][,r] <- mmultC(Ms[[k]],u_r_def)
244-
t_t[[k]][,r] <- mmultC(X_0,u_r_def)
245248
if(norm_th_sc>NZV){
246249
nrom_t_r_2 <- sum(t_r_def^2)
247250
bXr <- crossprod(t_r_def,X_0)/nrom_t_r_2
248251
if(r>1){
249252
u_r_def <- mmultC(Phi_r[[k]],u_r_def)
253+
u_r_def <- u_r_def/sqrt(sum(u_r_def^2))
250254
}
251-
Phi_r[[k]] = mmultC(Phi_r[[k]],diag(ps[k])-mmultC(u_r_def,bXr))
255+
Phi_r[[k]] = Phi_r[[k]] - mmultC(mmultC(Phi_r[[k]],u_r_def),bXr)
252256
}
253257
svd_k$v[,r] <- u_r_def
254258
## Perform deflation
@@ -266,20 +270,19 @@ MddsPLS_core <- function(Xs,Y,lambda=0,R=1,mode="reg",
266270
}
267271
}
268272
u_t_r[[k]] = u_t_r_0[[k]] <- svd_k$v
273+
## Count the number of selected variables
274+
Ms_k_selected_lengths[k,] <- unlist(apply(abs(u_t_r[[k]]),2,function(uu){length(which(uu>NZV))} ))
269275
for(r in 1:R){
270276
if(svd_k$d[r]<NZV){
271277
u_t_r[[k]][,r] <- u_t_r[[k]][,r]*0
272278
}
273279
}
274-
if(weight & svd_k$d[1]!=0){
275-
u_t_r[[k]] <- u_t_r[[k]]/length(which(rowSums(abs(u_t_r[[k]]))>NZV))
276-
}
277280
if(!deflat & is.null(mu)){
278-
if(k==1){
279-
for(r in 1:R){
280-
t_r[[r]] <- matrix(0,n,K)
281-
}
282-
}
281+
# if(k==1){
282+
# for(r in 1:R){
283+
# t_r[[r]] <- matrix(0,n,K)
284+
# }
285+
# }
283286
for(r in 1:R){
284287
if(svd_k$d[r]!=0){
285288
t_r[[r]][,k] <- mmultC(Xs[[k]],u_t_r[[k]][,r,drop=F])
@@ -291,12 +294,23 @@ MddsPLS_core <- function(Xs,Y,lambda=0,R=1,mode="reg",
291294
# }
292295
t_t[[k]] <- mmultC(Xs[[k]],u_t_r[[k]])#crossprod(Y,Xs[[k]]%*%u_t_r[[k]])
293296
}
297+
if(weight & svd_k$d[1]!=0){
298+
for(r in 1:R){
299+
if(Ms_k_selected_lengths[k,r]!=0){
300+
z_t[[k]][,r] <- z_t[[k]][,r]/Ms_k_selected_lengths[k,r]#length(which(rowSums(abs(u_t_r[[k]]))>NZV))
301+
}
302+
}
303+
# t_t[[k]] <- t_t[[k]]/length(which(rowSums(abs(u_t_r[[k]]))>NZV))
304+
# for(r in 1:R){
305+
# t_r[[r]][,k] <- t_r[[r]][,k]/length(which(rowSums(abs(u_t_r[[k]]))>NZV))
306+
# }
307+
}
294308
}
295309
U_t_super = beta_list <- list()
296310
if(is.null(mu)){
297311
## Big SVD solution ######################### -----------------
298312
Z <- do.call(cbind,z_t)
299-
R_opt <- R
313+
R_opt <- min(R,q)
300314
svd_Z <- svd(Z,nu = R_opt,nv = R_opt)
301315
beta_all <- svd_Z$v
302316
for(k in 1:K){
@@ -386,8 +400,8 @@ MddsPLS_core <- function(Xs,Y,lambda=0,R=1,mode="reg",
386400
for(k in 1:K){
387401
B_t <- matrix(NA,R,q)
388402
for(r in 1:R){
389-
B_t[r,] <- Q[count_reg,]
390-
count_reg <- count_reg + 1
403+
B_t[r,] <- Q[(r-1)*K+k,]#Q[count_reg,]
404+
#count_reg <- count_reg + 1
391405
}
392406
B[[k]] <- mmultC(u_t_r[[k]],B_t)
393407
for(jj in 1:q){
@@ -456,9 +470,9 @@ MddsPLS_core <- function(Xs,Y,lambda=0,R=1,mode="reg",
456470
}
457471
}
458472
list(u=u_t_r,u_t_super=U_t_super,V_super=V_super,ts=t_r,beta_comb=u,
459-
T_super=T_super,S_super=S_super,
460-
t_ort=t_ort,s_ort=s_ort,B=B,
461-
mu_x_s=mu_x_s,sd_x_s=sd_x_s,mu_y=mu_y,sd_y=sd_y,R=R,q=q,Ms=Ms,lambda=lambda_in[1],mu=mu)
473+
T_super=T_super,S_super=S_super,t_ort=t_ort,s_ort=s_ort,B=B,
474+
mu_x_s=mu_x_s,sd_x_s=sd_x_s,mu_y=mu_y,sd_y=sd_y,R=R,q=q,Ms=Ms,
475+
lambda=lambda_in[1],mu=mu,weight=weight)
462476
}
463477

464478

@@ -469,15 +483,18 @@ MddsPLS_core <- function(Xs,Y,lambda=0,R=1,mode="reg",
469483
#' must be built on. The coefficient lambda regularizes the quality of proximity to the data choosing to forget the least correlated bounds between
470484
#' \eqn{X} and \eqn{Y} data sets.
471485
#'
486+
#' The parameter \strong{weight} allows to penalize the per-block components according to the number of variables selected in each block.
487+
#'
488+
#' Parameters \strong{mu} and \strong{deflat} allow to build deflated models but need for more theoretical verifications.
472489
#'
473490
#' @param Xs A matrix, if there is only one block, or a list of matrices,, if there is more than one block, of \strong{n} rows each, the number of individuals. Some rows must be missing. The different matrices can have different numbers of columns. The length of Xs is denoted by \strong{K}.
474491
#' @param Y A matrix of \strong{n} rows of a vector of length \strong{n} detailing the response matrix. No missing values are allowed in that matrix.
475492
#' @param lambda A real \eqn{[0,1]} where 1 means just perfect correlations will be used and 0 no regularization is used.
476493
#' @param R A strictly positive integer detailing the number of components to build in the model.
477494
#' @param L0 An integer non nul parameter giving the largest number of X variables that can be selected.
495+
#' @param weight Logical. If TRUE, the scores are divided by the number of selected variables of their corresponding block.
478496
#' @param mu A real positive. The Ridge parameter changing the bias of the regression model. If is NULL, consider the classical ddsPLS. Default to NULL.
479497
#' @param deflat Logical. If TRUE, the solution uses deflations to construct the weights.
480-
#' @param weight Logical. If TRUE, the scores are divided by the number of selected variables of their corresponding block.
481498
#' @param keep_imp_mod Logical. Whether or not to keep imputation \strong{mddsPLS} models. Initialized to \code{FALSE} due to the potential size of those models.
482499
#' @param mode A character chain. Possibilities are "\strong{(reg,lda,logit)}", which implies regression problem, linear discriminant analysis (through the paclkage \code{MASS}, function \code{lda}) and logistic regression (function \code{glm}). Default is \strong{reg}.
483500
#' @param NZV Float. The floatting value above which the weights are set to 0.
@@ -782,14 +799,9 @@ mddsPLS <- function(Xs,Y,lambda=0,R=1,mode="reg",
782799
iter <- 0
783800
# Consider no deflation in the non ridge case. To be published functionnality
784801
if(is.null(mu)){
785-
if(deflat & R>q){
786-
cat(" WARNING__________________________________________________________________________________\n")
787-
cat(" | No deflation supported for R>q if not ddsPLS-Ridge analysis. |\n")
788-
cat(" | Classical ddsPLS model built with R=q. |\n")
789-
cat(" | Please set a value to mu (different from NULL) to start ddsPLS-Ridge analysis for R>q. |\n")
790-
cat(" ________________________________________________________________________________________\n")
802+
if(!deflat & R>q){
803+
deflat <- F
791804
}
792-
deflat <- F
793805
}
794806
# Change R in case of no deflation with R>q.
795807
if(!deflat & R>q) R <- q
@@ -816,7 +828,9 @@ mddsPLS <- function(Xs,Y,lambda=0,R=1,mode="reg",
816828
}
817829
}
818830
if(K>1){
819-
mod_0 <- MddsPLS_core(Xs,Y,lambda=lambda,R=R,mode=mode,L0=L0,mu=mu,deflat=deflat,NZV=NZV,weight=weight)
831+
mod_0 <- MddsPLS_core(Xs,Y,lambda=lambda,R=R,mode=mode,
832+
L0=L0,mu=mu,deflat=deflat,NZV=NZV,
833+
weight=weight)
820834
if(sum(abs(as.vector(mod_0$S_super)))!=0){
821835
Mat_na <- matrix(0,n,K)
822836
for(k in 1:K){

R/perf_mddsPLS.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#' the cross-validation process is made on the given set
55
#' of parameters.
66
#'
7+
#' For now, parameter \strong{mu} is included in that function but cannot be cross-validated. It is to the user to build cross-validations for each of the \strong{mu} needed.
8+
#'
79
#' @param Xs A matrix, if there is only one block, or a list of matrices,
810
#' if there is more than one block, of \strong{n} rows each, the number of individuals.
911
#' Some rows must be missing. The different matrices can have different numbers of columns.

man/MddsPLS_core.Rd

Lines changed: 13 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/liverToxicity.Rd

Lines changed: 4 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mddsPLS.Rd

Lines changed: 19 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/penicilliumYES.Rd

Lines changed: 4 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/perf_mddsPLS.Rd

Lines changed: 20 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/plot.mddsPLS.Rd

Lines changed: 20 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/plot.perf_mddsPLS.Rd

Lines changed: 14 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)