Skip to content

Commit 287b56f

Browse files
Merge pull request #111 from adrientaudiere/dev
v 0.12.1
2 parents 4e901d0 + 8d89a05 commit 287b56f

File tree

189 files changed

+594
-309
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

189 files changed

+594
-309
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: MiscMetabar
22
Type: Package
33
Title: Miscellaneous Functions for Metabarcoding Analysis
4-
Version: 0.12.0
4+
Version: 0.12.1
55
Authors@R: person("Adrien", "Taudière", email = "adrien.taudiere@zaclys.net",
66
role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0003-1088-1182"))
77
Description: Facilitate the description, transformation, exploration, and reproducibility of metabarcoding analyses. 'MiscMetabar' is mainly built on top of the 'phyloseq', 'dada2' and 'targets' R packages. It helps to build reproducible and robust bioinformatics pipelines in R. 'MiscMetabar' makes ecological analysis of alpha and beta-diversity easier, more reproducible and more powerful by integrating a large number of tools. Important features are described in Taudière A. (2023) <doi:10.21105/joss.06038>.

NEWS.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
1-
# MiscMetabar 0.2.0 (in development)
1+
# MiscMetabar 0.12.1 (in development)
2+
3+
- Add parameter name `min_bootstrap` in [add_new_taxonomy_pq()]
4+
- Bug fix in [assign_idtaxa()]
5+
- Add parameters `pattern_to_remove` and `remove_NA` to [simplify_taxo()]
6+
7+
# MiscMetabar 0.12.0
28

39
- Add function [assign_idtaxa()] and [learn_idtaxa()] to facilitate the taxonomic assignation using the idtaxa algorithm from the DECIPHER R package.
410
- Add option `idtaxa` to method in [add_new_taxonomy_pq()]
511
- Add function [tbl_sum_taxtable()] to summarize tax_table from a phyloseq object
612
- In function [assign_sintax()], add params `too_few` (default value "align_start") and `too_many` (default "merge") to authorize db with variable numbers of rank and parenthesis in taxonomic name,
713

814

9-
# MiscMetabar 0.11.1 (in development)
15+
# MiscMetabar 0.11.1
1016

1117
- Add param `suffix` to `add_blast_info()` allowing multiple use of the function on the same phyloseq object (e.g. in order to used different database)
1218
- Add param `return_DNAStringSet` to `write_temp_fasta()` function to return a DNAStringSet object in place of a temporary file.

R/dada_phyloseq.R

Lines changed: 56 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1767,55 +1767,63 @@ select_one_sample <- function(physeq, sam_name, silent = FALSE) {
17671767
#' Note that if trainingSet is not NULL, the ref_fasta is overwrite by the
17681768
#' trainingSet parameter. To customize learning parameters of the idtaxa
17691769
#' algorithm you must use trainingSet computed by the function [learn_idtaxa()].
1770-
#' @param min_boostrap (Int. \[0:1\])
1771-
#'
1770+
#' @param min_bootstrap (Int. \[0:1\])
1771+
#'
17721772
#' Minimum bootstrap value to inform taxonomy. For each bootstrap
1773-
#' below the min_boostrap value, the taxonomy information is set to NA.
1774-
#'
1773+
#' below the min_bootstrap value, the taxonomy information is set to NA.
1774+
#'
17751775
#' Correspond to parameters :
1776-
#'
1776+
#'
17771777
#' - dada2: `minBoot`, default value = 0.5
1778-
#'
1779-
#' - sintax: `min_boostrap`, default value = 60
1780-
#'
1781-
#' - lca: `threshold`, default value = 60
1782-
#'
1783-
#' - idtaxa: `threshold`, default value = 60
1784-
#'
1778+
#'
1779+
#' - sintax: `min_bootstrap`, default value = 0.5
1780+
#'
1781+
#' - lca: `id`, default value = 0.5. Note in that case, the bootstrap value is different.
1782+
#' See the id parameter in [assign_vsearch_lca()]
1783+
#'
1784+
#' - idtaxa: `threshold`, default value = 0.6
1785+
#'
17851786
#' @param ... Other arguments passed on to the taxonomic assignation method.
17861787
#' @return A new \code{\link[phyloseq]{phyloseq-class}} object with a larger slot tax_table"
17871788
#' @seealso [dada2::assignTaxonomy()], [assign_sintax()], [assign_vsearch_lca()], [assign_sintax()]
17881789
#' @export
17891790
#'
17901791
#' @author Adrien Taudière
17911792
#'
1792-
add_new_taxonomy_pq <- function(physeq, ref_fasta, suffix = NULL, method = c("dada2", "sintax", "lca", "idtaxa"), trainingSet = NULL, min_boostrap = NULL, ...) {
1793+
add_new_taxonomy_pq <- function(
1794+
physeq,
1795+
ref_fasta,
1796+
suffix = NULL,
1797+
method = c("dada2", "sintax", "lca", "idtaxa"),
1798+
trainingSet = NULL,
1799+
min_bootstrap = NULL,
1800+
...) {
17931801
method <- match.arg(method)
17941802

1795-
if(is.null(min_boostrap)){
1796-
min_boostrap <- ifelse(method=="idtaxa", 0.6, 0.5)
1803+
if (is.null(min_bootstrap)) {
1804+
min_bootstrap <- ifelse(method == "idtaxa", 0.6, 0.5)
17971805
}
17981806

17991807
if (is.null(suffix)) {
18001808
suffix <- paste0("_", basename(ref_fasta), "_", method)
18011809
}
18021810
if (method == "dada2") {
18031811
tax_tab <-
1804-
dada2::assignTaxonomy(physeq@refseq, refFasta = ref_fasta, minBoot = 100 * min_boostrap, ...)
1812+
dada2::assignTaxonomy(physeq@refseq, refFasta = ref_fasta, minBoot = 100 * min_bootstrap, ...)
18051813
colnames(tax_tab) <-
18061814
make.unique(paste0(colnames(tax_tab), suffix))
18071815
new_tax_tab <- tax_table(cbind(physeq@tax_table, tax_tab))
18081816
new_physeq <- physeq
18091817
tax_table(new_physeq) <- new_tax_tab
18101818
} else if (method == "sintax") {
1811-
new_physeq <- assign_sintax(physeq, ref_fasta = ref_fasta, suffix = suffix, behavior = "add_to_phyloseq", min_boostrap = min_boostrap, ...)
1819+
new_physeq <- assign_sintax(physeq, ref_fasta = ref_fasta, suffix = suffix, behavior = "add_to_phyloseq", min_bootstrap = min_bootstrap, ...)
18121820
} else if (method == "lca") {
18131821
new_physeq <- assign_vsearch_lca(physeq, ref_fasta = ref_fasta, suffix = suffix, behavior = "add_to_phyloseq", ...)
18141822
} else if (method == "idtaxa") {
18151823
if (is.null(trainingSet)) {
1816-
new_physeq <- assign_idtaxa(physeq, seq2search = ref_fasta, suffix = suffix, threshold = 100 * min_boostrap, ...)
1824+
new_physeq <- assign_idtaxa(physeq, fasta_for_training = ref_fasta, behavior = "add_to_phyloseq", suffix = suffix, threshold = 100 * min_bootstrap, ...)
18171825
} else {
1818-
new_physeq <- assign_idtaxa(physeq, trainingSet = trainingSet, suffix = suffix, ...)
1826+
new_physeq <- assign_idtaxa(physeq, trainingSet = trainingSet, behavior = "add_to_phyloseq", suffix = suffix, threshold = 100 * min_bootstrap, ...)
18191827
}
18201828
}
18211829

@@ -1927,7 +1935,8 @@ tbl_sum_taxtable <- function(physeq, taxonomic_ranks = NULL, ...) {
19271935
#'
19281936
#' @inheritParams clean_pq
19291937
#' @param taxLevels Name of the 7 columns in tax_table required by funguild
1930-
#'
1938+
#' @param db_url a length 1 character string giving the URL to retrieve the database
1939+
#' from
19311940
#' @return A new object of class `physeq` with Guild information added to
19321941
#' `tax_table` slot
19331942
#' @export
@@ -1952,7 +1961,7 @@ tbl_sum_taxtable <- function(physeq, taxonomic_ranks = NULL, ...) {
19521961
#' @details
19531962
#' This function is mainly a wrapper of the work of others.
19541963
#' Please make a reference to `FUNGuildR` package and the associate
1955-
#' publication (\doi{10.1016/j.funeco.2015.06.006}) if you
1964+
#' publication (\doi{doi:10.1016/j.funeco.2015.06.006}) if you
19561965
#' use this function.
19571966
#' @seealso [plot_guild_pq()]
19581967

@@ -1965,13 +1974,18 @@ add_funguild_info <- function(physeq,
19651974
"Family",
19661975
"Genus",
19671976
"Species"
1968-
)) {
1977+
),
1978+
db_url = "http://www.stbates.org/funguild_db_2.php") {
19691979
tax_tab <- physeq@tax_table
19701980
FUNGuild_assign <-
19711981
funguild_assign(data.frame(
19721982
"Taxonomy" =
19731983
apply(tax_tab[, taxLevels], 1, paste, collapse = ";")
1974-
))
1984+
), db_url = db_url)
1985+
if (is.null(FUNGuild_assign)) {
1986+
message("No http access to the funguild database. No information were added.")
1987+
return(physeq)
1988+
}
19751989
tax_tab <-
19761990
as.matrix(cbind(tax_tab, FUNGuild_assign))
19771991
physeq@tax_table <- tax_table(tax_tab)
@@ -3156,6 +3170,11 @@ rarefy_sample_count_by_modality <-
31563170
#'
31573171
#' - "add_to_phyloseq" return a phyloseq object with amended slot `@taxtable`.
31583172
#' Only available if using physeq input and not seq2search input.
3173+
#' @param threshold (Int, default 60) Numeric specifying the confidence at which
3174+
#' to truncate the output taxonomic classifications.
3175+
#' Lower values of threshold will classify deeper into the taxonomic tree at
3176+
#' the expense of accuracy, and vise-versa for higher values of threshold. See
3177+
#' [DECIPHER::IdTaxa()] man page.
31593178
#' @param column_names (vector of character) names for the column of the
31603179
#' taxonomy
31613180
#' @param suffix (character) The suffix to name the new columns.
@@ -3199,11 +3218,12 @@ rarefy_sample_count_by_modality <-
31993218
assign_idtaxa <- function(physeq,
32003219
seq2search = NULL,
32013220
trainingSet = NULL,
3202-
fasta_for_training,
3221+
fasta_for_training = NULL,
32033222
behavior = "return_matrix",
3223+
threshold = 60,
32043224
column_names = c(
32053225
"Kingdom",
3206-
"Phyla",
3226+
"Phylum",
32073227
"Class",
32083228
"Order",
32093229
"Family",
@@ -3215,7 +3235,11 @@ assign_idtaxa <- function(physeq,
32153235
unite = FALSE,
32163236
verbose = TRUE,
32173237
...) {
3218-
if (is.null(trainingSet) && !is.null(fasta_for_training)) {
3238+
if (!is.null(trainingSet) && !is.null(fasta_for_training)) {
3239+
stop("Please provide either trainingSet or fasta_for_training parameters, not both.")
3240+
} else if (is.null(trainingSet) && is.null(fasta_for_training)) {
3241+
stop("Please provide either trainingSet or fasta_for_training parameters.")
3242+
} else if (is.null(trainingSet) && !is.null(fasta_for_training)) {
32193243
if (verbose) {
32203244
message("Training using fasta_for_training file.")
32213245
}
@@ -3230,7 +3254,7 @@ assign_idtaxa <- function(physeq,
32303254
return_DNAStringSet = TRUE
32313255
)
32323256

3233-
fasta2search <- OrientNucleotides(RemoveGaps(fasta2search))
3257+
fasta2search <- DECIPHER::OrientNucleotides(DECIPHER::RemoveGaps(fasta2search))
32343258

32353259
if (verbose) {
32363260
message("Classifing using training Set with IdTaxa.")
@@ -3240,6 +3264,7 @@ assign_idtaxa <- function(physeq,
32403264
test = fasta2search,
32413265
trainingSet = trainingSet_idtaxa,
32423266
processors = nproc,
3267+
threshold = threshold,
32433268
...
32443269
)
32453270

@@ -3352,9 +3377,9 @@ assign_idtaxa <- function(physeq,
33523377
#' Please make a reference to [DECIPHER::LearnTaxa()] if you
33533378
#' use this function.
33543379
learn_idtaxa <- function(fasta_for_training, output_Rdata = NULL, output_path_only = FALSE, unite = FALSE, ...) {
3355-
seqs <- readDNAStringSet(fasta_for_training)
3356-
seqs <- RemoveGaps(seqs)
3357-
seqs <- OrientNucleotides(seqs)
3380+
seqs <- Biostrings::readDNAStringSet(fasta_for_training)
3381+
seqs <- DECIPHER::RemoveGaps(seqs)
3382+
seqs <- DECIPHER::OrientNucleotides(seqs)
33583383

33593384
taxo_for_learning <- names(seqs)
33603385
if (unite) {
@@ -3374,7 +3399,7 @@ learn_idtaxa <- function(fasta_for_training, output_Rdata = NULL, output_path_on
33743399
taxo_for_learning[!grepl("^Root;", taxo_for_learning)] <-
33753400
paste0("Root;", taxo_for_learning[!grepl("^Root;", taxo_for_learning)])
33763401

3377-
train_idtaxa <- LearnTaxa(seqs, taxonomy = taxo_for_learning, ...)
3402+
train_idtaxa <- DECIPHER::LearnTaxa(seqs, taxonomy = taxo_for_learning, ...)
33783403
if (!is.null(output_Rdata)) {
33793404
save(train_idtaxa, output_Rdata)
33803405
}

R/funguild.R

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
#' modified by Adrien Taudière
2525
#'
2626
get_funguild_db <- function(db_url = "http://www.stbates.org/funguild_db_2.php") {
27+
if (httr::http_error(db_url)) {
28+
return(NULL)
29+
}
30+
2731
httr::GET(url = db_url) %>%
2832
httr::content(as = "text") %>%
2933
stringr::str_split("\n") %>%
@@ -81,13 +85,17 @@ get_funguild_db <- function(db_url = "http://www.stbates.org/funguild_db_2.php")
8185
#' "`p__`", ...) are also allowed.
8286
#' A `character` vector, representing only the taxonomic classification,
8387
#' is also accepted.
84-
#' @param tax_col A `character` string, optionally giving an alternate
85-
#' column name in `otu_table` to use instead of `otu_table$Taxonomy`.
88+
#'
89+
#' @param db_url a length 1 character string giving the URL to retrieve the database
90+
#' from
8691
#'
8792
#' @param db_funguild A `data.frame` representing the FUNGuild as returned by
8893
#' [get_funguild_db()]
8994
#' If not supplied, the default database will be downloaded.
9095
#'
96+
#' @param tax_col A `character` string, optionally giving an alternate
97+
#' column name in `otu_table` to use instead of `otu_table$Taxonomy`.
98+
#'
9199
#' @return A [`tibble::tibble`] containing all columns of
92100
#' `otu_table`, plus relevant columns of information from the FUNGuild
93101
#' @export
@@ -98,8 +106,18 @@ get_funguild_db <- function(db_url = "http://www.stbates.org/funguild_db_2.php")
98106
#' 20:241-248.
99107
#' @author Brendan Furneaux (orcid: [0000-0003-3522-7363](https://orcid.org/0000-0003-3522-7363)),
100108
#' modified by Adrien Taudière
101-
funguild_assign <- function(otu_table, db_funguild = get_funguild_db(),
102-
tax_col = "Taxonomy") {
109+
funguild_assign <- function(
110+
otu_table,
111+
db_url = NULL,
112+
db_funguild = NULL,
113+
tax_col = "Taxonomy") {
114+
if (is.null(db_funguild)) {
115+
db_funguild <- get_funguild_db(db_url = db_url)
116+
}
117+
118+
if (is.null(db_funguild)) {
119+
return(NULL)
120+
}
103121
if (is.character(otu_table)) {
104122
otu_table <- tibble::tibble(otu_table)
105123
names(otu_table) <- tax_col
@@ -120,7 +138,8 @@ funguild_assign <- function(otu_table, db_funguild = get_funguild_db(),
120138

121139
otu_table$taxkey <- make_taxkey(otu_table[[tax_col]])
122140
all_taxkey <- unique(otu_table$taxkey) %>% na.omit()
123-
`.` <- taxon <- taxkey <- searchkey <- taxonomicLevel <- NULL # to pass R CMD check
141+
`.` <- taxon <- taxkey <- searchkey <- taxonomicLevel <- NULL
142+
124143
db_funguild <- dplyr::mutate(
125144
db_funguild,
126145
searchkey = paste0("@", stringr::str_replace(taxon, "[ _]", "@"), "@")

R/miscellanous.R

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,18 +126,49 @@ all_object_size <- function() {
126126
#'
127127
#' Internally used in [clean_pq()]
128128
#' @inheritParams clean_pq
129+
#' @param pattern_to_remove (a vector of character) the pattern to remove using [base::gsub()] function.
129130
#' @param remove_space (logical; default TRUE): do we remove space?
131+
#' @param remove_NA (logical; default FALSE): do we remove NA (in majuscule)?
130132
#' @author Adrien Taudière
131133
#'
132134
#' @return A \code{\link[phyloseq]{phyloseq-class}} object with simplified taxonomy
133135
#' @export
134-
simplify_taxo <- function(physeq, remove_space = TRUE) {
136+
#' @examples
137+
#' d_fm <- data_fungi_mini
138+
#' d_fm@tax_table[, "Species"] <- paste0(rep(
139+
#' c("s__", "s:"),
140+
#' ntaxa(d_fm) / 2
141+
#' ), d_fm@tax_table[, "Species"])
142+
#'
143+
#' # First column is the new vector of Species,
144+
#' # second column is the column before simplification
145+
#' cbind(
146+
#' simplify_taxo(d_fm)@tax_table[, "Species"],
147+
#' d_fm@tax_table[, "Species"]
148+
#' )
149+
#' cbind(
150+
#' simplify_taxo(d_fm, remove_NA = TRUE)@tax_table[, "Species"],
151+
#' d_fm@tax_table[, "Species"]
152+
#' )
153+
simplify_taxo <- function(
154+
physeq,
155+
pattern_to_remove = c(".__", ".*:"),
156+
remove_space = TRUE,
157+
remove_NA = FALSE) {
135158
taxo <- physeq@tax_table
136-
taxo <- gsub(".__", "", taxo, perl = TRUE)
159+
for (p in pattern_to_remove) {
160+
taxo <- gsub(p, "", taxo)
161+
}
162+
137163
if (remove_space) {
138164
taxo <- gsub(" ", "", taxo)
139165
taxo <- gsub("\u00a0", "", taxo)
140166
}
167+
168+
if (remove_NA) {
169+
taxo <- gsub("NA", "", taxo, ignore.case = FALSE)
170+
}
171+
141172
physeq@tax_table <- tax_table(taxo)
142173
return(physeq)
143174
}

R/plot_functions.R

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2502,7 +2502,7 @@ biplot_pq <- function(physeq,
25022502
#' @inheritParams clean_pq
25032503
#' @param split_by (required if pairs is NULL) the name of the factor to make all combination
25042504
#' of couples of values
2505-
#' @param pairs (required if pairs is NULL) the name of the factor in physeq@sam_data` slot
2505+
#' @param pairs (required if split_by is NULL) the name of the factor in physeq@sam_data` slot
25062506
#' to make plot by pairs of samples. Each level must be present only two times.
25072507
#' Note that if you set pairs, you also must set fact arguments to passed on to [biplot_pq()].
25082508
#' @param na_remove (logical, default TRUE) if TRUE remove all the samples
@@ -4856,14 +4856,14 @@ hill_curves_pq <- function(physeq,
48564856
#' # ggtitle("NMDS"))) +
48574857
#' # patchwork::plot_layout(guides = "collect")
48584858
#'
4859-
#' df_uwot <- umap_pq(data_fungi_mini, pkg = "uwot")
4859+
#' # df_uwot <- umap_pq(data_fungi_mini, pkg = "uwot")
48604860
#'
4861-
#' (ggplot(df_umap, aes(x = x_umap, y = y_umap, col = Height)) +
4862-
#' geom_point(size = 2) +
4863-
#' ggtitle("umap::umap")) /
4864-
#' (ggplot(df_uwot, aes(x = x_umap, y = y_umap, col = Height)) +
4865-
#' geom_point(size = 2) +
4866-
#' ggtitle("uwot::umap2"))
4861+
#' # (ggplot(df_umap, aes(x = x_umap, y = y_umap, col = Height)) +
4862+
#' # geom_point(size = 2) +
4863+
#' # ggtitle("umap::umap")) /
4864+
#' # (ggplot(df_uwot, aes(x = x_umap, y = y_umap, col = Height)) +
4865+
#' # geom_point(size = 2) +
4866+
#' # ggtitle("uwot::umap2"))
48674867
#'
48684868
#' @details
48694869
#' This function is mainly a wrapper of the work of others.

0 commit comments

Comments
 (0)