diff --git a/DESCRIPTION b/DESCRIPTION index fc59cb4..753539b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BioMonTools Type: Package Title: Biomonitoring and Bioassessment Calculations -Version: 1.2.4.9008 +Version: 1.2.4.9012 Authors@R: c( person("Erik W.", "Leppo", email="Erik.Leppo@tetratech.com", diff --git a/NAMESPACE b/NAMESPACE index a849114..e58d646 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,10 @@ export(metric.values.fish) export(metvalgrpxl) export(qc.checks) export(qc_taxa) +export(qc_taxa_match_official) +export(qc_taxa_values_ffg) +export(qc_taxa_values_habit) +export(qc_taxa_values_tolval) export(rarify) export(taxa_translate) importFrom(rlang,.data) diff --git a/NEWS b/NEWS index 68e84da..63b528f 100644 --- a/NEWS +++ b/NEWS @@ -4,10 +4,29 @@ NEWS - #> Last Update: 2026-03-18 20:26:19.353336 + #> Last Update: 2026-03-18 22:51:30.206983 # Version History +## Changes in version 1.2.4.9012 (2026-03-18) + +- feature: Add qc_taxa_values_tolval function + +## Changes in version 1.2.4.9011 (2026-03-18) + +- refactor: Add default column name to qc_taxa_values_ffg +- feature: Add qc_taxa_values_habit function + +## Changes in version 1.2.4.9010 (2026-03-18) + +- feature: Add qc_taxa_values_ffg function + +## Changes in version 1.2.4.9009 (2026-03-18) + +- deprecate: Change qc_taxa to qc_taxa_match_official + - Will be removed in a future version +- refactor: Add qc_taxa_match_official and update with new name + ## Changes in version 1.2.4.9008 (2026-03-18) - test: Add test for metric.values for collapsing, bugs and fish, Issue diff --git a/NEWS.md b/NEWS.md index 68e84da..63b528f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,10 +4,29 @@ NEWS - #> Last Update: 2026-03-18 20:26:19.353336 + #> Last Update: 2026-03-18 22:51:30.206983 # Version History +## Changes in version 1.2.4.9012 (2026-03-18) + +- feature: Add qc_taxa_values_tolval function + +## Changes in version 1.2.4.9011 (2026-03-18) + +- refactor: Add default column name to qc_taxa_values_ffg +- feature: Add qc_taxa_values_habit function + +## Changes in version 1.2.4.9010 (2026-03-18) + +- feature: Add qc_taxa_values_ffg function + +## Changes in version 1.2.4.9009 (2026-03-18) + +- deprecate: Change qc_taxa to qc_taxa_match_official + - Will be removed in a future version +- refactor: Add qc_taxa_match_official and update with new name + ## Changes in version 1.2.4.9008 (2026-03-18) - test: Add test for metric.values for collapsing, bugs and fish, Issue diff --git a/NEWS.rmd b/NEWS.rmd index d01a9b7..902fa8d 100644 --- a/NEWS.rmd +++ b/NEWS.rmd @@ -18,6 +18,25 @@ cat(paste0("Last Update: ",Sys.time())) # Version History +## Changes in version 1.2.4.9012 (2026-03-18) + +* feature: Add qc_taxa_values_tolval function + +## Changes in version 1.2.4.9011 (2026-03-18) + +* refactor: Add default column name to qc_taxa_values_ffg +* feature: Add qc_taxa_values_habit function + +## Changes in version 1.2.4.9010 (2026-03-18) + +* feature: Add qc_taxa_values_ffg function + +## Changes in version 1.2.4.9009 (2026-03-18) + +* deprecate: Change qc_taxa to qc_taxa_match_official + + Will be removed in a future version +* refactor: Add qc_taxa_match_official and update with new name + ## Changes in version 1.2.4.9008 (2026-03-18) * test: Add test for metric.values for collapsing, bugs and fish, Issue #131 diff --git a/R/qc_taxa.R b/R/qc_taxa.R index 3cab578..8040810 100644 --- a/R/qc_taxa.R +++ b/R/qc_taxa.R @@ -1,56 +1,10 @@ #' Quality Control Check on User Data Against Master Taxa List #' -#' This function compares the user's data frame to a data frame with the -#' official (or user supplied) master taxa list (benthic macroinvertebrates). +#' This function has been deprecated (March 2026). #' -#' Output is a data frame with matches. +#' The new function is qc_taxa_match_official. #' -#' Messages are output to the console with the number of matches and which user -#' taxa did not match the official list. -#' -#' The official list is stored online but the user can input their own saved -#' copy. -#' -#' Any columns in the user input file that match the official master taxa list -#' will be renamed with the "_NonOfficial" suffix. -#' -#' New/different taxa in the user data are handled by the 'useOfficialTaxaInfo' -#' parameter. For taxa that did not match the master taxa list the user has -#' options on how to handle the differences for the phylogeny (e.g., columns for -#' phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit, -#' tolerance value, etc.). The options are below. -#' -#' * only_official = use only official master taxa information. Any -#' non-matching taxa will not have any master taxa information. -#' -#' * only_user = only use the information provided by the user. Information -#' from the 'Official' will not be used. This should only be used for -#' non-official calculations. -#' -#' * add_new = hybrid approach that uses official master taxa information, when -#' present, but includes user information for non-matching taxa if the column -#' names match. -#' -#' Default master taxa lists are saved as CSV files online at: -#' -#' https://github.com/leppott/MBSStools_SupportFiles -#' -#' The files can be downloaded with the following code. -#' -#' **Benthic Macroinvertebrate** -#' -#' url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" -#' df_mt_bugs <- read.csv(url_mt_bugs) -#' -#' The master taxa files are periodically updated. Update dates will be logged -#' on the GitHub repository. -#' -#' Expected fields include: -#' -#' **Benthic Macroinvertebrates** -#' -#' + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG, -#' FAM_TV, Habit, FinalTolVal07, Comment +#' This function exists only as a wrapper to avoid breaking older code. #' #' @param DF_User User taxa data. #' @param DF_Official Official master taxa list. Can be a local file or @@ -94,198 +48,11 @@ qc_taxa <- function(DF_User, DF_Official = NULL, fun.Community = NULL, useOfficialTaxaInfo = "only_Official") { - ##FUNCTION ~ mastertaxa ~START - # - boo_DEBUG <- FALSE - if(boo_DEBUG==TRUE){##IF~boo_DEBUG~START - # # # Bugs - # DF_User<- taxa_bugs_genus - # DF_Official = NULL - # fun.Community = "bugs" - # useOfficialTaxaInfo = "only_Official" - # # - }##IF~boo_DEBUG~END - - # Col Suffixes - sfx_Official <- "_Official" - sfx_NonOfficial <- "_NonOfficial" - - # QC - ## inputs as data frames (just in case have a tibble) - DF_User <- data.frame(DF_User) - # DF_Official handled when checking URL - ## Community, convert community to lowercase - fun.Community <- tolower(fun.Community) - - # Taxa list, official - # run the proper sub function - if (fun.Community == "bugs") {##IF.START - url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" - col_mt <- c("Taxon", - "Phylum", - "Class", - "Order", - "Family", - "Genus", - "Other_Taxa", - "Tribe", - "FFG", - "FAM_TV", - "Habit", - "FinalTolVal07", - "Comment") - col_taxon <- col_mt[1] - # } else if(fun.Community == "fish"){ - # url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Fish.csv" - # col_mt <- c("SPECIES", "TYPE", "PTOLR", "NATIVE", "TROPHIC", "SILT" - # , "PIRHALLA","DATE.ADDED", "REASON", "SOURCE", "FAM", "GENUS" - # , "SP_SCI", "IN_KEY", "APPROX_ID" ) - # col_taxon <- col_mt[1] - # future functionality - } else { - msg <- "Valid values for fun.Community is only 'bugs'." - stop(msg) - }##IF ~ fun.community ~ END - - # Master Taxa - # Download "official" list if none provided - if(is.null(DF_Official)){ - # 404 Error if file not found - df_mt <- utils::read.csv(url_mt) - } else { - df_mt <- data.frame(DF_Official) - }## IF ~ is.null(DF_Official) ~ END - - # Names to upper case - names(DF_User) <- toupper(names(DF_User)) - names(df_mt) <- toupper(names(df_mt)) - # col_mt <- toupper(col_mt) - col_taxon <- toupper(col_taxon) - - # QC check for col_taxon - if (!col_taxon %in% names(DF_User)) { - stop(paste0("DF_User missing column; ", col_taxon)) - } ## IF, stop - - # taxa names to ALL CAPS for bugs and fish - DF_User[, col_taxon] <- toupper(DF_User[, col_taxon]) - - # Check Numbers - taxa_user <- sort(unique(DF_User[, col_taxon])) - taxa_user_n <- length(taxa_user) - boo_taxa_match <- taxa_user %in% df_mt[, col_taxon] - sum_taxa_match <- sum(boo_taxa_match) - taxa_nonmatch <- taxa_user[!boo_taxa_match] - # Output to Console - msg <- paste0("Taxa match, ", sum_taxa_match, " / ", taxa_user_n) - message(msg) - # Inform user of the non-matches - if(sum_taxa_match != taxa_user_n){ - n_nonmatch <- taxa_user_n - sum_taxa_match - str_tax <- ifelse(n_nonmatch == 1, "taxon", "taxa") - msg_1 <- paste0("The following user ", - str_tax, - " (", - n_nonmatch, - "/", - taxa_user_n, - ") did not match the master list.\n") - msg_2 <- paste0(taxa_nonmatch, collapse = "\n") - message(paste0(msg_1, msg_2)) - }##IF ~ non-matches ~ END - - - - # Merge and Munge Columns - ## Columns - # col_mt_nonTaxon <- col_mt[!(col_mt %in% col_taxon)] - # col_mt_nonOfficial <- paste0(col_mt_nonTaxon, sfx_NonOfficial) - # boo_col_match <- colnames(DF_User) %in% col_mt_nonTaxon - # col_mod <- colnames(DF_User)[boo_col_match] - ## Rename matching columns before merge - #names(DF_User)[boo_col_match] <- paste0(names(DF_User)[boo_col_match] - # , "_NonOfficial") - # more control than using suffixes in merge() # - ## Merge - # df_merge <- merge(DF_User, df_mt - # , by = col_taxon - # , all.x = TRUE) - ## Munge Cols - if(useOfficialTaxaInfo == "only_Official"){ - # Do Nothing - # leave in "_NonOfficial" columns - df_result <- merge(DF_User, df_mt, - by = col_taxon, - all.x = TRUE, - suffixes = c(sfx_NonOfficial, "")) - - #names(df_result) <- gsub(".x$", "", names(df_result)) - - # df_result <- dplyr::left_join(DF_User, df_mt - # , by = col_taxon - # , suffix = c(sfx_NonOfficial, "")) - - } else if(useOfficialTaxaInfo == "only_user"){ - # Reverse and keep _NonOfficial and remove official field - # # Remove Official Cols - # col_keep <- !(names(df_merge) %in% col_mod) - # df_result <- df_merge[, col_keep] - # # Revert "_NonOfficial" - # names(df_result) <- gsub("_NonOfficial$", "", names(df_result)) - - df_result <- merge(DF_User, df_mt, - by = col_taxon, - all.x = TRUE, - suffixes = c("", sfx_Official)) - - - # df_result <- dplyr::left_join(DF_User, df_mt - # , by = col_taxon - # , suffix = c("", sfx_Official)) - - } else if(useOfficialTaxaInfo == "add_new"){ - # add user info for new taxa to official columns - # df_result <- df_merge - # df_merge[df_merge[, col_taxon] == taxa_nonmatch, col_mod] <- - # df_merge[df_merge[, col_taxon] == taxa_nonmatch, paste0(col_mod - # , "_NonOfficial")] - - df_result <- merge(DF_User, df_mt, - by = col_taxon, - all.x = TRUE, - suffixes = c(sfx_NonOfficial, "")) - - # df_result <- dplyr::left_join(DF_User, df_mt - # , by = col_taxon - # , suffix = c(sfx_NonOfficial, "")) - - col_match_y <- names(df_result)[grepl(paste0(sfx_NonOfficial,"$") - , names(df_result))] - col_match_x <- gsub(paste0(sfx_NonOfficial,"$"), "", col_match_y) - df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_x] <- - df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_y] - - } else { - # Stop if wrong values - msg <- "Valid values for useOfficialTaxaInfo are - 'only_Official', 'only_user', or 'add_new'." - stop(msg) - } - - # QC - ## Missing Columns - - ## Valid values - # Bugs = "FFG", "FAM_TV", "Habit", "FinalTolVal07" - # Fish = TYPE, PTROLR, TROPHIC - - # Other columns for metric calculation - # Bugs = EXCLUDE, STRATA_R - # Fish = - - - # Output - return(df_result) + .Deprecated("qc_taxa") + qc_taxa(DF_User, + DF_Official, + fun.Community, + useOfficialTaxaInfo) # }##FUNCTION ~ qc_taxa ~ END diff --git a/R/qc_taxa_match_official.R b/R/qc_taxa_match_official.R new file mode 100644 index 0000000..d996337 --- /dev/null +++ b/R/qc_taxa_match_official.R @@ -0,0 +1,294 @@ +#' Quality Control Check on User Data Against Master Taxa List +#' +#' This function compares the user's data frame to a data frame with the +#' official (or user supplied) master taxa list (benthic macroinvertebrates). +#' +#' Output is a data frame with matches. +#' +#' Messages are output to the console with the number of matches and which user +#' taxa did not match the official list. +#' +#' The official list is stored online but the user can input their own saved +#' copy. +#' +#' Any columns in the user input file that match the official master taxa list +#' will be renamed with the "_NonOfficial" suffix. +#' +#' New/different taxa in the user data are handled by the 'useOfficialTaxaInfo' +#' parameter. For taxa that did not match the master taxa list the user has +#' options on how to handle the differences for the phylogeny (e.g., columns for +#' phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit, +#' tolerance value, etc.). The options are below. +#' +#' * only_official = use only official master taxa information. Any +#' non-matching taxa will not have any master taxa information. +#' +#' * only_user = only use the information provided by the user. Information +#' from the 'Official' will not be used. This should only be used for +#' non-official calculations. +#' +#' * add_new = hybrid approach that uses official master taxa information, when +#' present, but includes user information for non-matching taxa if the column +#' names match. +#' +#' Default master taxa lists are saved as CSV files online at: +#' +#' https://github.com/leppott/MBSStools_SupportFiles +#' +#' The files can be downloaded with the following code. +#' +#' **Benthic Macroinvertebrate** +#' +#' url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" +#' df_mt_bugs <- read.csv(url_mt_bugs) +#' +#' The master taxa files are periodically updated. Update dates will be logged +#' on the GitHub repository. +#' +#' Expected fields include: +#' +#' **Benthic Macroinvertebrates** +#' +#' + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG, +#' FAM_TV, Habit, FinalTolVal07, Comment +#' +#' This function was called qc_taxa prior to March 2026 update. +#' The older function has been deprecated and may be removed in a future +#' release. +#' +#' @param DF_User User taxa data. +#' @param DF_Official Official master taxa list. Can be a local file or +#' from a URL. +#' Default is NULL. A NULL value will use the official online files. +#' @param fun.Community Community name for which to compare the master taxa list +#' (bugs or fish). +#' @param useOfficialTaxaInfo Select how to handle new/different taxa. +#' See 'Details' for more information. +#' Valid values are "only_Official", "only_user", "add_new". +#' Default = "only_Official". +#' +#' @return input data frame with master taxa information added to it. +#' +#' @examples +#' # Example 1, Master Taxa List, Bugs +#' url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" +#' df_mt_bugs <- read.csv(url_mt_bugs) +#' +#' # User data +#' DF_User <- data_benthos_MBSS +#' DF_Official <- NULL # NULL df_mt_bugs +#' fun.Community <- "bugs" +#' useOfficialTaxaInfo <- "only_Official" +#' # modify taxa id column +#' DF_User[, "TAXON"] <- DF_User[, "TAXAID"] +#' +#' df_qc_taxa_bugs <- qc_taxa_match_official(DF_User, +#' DF_Official, +#' fun.Community, +#' useOfficialTaxaInfo) +#' +#' # QC input/output +#' dim(DF_User) +#' dim(df_qc_taxa_bugs) +#' names(DF_User) +#' names(df_qc_taxa_bugs) +# +#' @export +qc_taxa_match_official <- function(DF_User, + DF_Official = NULL, + fun.Community = NULL, + useOfficialTaxaInfo = "only_Official") { + # DEBUG ---- + boo_DEBUG <- FALSE + if(boo_DEBUG==TRUE){##IF~boo_DEBUG~START + # # # Bugs + # DF_User<- taxa_bugs_genus + # DF_Official = NULL + # fun.Community = "bugs" + # useOfficialTaxaInfo = "only_Official" + # # + }##IF~boo_DEBUG~END + + # Col Suffixes---- + sfx_Official <- "_Official" + sfx_NonOfficial <- "_NonOfficial" + + # QC---- + ## inputs as data frames (just in case have a tibble) + DF_User <- data.frame(DF_User) + # DF_Official handled when checking URL + ## Community, convert community to lowercase + fun.Community <- tolower(fun.Community) + + # Taxa list, official---- + # run the proper sub function + if (fun.Community == "bugs") {##IF.START + url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" + col_mt <- c("Taxon", + "Phylum", + "Class", + "Order", + "Family", + "Genus", + "Other_Taxa", + "Tribe", + "FFG", + "FAM_TV", + "Habit", + "FinalTolVal07", + "Comment") + col_taxon <- col_mt[1] + # } else if(fun.Community == "fish"){ + # url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Fish.csv" + # col_mt <- c("SPECIES", "TYPE", "PTOLR", "NATIVE", "TROPHIC", "SILT" + # , "PIRHALLA","DATE.ADDED", "REASON", "SOURCE", "FAM", "GENUS" + # , "SP_SCI", "IN_KEY", "APPROX_ID" ) + # col_taxon <- col_mt[1] + # future functionality + } else { + msg <- "Valid values for fun.Community is only 'bugs'." + stop(msg) + }##IF ~ fun.community ~ END + + # Master Taxa---- + # Download "official" list if none provided + if(is.null(DF_Official)){ + # 404 Error if file not found + df_mt <- utils::read.csv(url_mt) + } else { + df_mt <- data.frame(DF_Official) + }## IF ~ is.null(DF_Official) ~ END + + # Names to upper case---- + names(DF_User) <- toupper(names(DF_User)) + names(df_mt) <- toupper(names(df_mt)) + # col_mt <- toupper(col_mt) + col_taxon <- toupper(col_taxon) + + # QC check for col_taxon---- + if (!col_taxon %in% names(DF_User)) { + stop(paste0("DF_User missing column; ", col_taxon)) + } ## IF, stop + + # taxa names to ALL CAPS for bugs and fish---- + DF_User[, col_taxon] <- toupper(DF_User[, col_taxon]) + + # Check Numbers---- + taxa_user <- sort(unique(DF_User[, col_taxon])) + taxa_user_n <- length(taxa_user) + boo_taxa_match <- taxa_user %in% df_mt[, col_taxon] + sum_taxa_match <- sum(boo_taxa_match) + taxa_nonmatch <- taxa_user[!boo_taxa_match] + # Output to Console + msg <- paste0("Taxa match, ", sum_taxa_match, " / ", taxa_user_n) + message(msg) + # Inform user of the non-matches + if(sum_taxa_match != taxa_user_n){ + n_nonmatch <- taxa_user_n - sum_taxa_match + str_tax <- ifelse(n_nonmatch == 1, "taxon", "taxa") + msg_1 <- paste0("The following user ", + str_tax, + " (", + n_nonmatch, + "/", + taxa_user_n, + ") did not match the master list.\n") + msg_2 <- paste0(taxa_nonmatch, collapse = "\n") + message(paste0(msg_1, msg_2)) + }##IF ~ non-matches ~ END + + + + # Merge and Munge Columns---- + ## Columns + # col_mt_nonTaxon <- col_mt[!(col_mt %in% col_taxon)] + # col_mt_nonOfficial <- paste0(col_mt_nonTaxon, sfx_NonOfficial) + # boo_col_match <- colnames(DF_User) %in% col_mt_nonTaxon + # col_mod <- colnames(DF_User)[boo_col_match] + ## Rename matching columns before merge + #names(DF_User)[boo_col_match] <- paste0(names(DF_User)[boo_col_match] + # , "_NonOfficial") + # more control than using suffixes in merge() + # + ## Merge + # df_merge <- merge(DF_User, df_mt + # , by = col_taxon + # , all.x = TRUE) + ## Munge Cols + if(useOfficialTaxaInfo == "only_Official"){ + # Do Nothing + # leave in "_NonOfficial" columns + df_result <- merge(DF_User, df_mt, + by = col_taxon, + all.x = TRUE, + suffixes = c(sfx_NonOfficial, "")) + + #names(df_result) <- gsub(".x$", "", names(df_result)) + + # df_result <- dplyr::left_join(DF_User, df_mt + # , by = col_taxon + # , suffix = c(sfx_NonOfficial, "")) + + } else if(useOfficialTaxaInfo == "only_user"){ + # Reverse and keep _NonOfficial and remove official field + # # Remove Official Cols + # col_keep <- !(names(df_merge) %in% col_mod) + # df_result <- df_merge[, col_keep] + # # Revert "_NonOfficial" + # names(df_result) <- gsub("_NonOfficial$", "", names(df_result)) + + df_result <- merge(DF_User, df_mt, + by = col_taxon, + all.x = TRUE, + suffixes = c("", sfx_Official)) + + + # df_result <- dplyr::left_join(DF_User, df_mt + # , by = col_taxon + # , suffix = c("", sfx_Official)) + + } else if(useOfficialTaxaInfo == "add_new"){ + # add user info for new taxa to official columns + # df_result <- df_merge + # df_merge[df_merge[, col_taxon] == taxa_nonmatch, col_mod] <- + # df_merge[df_merge[, col_taxon] == taxa_nonmatch, paste0(col_mod + # , "_NonOfficial")] + + df_result <- merge(DF_User, df_mt, + by = col_taxon, + all.x = TRUE, + suffixes = c(sfx_NonOfficial, "")) + + # df_result <- dplyr::left_join(DF_User, df_mt + # , by = col_taxon + # , suffix = c(sfx_NonOfficial, "")) + + col_match_y <- names(df_result)[grepl(paste0(sfx_NonOfficial,"$") + , names(df_result))] + col_match_x <- gsub(paste0(sfx_NonOfficial,"$"), "", col_match_y) + df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_x] <- + df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_y] + + } else { + # Stop if wrong values + msg <- "Valid values for useOfficialTaxaInfo are + 'only_Official', 'only_user', or 'add_new'." + stop(msg) + } + + # QC---- + ## Missing Columns + + ## Valid values + # Bugs = "FFG", "FAM_TV", "Habit", "FinalTolVal07" + # Fish = TYPE, PTROLR, TROPHIC + + # Other columns for metric calculation + # Bugs = EXCLUDE, STRATA_R + # Fish = + + + # Output---- + return(df_result) + # +}##FUNCTION ~ qc_taxa ~ END diff --git a/R/qc_taxa_values_ffg.R b/R/qc_taxa_values_ffg.R new file mode 100644 index 0000000..cc86a9e --- /dev/null +++ b/R/qc_taxa_values_ffg.R @@ -0,0 +1,81 @@ +#' QC Functional Feeding Group (FFG) Values +#' +#' Performs basic QC of the FFG column against a list of accepted values. +#' +#' Returns a data frame the values from the input with counts (column = n) from +#' the FFG column and whether the value appeared in valid values (column = +#' valid). Values in the accepted values not appearing in the input are appended +#' to the bottom of the returned data frame. These values are marked as n = NA +#' and valid = TRUE. +#' +#' The default accepted values are the abbreviations are those used as +#' metric.values(); CF, CG, MH, OM, PA, PI, PR, SC, SH, and XY. User using FC +#' and GC over CF and CG can modify the accepted values. Both versions are +#' accepted in metric.values(). +#' +#' @param df_data A data frame containing taxa data. +#' @param col_ffg The column containing FFG values. Default = "FFG" +#' @param valid_vals Accepted values. +#' Default = c(CF, CG, MH, OM ,PA, PH, PI, PR, SC, SH, XY) +#' +#' @return A data frame with col_ffg values, occurrence (n), and if valid (TRUE/ +#' FALSE). Additional values from valid_vals are appended. +#' +#' @examples +#' # Values, Default +#' qc_taxa_values_ffg(data_benthos_PacNW) +#' +#' # Values, User (full names) +#' qc_taxa_values_ffg(data_benthos_MBSS, +#' "FFG", +#' valid_vals = c("Collector", +#' "Filterer", +#' "Predator", +#' "Scraper", +#' "Shredder")) +#' +#' @export +qc_taxa_values_ffg <- function(df_data, + col_ffg = "FFG", + valid_vals = c("CF", + "CG", + "MH", + "OM", + "PA", + "PH", + "PI", + "PR", + "SC", + "SH", + "XY")) { + + # QC---- + if (!rlang::as_string(col_ffg) %in% names(df_data)) { + stop("Column '", + rlang::as_string(col_ffg), + "' is missing from input data.", call. = FALSE) + }# IF ~ col_ffg + + # Convert valid_vals to data frame + df_valid_vals <- as.data.frame(valid_vals) + names(df_valid_vals) <- col_ffg + + # occurrence---- + df_match <- df_data |> + # occurrence + dplyr::count(.data[[col_ffg]], name = "n") |> + # valid + ## T/F + dplyr::mutate(valid = .data[[col_ffg]] %in% valid_vals) |> + ## values + dplyr::full_join(y = df_valid_vals, + by = dplyr::join_by({{col_ffg}})) |> + ## convert NA to TRUE + dplyr::mutate(valid = dplyr::case_when(is.na(valid) ~ TRUE, + .default = valid)) + + # Result---- + return(df_match) + +}## FUNCTION ~ END + diff --git a/R/qc_taxa_values_habit.R b/R/qc_taxa_values_habit.R new file mode 100644 index 0000000..2c9d2ef --- /dev/null +++ b/R/qc_taxa_values_habit.R @@ -0,0 +1,75 @@ +#' QC Habitat Values +#' +#' Performs basic QC of the Habit column against a list of accepted values. +#' +#' Returns a data frame the values from the input with counts (column = n) from +#' the Habit column and whether the value appeared in valid values (column = +#' valid). Values in the accepted values not appearing in the input are appended +#' to the bottom of the returned data frame. These values are marked as n = NA +#' and valid = TRUE. +#' +#' The default accepted values are the abbreviations are those used as +#' metric.values(); BU, CB, CN, SK, SP, and SW. Valid separated with "," are +#' first split apart and spaces removed. +#' +#' @param df_data A data frame containing taxa data. +#' @param col_habit The column containing Habit values. Default = "Habit" +#' @param valid_vals Accepted values. +#' Default = c(BU, CB, CN, SK ,SP, SW.) +#' +#' @return A data frame with col_habit values, occurrence (n), and if valid +#' (TRUE/FALSE). Additional values from valid_vals are appended. +#' +#' @examples +#' # Values, Default +#' qc_taxa_values_habit(data_benthos_MBSS) +#' +#' # Values, User +#' qc_taxa_values_habit(data_benthos_MBSS, +#' "Habit", +#' valid_vals = c("bu", "cb", "cn", "dv", "sk", "sp", "sw")) +#' +#' @export +qc_taxa_values_habit <- function(df_data, + col_habit = "Habit", + valid_vals = c("BU", + "CB", + "CN", + "SK", + "SP", + "SW")) { + + # QC---- + if (!rlang::as_string(col_habit) %in% names(df_data)) { + stop("Column '", + rlang::as_string(col_habit), + "' is missing from input data.", call. = FALSE) + }# IF ~ col_habit + + # Convert valid_vals to data frame + df_valid_vals <- as.data.frame(valid_vals) + names(df_valid_vals) <- col_habit + + # occurrence---- + df_match <- df_data |> + # get all values, split on comma with optional surrounding spaces + tidyr::separate_rows(.data[[col_habit]], sep = "\\s*,\\s*") |> + # remove spaces + dplyr::mutate({{col_habit}} := trimws(.data[[col_habit]])) |> + # occurrence + dplyr::count(.data[[col_habit]], name = "n") |> + # valid + ## T/F + dplyr::mutate(valid = .data[[col_habit]] %in% valid_vals) |> + ## values + dplyr::full_join(y = df_valid_vals, + by = dplyr::join_by({{col_habit}})) |> + ## convert NA to TRUE + dplyr::mutate(valid = dplyr::case_when(is.na(valid) ~ TRUE, + .default = valid)) + + # Result---- + return(df_match) + +}## FUNCTION ~ END + diff --git a/R/qc_taxa_values_tolval.R b/R/qc_taxa_values_tolval.R new file mode 100644 index 0000000..0692375 --- /dev/null +++ b/R/qc_taxa_values_tolval.R @@ -0,0 +1,68 @@ +#' QC Habitat Values +#' +#' Performs basic QC of the Tolerance Value column. +#' +#' Returns a data frame the values from the input with counts (column = n) from +#' the TolVal column and whether the value appeared in valid values (column = +#' valid). +#' +#' The default accepted values are 0 - 10. +#' +#' @param df_data A data frame containing taxa data. +#' @param col_tolval The column containing Tolerance Values. Default = "TolVal" +#' @param valid_min Valid values range minimum. Default = 0. +#' @param valid_max Valid values range maximum. Default = 10. +#' +#' @return A data frame with col_tolval values, occurrence (n), and if valid +#' (TRUE/FALSE). +#' +#' @examples +#' qc_taxa_values_tolval(data_benthos_MBSS, "TOLVAL") +#' +#' @export +qc_taxa_values_tolval <- function(df_data, + col_tolval = "TolVal", + valid_min = 0, + valid_max = 10) { + + # QC---- + ## col_tolval in df_data + if (!rlang::as_string(col_tolval) %in% names(df_data)) { + stop("Column '", + rlang::as_string(col_tolval), + "' is missing from input data.", call. = FALSE) + }# IF ~ col_tolval exists + + ## col_tolval is numeric + if (!is.numeric(df_data[[rlang::as_string(col_tolval)]])) { + stop("Column '", + rlang::as_string(col_tolval), + "' must be numeric.", call. = FALSE) + }## IF ~ col_tolval is numeric + + ## valid_min is numeric + if (!is.numeric(valid_min)) { + stop("'valid_min' must be numeric.", call. = FALSE) + }## IF ~ valid_min is numeric + + ## valid_min is numeric + if (!is.numeric(valid_min)) { + stop("'valid_min' must be numeric.", call. = FALSE) + }## IF ~ valid_min is numeric + + # occurrence---- + df_match <- df_data |> + # occurrence + dplyr::count(.data[[col_tolval]], name = "n") |> + # valid + ## T/F + dplyr::mutate(valid = dplyr::case_when( + .data[[col_tolval]] >= valid_min & + .data[[col_tolval]] <= valid_max ~ TRUE, + .default = FALSE)) + + # Result---- + return(df_match) + +}## FUNCTION ~ END + diff --git a/man/qc_taxa.Rd b/man/qc_taxa.Rd index aa2f940..d33c7a0 100644 --- a/man/qc_taxa.Rd +++ b/man/qc_taxa.Rd @@ -30,58 +30,12 @@ Default = "only_Official".} input data frame with master taxa information added to it. } \description{ -This function compares the user's data frame to a data frame with the -official (or user supplied) master taxa list (benthic macroinvertebrates). +This function has been deprecated (March 2026). } \details{ -Output is a data frame with matches. +The new function is qc_taxa_match_official. -Messages are output to the console with the number of matches and which user -taxa did not match the official list. - -The official list is stored online but the user can input their own saved -copy. - -Any columns in the user input file that match the official master taxa list -will be renamed with the "_NonOfficial" suffix. - -New/different taxa in the user data are handled by the 'useOfficialTaxaInfo' -parameter. For taxa that did not match the master taxa list the user has -options on how to handle the differences for the phylogeny (e.g., columns for -phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit, -tolerance value, etc.). The options are below. - -* only_official = use only official master taxa information. Any -non-matching taxa will not have any master taxa information. - -* only_user = only use the information provided by the user. Information -from the 'Official' will not be used. This should only be used for -non-official calculations. - -* add_new = hybrid approach that uses official master taxa information, when -present, but includes user information for non-matching taxa if the column -names match. - -Default master taxa lists are saved as CSV files online at: - -https://github.com/leppott/MBSStools_SupportFiles - -The files can be downloaded with the following code. - -**Benthic Macroinvertebrate** - -url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" -df_mt_bugs <- read.csv(url_mt_bugs) - -The master taxa files are periodically updated. Update dates will be logged -on the GitHub repository. - -Expected fields include: - -**Benthic Macroinvertebrates** - - + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG, - FAM_TV, Habit, FinalTolVal07, Comment +This function exists only as a wrapper to avoid breaking older code. } \examples{ # Example 1, Master Taxa List, Bugs diff --git a/man/qc_taxa_match_official.Rd b/man/qc_taxa_match_official.Rd new file mode 100644 index 0000000..34dca0f --- /dev/null +++ b/man/qc_taxa_match_official.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qc_taxa_match_official.R +\name{qc_taxa_match_official} +\alias{qc_taxa_match_official} +\title{Quality Control Check on User Data Against Master Taxa List} +\usage{ +qc_taxa_match_official( + DF_User, + DF_Official = NULL, + fun.Community = NULL, + useOfficialTaxaInfo = "only_Official" +) +} +\arguments{ +\item{DF_User}{User taxa data.} + +\item{DF_Official}{Official master taxa list. Can be a local file or +from a URL. +Default is NULL. A NULL value will use the official online files.} + +\item{fun.Community}{Community name for which to compare the master taxa list +(bugs or fish).} + +\item{useOfficialTaxaInfo}{Select how to handle new/different taxa. +See 'Details' for more information. +Valid values are "only_Official", "only_user", "add_new". +Default = "only_Official".} +} +\value{ +input data frame with master taxa information added to it. +} +\description{ +This function compares the user's data frame to a data frame with the +official (or user supplied) master taxa list (benthic macroinvertebrates). +} +\details{ +Output is a data frame with matches. + +Messages are output to the console with the number of matches and which user +taxa did not match the official list. + +The official list is stored online but the user can input their own saved +copy. + +Any columns in the user input file that match the official master taxa list +will be renamed with the "_NonOfficial" suffix. + +New/different taxa in the user data are handled by the 'useOfficialTaxaInfo' +parameter. For taxa that did not match the master taxa list the user has +options on how to handle the differences for the phylogeny (e.g., columns for +phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit, +tolerance value, etc.). The options are below. + +* only_official = use only official master taxa information. Any +non-matching taxa will not have any master taxa information. + +* only_user = only use the information provided by the user. Information +from the 'Official' will not be used. This should only be used for +non-official calculations. + +* add_new = hybrid approach that uses official master taxa information, when +present, but includes user information for non-matching taxa if the column +names match. + +Default master taxa lists are saved as CSV files online at: + +https://github.com/leppott/MBSStools_SupportFiles + +The files can be downloaded with the following code. + +**Benthic Macroinvertebrate** + +url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" +df_mt_bugs <- read.csv(url_mt_bugs) + +The master taxa files are periodically updated. Update dates will be logged +on the GitHub repository. + +Expected fields include: + +**Benthic Macroinvertebrates** + + + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG, + FAM_TV, Habit, FinalTolVal07, Comment + +This function was called qc_taxa prior to March 2026 update. +The older function has been deprecated and may be removed in a future +release. +} +\examples{ +# Example 1, Master Taxa List, Bugs +url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" +df_mt_bugs <- read.csv(url_mt_bugs) + +# User data +DF_User <- data_benthos_MBSS +DF_Official <- NULL # NULL df_mt_bugs +fun.Community <- "bugs" +useOfficialTaxaInfo <- "only_Official" +# modify taxa id column +DF_User[, "TAXON"] <- DF_User[, "TAXAID"] + +df_qc_taxa_bugs <- qc_taxa_match_official(DF_User, + DF_Official, + fun.Community, + useOfficialTaxaInfo) + +# QC input/output +dim(DF_User) +dim(df_qc_taxa_bugs) +names(DF_User) +names(df_qc_taxa_bugs) +} diff --git a/man/qc_taxa_values_ffg.Rd b/man/qc_taxa_values_ffg.Rd new file mode 100644 index 0000000..1d29008 --- /dev/null +++ b/man/qc_taxa_values_ffg.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qc_taxa_values_ffg.R +\name{qc_taxa_values_ffg} +\alias{qc_taxa_values_ffg} +\title{QC Functional Feeding Group (FFG) Values} +\usage{ +qc_taxa_values_ffg( + df_data, + col_ffg = "FFG", + valid_vals = c("CF", "CG", "MH", "OM", "PA", "PH", "PI", "PR", "SC", "SH", "XY") +) +} +\arguments{ +\item{df_data}{A data frame containing taxa data.} + +\item{col_ffg}{The column containing FFG values. Default = "FFG"} + +\item{valid_vals}{Accepted values. +Default = c(CF, CG, MH, OM ,PA, PH, PI, PR, SC, SH, XY)} +} +\value{ +A data frame with col_ffg values, occurrence (n), and if valid (TRUE/ +FALSE). Additional values from valid_vals are appended. +} +\description{ +Performs basic QC of the FFG column against a list of accepted values. +} +\details{ +Returns a data frame the values from the input with counts (column = n) from +the FFG column and whether the value appeared in valid values (column = +valid). Values in the accepted values not appearing in the input are appended +to the bottom of the returned data frame. These values are marked as n = NA +and valid = TRUE. + +The default accepted values are the abbreviations are those used as +metric.values(); CF, CG, MH, OM, PA, PI, PR, SC, SH, and XY. User using FC +and GC over CF and CG can modify the accepted values. Both versions are +accepted in metric.values(). +} +\examples{ +# Values, Default +qc_taxa_values_ffg(data_benthos_PacNW) + +# Values, User (full names) +qc_taxa_values_ffg(data_benthos_MBSS, + "FFG", + valid_vals = c("Collector", + "Filterer", + "Predator", + "Scraper", + "Shredder")) + +} diff --git a/man/qc_taxa_values_habit.Rd b/man/qc_taxa_values_habit.Rd new file mode 100644 index 0000000..c966a94 --- /dev/null +++ b/man/qc_taxa_values_habit.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qc_taxa_values_habit.R +\name{qc_taxa_values_habit} +\alias{qc_taxa_values_habit} +\title{QC Habitat Values} +\usage{ +qc_taxa_values_habit( + df_data, + col_habit = "Habit", + valid_vals = c("BU", "CB", "CN", "SK", "SP", "SW") +) +} +\arguments{ +\item{df_data}{A data frame containing taxa data.} + +\item{col_habit}{The column containing Habit values. Default = "Habit"} + +\item{valid_vals}{Accepted values. +Default = c(BU, CB, CN, SK ,SP, SW.)} +} +\value{ +A data frame with col_habit values, occurrence (n), and if valid +(TRUE/FALSE). Additional values from valid_vals are appended. +} +\description{ +Performs basic QC of the Habit column against a list of accepted values. +} +\details{ +Returns a data frame the values from the input with counts (column = n) from +the Habit column and whether the value appeared in valid values (column = +valid). Values in the accepted values not appearing in the input are appended +to the bottom of the returned data frame. These values are marked as n = NA +and valid = TRUE. + +The default accepted values are the abbreviations are those used as +metric.values(); BU, CB, CN, SK, SP, and SW. Valid separated with "," are +first split apart and spaces removed. +} +\examples{ +# Values, Default +qc_taxa_values_habit(data_benthos_MBSS) + +# Values, User +qc_taxa_values_habit(data_benthos_MBSS, + "Habit", + valid_vals = c("bu", "cb", "cn", "dv", "sk", "sp", "sw")) + +} diff --git a/man/qc_taxa_values_tolval.Rd b/man/qc_taxa_values_tolval.Rd new file mode 100644 index 0000000..fef4473 --- /dev/null +++ b/man/qc_taxa_values_tolval.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qc_taxa_values_tolval.R +\name{qc_taxa_values_tolval} +\alias{qc_taxa_values_tolval} +\title{QC Habitat Values} +\usage{ +qc_taxa_values_tolval( + df_data, + col_tolval = "TolVal", + valid_min = 0, + valid_max = 10 +) +} +\arguments{ +\item{df_data}{A data frame containing taxa data.} + +\item{col_tolval}{The column containing Tolerance Values. Default = "TolVal"} + +\item{valid_min}{Valid values range minimum. Default = 0.} + +\item{valid_max}{Valid values range maximum. Default = 10.} +} +\value{ +A data frame with col_tolval values, occurrence (n), and if valid +(TRUE/FALSE). +} +\description{ +Performs basic QC of the Tolerance Value column. +} +\details{ +Returns a data frame the values from the input with counts (column = n) from +the TolVal column and whether the value appeared in valid values (column = +valid). + +The default accepted values are 0 - 10. +} +\examples{ +qc_taxa_values_tolval(data_benthos_MBSS, "TOLVAL") + +}