From e178020d2d8699e6cbdf878b22330a539ff3e89e Mon Sep 17 00:00:00 2001 From: Erik Leppo Date: Wed, 18 Mar 2026 20:59:08 -0400 Subject: [PATCH 1/4] v1.2.4.9009 * deprecate: Change qc_taxa to qc_taxa_match_official + Will be removed in a future version * refactor: Add qc_taxa_match_official and update with new name --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS | 8 +- NEWS.md | 8 +- NEWS.rmd | 6 + R/qc_taxa.R | 249 +--------------------------- R/qc_taxa_match_official.R | 294 ++++++++++++++++++++++++++++++++++ man/qc_taxa.Rd | 52 +----- man/qc_taxa_match_official.Rd | 113 +++++++++++++ 9 files changed, 440 insertions(+), 293 deletions(-) create mode 100644 R/qc_taxa_match_official.R create mode 100644 man/qc_taxa_match_official.Rd diff --git a/DESCRIPTION b/DESCRIPTION index fc59cb4..7729722 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BioMonTools Type: Package Title: Biomonitoring and Bioassessment Calculations -Version: 1.2.4.9008 +Version: 1.2.4.9009 Authors@R: c( person("Erik W.", "Leppo", email="Erik.Leppo@tetratech.com", diff --git a/NAMESPACE b/NAMESPACE index a849114..9af878e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(metric.values.fish) export(metvalgrpxl) export(qc.checks) export(qc_taxa) +export(qc_taxa_match_official) export(rarify) export(taxa_translate) importFrom(rlang,.data) diff --git a/NEWS b/NEWS index 68e84da..d06c9ef 100644 --- a/NEWS +++ b/NEWS @@ -4,10 +4,16 @@ NEWS - #> Last Update: 2026-03-18 20:26:19.353336 + #> Last Update: 2026-03-18 20:55:43.088572 # Version History +## Changes in version 1.2.4.9009 (2026-03-18) + +- deprecate: Change qc_taxa to qc_taxa_match_official + - Will be removed in a future version +- refactor: Add qc_taxa_match_official and update with new name + ## Changes in version 1.2.4.9008 (2026-03-18) - test: Add test for metric.values for collapsing, bugs and fish, Issue diff --git a/NEWS.md b/NEWS.md index 68e84da..d06c9ef 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,10 +4,16 @@ NEWS - #> Last Update: 2026-03-18 20:26:19.353336 + #> Last Update: 2026-03-18 20:55:43.088572 # Version History +## Changes in version 1.2.4.9009 (2026-03-18) + +- deprecate: Change qc_taxa to qc_taxa_match_official + - Will be removed in a future version +- refactor: Add qc_taxa_match_official and update with new name + ## Changes in version 1.2.4.9008 (2026-03-18) - test: Add test for metric.values for collapsing, bugs and fish, Issue diff --git a/NEWS.rmd b/NEWS.rmd index d01a9b7..c180538 100644 --- a/NEWS.rmd +++ b/NEWS.rmd @@ -18,6 +18,12 @@ cat(paste0("Last Update: ",Sys.time())) # Version History +## Changes in version 1.2.4.9009 (2026-03-18) + +* deprecate: Change qc_taxa to qc_taxa_match_official + + Will be removed in a future version +* refactor: Add qc_taxa_match_official and update with new name + ## Changes in version 1.2.4.9008 (2026-03-18) * test: Add test for metric.values for collapsing, bugs and fish, Issue #131 diff --git a/R/qc_taxa.R b/R/qc_taxa.R index 3cab578..8040810 100644 --- a/R/qc_taxa.R +++ b/R/qc_taxa.R @@ -1,56 +1,10 @@ #' Quality Control Check on User Data Against Master Taxa List #' -#' This function compares the user's data frame to a data frame with the -#' official (or user supplied) master taxa list (benthic macroinvertebrates). +#' This function has been deprecated (March 2026). #' -#' Output is a data frame with matches. +#' The new function is qc_taxa_match_official. #' -#' Messages are output to the console with the number of matches and which user -#' taxa did not match the official list. -#' -#' The official list is stored online but the user can input their own saved -#' copy. -#' -#' Any columns in the user input file that match the official master taxa list -#' will be renamed with the "_NonOfficial" suffix. -#' -#' New/different taxa in the user data are handled by the 'useOfficialTaxaInfo' -#' parameter. For taxa that did not match the master taxa list the user has -#' options on how to handle the differences for the phylogeny (e.g., columns for -#' phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit, -#' tolerance value, etc.). The options are below. -#' -#' * only_official = use only official master taxa information. Any -#' non-matching taxa will not have any master taxa information. -#' -#' * only_user = only use the information provided by the user. Information -#' from the 'Official' will not be used. This should only be used for -#' non-official calculations. -#' -#' * add_new = hybrid approach that uses official master taxa information, when -#' present, but includes user information for non-matching taxa if the column -#' names match. -#' -#' Default master taxa lists are saved as CSV files online at: -#' -#' https://github.com/leppott/MBSStools_SupportFiles -#' -#' The files can be downloaded with the following code. -#' -#' **Benthic Macroinvertebrate** -#' -#' url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" -#' df_mt_bugs <- read.csv(url_mt_bugs) -#' -#' The master taxa files are periodically updated. Update dates will be logged -#' on the GitHub repository. -#' -#' Expected fields include: -#' -#' **Benthic Macroinvertebrates** -#' -#' + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG, -#' FAM_TV, Habit, FinalTolVal07, Comment +#' This function exists only as a wrapper to avoid breaking older code. #' #' @param DF_User User taxa data. #' @param DF_Official Official master taxa list. Can be a local file or @@ -94,198 +48,11 @@ qc_taxa <- function(DF_User, DF_Official = NULL, fun.Community = NULL, useOfficialTaxaInfo = "only_Official") { - ##FUNCTION ~ mastertaxa ~START - # - boo_DEBUG <- FALSE - if(boo_DEBUG==TRUE){##IF~boo_DEBUG~START - # # # Bugs - # DF_User<- taxa_bugs_genus - # DF_Official = NULL - # fun.Community = "bugs" - # useOfficialTaxaInfo = "only_Official" - # # - }##IF~boo_DEBUG~END - - # Col Suffixes - sfx_Official <- "_Official" - sfx_NonOfficial <- "_NonOfficial" - - # QC - ## inputs as data frames (just in case have a tibble) - DF_User <- data.frame(DF_User) - # DF_Official handled when checking URL - ## Community, convert community to lowercase - fun.Community <- tolower(fun.Community) - - # Taxa list, official - # run the proper sub function - if (fun.Community == "bugs") {##IF.START - url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" - col_mt <- c("Taxon", - "Phylum", - "Class", - "Order", - "Family", - "Genus", - "Other_Taxa", - "Tribe", - "FFG", - "FAM_TV", - "Habit", - "FinalTolVal07", - "Comment") - col_taxon <- col_mt[1] - # } else if(fun.Community == "fish"){ - # url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Fish.csv" - # col_mt <- c("SPECIES", "TYPE", "PTOLR", "NATIVE", "TROPHIC", "SILT" - # , "PIRHALLA","DATE.ADDED", "REASON", "SOURCE", "FAM", "GENUS" - # , "SP_SCI", "IN_KEY", "APPROX_ID" ) - # col_taxon <- col_mt[1] - # future functionality - } else { - msg <- "Valid values for fun.Community is only 'bugs'." - stop(msg) - }##IF ~ fun.community ~ END - - # Master Taxa - # Download "official" list if none provided - if(is.null(DF_Official)){ - # 404 Error if file not found - df_mt <- utils::read.csv(url_mt) - } else { - df_mt <- data.frame(DF_Official) - }## IF ~ is.null(DF_Official) ~ END - - # Names to upper case - names(DF_User) <- toupper(names(DF_User)) - names(df_mt) <- toupper(names(df_mt)) - # col_mt <- toupper(col_mt) - col_taxon <- toupper(col_taxon) - - # QC check for col_taxon - if (!col_taxon %in% names(DF_User)) { - stop(paste0("DF_User missing column; ", col_taxon)) - } ## IF, stop - - # taxa names to ALL CAPS for bugs and fish - DF_User[, col_taxon] <- toupper(DF_User[, col_taxon]) - - # Check Numbers - taxa_user <- sort(unique(DF_User[, col_taxon])) - taxa_user_n <- length(taxa_user) - boo_taxa_match <- taxa_user %in% df_mt[, col_taxon] - sum_taxa_match <- sum(boo_taxa_match) - taxa_nonmatch <- taxa_user[!boo_taxa_match] - # Output to Console - msg <- paste0("Taxa match, ", sum_taxa_match, " / ", taxa_user_n) - message(msg) - # Inform user of the non-matches - if(sum_taxa_match != taxa_user_n){ - n_nonmatch <- taxa_user_n - sum_taxa_match - str_tax <- ifelse(n_nonmatch == 1, "taxon", "taxa") - msg_1 <- paste0("The following user ", - str_tax, - " (", - n_nonmatch, - "/", - taxa_user_n, - ") did not match the master list.\n") - msg_2 <- paste0(taxa_nonmatch, collapse = "\n") - message(paste0(msg_1, msg_2)) - }##IF ~ non-matches ~ END - - - - # Merge and Munge Columns - ## Columns - # col_mt_nonTaxon <- col_mt[!(col_mt %in% col_taxon)] - # col_mt_nonOfficial <- paste0(col_mt_nonTaxon, sfx_NonOfficial) - # boo_col_match <- colnames(DF_User) %in% col_mt_nonTaxon - # col_mod <- colnames(DF_User)[boo_col_match] - ## Rename matching columns before merge - #names(DF_User)[boo_col_match] <- paste0(names(DF_User)[boo_col_match] - # , "_NonOfficial") - # more control than using suffixes in merge() # - ## Merge - # df_merge <- merge(DF_User, df_mt - # , by = col_taxon - # , all.x = TRUE) - ## Munge Cols - if(useOfficialTaxaInfo == "only_Official"){ - # Do Nothing - # leave in "_NonOfficial" columns - df_result <- merge(DF_User, df_mt, - by = col_taxon, - all.x = TRUE, - suffixes = c(sfx_NonOfficial, "")) - - #names(df_result) <- gsub(".x$", "", names(df_result)) - - # df_result <- dplyr::left_join(DF_User, df_mt - # , by = col_taxon - # , suffix = c(sfx_NonOfficial, "")) - - } else if(useOfficialTaxaInfo == "only_user"){ - # Reverse and keep _NonOfficial and remove official field - # # Remove Official Cols - # col_keep <- !(names(df_merge) %in% col_mod) - # df_result <- df_merge[, col_keep] - # # Revert "_NonOfficial" - # names(df_result) <- gsub("_NonOfficial$", "", names(df_result)) - - df_result <- merge(DF_User, df_mt, - by = col_taxon, - all.x = TRUE, - suffixes = c("", sfx_Official)) - - - # df_result <- dplyr::left_join(DF_User, df_mt - # , by = col_taxon - # , suffix = c("", sfx_Official)) - - } else if(useOfficialTaxaInfo == "add_new"){ - # add user info for new taxa to official columns - # df_result <- df_merge - # df_merge[df_merge[, col_taxon] == taxa_nonmatch, col_mod] <- - # df_merge[df_merge[, col_taxon] == taxa_nonmatch, paste0(col_mod - # , "_NonOfficial")] - - df_result <- merge(DF_User, df_mt, - by = col_taxon, - all.x = TRUE, - suffixes = c(sfx_NonOfficial, "")) - - # df_result <- dplyr::left_join(DF_User, df_mt - # , by = col_taxon - # , suffix = c(sfx_NonOfficial, "")) - - col_match_y <- names(df_result)[grepl(paste0(sfx_NonOfficial,"$") - , names(df_result))] - col_match_x <- gsub(paste0(sfx_NonOfficial,"$"), "", col_match_y) - df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_x] <- - df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_y] - - } else { - # Stop if wrong values - msg <- "Valid values for useOfficialTaxaInfo are - 'only_Official', 'only_user', or 'add_new'." - stop(msg) - } - - # QC - ## Missing Columns - - ## Valid values - # Bugs = "FFG", "FAM_TV", "Habit", "FinalTolVal07" - # Fish = TYPE, PTROLR, TROPHIC - - # Other columns for metric calculation - # Bugs = EXCLUDE, STRATA_R - # Fish = - - - # Output - return(df_result) + .Deprecated("qc_taxa") + qc_taxa(DF_User, + DF_Official, + fun.Community, + useOfficialTaxaInfo) # }##FUNCTION ~ qc_taxa ~ END diff --git a/R/qc_taxa_match_official.R b/R/qc_taxa_match_official.R new file mode 100644 index 0000000..c90904f --- /dev/null +++ b/R/qc_taxa_match_official.R @@ -0,0 +1,294 @@ +#' Quality Control Check on User Data Against Master Taxa List +#' +#' This function compares the user's data frame to a data frame with the +#' official (or user supplied) master taxa list (benthic macroinvertebrates). +#' +#' Output is a data frame with matches. +#' +#' Messages are output to the console with the number of matches and which user +#' taxa did not match the official list. +#' +#' The official list is stored online but the user can input their own saved +#' copy. +#' +#' Any columns in the user input file that match the official master taxa list +#' will be renamed with the "_NonOfficial" suffix. +#' +#' New/different taxa in the user data are handled by the 'useOfficialTaxaInfo' +#' parameter. For taxa that did not match the master taxa list the user has +#' options on how to handle the differences for the phylogeny (e.g., columns for +#' phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit, +#' tolerance value, etc.). The options are below. +#' +#' * only_official = use only official master taxa information. Any +#' non-matching taxa will not have any master taxa information. +#' +#' * only_user = only use the information provided by the user. Information +#' from the 'Official' will not be used. This should only be used for +#' non-official calculations. +#' +#' * add_new = hybrid approach that uses official master taxa information, when +#' present, but includes user information for non-matching taxa if the column +#' names match. +#' +#' Default master taxa lists are saved as CSV files online at: +#' +#' https://github.com/leppott/MBSStools_SupportFiles +#' +#' The files can be downloaded with the following code. +#' +#' **Benthic Macroinvertebrate** +#' +#' url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" +#' df_mt_bugs <- read.csv(url_mt_bugs) +#' +#' The master taxa files are periodically updated. Update dates will be logged +#' on the GitHub repository. +#' +#' Expected fields include: +#' +#' **Benthic Macroinvertebrates** +#' +#' + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG, +#' FAM_TV, Habit, FinalTolVal07, Comment +#' +#' This function was called qc_taxa prior to March 2026 update. +#' The older function has been deprecated and may be removed in a future +#' release. +#' +#' @param DF_User User taxa data. +#' @param DF_Official Official master taxa list. Can be a local file or +#' from a URL. +#' Default is NULL. A NULL value will use the official online files. +#' @param fun.Community Community name for which to compare the master taxa list +#' (bugs or fish). +#' @param useOfficialTaxaInfo Select how to handle new/different taxa. +#' See 'Details' for more information. +#' Valid values are "only_Official", "only_user", "add_new". +#' Default = "only_Official". +#' +#' @return input data frame with master taxa information added to it. +#' +#' @examples +#' # Example 1, Master Taxa List, Bugs +#' url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" +#' df_mt_bugs <- read.csv(url_mt_bugs) +#' +#' # User data +#' DF_User <- data_benthos_MBSS +#' DF_Official <- NULL # NULL df_mt_bugs +#' fun.Community <- "bugs" +#' useOfficialTaxaInfo <- "only_Official" +#' # modify taxa id column +#' DF_User[, "TAXON"] <- DF_User[, "TAXAID"] +#' +#' df_qc_taxa_bugs <- qc_taxa_match_official(DF_User, +#' DF_Official, +#' fun.Community, +#' useOfficialTaxaInfo) +#' +#' # QC input/output +#' dim(DF_User) +#' dim(df_qc_taxa_bugs) +#' names(DF_User) +#' names(df_qc_taxa_bugs) +# +#' @export +qc_taxa_match_official <- function(DF_User, + DF_Official = NULL, + fun.Community = NULL, + useOfficialTaxaInfo = "only_Official") { + # + boo_DEBUG <- FALSE + if(boo_DEBUG==TRUE){##IF~boo_DEBUG~START + # # # Bugs + # DF_User<- taxa_bugs_genus + # DF_Official = NULL + # fun.Community = "bugs" + # useOfficialTaxaInfo = "only_Official" + # # + }##IF~boo_DEBUG~END + + # Col Suffixes + sfx_Official <- "_Official" + sfx_NonOfficial <- "_NonOfficial" + + # QC + ## inputs as data frames (just in case have a tibble) + DF_User <- data.frame(DF_User) + # DF_Official handled when checking URL + ## Community, convert community to lowercase + fun.Community <- tolower(fun.Community) + + # Taxa list, official + # run the proper sub function + if (fun.Community == "bugs") {##IF.START + url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" + col_mt <- c("Taxon", + "Phylum", + "Class", + "Order", + "Family", + "Genus", + "Other_Taxa", + "Tribe", + "FFG", + "FAM_TV", + "Habit", + "FinalTolVal07", + "Comment") + col_taxon <- col_mt[1] + # } else if(fun.Community == "fish"){ + # url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Fish.csv" + # col_mt <- c("SPECIES", "TYPE", "PTOLR", "NATIVE", "TROPHIC", "SILT" + # , "PIRHALLA","DATE.ADDED", "REASON", "SOURCE", "FAM", "GENUS" + # , "SP_SCI", "IN_KEY", "APPROX_ID" ) + # col_taxon <- col_mt[1] + # future functionality + } else { + msg <- "Valid values for fun.Community is only 'bugs'." + stop(msg) + }##IF ~ fun.community ~ END + + # Master Taxa + # Download "official" list if none provided + if(is.null(DF_Official)){ + # 404 Error if file not found + df_mt <- utils::read.csv(url_mt) + } else { + df_mt <- data.frame(DF_Official) + }## IF ~ is.null(DF_Official) ~ END + + # Names to upper case + names(DF_User) <- toupper(names(DF_User)) + names(df_mt) <- toupper(names(df_mt)) + # col_mt <- toupper(col_mt) + col_taxon <- toupper(col_taxon) + + # QC check for col_taxon + if (!col_taxon %in% names(DF_User)) { + stop(paste0("DF_User missing column; ", col_taxon)) + } ## IF, stop + + # taxa names to ALL CAPS for bugs and fish + DF_User[, col_taxon] <- toupper(DF_User[, col_taxon]) + + # Check Numbers + taxa_user <- sort(unique(DF_User[, col_taxon])) + taxa_user_n <- length(taxa_user) + boo_taxa_match <- taxa_user %in% df_mt[, col_taxon] + sum_taxa_match <- sum(boo_taxa_match) + taxa_nonmatch <- taxa_user[!boo_taxa_match] + # Output to Console + msg <- paste0("Taxa match, ", sum_taxa_match, " / ", taxa_user_n) + message(msg) + # Inform user of the non-matches + if(sum_taxa_match != taxa_user_n){ + n_nonmatch <- taxa_user_n - sum_taxa_match + str_tax <- ifelse(n_nonmatch == 1, "taxon", "taxa") + msg_1 <- paste0("The following user ", + str_tax, + " (", + n_nonmatch, + "/", + taxa_user_n, + ") did not match the master list.\n") + msg_2 <- paste0(taxa_nonmatch, collapse = "\n") + message(paste0(msg_1, msg_2)) + }##IF ~ non-matches ~ END + + + + # Merge and Munge Columns + ## Columns + # col_mt_nonTaxon <- col_mt[!(col_mt %in% col_taxon)] + # col_mt_nonOfficial <- paste0(col_mt_nonTaxon, sfx_NonOfficial) + # boo_col_match <- colnames(DF_User) %in% col_mt_nonTaxon + # col_mod <- colnames(DF_User)[boo_col_match] + ## Rename matching columns before merge + #names(DF_User)[boo_col_match] <- paste0(names(DF_User)[boo_col_match] + # , "_NonOfficial") + # more control than using suffixes in merge() + # + ## Merge + # df_merge <- merge(DF_User, df_mt + # , by = col_taxon + # , all.x = TRUE) + ## Munge Cols + if(useOfficialTaxaInfo == "only_Official"){ + # Do Nothing + # leave in "_NonOfficial" columns + df_result <- merge(DF_User, df_mt, + by = col_taxon, + all.x = TRUE, + suffixes = c(sfx_NonOfficial, "")) + + #names(df_result) <- gsub(".x$", "", names(df_result)) + + # df_result <- dplyr::left_join(DF_User, df_mt + # , by = col_taxon + # , suffix = c(sfx_NonOfficial, "")) + + } else if(useOfficialTaxaInfo == "only_user"){ + # Reverse and keep _NonOfficial and remove official field + # # Remove Official Cols + # col_keep <- !(names(df_merge) %in% col_mod) + # df_result <- df_merge[, col_keep] + # # Revert "_NonOfficial" + # names(df_result) <- gsub("_NonOfficial$", "", names(df_result)) + + df_result <- merge(DF_User, df_mt, + by = col_taxon, + all.x = TRUE, + suffixes = c("", sfx_Official)) + + + # df_result <- dplyr::left_join(DF_User, df_mt + # , by = col_taxon + # , suffix = c("", sfx_Official)) + + } else if(useOfficialTaxaInfo == "add_new"){ + # add user info for new taxa to official columns + # df_result <- df_merge + # df_merge[df_merge[, col_taxon] == taxa_nonmatch, col_mod] <- + # df_merge[df_merge[, col_taxon] == taxa_nonmatch, paste0(col_mod + # , "_NonOfficial")] + + df_result <- merge(DF_User, df_mt, + by = col_taxon, + all.x = TRUE, + suffixes = c(sfx_NonOfficial, "")) + + # df_result <- dplyr::left_join(DF_User, df_mt + # , by = col_taxon + # , suffix = c(sfx_NonOfficial, "")) + + col_match_y <- names(df_result)[grepl(paste0(sfx_NonOfficial,"$") + , names(df_result))] + col_match_x <- gsub(paste0(sfx_NonOfficial,"$"), "", col_match_y) + df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_x] <- + df_result[df_result[, col_taxon] == taxa_nonmatch, col_match_y] + + } else { + # Stop if wrong values + msg <- "Valid values for useOfficialTaxaInfo are + 'only_Official', 'only_user', or 'add_new'." + stop(msg) + } + + # QC + ## Missing Columns + + ## Valid values + # Bugs = "FFG", "FAM_TV", "Habit", "FinalTolVal07" + # Fish = TYPE, PTROLR, TROPHIC + + # Other columns for metric calculation + # Bugs = EXCLUDE, STRATA_R + # Fish = + + + # Output + return(df_result) + # +}##FUNCTION ~ qc_taxa ~ END diff --git a/man/qc_taxa.Rd b/man/qc_taxa.Rd index aa2f940..d33c7a0 100644 --- a/man/qc_taxa.Rd +++ b/man/qc_taxa.Rd @@ -30,58 +30,12 @@ Default = "only_Official".} input data frame with master taxa information added to it. } \description{ -This function compares the user's data frame to a data frame with the -official (or user supplied) master taxa list (benthic macroinvertebrates). +This function has been deprecated (March 2026). } \details{ -Output is a data frame with matches. +The new function is qc_taxa_match_official. -Messages are output to the console with the number of matches and which user -taxa did not match the official list. - -The official list is stored online but the user can input their own saved -copy. - -Any columns in the user input file that match the official master taxa list -will be renamed with the "_NonOfficial" suffix. - -New/different taxa in the user data are handled by the 'useOfficialTaxaInfo' -parameter. For taxa that did not match the master taxa list the user has -options on how to handle the differences for the phylogeny (e.g., columns for -phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit, -tolerance value, etc.). The options are below. - -* only_official = use only official master taxa information. Any -non-matching taxa will not have any master taxa information. - -* only_user = only use the information provided by the user. Information -from the 'Official' will not be used. This should only be used for -non-official calculations. - -* add_new = hybrid approach that uses official master taxa information, when -present, but includes user information for non-matching taxa if the column -names match. - -Default master taxa lists are saved as CSV files online at: - -https://github.com/leppott/MBSStools_SupportFiles - -The files can be downloaded with the following code. - -**Benthic Macroinvertebrate** - -url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" -df_mt_bugs <- read.csv(url_mt_bugs) - -The master taxa files are periodically updated. Update dates will be logged -on the GitHub repository. - -Expected fields include: - -**Benthic Macroinvertebrates** - - + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG, - FAM_TV, Habit, FinalTolVal07, Comment +This function exists only as a wrapper to avoid breaking older code. } \examples{ # Example 1, Master Taxa List, Bugs diff --git a/man/qc_taxa_match_official.Rd b/man/qc_taxa_match_official.Rd new file mode 100644 index 0000000..34dca0f --- /dev/null +++ b/man/qc_taxa_match_official.Rd @@ -0,0 +1,113 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qc_taxa_match_official.R +\name{qc_taxa_match_official} +\alias{qc_taxa_match_official} +\title{Quality Control Check on User Data Against Master Taxa List} +\usage{ +qc_taxa_match_official( + DF_User, + DF_Official = NULL, + fun.Community = NULL, + useOfficialTaxaInfo = "only_Official" +) +} +\arguments{ +\item{DF_User}{User taxa data.} + +\item{DF_Official}{Official master taxa list. Can be a local file or +from a URL. +Default is NULL. A NULL value will use the official online files.} + +\item{fun.Community}{Community name for which to compare the master taxa list +(bugs or fish).} + +\item{useOfficialTaxaInfo}{Select how to handle new/different taxa. +See 'Details' for more information. +Valid values are "only_Official", "only_user", "add_new". +Default = "only_Official".} +} +\value{ +input data frame with master taxa information added to it. +} +\description{ +This function compares the user's data frame to a data frame with the +official (or user supplied) master taxa list (benthic macroinvertebrates). +} +\details{ +Output is a data frame with matches. + +Messages are output to the console with the number of matches and which user +taxa did not match the official list. + +The official list is stored online but the user can input their own saved +copy. + +Any columns in the user input file that match the official master taxa list +will be renamed with the "_NonOfficial" suffix. + +New/different taxa in the user data are handled by the 'useOfficialTaxaInfo' +parameter. For taxa that did not match the master taxa list the user has +options on how to handle the differences for the phylogeny (e.g., columns for +phylum, class, family, etc.) and autecology (e.g., columns for FFG, habit, +tolerance value, etc.). The options are below. + +* only_official = use only official master taxa information. Any +non-matching taxa will not have any master taxa information. + +* only_user = only use the information provided by the user. Information +from the 'Official' will not be used. This should only be used for +non-official calculations. + +* add_new = hybrid approach that uses official master taxa information, when +present, but includes user information for non-matching taxa if the column +names match. + +Default master taxa lists are saved as CSV files online at: + +https://github.com/leppott/MBSStools_SupportFiles + +The files can be downloaded with the following code. + +**Benthic Macroinvertebrate** + +url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" +df_mt_bugs <- read.csv(url_mt_bugs) + +The master taxa files are periodically updated. Update dates will be logged +on the GitHub repository. + +Expected fields include: + +**Benthic Macroinvertebrates** + + + TAXON, Phylum, Class, Order, Family, Genus, Other_Taxa, Tribe, FFG, + FAM_TV, Habit, FinalTolVal07, Comment + +This function was called qc_taxa prior to March 2026 update. +The older function has been deprecated and may be removed in a future +release. +} +\examples{ +# Example 1, Master Taxa List, Bugs +url_mt_bugs <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" +df_mt_bugs <- read.csv(url_mt_bugs) + +# User data +DF_User <- data_benthos_MBSS +DF_Official <- NULL # NULL df_mt_bugs +fun.Community <- "bugs" +useOfficialTaxaInfo <- "only_Official" +# modify taxa id column +DF_User[, "TAXON"] <- DF_User[, "TAXAID"] + +df_qc_taxa_bugs <- qc_taxa_match_official(DF_User, + DF_Official, + fun.Community, + useOfficialTaxaInfo) + +# QC input/output +dim(DF_User) +dim(df_qc_taxa_bugs) +names(DF_User) +names(df_qc_taxa_bugs) +} From 269de5af39ebfda107c0329b9913782a3a933d94 Mon Sep 17 00:00:00 2001 From: Erik Leppo Date: Wed, 18 Mar 2026 21:59:41 -0400 Subject: [PATCH 2/4] v1.2.4.9010 * feature: Add qc_taxa_values_ffg function --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS | 6 ++- NEWS.md | 6 ++- NEWS.rmd | 4 ++ R/qc_taxa_values_ffg.R | 80 +++++++++++++++++++++++++++++++++++++++ man/qc_taxa_values_ffg.Rd | 52 +++++++++++++++++++++++++ 7 files changed, 148 insertions(+), 3 deletions(-) create mode 100644 R/qc_taxa_values_ffg.R create mode 100644 man/qc_taxa_values_ffg.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 7729722..5fb8a1c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BioMonTools Type: Package Title: Biomonitoring and Bioassessment Calculations -Version: 1.2.4.9009 +Version: 1.2.4.9010 Authors@R: c( person("Erik W.", "Leppo", email="Erik.Leppo@tetratech.com", diff --git a/NAMESPACE b/NAMESPACE index 9af878e..ccd284e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(metvalgrpxl) export(qc.checks) export(qc_taxa) export(qc_taxa_match_official) +export(qc_taxa_values_ffg) export(rarify) export(taxa_translate) importFrom(rlang,.data) diff --git a/NEWS b/NEWS index d06c9ef..5501e8a 100644 --- a/NEWS +++ b/NEWS @@ -4,10 +4,14 @@ NEWS - #> Last Update: 2026-03-18 20:55:43.088572 + #> Last Update: 2026-03-18 21:58:25.742973 # Version History +## Changes in version 1.2.4.9010 (2026-03-18) + +- feature: Add qc_taxa_values_ffg function + ## Changes in version 1.2.4.9009 (2026-03-18) - deprecate: Change qc_taxa to qc_taxa_match_official diff --git a/NEWS.md b/NEWS.md index d06c9ef..5501e8a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,10 +4,14 @@ NEWS - #> Last Update: 2026-03-18 20:55:43.088572 + #> Last Update: 2026-03-18 21:58:25.742973 # Version History +## Changes in version 1.2.4.9010 (2026-03-18) + +- feature: Add qc_taxa_values_ffg function + ## Changes in version 1.2.4.9009 (2026-03-18) - deprecate: Change qc_taxa to qc_taxa_match_official diff --git a/NEWS.rmd b/NEWS.rmd index c180538..1ea0e7a 100644 --- a/NEWS.rmd +++ b/NEWS.rmd @@ -18,6 +18,10 @@ cat(paste0("Last Update: ",Sys.time())) # Version History +## Changes in version 1.2.4.9010 (2026-03-18) + +* feature: Add qc_taxa_values_ffg function + ## Changes in version 1.2.4.9009 (2026-03-18) * deprecate: Change qc_taxa to qc_taxa_match_official diff --git a/R/qc_taxa_values_ffg.R b/R/qc_taxa_values_ffg.R new file mode 100644 index 0000000..2dc423b --- /dev/null +++ b/R/qc_taxa_values_ffg.R @@ -0,0 +1,80 @@ +#' QC Functional Feeding Group (FFG) Values +#' +#' Performs basic QC of the FFG column against a list of accepted values. +#' +#' Returns a data frame the values from the input with counts (column = n) from +#' the FFG column and whether the value appeared in valid values (column = +#' valid). Values in the accepted values not appearing in the input are appended +#' to the bottom of the returned data frame. These values are marked as n = NA +#' and valid = TRUE. +#' +#' The default accepted values are the abbreviations are those used as +#' metric.values(); CF, CG, MH, OM, PA, PI, PR, SC, SH, and XY. User using FC +#' and GC over CF and CG can modify the accepted values. Both versions are +#' accepted in metric.values(). +#' +#' @param df_data A data frame containing taxa data. +#' @param col_ffg The column containing FFG values (unquoted tidyselect style). +#' @param valid_vals Accepted values. +#' Default = c(CF, CG, MH, OM ,PA, PH, PI, PR, SC, SH, XY) +#' +#' @return A data frame with col_ffg values, occurrence (n), and if valid (TRUE/ +#' FALSE). Additional values from valid_vals are appended. +#' +#' @examples +#' # Values, Default +#' qc_taxa_values_ffg(data_benthos_PacNW, "FFG") +#' +#' # Values, User (full names) +#' qc_taxa_values_ffg(data_benthos_MBSS, "FFG", +#' valid_vals = c("Collector", +#' "Filterer", +#' "Predator", +#' "Scraper", +#' "Shredder")) +#' +#' @export +qc_taxa_values_ffg <- function(df_data, + col_ffg, + valid_vals = c("CF", + "CG", + "MH", + "OM", + "PA", + "PH", + "PI", + "PR", + "SC", + "SH", + "XY")) { + + # QC + if (!rlang::as_string(col_ffg) %in% names(df_data)) { + stop("Column '", + rlang::as_string(col_ffg), + "' is missing from input data.", call. = FALSE) + }# IF ~ col_ffg + + # Convert valid_vals to data frame + df_valid_vals <- as.data.frame(valid_vals) + names(df_valid_vals) <- col_ffg + + # occurrence + df_match <- df_data |> + # occurrence + dplyr::count(.data[[col_ffg]], name = "n") |> + # valid + ## T/F + dplyr::mutate(valid = .data[[col_ffg]] %in% valid_vals) |> + ## values + dplyr::full_join(y = df_valid_vals, + by = dplyr::join_by({{col_ffg}})) |> + ## convert NA to TRUE + dplyr::mutate(valid = dplyr::case_when(is.na(valid) ~ TRUE, + .default = valid)) + + # Result + return(df_match) + +}## FUNCTION ~ END + diff --git a/man/qc_taxa_values_ffg.Rd b/man/qc_taxa_values_ffg.Rd new file mode 100644 index 0000000..a4d1b37 --- /dev/null +++ b/man/qc_taxa_values_ffg.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qc_taxa_values_ffg.R +\name{qc_taxa_values_ffg} +\alias{qc_taxa_values_ffg} +\title{QC Functional Feeding Group (FFG) Values} +\usage{ +qc_taxa_values_ffg( + df_data, + col_ffg, + valid_vals = c("CF", "CG", "MH", "OM", "PA", "PH", "PI", "PR", "SC", "SH", "XY") +) +} +\arguments{ +\item{df_data}{A data frame containing taxa data.} + +\item{col_ffg}{The column containing FFG values (unquoted tidyselect style).} + +\item{valid_vals}{Accepted values. +Default = c(CF, CG, MH, OM ,PA, PH, PI, PR, SC, SH, XY)} +} +\value{ +A data frame with col_ffg values, occurrence (n), and if valid (TRUE/ +FALSE). Additional values from valid_vals are appended. +} +\description{ +Performs basic QC of the FFG column against a list of accepted values. +} +\details{ +Returns a data frame the values from the input with counts (column = n) from +the FFG column and whether the value appeared in valid values (column = +valid). Values in the accepted values not appearing in the input are appended +to the bottom of the returned data frame. These values are marked as n = NA +and valid = TRUE. + +The default accepted values are the abbreviations are those used as +metric.values(); CF, CG, MH, OM, PA, PI, PR, SC, SH, and XY. User using FC +and GC over CF and CG can modify the accepted values. Both versions are +accepted in metric.values(). +} +\examples{ +# Values, Default +qc_taxa_values_ffg(data_benthos_PacNW, "FFG") + +# Values, User (full names) +qc_taxa_values_ffg(data_benthos_MBSS, "FFG", + valid_vals = c("Collector", + "Filterer", + "Predator", + "Scraper", + "Shredder")) + +} From 1d5afc2b7e7953fa0f09c1633c28392e8b386223 Mon Sep 17 00:00:00 2001 From: Erik Leppo Date: Wed, 18 Mar 2026 22:20:21 -0400 Subject: [PATCH 3/4] v1.2.4.9011 - refactor: Add default column name to qc_taxa_values_ffg - feature: Add qc_taxa_values_habit function --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS | 7 +++- NEWS.md | 7 +++- NEWS.rmd | 5 +++ R/qc_taxa_values_ffg.R | 9 +++-- R/qc_taxa_values_habit.R | 75 +++++++++++++++++++++++++++++++++++++ man/qc_taxa_values_ffg.Rd | 9 +++-- man/qc_taxa_values_habit.Rd | 48 ++++++++++++++++++++++++ 9 files changed, 152 insertions(+), 11 deletions(-) create mode 100644 R/qc_taxa_values_habit.R create mode 100644 man/qc_taxa_values_habit.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 5fb8a1c..9261dcf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BioMonTools Type: Package Title: Biomonitoring and Bioassessment Calculations -Version: 1.2.4.9010 +Version: 1.2.4.9011 Authors@R: c( person("Erik W.", "Leppo", email="Erik.Leppo@tetratech.com", diff --git a/NAMESPACE b/NAMESPACE index ccd284e..625546f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,6 +16,7 @@ export(qc.checks) export(qc_taxa) export(qc_taxa_match_official) export(qc_taxa_values_ffg) +export(qc_taxa_values_habit) export(rarify) export(taxa_translate) importFrom(rlang,.data) diff --git a/NEWS b/NEWS index 5501e8a..6f25e40 100644 --- a/NEWS +++ b/NEWS @@ -4,10 +4,15 @@ NEWS - #> Last Update: 2026-03-18 21:58:25.742973 + #> Last Update: 2026-03-18 22:19:01.612552 # Version History +## Changes in version 1.2.4.9011 (2026-03-18) + +- refactor: Add default column name to qc_taxa_values_ffg +- feature: Add qc_taxa_values_habit function + ## Changes in version 1.2.4.9010 (2026-03-18) - feature: Add qc_taxa_values_ffg function diff --git a/NEWS.md b/NEWS.md index 5501e8a..6f25e40 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,10 +4,15 @@ NEWS - #> Last Update: 2026-03-18 21:58:25.742973 + #> Last Update: 2026-03-18 22:19:01.612552 # Version History +## Changes in version 1.2.4.9011 (2026-03-18) + +- refactor: Add default column name to qc_taxa_values_ffg +- feature: Add qc_taxa_values_habit function + ## Changes in version 1.2.4.9010 (2026-03-18) - feature: Add qc_taxa_values_ffg function diff --git a/NEWS.rmd b/NEWS.rmd index 1ea0e7a..0bd8433 100644 --- a/NEWS.rmd +++ b/NEWS.rmd @@ -18,6 +18,11 @@ cat(paste0("Last Update: ",Sys.time())) # Version History +## Changes in version 1.2.4.9011 (2026-03-18) + +* refactor: Add default column name to qc_taxa_values_ffg +* feature: Add qc_taxa_values_habit function + ## Changes in version 1.2.4.9010 (2026-03-18) * feature: Add qc_taxa_values_ffg function diff --git a/R/qc_taxa_values_ffg.R b/R/qc_taxa_values_ffg.R index 2dc423b..b432e66 100644 --- a/R/qc_taxa_values_ffg.R +++ b/R/qc_taxa_values_ffg.R @@ -14,7 +14,7 @@ #' accepted in metric.values(). #' #' @param df_data A data frame containing taxa data. -#' @param col_ffg The column containing FFG values (unquoted tidyselect style). +#' @param col_ffg The column containing FFG values. Default = "FFG" #' @param valid_vals Accepted values. #' Default = c(CF, CG, MH, OM ,PA, PH, PI, PR, SC, SH, XY) #' @@ -23,10 +23,11 @@ #' #' @examples #' # Values, Default -#' qc_taxa_values_ffg(data_benthos_PacNW, "FFG") +#' qc_taxa_values_ffg(data_benthos_PacNW) #' #' # Values, User (full names) -#' qc_taxa_values_ffg(data_benthos_MBSS, "FFG", +#' qc_taxa_values_ffg(data_benthos_MBSS, +#' "FFG", #' valid_vals = c("Collector", #' "Filterer", #' "Predator", @@ -35,7 +36,7 @@ #' #' @export qc_taxa_values_ffg <- function(df_data, - col_ffg, + col_ffg = "FFG", valid_vals = c("CF", "CG", "MH", diff --git a/R/qc_taxa_values_habit.R b/R/qc_taxa_values_habit.R new file mode 100644 index 0000000..b820d2f --- /dev/null +++ b/R/qc_taxa_values_habit.R @@ -0,0 +1,75 @@ +#' QC Habitat Values +#' +#' Performs basic QC of the Habit column against a list of accepted values. +#' +#' Returns a data frame the values from the input with counts (column = n) from +#' the Habit column and whether the value appeared in valid values (column = +#' valid). Values in the accepted values not appearing in the input are appended +#' to the bottom of the returned data frame. These values are marked as n = NA +#' and valid = TRUE. +#' +#' The default accepted values are the abbreviations are those used as +#' metric.values(); BU, CB, CN, SK, SP, and SW. Valid separated with "," are +#' first split apart and spaces removed. +#' +#' @param df_data A data frame containing taxa data. +#' @param col_habit The column containing Habit values. Default = "Habit" +#' @param valid_vals Accepted values. +#' Default = c(BU, CB, CN, SK ,SP, SW.) +#' +#' @return A data frame with col_habit values, occurrence (n), and if valid +#' (TRUE/FALSE). Additional values from valid_vals are appended. +#' +#' @examples +#' # Values, Default +#' qc_taxa_values_habit(data_benthos_MBSS) +#' +#' # Values, User +#' qc_taxa_values_habit(data_benthos_MBSS, +#' "Habit", +#' valid_vals = c("bu", "cb", "cn", "dv", "sk", "sp", "sw")) +#' +#' @export +qc_taxa_values_habit <- function(df_data, + col_habit = "Habit", + valid_vals = c("BU", + "CB", + "CN", + "SK", + "SP", + "SW")) { + + # QC + if (!rlang::as_string(col_habit) %in% names(df_data)) { + stop("Column '", + rlang::as_string(col_habit), + "' is missing from input data.", call. = FALSE) + }# IF ~ col_habit + + # Convert valid_vals to data frame + df_valid_vals <- as.data.frame(valid_vals) + names(df_valid_vals) <- col_habit + + # occurrence + df_match <- df_data |> + # get all values, split on comma with optional surrounding spaces + tidyr::separate_rows(.data[[col_habit]], sep = "\\s*,\\s*") |> + # remove spaces + dplyr::mutate({{col_habit}} := trimws(.data[[col_habit]])) |> + # occurrence + dplyr::count(.data[[col_habit]], name = "n") |> + # valid + ## T/F + dplyr::mutate(valid = .data[[col_habit]] %in% valid_vals) |> + ## values + dplyr::full_join(y = df_valid_vals, + by = dplyr::join_by({{col_habit}})) |> + ## convert NA to TRUE + dplyr::mutate(valid = dplyr::case_when(is.na(valid) ~ TRUE, + .default = valid)) + + # Result + return(df_match) + +}## FUNCTION ~ END + diff --git a/man/qc_taxa_values_ffg.Rd b/man/qc_taxa_values_ffg.Rd index a4d1b37..1d29008 100644 --- a/man/qc_taxa_values_ffg.Rd +++ b/man/qc_taxa_values_ffg.Rd @@ -6,14 +6,14 @@ \usage{ qc_taxa_values_ffg( df_data, - col_ffg, + col_ffg = "FFG", valid_vals = c("CF", "CG", "MH", "OM", "PA", "PH", "PI", "PR", "SC", "SH", "XY") ) } \arguments{ \item{df_data}{A data frame containing taxa data.} -\item{col_ffg}{The column containing FFG values (unquoted tidyselect style).} +\item{col_ffg}{The column containing FFG values. Default = "FFG"} \item{valid_vals}{Accepted values. Default = c(CF, CG, MH, OM ,PA, PH, PI, PR, SC, SH, XY)} @@ -39,10 +39,11 @@ accepted in metric.values(). } \examples{ # Values, Default -qc_taxa_values_ffg(data_benthos_PacNW, "FFG") +qc_taxa_values_ffg(data_benthos_PacNW) # Values, User (full names) -qc_taxa_values_ffg(data_benthos_MBSS, "FFG", +qc_taxa_values_ffg(data_benthos_MBSS, + "FFG", valid_vals = c("Collector", "Filterer", "Predator", diff --git a/man/qc_taxa_values_habit.Rd b/man/qc_taxa_values_habit.Rd new file mode 100644 index 0000000..c966a94 --- /dev/null +++ b/man/qc_taxa_values_habit.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qc_taxa_values_habit.R +\name{qc_taxa_values_habit} +\alias{qc_taxa_values_habit} +\title{QC Habitat Values} +\usage{ +qc_taxa_values_habit( + df_data, + col_habit = "Habit", + valid_vals = c("BU", "CB", "CN", "SK", "SP", "SW") +) +} +\arguments{ +\item{df_data}{A data frame containing taxa data.} + +\item{col_habit}{The column containing Habit values. Default = "Habit"} + +\item{valid_vals}{Accepted values. +Default = c(BU, CB, CN, SK ,SP, SW.)} +} +\value{ +A data frame with col_habit values, occurrence (n), and if valid +(TRUE/FALSE). Additional values from valid_vals are appended. +} +\description{ +Performs basic QC of the Habit column against a list of accepted values. +} +\details{ +Returns a data frame the values from the input with counts (column = n) from +the Habit column and whether the value appeared in valid values (column = +valid). Values in the accepted values not appearing in the input are appended +to the bottom of the returned data frame. These values are marked as n = NA +and valid = TRUE. + +The default accepted values are the abbreviations are those used as +metric.values(); BU, CB, CN, SK, SP, and SW. Valid separated with "," are +first split apart and spaces removed. +} +\examples{ +# Values, Default +qc_taxa_values_habit(data_benthos_MBSS) + +# Values, User +qc_taxa_values_habit(data_benthos_MBSS, + "Habit", + valid_vals = c("bu", "cb", "cn", "dv", "sk", "sp", "sw")) + +} From e68d838186dd1720d2aed46b18d3ef7dae531e4d Mon Sep 17 00:00:00 2001 From: Erik Leppo Date: Wed, 18 Mar 2026 22:53:47 -0400 Subject: [PATCH 4/4] v1.2.4.9012 * feature: Add qc_taxa_values_tolval function --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS | 6 +++- NEWS.md | 6 +++- NEWS.rmd | 4 +++ R/qc_taxa_match_official.R | 24 ++++++------- R/qc_taxa_values_ffg.R | 6 ++-- R/qc_taxa_values_habit.R | 6 ++-- R/qc_taxa_values_tolval.R | 68 ++++++++++++++++++++++++++++++++++++ man/qc_taxa_values_tolval.Rd | 40 +++++++++++++++++++++ 10 files changed, 142 insertions(+), 21 deletions(-) create mode 100644 R/qc_taxa_values_tolval.R create mode 100644 man/qc_taxa_values_tolval.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 9261dcf..753539b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: BioMonTools Type: Package Title: Biomonitoring and Bioassessment Calculations -Version: 1.2.4.9011 +Version: 1.2.4.9012 Authors@R: c( person("Erik W.", "Leppo", email="Erik.Leppo@tetratech.com", diff --git a/NAMESPACE b/NAMESPACE index 625546f..e58d646 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -17,6 +17,7 @@ export(qc_taxa) export(qc_taxa_match_official) export(qc_taxa_values_ffg) export(qc_taxa_values_habit) +export(qc_taxa_values_tolval) export(rarify) export(taxa_translate) importFrom(rlang,.data) diff --git a/NEWS b/NEWS index 6f25e40..63b528f 100644 --- a/NEWS +++ b/NEWS @@ -4,10 +4,14 @@ NEWS - #> Last Update: 2026-03-18 22:19:01.612552 + #> Last Update: 2026-03-18 22:51:30.206983 # Version History +## Changes in version 1.2.4.9012 (2026-03-18) + +- feature: Add qc_taxa_values_tolval function + ## Changes in version 1.2.4.9011 (2026-03-18) - refactor: Add default column name to qc_taxa_values_ffg diff --git a/NEWS.md b/NEWS.md index 6f25e40..63b528f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,10 +4,14 @@ NEWS - #> Last Update: 2026-03-18 22:19:01.612552 + #> Last Update: 2026-03-18 22:51:30.206983 # Version History +## Changes in version 1.2.4.9012 (2026-03-18) + +- feature: Add qc_taxa_values_tolval function + ## Changes in version 1.2.4.9011 (2026-03-18) - refactor: Add default column name to qc_taxa_values_ffg diff --git a/NEWS.rmd b/NEWS.rmd index 0bd8433..902fa8d 100644 --- a/NEWS.rmd +++ b/NEWS.rmd @@ -18,6 +18,10 @@ cat(paste0("Last Update: ",Sys.time())) # Version History +## Changes in version 1.2.4.9012 (2026-03-18) + +* feature: Add qc_taxa_values_tolval function + ## Changes in version 1.2.4.9011 (2026-03-18) * refactor: Add default column name to qc_taxa_values_ffg diff --git a/R/qc_taxa_match_official.R b/R/qc_taxa_match_official.R index c90904f..d996337 100644 --- a/R/qc_taxa_match_official.R +++ b/R/qc_taxa_match_official.R @@ -98,7 +98,7 @@ qc_taxa_match_official <- function(DF_User, DF_Official = NULL, fun.Community = NULL, useOfficialTaxaInfo = "only_Official") { - # + # DEBUG ---- boo_DEBUG <- FALSE if(boo_DEBUG==TRUE){##IF~boo_DEBUG~START # # # Bugs @@ -109,18 +109,18 @@ qc_taxa_match_official <- function(DF_User, # # }##IF~boo_DEBUG~END - # Col Suffixes + # Col Suffixes---- sfx_Official <- "_Official" sfx_NonOfficial <- "_NonOfficial" - # QC + # QC---- ## inputs as data frames (just in case have a tibble) DF_User <- data.frame(DF_User) # DF_Official handled when checking URL ## Community, convert community to lowercase fun.Community <- tolower(fun.Community) - # Taxa list, official + # Taxa list, official---- # run the proper sub function if (fun.Community == "bugs") {##IF.START url_mt <- "https://github.com/leppott/MBSStools_SupportFiles/raw/master/Data/CHAR_Bugs.csv" @@ -150,7 +150,7 @@ qc_taxa_match_official <- function(DF_User, stop(msg) }##IF ~ fun.community ~ END - # Master Taxa + # Master Taxa---- # Download "official" list if none provided if(is.null(DF_Official)){ # 404 Error if file not found @@ -159,21 +159,21 @@ qc_taxa_match_official <- function(DF_User, df_mt <- data.frame(DF_Official) }## IF ~ is.null(DF_Official) ~ END - # Names to upper case + # Names to upper case---- names(DF_User) <- toupper(names(DF_User)) names(df_mt) <- toupper(names(df_mt)) # col_mt <- toupper(col_mt) col_taxon <- toupper(col_taxon) - # QC check for col_taxon + # QC check for col_taxon---- if (!col_taxon %in% names(DF_User)) { stop(paste0("DF_User missing column; ", col_taxon)) } ## IF, stop - # taxa names to ALL CAPS for bugs and fish + # taxa names to ALL CAPS for bugs and fish---- DF_User[, col_taxon] <- toupper(DF_User[, col_taxon]) - # Check Numbers + # Check Numbers---- taxa_user <- sort(unique(DF_User[, col_taxon])) taxa_user_n <- length(taxa_user) boo_taxa_match <- taxa_user %in% df_mt[, col_taxon] @@ -199,7 +199,7 @@ qc_taxa_match_official <- function(DF_User, - # Merge and Munge Columns + # Merge and Munge Columns---- ## Columns # col_mt_nonTaxon <- col_mt[!(col_mt %in% col_taxon)] # col_mt_nonOfficial <- paste0(col_mt_nonTaxon, sfx_NonOfficial) @@ -276,7 +276,7 @@ qc_taxa_match_official <- function(DF_User, stop(msg) } - # QC + # QC---- ## Missing Columns ## Valid values @@ -288,7 +288,7 @@ qc_taxa_match_official <- function(DF_User, # Fish = - # Output + # Output---- return(df_result) # }##FUNCTION ~ qc_taxa ~ END diff --git a/R/qc_taxa_values_ffg.R b/R/qc_taxa_values_ffg.R index b432e66..cc86a9e 100644 --- a/R/qc_taxa_values_ffg.R +++ b/R/qc_taxa_values_ffg.R @@ -49,7 +49,7 @@ qc_taxa_values_ffg <- function(df_data, "SH", "XY")) { - # QC + # QC---- if (!rlang::as_string(col_ffg) %in% names(df_data)) { stop("Column '", rlang::as_string(col_ffg), @@ -60,7 +60,7 @@ qc_taxa_values_ffg <- function(df_data, df_valid_vals <- as.data.frame(valid_vals) names(df_valid_vals) <- col_ffg - # occurrence + # occurrence---- df_match <- df_data |> # occurrence dplyr::count(.data[[col_ffg]], name = "n") |> @@ -74,7 +74,7 @@ qc_taxa_values_ffg <- function(df_data, dplyr::mutate(valid = dplyr::case_when(is.na(valid) ~ TRUE, .default = valid)) - # Result + # Result---- return(df_match) }## FUNCTION ~ END diff --git a/R/qc_taxa_values_habit.R b/R/qc_taxa_values_habit.R index b820d2f..2c9d2ef 100644 --- a/R/qc_taxa_values_habit.R +++ b/R/qc_taxa_values_habit.R @@ -39,7 +39,7 @@ qc_taxa_values_habit <- function(df_data, "SP", "SW")) { - # QC + # QC---- if (!rlang::as_string(col_habit) %in% names(df_data)) { stop("Column '", rlang::as_string(col_habit), @@ -50,7 +50,7 @@ qc_taxa_values_habit <- function(df_data, df_valid_vals <- as.data.frame(valid_vals) names(df_valid_vals) <- col_habit - # occurrence + # occurrence---- df_match <- df_data |> # get all values, split on comma with optional surrounding spaces tidyr::separate_rows(.data[[col_habit]], sep = "\\s*,\\s*") |> @@ -68,7 +68,7 @@ qc_taxa_values_habit <- function(df_data, dplyr::mutate(valid = dplyr::case_when(is.na(valid) ~ TRUE, .default = valid)) - # Result + # Result---- return(df_match) }## FUNCTION ~ END diff --git a/R/qc_taxa_values_tolval.R b/R/qc_taxa_values_tolval.R new file mode 100644 index 0000000..0692375 --- /dev/null +++ b/R/qc_taxa_values_tolval.R @@ -0,0 +1,68 @@ +#' QC Habitat Values +#' +#' Performs basic QC of the Tolerance Value column. +#' +#' Returns a data frame the values from the input with counts (column = n) from +#' the TolVal column and whether the value appeared in valid values (column = +#' valid). +#' +#' The default accepted values are 0 - 10. +#' +#' @param df_data A data frame containing taxa data. +#' @param col_tolval The column containing Tolerance Values. Default = "TolVal" +#' @param valid_min Valid values range minimum. Default = 0. +#' @param valid_max Valid values range maximum. Default = 10. +#' +#' @return A data frame with col_tolval values, occurrence (n), and if valid +#' (TRUE/FALSE). +#' +#' @examples +#' qc_taxa_values_tolval(data_benthos_MBSS, "TOLVAL") +#' +#' @export +qc_taxa_values_tolval <- function(df_data, + col_tolval = "TolVal", + valid_min = 0, + valid_max = 10) { + + # QC---- + ## col_tolval in df_data + if (!rlang::as_string(col_tolval) %in% names(df_data)) { + stop("Column '", + rlang::as_string(col_tolval), + "' is missing from input data.", call. = FALSE) + }# IF ~ col_tolval exists + + ## col_tolval is numeric + if (!is.numeric(df_data[[rlang::as_string(col_tolval)]])) { + stop("Column '", + rlang::as_string(col_tolval), + "' must be numeric.", call. = FALSE) + }## IF ~ col_tolval is numeric + + ## valid_min is numeric + if (!is.numeric(valid_min)) { + stop("'valid_min' must be numeric.", call. = FALSE) + }## IF ~ valid_min is numeric + + ## valid_min is numeric + if (!is.numeric(valid_min)) { + stop("'valid_min' must be numeric.", call. = FALSE) + }## IF ~ valid_min is numeric + + # occurrence---- + df_match <- df_data |> + # occurrence + dplyr::count(.data[[col_tolval]], name = "n") |> + # valid + ## T/F + dplyr::mutate(valid = dplyr::case_when( + .data[[col_tolval]] >= valid_min & + .data[[col_tolval]] <= valid_max ~ TRUE, + .default = FALSE)) + + # Result---- + return(df_match) + +}## FUNCTION ~ END + diff --git a/man/qc_taxa_values_tolval.Rd b/man/qc_taxa_values_tolval.Rd new file mode 100644 index 0000000..fef4473 --- /dev/null +++ b/man/qc_taxa_values_tolval.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/qc_taxa_values_tolval.R +\name{qc_taxa_values_tolval} +\alias{qc_taxa_values_tolval} +\title{QC Habitat Values} +\usage{ +qc_taxa_values_tolval( + df_data, + col_tolval = "TolVal", + valid_min = 0, + valid_max = 10 +) +} +\arguments{ +\item{df_data}{A data frame containing taxa data.} + +\item{col_tolval}{The column containing Tolerance Values. Default = "TolVal"} + +\item{valid_min}{Valid values range minimum. Default = 0.} + +\item{valid_max}{Valid values range maximum. Default = 10.} +} +\value{ +A data frame with col_tolval values, occurrence (n), and if valid +(TRUE/FALSE). +} +\description{ +Performs basic QC of the Tolerance Value column. +} +\details{ +Returns a data frame the values from the input with counts (column = n) from +the TolVal column and whether the value appeared in valid values (column = +valid). + +The default accepted values are 0 - 10. +} +\examples{ +qc_taxa_values_tolval(data_benthos_MBSS, "TOLVAL") + +}