diff --git a/NAMESPACE b/NAMESPACE index f77623a..0f8fe2b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,9 @@ export(claims_by_first_report) export(claims_by_link_ratio) export(claims_by_wait_time) +export(fill_addl) +export(is.pol_char) +export(pol_char) export(policies_grow) export(policies_renew) export(policies_simulate) diff --git a/R/PolicyChars.R b/R/PolicyChars.R new file mode 100644 index 0000000..35182e5 --- /dev/null +++ b/R/PolicyChars.R @@ -0,0 +1,137 @@ +#' @title Policy Characteristics +#' @name pol_char +#' @description A structure for storing policy characteristics and how frequently they occur. +#' @param char_levels A named list of vectors. The vector names are characteristic names and the vectors contain all possible levels of each characteristic +#' @param frequencies A names list of numeric vectors. The names must match those in char_levels. The vectors must be positive numbers which indicate relative freqeuncy of each level within each characteristic. If the values in a vector do not sum to 1.0 then they will be off-balanced in the output. +#' @returns \code{pol_char()} checks that the inputs conform to requirements and returns a pol_char object which is a list containing char_list and freq_list +#' @details \code{pol_char()} does not do any calculations. It checks that the list which is input has a valid +#' structure to be a pol_char object. The benefit is that functions which use pol_char objects do not have to run +#' tests to check validity of the structure. The only have to do \code{is.pol_char(obj)} +#' @export +#' @examples +#' test_char <- pol_char( +#' list(state = c('AL','AR','AZ') +#' , line = c('auto', 'home') +#' , uw_score = LETTERS[1:5]) +#' ,list(state = c(.5, .2, .3) +#' , line = c(.6, .4) +#' , uw_score = c(.15, .25, .4, .2, .1)) +#' ) +#' +#' test_char <- pol_char( +#' list(state = c('AL','AR','AZ') +#' , line = c('auto', 'home') +#' , uw_score = LETTERS[1:5]) +#' ,list(state = c(5, 2, 3) +#' , line = c(6, 4) +#' , uw_score = c(15, 25, 40, 20, 10)) +#' ) +pol_char <- function(char_levels, frequencies) { + assertthat::assert_that(is.list(char_levels)) + assertthat::assert_that(is.list(frequencies)) + assertthat::assert_that(length(char_levels) == length(frequencies)) + assertthat::assert_that(all(names(char_levels) != '')) + assertthat::assert_that(all(sapply(char_levels, length) == sapply(frequencies, length))) + assertthat::assert_that(all(names(char_levels) == names(frequencies))) + assertthat::assert_that(length(names(char_levels)) == length(unique(names(char_levels)))) + assertthat::assert_that(all(sapply(frequencies, is.numeric))) + # check that levels of each characteristic are unique + assertthat::assert_that(all(sapply(char_levels, function(v) length(v) == length(unique(v))))) + return(structure( + list(char_list = char_levels + , freq_list = frequencies) + , class = 'pol_char') + ) +} + +#' is.pol_char +#' +#' @param plc A \code{pol_list} object +#' @returns Boolean value indicating if plc is a pol_char object +#' @export +is.pol_char <- function(plc) inherits(plc, 'pol_char') + +#' @title Fill the additional characteristics of a simulated policy table, or create +#' a simulated policy table filling in additional characteristics from a pol_char object +#' +#' @param plc a \code{pol_char} object used to fill additional fields +#' @param tbl_policy A simulated policy table. If NULL then one will be created +#' @param ... Used to for parameters to call \code{policies_simulate} if needed. +#' Do not include \code{additional_columns}; those will be filled with pol_char +#' @export +fill_addl <- function(plc, tbl_policy = NULL, ...) { + assertthat::assert_that(is.pol_char(plc)) + if (is.null(tbl_policy)) { + misc_params <- names(list(...)) + if (!('n' %in% misc_params)) { + stop('When tbl_policy is not supplied, a value for "n" must be supplied to create a simulated policy table') + } + if (!(any(c('policy_years', 'num_years') %in% misc_params))) { + stop("At least one of policy_years or num_years must be given.") + } + addl_cols <- list() + for (k in 1:length(plc$char_list)) { + addl_cols <- append(addl_cols, list(NA)) + } + names(addl_cols) <- names(plc$char_list) + tbl_policy <- policies_simulate(..., additional_columns = addl_cols) + } + assertthat::assert_that(nrow(tbl_policy) > 0) + + plc_tbl_nms <- names(plc$char_list) + pol_tbl_nms <- grep('policy_effective_date|policy_expiration_date|exposure|policyholder_id' + , names(tbl_policy), value = TRUE, invert = TRUE) + + if (length(setdiff(plc_tbl_nms, pol_tbl_nms)) > 0) { + message(paste('The following characteristics will be added to the policy table:' + , paste(setdiff(plc_tbl_nms, pol_tbl_nms), collapse = ', '), '\n')) + for (nm in setdiff(plc_tbl_nms, pol_tbl_nms)) { + tbl_policy[[nm]] <- NA + } + } + + if (length(setdiff(pol_tbl_nms, plc_tbl_nms)) > 0) { + warning(paste('The following policy characteristics are not affected:' + , paste(setdiff(pol_tbl_nms, plc_tbl_nms), collapse = ', '))) + } + + for (nm in plc_tbl_nms) { + tbl_policy[[nm]] <- sample(x = plc$char_list[[nm]] + , size = nrow(tbl_policy) + , replace = TRUE + , prob = plc$freq_list[[nm]]) + } + + return(tbl_policy) +} + +#' @name print.pol_char +#' @param plc A \code{pol_char} object +#' @param ... optional parameters for future use +print.pol_char <- function(plc, ...) { + assertthat::assert_that(is.pol_char(plc)) + charnames <- names(plc$char_list) + top_levels <- sapply(plc$char_list, + function(v) { + top <- paste(v[1:(min(3, length(v)))], collapse = ', ') + top <- ifelse(length(v) > 3, paste0(top, ', ...'), top) + } + ) + top_frq <- sapply(plc$freq_list, + function(v) { + top <- paste(v[1:(min(3, length(v)))], collapse = ', ') + top <- ifelse(length(v) > 3, paste0(top, ', ...'), top) + } + ) + top_levels <- as.vector(top_levels) + top_frq <- as.vector(top_frq) + cat('Characteric names and top three levels:\n') + result <- ascii::ascii(data.frame( + char_name = charnames + , levels = top_levels + , frequencies = top_frq + ), include.rownames = FALSE + , type = 'rest' + , header = F) + print(result) +} diff --git a/R/SimulatePolicies.R b/R/SimulatePolicies.R index 49f3caf..16a6630 100644 --- a/R/SimulatePolicies.R +++ b/R/SimulatePolicies.R @@ -230,7 +230,7 @@ FixGrowthVector <- function(vecIn, numRenewals, vec_kind) #' @param retention A vector indicating loss of policies #' @param growth A vector indicating the rate of growth of policies #' @param start_id Integer of the first number in the policy ID sequence -#' @param additional_columns A list of addtional column names and values +#' @param additional_columns A list of additional column names and values #' #' @return A data frame of policy data #' diff --git a/man/fill_addl.Rd b/man/fill_addl.Rd new file mode 100644 index 0000000..2a01786 --- /dev/null +++ b/man/fill_addl.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PolicyChars.R +\name{fill_addl} +\alias{fill_addl} +\title{Fill the additional characteristics of a simulated policy table, or create +a simulated policy table filling in additional characteristics from a pol_char object} +\usage{ +fill_addl(plc, tbl_policy = NULL, ...) +} +\arguments{ +\item{plc}{a \code{pol_char} object used to fill additional fields} + +\item{tbl_policy}{A simulated policy table. If NULL then one will be created} + +\item{...}{Used to for parameters to call \code{policies_simulate} if needed. +Do not include \code{additional_columns}; those will be filled with pol_char} +} +\description{ +Fill the additional characteristics of a simulated policy table, or create +a simulated policy table filling in additional characteristics from a pol_char object +} diff --git a/man/is.pol_char.Rd b/man/is.pol_char.Rd new file mode 100644 index 0000000..3361f08 --- /dev/null +++ b/man/is.pol_char.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PolicyChars.R +\name{is.pol_char} +\alias{is.pol_char} +\title{is.pol_char} +\usage{ +is.pol_char(plc) +} +\arguments{ +\item{plc}{A \code{pol_list} object} +} +\value{ +Boolean value indicating if plc is a pol_char object +} +\description{ +is.pol_char +} diff --git a/man/pol_char.Rd b/man/pol_char.Rd new file mode 100644 index 0000000..b9c8675 --- /dev/null +++ b/man/pol_char.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PolicyChars.R +\name{pol_char} +\alias{pol_char} +\title{Policy Characteristics} +\usage{ +pol_char(char_levels, frequencies) +} +\arguments{ +\item{char_levels}{A named list of vectors. The vector names are characteristic names and the vectors contain all possible levels of each characteristic} + +\item{frequencies}{A names list of numeric vectors. The names must match those in char_levels. The vectors must be positive numbers which indicate relative freqeuncy of each level within each characteristic. If the values in a vector do not sum to 1.0 then they will be off-balanced in the output.} +} +\value{ +\code{pol_char()} checks that the inputs conform to requirements and returns a pol_char object which is a list containing char_list and freq_list +} +\description{ +A structure for storing policy characteristics and how frequently they occur. +} +\details{ +\code{pol_char()} does not do any calculations. It checks that the list which is input has a valid +structure to be a pol_char object. The benefit is that functions which use pol_char objects do not have to run +tests to check validity of the structure. The only have to do \code{is.pol_char(obj)} +} +\examples{ +test_char <- pol_char( + list(state = c('AL','AR','AZ') + , line = c('auto', 'home') + , uw_score = LETTERS[1:5]) + ,list(state = c(.5, .2, .3) + , line = c(.6, .4) + , uw_score = c(.15, .25, .4, .2, .1)) +) + +test_char <- pol_char( + list(state = c('AL','AR','AZ') + , line = c('auto', 'home') + , uw_score = LETTERS[1:5]) + ,list(state = c(5, 2, 3) + , line = c(6, 4) + , uw_score = c(15, 25, 40, 20, 10)) +) +} diff --git a/man/policies_simulate.Rd b/man/policies_simulate.Rd index 8d82e63..da088de 100644 --- a/man/policies_simulate.Rd +++ b/man/policies_simulate.Rd @@ -30,7 +30,7 @@ policies_simulate( \item{start_id}{Integer of the first number in the policy ID sequence} -\item{additional_columns}{A list of addtional column names and values} +\item{additional_columns}{A list of additional column names and values} } \value{ A data frame of policy data diff --git a/tests/testthat/test-fill_addl.R b/tests/testthat/test-fill_addl.R new file mode 100644 index 0000000..6220913 --- /dev/null +++ b/tests/testthat/test-fill_addl.R @@ -0,0 +1,46 @@ +context("claims_by_first_report") + +test_that("fill_addl", { + + dfPolicy <- policies_simulate(100, num_years = 3) + dfPolicy2 <- policies_simulate(100, num_years = 3 + , additional_columns = list( + state = NA + , line = NA + , uw_score = NA + , ntr = NA + )) + dfPolicy3 <- policies_simulate(100, num_years = 3 + , additional_columns = list( + state = NA + , line = NA + )) + + + test_chars <- pol_char( + list(state = c('AL','AR','AZ') + , line = c('auto', 'home') + , uw_score = LETTERS[1:5]) + ,list(state = c(.5, .2, .3) + , line = c(.6, .4) + , uw_score = c(.15, .25, .4, .2, .1)) + ) + + less_chars <- pol_char( + list(state = c('AL','AR','AZ') + , line = c('auto', 'home') + ) + ,list(state = c(.5, .2, .3) + , line = c(.6, .4) + ) + ) + + expect_silent(imaginator::fill_addl(plc = test_chars, n=50, num_years = 2)) + expect_warning(imaginator::fill_addl(less_chars, dfPolicy2)) + expect_message(imaginator::fill_addl(test_chars, dfPolicy3)) + expect_error(imaginator::fill_addl(test_chars, n=50)) + expect_error(imaginator::fill_addl(test_chars, num_years=50)) + + + +}) diff --git a/tests/testthat/test-pol_char.R b/tests/testthat/test-pol_char.R new file mode 100644 index 0000000..3dc41bd --- /dev/null +++ b/tests/testthat/test-pol_char.R @@ -0,0 +1,43 @@ +context("claims_by_first_report") + +test_that("test inputs", { + + expect_error(imaginator::pol_char(c('state','line'))) + + expect_silent(imaginator::pol_char( + list(state = c('AL','AR','AZ') + , line = c('auto', 'home') + , uw_score = LETTERS[1:5]) + ,list(state = c(.5, .2, .3) + , line = c(.6, .4) + , uw_score = c(.15, .25, .4, .2, .1)) + )) + + expect_error(imaginator::pol_char( + list(state = c('AL','AR','AZ','DE') + , line = c('auto', 'home') + , uw_score = LETTERS[1:5]) + ,list(state = c(.5, .2, .3) + , line = c(.6, .4) + , uw_score = c(.15, .25, .4, .2, .1)) + )) + + expect_error(imaginator::pol_char( + list(c('AL','AR','AZ') + , line = c('auto', 'home') + , uw_score = LETTERS[1:5]) + , list(c(.5, .2, .3) + , line = c(.6, .4) + , uw_score = c(.15, .25, .4, .2, .1)) + )) + + expect_error(imaginator::pol_char( + list(state = c('AL','AR','AZ') + , line = c('auto', 'home') + , uw_score = LETTERS[1:5]) + , list(state = c(.5, .2, .3) + , line = c('.6', '.4') + , uw_score = c(.15, .25, .4, .2, .1)) + )) + +}) diff --git a/vignettes/PolCharacteristics.Rmd b/vignettes/PolCharacteristics.Rmd new file mode 100644 index 0000000..4563ede --- /dev/null +++ b/vignettes/PolCharacteristics.Rmd @@ -0,0 +1,65 @@ +--- +title: "Policy Characteristics" +author: "Steve Fiete" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Policy Characteristics} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +The policy characteristics class (pol_char) provides a way to create simulated policies with additional characteristics which are not uniform throughout the data set, but instead are generated randomly with a specified frequency. It also can be used to update or add additional characteristics to existing simulated policy tables. A modified example from the SimulatedPolicies vignette is used below. + +Here a `pol_char` object is created which specifies policy additional characteristics will be state, line, and uw_score, and how frequently each is expected to occur in randomly drawn samples. We can add those characteristics to an existing policy table. +```{r} +library(imaginator) +set.seed(42) +tbl_policies <- policies_simulate( + n = 30, + num_years = 5) + +sample_chars <- pol_char( + list( + state = c('AL' ,'AR', 'AZ'), + line = c('auto', 'home'), + uw_score = LETTERS[1:5] + ), + list( + state = c(.5, .2, .3) + , line = c(.6, .4) + , uw_score = c(.15, .25, .4, .2, .1) + ) +) + +tbl_policies <- fill_addl(plc = sample_chars, + tbl_policy = tbl_policies) + +``` +The message lets us know that new columns are being added to the policy table. If those columns had already been there, then there would be no message. It is also possible to create and fill a polciy table in a single function call. + +```{r} +tbl_policies <- fill_addl(plc = sample_chars, + n = 30, + num_years = 5) + +head(tbl_policies, 10) +``` + +Any parameters allowed in `policies_simulate` can be allowed in fill_addl. + +`fill_addl` adds columns to an existing policy table. Suppose we wanted to simulate data on the marketing channel for each policy, which could be "agent", "direct", or "affiliated". + +```{r} + +mkt_char <- pol_char( + list(mkting = c('agent', 'direct', 'affiliated')), + list(mkting = c(.7, .2, .1))) + + +tbl_policies <- fill_addl(mkt_char, tbl_policies) + +head(tbl_policies, 10) +``` + +Here there is a warning not not all of the additional characteristics were affected since they were not included in the `pol_char` object. It is a warning rather than simply a message to help the user catch typos.