Computes some metrics (based on estimation_type) in cumulative window function style over the rulelist (in the same order) ignoring the keys.

# S3 method for class 'rulelist'
calculate(x, metrics_to_exclude = NULL, ...)



A rulelist


(character vector) Names of metrics to exclude


Named list of custom metrics. See 'details'.


A dataframe of metrics with a rule_nbr column.


Default Metrics

These metrics are calculated by default:

  • cumulative_coverage: For nth rule in the rulelist, number of distinct row_nbrs (of new_data) covered by nth and all preceding rules (in order). In weighted case, we sum the weights corresponding to the distinct row_nbrs.

  • cumulative_overlap: Up til nth rule in the rulelist, number of distinct row_nbrs (of new_data) already covered by some preceding rule (in order). In weighted case, we sum the weights corresponding to the distinct row_nbrs.

For classification:

  • cumulative_accuracy: For nth rule in the rulelist, fraction of row_nbrs such that RHS matches the y_name column (of new_data) by nth and all preceding rules (in order). In weighted case, weighted accuracy is computed.

For regression:

  • cumulative_RMSE: For nth rule in the rulelist, weighted RMSE of all predictions (RHS) predicted by nth rule and all preceding rules.

Custom metrics

Custom metrics to be computed should be passed a named list of function(s) in .... The custom metric function should take these arguments in same order: rulelist, new_data, y_name, weight. The custom metric function should return a numeric vector of same length as the number of rows of rulelist.

model_c5  = C50::C5.0(Attrition ~., data = modeldata::attrition, rules = TRUE)
tidy_c5   = tidy(model_c5) %>%
            set_validation_data(modeldata::attrition, "Attrition") %>%

# calculate default metrics (classification)
#> # A tidytable: 24 × 4
#>    rule_nbr cumulative_coverage cumulative_overlap cumulative_accuracy
#>       <int>               <dbl>              <dbl>               <dbl>
#>  1        1                  16                  0               1    
#>  2        2                 537                  0               0.944
#>  3        3                 545                  5               0.945
#>  4        4                 656                 89               0.941
#>  5        5                 664                 89               0.941
#>  6        6                 667                 94               0.942
#>  7        7                 681                 94               0.941
#>  8        8                 687                 94               0.942
#>  9        9                 699                 95               0.941
#> 10       10                1425                633               0.900
#> # ℹ 14 more rows

model_rpart = rpart::rpart(MonthlyIncome ~., data = modeldata::attrition)
tidy_rpart  =
  tidy(model_rpart) %>%
  set_validation_data(modeldata::attrition, "MonthlyIncome") %>%

# calculate default metrics (regression)
#> # A tidytable: 6 × 4
#>   rule_nbr cumulative_coverage cumulative_overlap cumulative_RMSE
#>      <int>               <dbl>              <dbl>           <dbl>
#> 1        1                 543                  0            748.
#> 2        2                1077                  0           1125.
#> 3        3                1255                  0           1153.
#> 4        4                1397                  0           1198.
#> 5        5                1437                  0           1191.
#> 6        6                1470                  0           1182.

# calculate default metrics with a custom metric
#' custom function to get cumulative MAE
get_cumulative_MAE = function(rulelist, new_data, y_name, weight){

  priority_df =
    rulelist %>%
    select(rule_nbr) %>%
    mutate(priority = 1:nrow(rulelist)) %>%
    select(rule_nbr, priority)

  pred_df =
    predict(rulelist, new_data) %>%
    left_join(priority_df, by = "rule_nbr") %>%
    mutate(weight = local(weight)) %>%
    select(rule_nbr, row_nbr, weight, priority)

  new_data2 =
    new_data %>%
    mutate(row_nbr = 1:n()) %>%
    select(all_of(c("row_nbr", y_name)))

  rmse_till_rule = function(rn){

    if (is.character(rulelist$RHS)) {
      inter_df =
        pred_df %>%
        tidytable::filter(priority <= rn) %>%
        left_join(mutate(new_data, row_nbr = 1:n()), by = "row_nbr") %>%
        left_join(select(rulelist, rule_nbr, RHS), by = "rule_nbr") %>%
        nest(.by = c("RHS", "rule_nbr", "row_nbr", "priority", "weight")) %>%
        mutate(RHS = purrr::map2_dbl(RHS,
                                     ~ eval(parse(text = .x), envir = .y)
               ) %>%
    } else {

      inter_df =
        pred_df %>%
        tidytable::filter(priority <= rn) %>%
        left_join(new_data2, by = "row_nbr") %>%
        left_join(select(rulelist, rule_nbr, RHS), by = "rule_nbr")

    inter_df %>%
      summarise(rmse = MetricsWeighted::mae(RHS,
                                             na.rm = TRUE
                ) %>%

  res = purrr::map_dbl(1:nrow(rulelist), rmse_till_rule)

          metrics_to_exclude = NULL,
          list("cumulative_mae" = get_cumulative_MAE)
#> # A tidytable: 6 × 5
#>   rule_nbr cumulative_coverage cumulative_overlap cumulative_RMSE cumulative_mae
#>      <int>               <dbl>              <dbl>           <dbl>          <dbl>
#> 1        1                 543                  0            748.           577.
#> 2        2                1077                  0           1125.           812.
#> 3        3                1255                  0           1153.           860.
#> 4        4                1397                  0           1198.           907.
#> 5        5                1437                  0           1191.           905.
#> 6        6                1470                  0           1182.           896.