BGG Models
  • Predictions
  • Methodology
  • Pipeline

On this page

  • 1 Load Models
  • 2 Load Games
  • 3 Predictions
  • 4 Save Predictions

Pipeline

Author

Phil Henrickson

Published

May 28, 2025

Show the code
# Load required packages
library(dplyr)
library(bggUtils)
library(tidymodels)
library(vetiver)
library(targets)
library(qs2)
library(reactable)
library(fs)

# Load source code
tar_source("src")

# Project settings
config <- config::get()

# Authenticate to GCS
authenticate_to_gcs()
Show the code
targets::tar_visnetwork(targets_only = T)

1 Load Models

Show the code
#|
# Load objects from pipeline
tar_load(hurdle_threshold)

# Create model board connection
model_board <- gcs_model_board(bucket = config$bucket, prefix = config$board)

# Load model metadata
tar_load(averageweight_vetiver)
tar_load(average_vetiver)
tar_load(usersrated_vetiver)
tar_load(hurdle_vetiver)

# load models
averageweight_fit <- pin_read_model(model_board, averageweight_vetiver)
average_fit <- pin_read_model(model_board, average_vetiver)
usersrated_fit <- pin_read_model(model_board, usersrated_vetiver)
hurdle_fit <- pin_read_model(model_board, hurdle_vetiver)
Show the code
# Function to extract model metadata
extract_model_metadata <- function(model_name, vetiver_obj, board) {
    tibble(
        bucket = board$bucket,
        path = board$prefix,
        model_name = model_name,
        hash = vetiver_obj$hash,
        version = vetiver_obj$version
    )
}

# Create a table with model metadata
model_metadata <- bind_rows(
    extract_model_metadata("averageweight", averageweight_vetiver, model_board),
    extract_model_metadata("average", average_vetiver, model_board),
    extract_model_metadata("usersrated", usersrated_vetiver, model_board),
    extract_model_metadata("hurdle", hurdle_vetiver, model_board)
)

# Display the model metadata table
model_metadata |>
    knitr::kable(
        caption = "Model Metadata",
        col.names = c("Bucket", "Prefix", "Model", "Hash", "Version"),
        format = "markdown"
    )
Model Metadata
Bucket Prefix Model Hash Version
bgg_models dev/model/ averageweight 79313d79c1944fc8 20250304T221625Z-79313
bgg_models dev/model/ average 66c6d76df420a7cf 20250304T221529Z-66c6d
bgg_models dev/model/ usersrated 06e70d3dd6bb333a 20250304T221420Z-06e70
bgg_models dev/model/ hurdle 9de9a43cd3d69c74 20250304T221240Z-9de9a

2 Load Games

Show the code
list_gcs_objs = function(
    obj = "raw/objects/games",
    bucket = "bgg_data",
    prefix = "raw/objects/games",
    versions = T,
    detail = "full"
) {
    googleCloudStorageR::gcs_list_objects(
        bucket = bucket,
        prefix = prefix,
        versions = T,
        detail = detail
    ) |>
        filter(name == obj)
}

# get details of games objects from gcp
games_objs = list_gcs_objs() |>
    select(bucket, name, generation, size, updated) |>
    arrange(desc(updated))

# ids for each game
games_generations = unique(games_objs$generation)

# show most recent games objects
games_objs |>
    head(10) |>
    knitr::kable(
        caption = "Games",
        format = "markdown"
    )
Games
bucket name generation size updated
bgg_data raw/objects/games 1748454988000060 77.8 Mb 2025-05-28 17:56:28
bgg_data raw/objects/games 1747834855255018 77.7 Mb 2025-05-21 13:40:55
bgg_data raw/objects/games 1747055380372986 77.5 Mb 2025-05-12 13:09:40
bgg_data raw/objects/games 1745860932921040 77.3 Mb 2025-04-28 17:22:12
bgg_data raw/objects/games 1745074120055107 77.1 Mb 2025-04-19 14:48:40
bgg_data raw/objects/games 1744392659954256 77 Mb 2025-04-11 17:30:59
bgg_data raw/objects/games 1743780453796724 76.9 Mb 2025-04-04 15:27:33
bgg_data raw/objects/games 1742943479411730 76.8 Mb 2025-03-25 22:57:59
bgg_data raw/objects/games 1742911503045718 76.7 Mb 2025-03-25 14:05:03
bgg_data raw/objects/games 1742489667404929 76.7 Mb 2025-03-20 16:54:27
Show the code
i = 1
# most recent batch
games = get_games_from_gcp(
    bucket = "bgg_data",
    generation = games_generations[i]
)

# previous batch of games
previous_games = get_games_from_gcp(
    bucket = "bgg_data",
    generation = games_generations[i + 1]
)

# Prepare games with preprocessor
prepared_games <- prepare_games(games)

Number of games by yearpublished in recent and upcoming years.

Show the code
# Get valid years from targets to determine upcoming games
tar_load(valid_predictions)

valid_years <- valid_predictions |>
    summarize(min_year = min(yearpublished), max_year = max(yearpublished))

# Filter to upcoming games (games published after the validation period)
end_valid_year <- valid_years$max_year

# filter to only upcoming games
upcoming_games <- prepared_games |>
    filter(yearpublished > end_valid_year)

# count games by year
upcoming_games |>
    group_by(yearpublished) |>
    count() |>
    ungroup() |>
    gt::gt()
yearpublished n
2024 5556
2025 3519
2026 314
2027 11
2028 4

New and upcoming games in most recent batch.

Show the code
upcoming_games_new = games |>
    anti_join(previous_games, by = join_by(game_id)) |>
    inner_join(upcoming_games, by = join_by(game_id)) |>
    bggUtils:::unnest_info() |>
    select(game_id, name, yearpublished) |>
    mutate(first_time_prediction = T)

upcoming_games_new |>
    select(game_id, name, yearpublished) |>
    arrange(desc(yearpublished)) |>
    mutate(game_id = as.factor(game_id)) |>
    reactable::reactable()

3 Predictions

Predict games with models.

Show the code
# Generate predictions for upcoming games
predictions <- upcoming_games |>
    impute_averageweight(
        model = averageweight_fit
    ) |>
    predict_hurdle(
        model = hurdle_fit,
        threshold = hurdle_threshold
    ) |>
    predict_bayesaverage(
        average_model = average_fit,
        usersrated_model = usersrated_fit
    )

# Add a flag for first-time predictions
predictions <- predictions |>
    left_join(
        upcoming_games_new |> select(game_id, first_time_prediction),
        by = join_by(game_id)
    ) |>
    mutate(first_time_prediction = replace_na(first_time_prediction, FALSE))

View predictions for games appearing for the first time

Show the code
predictions |>
    filter(first_time_prediction == T) |>
    select(yearpublished, game_id, name, starts_with(".pred")) |>
    mutate(across(
        starts_with(".pred_") & where(is.numeric),
        round,
        digits = 2
    )) |>
    rename_with(~ gsub("^.pred_", "", .), starts_with(".pred_")) |>
    select(yearpublished, game_id, name, contains("hurdle"), everything()) |>
    arrange(desc(bayesaverage)) |>
    mutate(
        game_id = as.factor(game_id),
        yearpublished = as.factor(yearpublished)
    ) |>
    reactable::reactable()

4 Save Predictions

Show the code
# Create directory if it doesn't exist
fs::dir_create("data/processed", recurse = TRUE)

# trim down predictions
predictions_out =
    predictions |>
    select(yearpublished, game_id, name, starts_with(".pred_"))

# Save locally first
local_board = pins::board_folder("data/processed")
pins::pin_write(local_board, predictions_out, name = "predictions")

# Save to Google Cloud Storage
# Create a GCS board connection for predictions
gcs_pred_board <- pins::board_gcs(
    bucket = config$bucket,
    prefix = "data/",
    versioned = T
)

# Pin predictions to GCS
pins::pin_write(gcs_pred_board, predictions_out, name = "predictions")

# Print confirmation message
cat(
    "Predictions saved to GCS bucket:",
    config$bucket,
    "with prefix:",
    paste0(config$board, "/predictions"),
    "\n"
)
Predictions saved to GCS bucket: bgg_models with prefix: dev/model//predictions