#|# Load objects from pipelinetar_load(hurdle_threshold)# Create model board connectionmodel_board <-gcs_model_board(bucket = config$bucket, prefix = config$board)# Load model metadatatar_load(averageweight_vetiver)tar_load(average_vetiver)tar_load(usersrated_vetiver)tar_load(hurdle_vetiver)# load modelsaverageweight_fit <-pin_read_model(model_board, averageweight_vetiver)average_fit <-pin_read_model(model_board, average_vetiver)usersrated_fit <-pin_read_model(model_board, usersrated_vetiver)hurdle_fit <-pin_read_model(model_board, hurdle_vetiver)
Show the code
# Function to extract model metadataextract_model_metadata <-function(model_name, vetiver_obj, board) {tibble(bucket = board$bucket,path = board$prefix,model_name = model_name,hash = vetiver_obj$hash,version = vetiver_obj$version )}# Create a table with model metadatamodel_metadata <-bind_rows(extract_model_metadata("averageweight", averageweight_vetiver, model_board),extract_model_metadata("average", average_vetiver, model_board),extract_model_metadata("usersrated", usersrated_vetiver, model_board),extract_model_metadata("hurdle", hurdle_vetiver, model_board))# Display the model metadata tablemodel_metadata |> knitr::kable(caption ="Model Metadata",col.names =c("Bucket", "Prefix", "Model", "Hash", "Version"),format ="markdown" )
Model Metadata
Bucket
Prefix
Model
Hash
Version
bgg_models
dev/model/
averageweight
79313d79c1944fc8
20250304T221625Z-79313
bgg_models
dev/model/
average
66c6d76df420a7cf
20250304T221529Z-66c6d
bgg_models
dev/model/
usersrated
06e70d3dd6bb333a
20250304T221420Z-06e70
bgg_models
dev/model/
hurdle
9de9a43cd3d69c74
20250304T221240Z-9de9a
2 Load Games
Show the code
list_gcs_objs =function(obj ="raw/objects/games",bucket ="bgg_data",prefix ="raw/objects/games",versions = T,detail ="full") { googleCloudStorageR::gcs_list_objects(bucket = bucket,prefix = prefix,versions = T,detail = detail ) |>filter(name == obj)}# get details of games objects from gcpgames_objs =list_gcs_objs() |>select(bucket, name, generation, size, updated) |>arrange(desc(updated))# ids for each gamegames_generations =unique(games_objs$generation)# show most recent games objectsgames_objs |>head(10) |> knitr::kable(caption ="Games",format ="markdown" )
Games
bucket
name
generation
size
updated
bgg_data
raw/objects/games
1745074120055107
77.1 Mb
2025-04-19 14:48:40
bgg_data
raw/objects/games
1744392659954256
77 Mb
2025-04-11 17:30:59
bgg_data
raw/objects/games
1743780453796724
76.9 Mb
2025-04-04 15:27:33
bgg_data
raw/objects/games
1742943479411730
76.8 Mb
2025-03-25 22:57:59
bgg_data
raw/objects/games
1742911503045718
76.7 Mb
2025-03-25 14:05:03
bgg_data
raw/objects/games
1742489667404929
76.7 Mb
2025-03-20 16:54:27
bgg_data
raw/objects/games
1741629678449522
73.6 Mb
2025-03-10 18:01:18
bgg_data
raw/objects/games
1740875193707566
73.5 Mb
2025-03-02 00:26:33
bgg_data
raw/objects/games
1732133366577344
72.1 Mb
2024-11-20 20:09:26
bgg_data
raw/objects/games
1730333447804461
71.8 Mb
2024-10-31 00:10:47
Show the code
i =1# most recent batchgames =get_games_from_gcp(bucket ="bgg_data",generation = games_generations[i])# previous batch of gamesprevious_games =get_games_from_gcp(bucket ="bgg_data",generation = games_generations[i +1])# Prepare games with preprocessorprepared_games <-prepare_games(games)
Number of games by yearpublished in recent and upcoming years.
Show the code
# Get valid years from targets to determine upcoming gamestar_load(valid_predictions)valid_years <- valid_predictions |>summarize(min_year =min(yearpublished), max_year =max(yearpublished))# Filter to upcoming games (games published after the validation period)end_valid_year <- valid_years$max_year# filter to only upcoming gamesupcoming_games <- prepared_games |>filter(yearpublished > end_valid_year)# count games by yearupcoming_games |>group_by(yearpublished) |>count() |>ungroup() |> gt::gt()
yearpublished
n
2024
5514
2025
2840
2026
228
2027
7
2028
4
New and upcoming games in most recent batch.
Show the code
upcoming_games_new = games |>anti_join(previous_games, by =join_by(game_id)) |>inner_join(upcoming_games, by =join_by(game_id)) |> bggUtils:::unnest_info() |>select(game_id, name, yearpublished) |>mutate(first_time_prediction = T)upcoming_games_new |>select(game_id, name, yearpublished) |>arrange(desc(yearpublished)) |>mutate(game_id =as.factor(game_id)) |> reactable::reactable()
# Create directory if it doesn't existfs::dir_create("data/processed", recurse =TRUE)# trim down predictionspredictions_out = predictions |>select(yearpublished, game_id, name, starts_with(".pred_"))# Save locally firstlocal_board = pins::board_folder("data/processed")pins::pin_write(local_board, predictions_out, name ="predictions")# Save to Google Cloud Storage# Create a GCS board connection for predictionsgcs_pred_board <- pins::board_gcs(bucket = config$bucket,prefix ="data/",versioned = T)# Pin predictions to GCSpins::pin_write(gcs_pred_board, predictions_out, name ="predictions")# Print confirmation messagecat("Predictions saved to GCS bucket:", config$bucket,"with prefix:",paste0(config$board, "/predictions"),"\n")
Predictions saved to GCS bucket: bgg_models with prefix: dev/model//predictions