This report analyzes Puzzle Wednesday data - the time it takes Branden and James working together as a team to complete the Sudoku, NY Times Crossword, and Regular Commuter Crossword in the Dallas Morning News Wednesday Edition.
The “Whose Paper” field indicates whose physical newspaper was used for that session (B = Branden brought his paper, J = James brought his paper) - not who completed the puzzles.
Key Findings:
puzzle_raw <- read_excel("data.xlsx") %>%
rename(
paper_date = `Paper Date`,
complete_date = `Complete Date`,
time = Time,
whose_paper = `Whose Paper?`,
notes = Notes
) %>%
mutate(
paper_date = as.Date(paper_date),
complete_date = as.Date(complete_date)
)
cat("Total records:", nrow(puzzle_raw), "\n")## Total records: 157
cat("Date range:", as.character(min(puzzle_raw$paper_date)), "to", as.character(max(puzzle_raw$paper_date)))## Date range: 2017-01-10 to 2025-12-31
puzzle_data <- puzzle_raw %>%
mutate(
time_hms = format(time, "%H:%M:%S"),
hours = as.numeric(substr(time_hms, 1, 2)),
minutes = as.numeric(substr(time_hms, 4, 5)),
seconds_part = as.numeric(substr(time_hms, 7, 8)),
time_seconds = hours * 3600 + minutes * 60 + seconds_part,
time_minutes = time_seconds / 60
) %>%
select(-hours, -minutes, -seconds_part)get_zodiac_sign <- function(date) {
month <- month(date)
day <- day(date)
if ((month == 3 && day >= 21) || (month == 4 && day <= 19)) return("Aries")
if ((month == 4 && day >= 20) || (month == 5 && day <= 20)) return("Taurus")
if ((month == 5 && day >= 21) || (month == 6 && day <= 20)) return("Gemini")
if ((month == 6 && day >= 21) || (month == 7 && day <= 22)) return("Cancer")
if ((month == 7 && day >= 23) || (month == 8 && day <= 22)) return("Leo")
if ((month == 8 && day >= 23) || (month == 9 && day <= 22)) return("Virgo")
if ((month == 9 && day >= 23) || (month == 10 && day <= 22)) return("Libra")
if ((month == 10 && day >= 23) || (month == 11 && day <= 21)) return("Scorpio")
if ((month == 11 && day >= 22) || (month == 12 && day <= 21)) return("Sagittarius")
if ((month == 12 && day >= 22) || (month == 1 && day <= 19)) return("Capricorn")
if ((month == 1 && day >= 20) || (month == 2 && day <= 18)) return("Aquarius")
if ((month == 2 && day >= 19) || (month == 3 && day <= 20)) return("Pisces")
return(NA_character_)
}
puzzle_data <- puzzle_data %>%
mutate(zodiac_sign = sapply(complete_date, get_zodiac_sign))moon_data <- getMoonIllumination(date = puzzle_data$complete_date)
get_moon_phase_name <- function(phase) {
if (phase < 0.025 || phase >= 0.975) return("New Moon")
if (phase < 0.25) return("Waxing Crescent")
if (phase < 0.275) return("First Quarter")
if (phase < 0.50) return("Waxing Gibbous")
if (phase < 0.525) return("Full Moon")
if (phase < 0.75) return("Waning Gibbous")
if (phase < 0.775) return("Last Quarter")
return("Waning Crescent")
}
puzzle_data <- puzzle_data %>%
mutate(
moon_illumination = moon_data$fraction,
moon_phase_value = moon_data$phase,
moon_phase_name = sapply(moon_data$phase, get_moon_phase_name)
)lat <- 33.0146
lon <- -97.0969
unique_dates <- sort(unique(puzzle_data$complete_date))
date_range_start <- min(unique_dates)
date_range_end <- max(unique_dates)
weather_url <- paste0(
"https://archive-api.open-meteo.com/v1/archive?",
"latitude=", lat,
"&longitude=", lon,
"&start_date=", date_range_start,
"&end_date=", date_range_end,
"&daily=temperature_2m_max,temperature_2m_min,temperature_2m_mean,",
"precipitation_sum,rain_sum,windspeed_10m_max,weathercode",
"&temperature_unit=fahrenheit",
"&windspeed_unit=mph",
"&precipitation_unit=inch",
"&timezone=America/Chicago"
)
weather_response <- tryCatch({
GET(weather_url, timeout(30))
}, error = function(e) NULL)
if (!is.null(weather_response) && status_code(weather_response) == 200) {
weather_json <- fromJSON(content(weather_response, "text", encoding = "UTF-8"))
get_weather_description <- function(code) {
weather_codes <- c(
"0" = "Clear sky", "1" = "Mainly clear", "2" = "Partly cloudy", "3" = "Overcast",
"45" = "Foggy", "48" = "Depositing rime fog",
"51" = "Light drizzle", "53" = "Moderate drizzle", "55" = "Dense drizzle",
"61" = "Slight rain", "63" = "Moderate rain", "65" = "Heavy rain",
"71" = "Slight snow", "73" = "Moderate snow", "75" = "Heavy snow",
"80" = "Slight rain showers", "81" = "Moderate rain showers", "82" = "Violent rain showers",
"95" = "Thunderstorm", "96" = "Thunderstorm with hail", "99" = "Thunderstorm with heavy hail"
)
code_str <- as.character(code)
if (code_str %in% names(weather_codes)) return(weather_codes[code_str])
return("Unknown")
}
weather_df <- data.frame(
complete_date = as.Date(weather_json$daily$time),
weather_temp_max_f = weather_json$daily$temperature_2m_max,
weather_temp_min_f = weather_json$daily$temperature_2m_min,
weather_temp_mean_f = weather_json$daily$temperature_2m_mean,
weather_precipitation_in = weather_json$daily$precipitation_sum,
weather_rain_in = weather_json$daily$rain_sum,
weather_wind_max_mph = weather_json$daily$windspeed_10m_max,
weather_code = weather_json$daily$weathercode
) %>%
mutate(weather_description = sapply(weather_code, get_weather_description))
puzzle_data <- puzzle_data %>%
left_join(weather_df, by = "complete_date")
}get_stock_performance <- function(symbol, start_date, end_date) {
tryCatch({
extended_start <- start_date - 7
stock_data <- getSymbols(symbol, src = "yahoo", from = extended_start,
to = end_date + 1, auto.assign = FALSE, warnings = FALSE)
stock_data <- stock_data[!is.na(Cl(stock_data)), ]
prices <- Cl(stock_data)
returns <- dailyReturn(prices)
data.frame(
date = index(prices),
close = as.numeric(prices),
daily_return_pct = as.numeric(returns) * 100
)
}, error = function(e) NULL)
}
# S&P 500
sp500_data <- get_stock_performance("^GSPC", date_range_start, date_range_end)
if (!is.null(sp500_data)) {
puzzle_data <- puzzle_data %>%
left_join(sp500_data %>% rename(complete_date = date, sp500_close = close,
sp500_daily_return_pct = daily_return_pct),
by = "complete_date")
}
# Dow Jones
djia_data <- get_stock_performance("^DJI", date_range_start, date_range_end)
if (!is.null(djia_data)) {
puzzle_data <- puzzle_data %>%
left_join(djia_data %>% rename(complete_date = date, djia_close = close,
djia_daily_return_pct = daily_return_pct),
by = "complete_date")
}
# NASDAQ
nasdaq_data <- get_stock_performance("^IXIC", date_range_start, date_range_end)
if (!is.null(nasdaq_data)) {
puzzle_data <- puzzle_data %>%
left_join(nasdaq_data %>% rename(complete_date = date, nasdaq_close = close,
nasdaq_daily_return_pct = daily_return_pct),
by = "complete_date")
}
# Gold
gold_data <- get_stock_performance("GC=F", date_range_start, date_range_end)
if (!is.null(gold_data)) {
puzzle_data <- puzzle_data %>%
left_join(gold_data %>% rename(complete_date = date, gold_close = close,
gold_daily_return_pct = daily_return_pct),
by = "complete_date")
}
# Oil
oil_data <- get_stock_performance("CL=F", date_range_start, date_range_end)
if (!is.null(oil_data)) {
puzzle_data <- puzzle_data %>%
left_join(oil_data %>% rename(complete_date = date, oil_wti_close = close,
oil_daily_return_pct = daily_return_pct),
by = "complete_date")
}get_us_holidays <- function(year) {
c(
as.Date(paste0(year, "-01-01")),
as.Date(paste0(year, "-01-01")) + (15 + (8 - wday(as.Date(paste0(year, "-01-01")))) %% 7),
as.Date(paste0(year, "-02-01")) + (15 + (8 - wday(as.Date(paste0(year, "-02-01")))) %% 7),
as.Date(paste0(year, "-05-31")) - ((wday(as.Date(paste0(year, "-05-31"))) - 2) %% 7),
as.Date(paste0(year, "-07-04")),
as.Date(paste0(year, "-09-01")) + ((8 - wday(as.Date(paste0(year, "-09-01")))) %% 7),
as.Date(paste0(year, "-11-01")) + (22 + (5 - wday(as.Date(paste0(year, "-11-01")))) %% 7),
as.Date(paste0(year, "-12-25"))
)
}
years <- unique(year(puzzle_data$complete_date))
all_holidays <- as.Date(unlist(lapply(years, get_us_holidays)), origin = "1970-01-01")
puzzle_data <- puzzle_data %>%
mutate(
days_to_nearest_holiday = sapply(complete_date, function(d) min(abs(as.numeric(d - all_holidays))))
)puzzle_data <- puzzle_data %>%
mutate(
day_of_week = wday(complete_date, label = TRUE, abbr = FALSE),
days_delay = as.numeric(complete_date - paper_date),
month = month(complete_date, label = TRUE, abbr = FALSE),
year = year(complete_date),
week_of_year = week(complete_date),
completed_same_day = days_delay == 0,
date_numeric = as.numeric(complete_date - min(complete_date)),
holiday_proximity = case_when(
days_to_nearest_holiday <= 3 ~ "Within 3 days",
days_to_nearest_holiday <= 7 ~ "4-7 days",
days_to_nearest_holiday <= 14 ~ "8-14 days",
TRUE ~ "More than 14 days"
),
holiday_proximity = factor(holiday_proximity,
levels = c("Within 3 days", "4-7 days", "8-14 days", "More than 14 days")),
temp_category = case_when(
weather_temp_mean_f < 50 ~ "Cold (<50F)",
weather_temp_mean_f < 70 ~ "Mild (50-70F)",
weather_temp_mean_f < 85 ~ "Warm (70-85F)",
weather_temp_mean_f >= 85 ~ "Hot (85F+)"
),
market_direction = case_when(
is.na(sp500_daily_return_pct) ~ NA_character_,
sp500_daily_return_pct < -1 ~ "Down >1%",
sp500_daily_return_pct < 0 ~ "Slightly Down",
sp500_daily_return_pct < 1 ~ "Slightly Up",
TRUE ~ "Up >1%"
)
)# Function to analyze sentiment of a text
analyze_sentiment <- function(text) {
if (is.na(text) || text == "") {
return(list(
syuzhet_score = NA_real_,
word_count = 0,
sentiment_label = NA_character_
))
}
# Get sentiment score using syuzhet
syuzhet_score <- get_sentiment(text, method = "syuzhet")
# Get word count
words_df <- tibble(text = text) %>%
unnest_tokens(word, text)
word_count <- nrow(words_df)
# Create sentiment label
if (syuzhet_score > 0.5) {
sentiment_label <- "Positive"
} else if (syuzhet_score < -0.5) {
sentiment_label <- "Negative"
} else {
sentiment_label <- "Neutral"
}
return(list(
syuzhet_score = syuzhet_score,
word_count = word_count,
sentiment_label = sentiment_label
))
}
# Apply sentiment analysis to each note
sentiment_results <- lapply(puzzle_data$notes, analyze_sentiment)
# Extract results into columns
puzzle_data <- puzzle_data %>%
mutate(
note_sentiment_score = sapply(sentiment_results, function(x) x$syuzhet_score),
note_word_count = sapply(sentiment_results, function(x) x$word_count),
note_sentiment_label = sapply(sentiment_results, function(x) x$sentiment_label),
has_note = !is.na(notes) & notes != ""
)
# Get NRC emotion scores
get_nrc_emotions <- function(text) {
if (is.na(text) || text == "") {
return(data.frame(
anger = NA, anticipation = NA, disgust = NA, fear = NA,
joy = NA, sadness = NA, surprise = NA, trust = NA
))
}
emotions <- get_nrc_sentiment(text)
return(emotions[, c("anger", "anticipation", "disgust", "fear",
"joy", "sadness", "surprise", "trust")])
}
nrc_emotions <- bind_rows(lapply(puzzle_data$notes, get_nrc_emotions))
puzzle_data <- puzzle_data %>%
mutate(
note_anger = nrc_emotions$anger,
note_anticipation = nrc_emotions$anticipation,
note_disgust = nrc_emotions$disgust,
note_fear = nrc_emotions$fear,
note_joy = nrc_emotions$joy,
note_sadness = nrc_emotions$sadness,
note_surprise = nrc_emotions$surprise,
note_trust = nrc_emotions$trust
)
# Summary
sentiment_summary <- puzzle_data %>%
filter(!is.na(note_sentiment_label)) %>%
count(note_sentiment_label)
cat("Sentiment distribution:\n")## Sentiment distribution:
## # A tibble: 3 × 2
## note_sentiment_label n
## <chr> <int>
## 1 Negative 29
## 2 Neutral 94
## 3 Positive 24
time_stats <- puzzle_data %>%
summarise(
N = n(),
Mean = round(mean(time_minutes), 2),
Median = round(median(time_minutes), 2),
SD = round(sd(time_minutes), 2),
Min = round(min(time_minutes), 2),
Max = round(max(time_minutes), 2),
Q1 = round(quantile(time_minutes, 0.25), 2),
Q3 = round(quantile(time_minutes, 0.75), 2)
)
kable(time_stats, caption = "Completion Time Statistics (minutes)") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| N | Mean | Median | SD | Min | Max | Q1 | Q3 |
|---|---|---|---|---|---|---|---|
| 157 | 42.75 | 41.92 | 10.47 | 23.45 | 84.98 | 34.9 | 48.37 |
ggplot(puzzle_data, aes(x = time_minutes)) +
geom_histogram(bins = 25, fill = "steelblue", color = "white", alpha = 0.8) +
geom_vline(aes(xintercept = mean(time_minutes)), color = "red", linetype = "dashed", size = 1) +
geom_vline(aes(xintercept = median(time_minutes)), color = "orange", linetype = "dashed", size = 1) +
labs(
title = "Distribution of Puzzle Completion Times",
subtitle = "Red = Mean, Orange = Median",
x = "Time (minutes)",
y = "Count"
) +
theme(plot.title = element_text(face = "bold", size = 14))The “Whose Paper” column indicates whose physical newspaper was used for that puzzle session. Both Branden and James work together as a team on all puzzles - this field simply tracks which person brought the paper that day. In some cases, using James’s paper resulted in accidentally breaking the Sudoku in an unrecoverable way.
paper_stats <- puzzle_data %>%
group_by(whose_paper) %>%
summarise(
N = n(),
`Mean (min)` = round(mean(time_minutes), 2),
`Median (min)` = round(median(time_minutes), 2),
`SD (min)` = round(sd(time_minutes), 2),
`Min (min)` = round(min(time_minutes), 2),
`Max (min)` = round(max(time_minutes), 2),
.groups = "drop"
)
kable(paper_stats, caption = "Team Completion Time by Paper Source (B = Branden's paper, J = James's paper)") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| whose_paper | N | Mean (min) | Median (min) | SD (min) | Min (min) | Max (min) |
|---|---|---|---|---|---|---|
| B | 141 | 42.89 | 41.97 | 10.41 | 23.45 | 84.98 |
| J | 16 | 41.46 | 39.65 | 11.20 | 24.08 | 68.27 |
ggplot(puzzle_data, aes(x = whose_paper, y = time_minutes, fill = whose_paper)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.3) +
scale_fill_brewer(palette = "Set1") +
labs(
title = "Team Completion Time by Paper Source",
subtitle = "B = Branden's paper, J = James's paper",
x = "Paper Source",
y = "Time (minutes)"
) +
theme(legend.position = "none")trend_model <- lm(time_minutes ~ date_numeric, data = puzzle_data)
trend_summary <- summary(trend_model)
ggplot(puzzle_data, aes(x = complete_date, y = time_minutes, color = whose_paper)) +
geom_point(alpha = 0.6, size = 2) +
geom_smooth(method = "lm", se = TRUE, aes(group = 1), color = "black", linetype = "dashed") +
geom_smooth(method = "lm", se = FALSE) +
scale_color_brewer(palette = "Set1") +
labs(
title = "Team Completion Time Trend Over Time",
subtitle = paste0("Overall trend: ", round(coef(trend_model)[2] * 30, 2),
" minutes/month (p = ", format(trend_summary$coefficients[2, 4], digits = 3), ")"),
x = "Date",
y = "Time (minutes)",
color = "Paper Source"
) +
theme(plot.title = element_text(face = "bold", size = 14))trend_by_paper <- puzzle_data %>%
group_by(whose_paper) %>%
do({
model <- lm(time_minutes ~ date_numeric, data = .)
data.frame(
`Monthly Change (min)` = round(coef(model)[2] * 30, 3),
`p-value` = round(summary(model)$coefficients[2, 4], 4),
Direction = ifelse(coef(model)[2] < 0, "Improving", "Getting Slower")
)
}) %>%
ungroup()
kable(trend_by_paper, caption = "Time Trend by Paper Source") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| whose_paper | Monthly.Change..min. | p.value | Direction |
|---|---|---|---|
| B | -0.292 | 0.0003 | Improving |
| J | -0.398 | 0.1267 | Improving |
Key Finding: The team is improving over time regardless of whose paper is used, with times decreasing by approximately 0.3-0.4 minutes per month.
numeric_cols <- puzzle_data %>%
select(
time_minutes,
moon_illumination,
weather_temp_mean_f, weather_precipitation_in, weather_wind_max_mph,
sp500_daily_return_pct, djia_daily_return_pct, nasdaq_daily_return_pct,
gold_daily_return_pct, oil_daily_return_pct,
days_to_nearest_holiday, days_delay
) %>%
drop_na()
correlations <- cor(numeric_cols)[, "time_minutes"]
correlations <- correlations[names(correlations) != "time_minutes"]
correlations <- sort(correlations, decreasing = TRUE)
cor_df <- data.frame(
Variable = names(correlations),
Correlation = round(correlations, 4),
`Abs Correlation` = abs(round(correlations, 4))
) %>%
arrange(desc(`Abs.Correlation`))
kable(cor_df, caption = "Correlations with Completion Time") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(which(abs(cor_df$Correlation) > 0.1), bold = TRUE, background = "#ffffcc")| Variable | Correlation | Abs.Correlation | |
|---|---|---|---|
| weather_wind_max_mph | weather_wind_max_mph | 0.1629 | 0.1629 |
| weather_precipitation_in | weather_precipitation_in | 0.0975 | 0.0975 |
| gold_daily_return_pct | gold_daily_return_pct | -0.0698 | 0.0698 |
| moon_illumination | moon_illumination | -0.0698 | 0.0698 |
| djia_daily_return_pct | djia_daily_return_pct | -0.0551 | 0.0551 |
| days_delay | days_delay | 0.0502 | 0.0502 |
| sp500_daily_return_pct | sp500_daily_return_pct | -0.0460 | 0.0460 |
| nasdaq_daily_return_pct | nasdaq_daily_return_pct | -0.0244 | 0.0244 |
| oil_daily_return_pct | oil_daily_return_pct | 0.0122 | 0.0122 |
| days_to_nearest_holiday | days_to_nearest_holiday | -0.0112 | 0.0112 |
| weather_temp_mean_f | weather_temp_mean_f | -0.0099 | 0.0099 |
cor_matrix <- cor(numeric_cols, use = "complete.obs")
corrplot(cor_matrix, method = "color", type = "upper",
tl.cex = 0.8, tl.col = "black",
addCoef.col = "black", number.cex = 0.7,
col = colorRampPalette(c("#6D9EC1", "white", "#E46726"))(200))Key Finding: Wind speed shows the strongest correlation with completion time (r = 0.16). Windier days = slower puzzle times!
day_stats <- puzzle_data %>%
group_by(day_of_week) %>%
summarise(
N = n(),
`Mean (min)` = round(mean(time_minutes), 2),
`Median (min)` = round(median(time_minutes), 2),
`SD (min)` = round(sd(time_minutes), 2),
.groups = "drop"
) %>%
arrange(`Mean (min)`)
kable(day_stats, caption = "Completion Time by Day of Week") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| day_of_week | N | Mean (min) | Median (min) | SD (min) |
|---|---|---|---|---|
| Saturday | 3 | 35.71 | 32.05 | 7.50 |
| Wednesday | 125 | 41.49 | 40.67 | 9.67 |
| Friday | 8 | 42.39 | 43.21 | 7.62 |
| Tuesday | 6 | 51.18 | 50.25 | 9.05 |
| Thursday | 10 | 51.38 | 46.52 | 15.35 |
| Monday | 5 | 51.65 | 50.10 | 11.48 |
day_order <- puzzle_data %>%
group_by(day_of_week) %>%
summarise(mean_time = mean(time_minutes)) %>%
arrange(mean_time) %>%
pull(day_of_week)
ggplot(puzzle_data, aes(x = factor(day_of_week, levels = day_order), y = time_minutes, fill = day_of_week)) +
geom_boxplot(alpha = 0.7) +
labs(
title = "Completion Time by Day of Week",
subtitle = "ANOVA p = 0.003 (Significant!)",
x = "Day",
y = "Time (minutes)"
) +
theme(legend.position = "none", axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Set3")Key Finding: Day of the week significantly affects completion time (p = 0.003). Completing on the paper day (Wednesday) is faster than delaying!
same_day_stats <- puzzle_data %>%
group_by(completed_same_day) %>%
summarise(
N = n(),
`Mean (min)` = round(mean(time_minutes), 2),
`Median (min)` = round(median(time_minutes), 2),
.groups = "drop"
) %>%
mutate(completed_same_day = ifelse(completed_same_day, "Same Day", "Delayed"))
kable(same_day_stats, caption = "Same Day vs Delayed Completion") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| completed_same_day | N | Mean (min) | Median (min) |
|---|---|---|---|
| Delayed | 36 | 46.72 | 44.33 |
| Same Day | 121 | 41.57 | 40.67 |
ggplot(puzzle_data, aes(x = completed_same_day, y = time_minutes, fill = completed_same_day)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.2) +
scale_fill_manual(values = c("TRUE" = "#2ecc71", "FALSE" = "#e74c3c")) +
labs(
title = "Same Day vs Delayed Completion",
subtitle = "ANOVA p = 0.009 (Significant!)",
x = "Completed Same Day",
y = "Time (minutes)"
) +
theme(legend.position = "none")Key Finding: Completing puzzles on the same day as the paper is significantly faster (41.6 min vs 46.7 min, p = 0.009).
zodiac_stats <- puzzle_data %>%
group_by(zodiac_sign) %>%
summarise(
N = n(),
`Mean (min)` = round(mean(time_minutes), 2),
`Median (min)` = round(median(time_minutes), 2),
.groups = "drop"
) %>%
arrange(`Mean (min)`)
kable(zodiac_stats, caption = "Completion Time by Zodiac Sign") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| zodiac_sign | N | Mean (min) | Median (min) |
|---|---|---|---|
| Leo | 14 | 40.32 | 38.14 |
| Sagittarius | 13 | 40.33 | 41.63 |
| Capricorn | 13 | 40.85 | 34.67 |
| Libra | 13 | 41.39 | 41.92 |
| Taurus | 11 | 42.02 | 44.00 |
| Gemini | 13 | 42.15 | 40.05 |
| Aries | 13 | 42.44 | 40.07 |
| Pisces | 14 | 43.01 | 44.12 |
| Cancer | 14 | 43.24 | 42.56 |
| Scorpio | 13 | 44.01 | 42.08 |
| Virgo | 13 | 44.40 | 37.82 |
| Aquarius | 13 | 48.85 | 46.77 |
ggplot(puzzle_data, aes(x = reorder(zodiac_sign, time_minutes), y = time_minutes)) +
geom_boxplot(fill = "#9b59b6", alpha = 0.6) +
coord_flip() +
labs(
title = "Completion Time by Zodiac Sign",
subtitle = "ANOVA p = 0.80 (Not Significant)",
x = "Zodiac Sign",
y = "Time (minutes)"
)moon_stats <- puzzle_data %>%
group_by(moon_phase_name) %>%
summarise(
N = n(),
`Mean (min)` = round(mean(time_minutes), 2),
`Median (min)` = round(median(time_minutes), 2),
.groups = "drop"
) %>%
arrange(`Mean (min)`)
kable(moon_stats, caption = "Completion Time by Moon Phase") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| moon_phase_name | N | Mean (min) | Median (min) |
|---|---|---|---|
| New Moon | 8 | 40.57 | 38.72 |
| Waxing Gibbous | 37 | 41.24 | 41.52 |
| Last Quarter | 6 | 41.40 | 38.40 |
| Waning Gibbous | 34 | 41.82 | 40.24 |
| Full Moon | 3 | 42.61 | 45.25 |
| Waning Crescent | 28 | 44.20 | 44.52 |
| Waxing Crescent | 40 | 44.40 | 43.13 |
| First Quarter | 1 | 49.37 | 49.37 |
ggplot(puzzle_data, aes(x = reorder(moon_phase_name, time_minutes), y = time_minutes)) +
geom_boxplot(fill = "#34495e", alpha = 0.6) +
coord_flip() +
labs(
title = "Completion Time by Moon Phase",
subtitle = "ANOVA p = 0.85 (Not Significant)",
x = "Moon Phase",
y = "Time (minutes)"
)temp_stats <- puzzle_data %>%
filter(!is.na(temp_category)) %>%
group_by(temp_category) %>%
summarise(
N = n(),
`Mean (min)` = round(mean(time_minutes), 2),
`Median (min)` = round(median(time_minutes), 2),
.groups = "drop"
) %>%
arrange(`Mean (min)`)
kable(temp_stats, caption = "Completion Time by Temperature") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| temp_category | N | Mean (min) | Median (min) |
|---|---|---|---|
| Warm (70-85F) | 55 | 41.08 | 38.38 |
| Mild (50-70F) | 52 | 42.73 | 43.46 |
| Cold (<50F) | 23 | 43.70 | 40.25 |
| Hot (85F+) | 27 | 45.37 | 43.82 |
ggplot(puzzle_data %>% filter(!is.na(weather_temp_mean_f)),
aes(x = weather_temp_mean_f, y = time_minutes)) +
geom_point(alpha = 0.5, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "Completion Time vs Temperature",
x = "Mean Temperature (F)",
y = "Time (minutes)"
)ggplot(puzzle_data %>% filter(!is.na(weather_wind_max_mph)),
aes(x = weather_wind_max_mph, y = time_minutes)) +
geom_point(alpha = 0.5, color = "#27ae60") +
geom_smooth(method = "lm", se = TRUE, color = "red") +
labs(
title = "Completion Time vs Wind Speed",
subtitle = paste("Correlation r =", round(cor(puzzle_data$time_minutes,
puzzle_data$weather_wind_max_mph,
use = "complete.obs"), 3)),
x = "Max Wind Speed (mph)",
y = "Time (minutes)"
)Key Finding: Higher wind speeds are associated with slower completion times (r = 0.16).
holiday_stats <- puzzle_data %>%
group_by(holiday_proximity) %>%
summarise(
N = n(),
`Mean (min)` = round(mean(time_minutes), 2),
`Median (min)` = round(median(time_minutes), 2),
.groups = "drop"
)
kable(holiday_stats, caption = "Completion Time by Holiday Proximity") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| holiday_proximity | N | Mean (min) | Median (min) |
|---|---|---|---|
| Within 3 days | 21 | 42.64 | 42.98 |
| 4-7 days | 22 | 42.67 | 40.88 |
| 8-14 days | 42 | 44.08 | 43.64 |
| More than 14 days | 72 | 42.03 | 40.59 |
ggplot(puzzle_data, aes(x = holiday_proximity, y = time_minutes, fill = holiday_proximity)) +
geom_boxplot(alpha = 0.7) +
labs(
title = "Completion Time by Holiday Proximity",
subtitle = "ANOVA p = 0.80 (Not Significant)",
x = "Days to Nearest Holiday",
y = "Time (minutes)"
) +
theme(legend.position = "none") +
scale_fill_brewer(palette = "Blues")Finding: Holiday proximity does not significantly affect completion time.
market_stats <- puzzle_data %>%
filter(!is.na(market_direction)) %>%
group_by(market_direction) %>%
summarise(
N = n(),
`Mean (min)` = round(mean(time_minutes), 2),
`Median (min)` = round(median(time_minutes), 2),
.groups = "drop"
) %>%
arrange(`Mean (min)`)
kable(market_stats, caption = "Completion Time by S&P 500 Direction") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| market_direction | N | Mean (min) | Median (min) |
|---|---|---|---|
| Slightly Up | 69 | 41.40 | 41.33 |
| Down >1% | 15 | 42.64 | 42.08 |
| Up >1% | 20 | 44.36 | 43.97 |
| Slightly Down | 49 | 44.55 | 42.48 |
ggplot(puzzle_data %>% filter(!is.na(market_direction)),
aes(x = market_direction, y = time_minutes, fill = market_direction)) +
geom_boxplot(alpha = 0.7) +
labs(
title = "Completion Time by S&P 500 Performance",
subtitle = "ANOVA p = 0.39 (Not Significant)",
x = "Market Direction",
y = "Time (minutes)"
) +
theme(legend.position = "none") +
scale_fill_brewer(palette = "RdYlGn")Finding: Stock market performance does not significantly affect completion time.
sentiment_dist <- puzzle_data %>%
filter(!is.na(note_sentiment_label)) %>%
count(note_sentiment_label) %>%
mutate(Percentage = round(n / sum(n) * 100, 1))
kable(sentiment_dist, col.names = c("Sentiment", "Count", "Percentage"),
caption = "Distribution of Note Sentiments") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Sentiment | Count | Percentage |
|---|---|---|
| Negative | 29 | 19.7 |
| Neutral | 94 | 63.9 |
| Positive | 24 | 16.3 |
ggplot(sentiment_dist, aes(x = "", y = n, fill = note_sentiment_label)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y") +
scale_fill_manual(values = c("Negative" = "#e74c3c", "Neutral" = "#95a5a6", "Positive" = "#2ecc71")) +
labs(title = "Sentiment Distribution in Notes", fill = "Sentiment") +
theme_void() +
theme(plot.title = element_text(hjust = 0.5, face = "bold"))sentiment_time <- puzzle_data %>%
filter(!is.na(note_sentiment_label)) %>%
group_by(note_sentiment_label) %>%
summarise(
N = n(),
`Mean (min)` = round(mean(time_minutes), 2),
`Median (min)` = round(median(time_minutes), 2),
`SD (min)` = round(sd(time_minutes), 2),
.groups = "drop"
) %>%
arrange(`Mean (min)`)
kable(sentiment_time, caption = "Completion Time by Note Sentiment") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| note_sentiment_label | N | Mean (min) | Median (min) | SD (min) |
|---|---|---|---|---|
| Positive | 24 | 40.72 | 40.39 | 10.77 |
| Neutral | 94 | 41.88 | 41.42 | 10.61 |
| Negative | 29 | 46.81 | 47.12 | 10.11 |
ggplot(puzzle_data %>% filter(!is.na(note_sentiment_label)),
aes(x = note_sentiment_label, y = time_minutes, fill = note_sentiment_label)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.3) +
scale_fill_manual(values = c("Negative" = "#e74c3c", "Neutral" = "#95a5a6", "Positive" = "#2ecc71")) +
labs(
title = "Completion Time by Note Sentiment",
subtitle = paste("ANOVA p =", round(summary(aov(time_minutes ~ note_sentiment_label,
data = puzzle_data %>%
filter(!is.na(note_sentiment_label))))[[1]][["Pr(>F)"]][1], 4)),
x = "Sentiment",
y = "Time (minutes)"
) +
theme(legend.position = "none")sentiment_cor <- cor(puzzle_data$time_minutes, puzzle_data$note_sentiment_score, use = "complete.obs")
ggplot(puzzle_data %>% filter(!is.na(note_sentiment_score)),
aes(x = note_sentiment_score, y = time_minutes)) +
geom_point(alpha = 0.5, aes(color = note_sentiment_label)) +
geom_smooth(method = "lm", se = TRUE, color = "black", linetype = "dashed") +
scale_color_manual(values = c("Negative" = "#e74c3c", "Neutral" = "#95a5a6", "Positive" = "#2ecc71")) +
labs(
title = "Completion Time vs Sentiment Score",
subtitle = paste("Correlation r =", round(sentiment_cor, 3)),
x = "Sentiment Score (negative = frustrated, positive = happy)",
y = "Time (minutes)",
color = "Sentiment"
)The NRC lexicon categorizes words into 8 emotions: anger, anticipation, disgust, fear, joy, sadness, surprise, and trust.
emotion_cols <- c("note_anger", "note_anticipation", "note_disgust", "note_fear",
"note_joy", "note_sadness", "note_surprise", "note_trust")
emotion_cors <- sapply(emotion_cols, function(col) {
cor(puzzle_data$time_minutes, puzzle_data[[col]], use = "complete.obs")
})
emotion_cor_df <- data.frame(
Emotion = gsub("note_", "", names(emotion_cors)),
Correlation = round(emotion_cors, 4)
) %>%
arrange(desc(abs(Correlation)))
kable(emotion_cor_df, caption = "Emotion Correlations with Completion Time") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(which(abs(emotion_cor_df$Correlation) > 0.05), bold = TRUE, background = "#ffffcc")| Emotion | Correlation | |
|---|---|---|
| note_anger | anger | 0.2270 |
| note_anticipation | anticipation | -0.1210 |
| note_fear | fear | 0.0831 |
| note_sadness | sadness | 0.0722 |
| note_joy | joy | -0.0412 |
| note_disgust | disgust | -0.0393 |
| note_trust | trust | 0.0226 |
| note_surprise | surprise | 0.0123 |
emotion_cor_df$Emotion <- factor(emotion_cor_df$Emotion, levels = emotion_cor_df$Emotion)
ggplot(emotion_cor_df, aes(x = reorder(Emotion, Correlation), y = Correlation,
fill = ifelse(Correlation > 0, "Positive", "Negative"))) +
geom_bar(stat = "identity", alpha = 0.8) +
coord_flip() +
scale_fill_manual(values = c("Positive" = "#e74c3c", "Negative" = "#2ecc71")) +
labs(
title = "Emotion Correlations with Completion Time",
subtitle = "Positive correlation = emotion associated with slower times",
x = "Emotion",
y = "Correlation with Time"
) +
theme(legend.position = "none") +
geom_hline(yintercept = 0, linetype = "dashed")Note. For the public usage of this, I have removed my notes because at times they can get quite colorful.
# Get all words from notes
all_words <- puzzle_data %>%
filter(!is.na(notes)) %>%
select(notes) %>%
unnest_tokens(word, notes) %>%
anti_join(stop_words, by = "word") %>%
count(word, sort = TRUE) %>%
filter(n > 1)
# Create word cloud
if (nrow(all_words) > 0) {
wordcloud(words = all_words$word, freq = all_words$n,
min.freq = 2, max.words = 100,
random.order = FALSE, rot.per = 0.35,
colors = brewer.pal(8, "Dark2"))
}# Note: whose_paper represents paper source (whose newspaper was used), not who completed the puzzles
# The team works together on all puzzles - this variable tests if paper source affects completion time
regression_data <- puzzle_data %>%
filter(!is.na(weather_temp_mean_f) & !is.na(sp500_daily_return_pct)) %>%
mutate(
paper_source = factor(whose_paper), # Renamed for clarity
day_of_week = factor(day_of_week)
)
full_model <- lm(
time_minutes ~
paper_source +
days_delay +
weather_temp_mean_f +
weather_precipitation_in +
weather_wind_max_mph +
moon_illumination +
sp500_daily_return_pct +
days_to_nearest_holiday +
date_numeric,
data = regression_data
)
model_summary <- summary(full_model)
cat("R-squared:", round(model_summary$r.squared, 4), "\n")## R-squared: 0.1127
## Adjusted R-squared: 0.0568
cat("F-statistic p-value:", format(pf(model_summary$fstatistic[1],
model_summary$fstatistic[2],
model_summary$fstatistic[3],
lower.tail = FALSE), digits = 4), "\n")## F-statistic p-value: 0.04138
coef_table <- tidy(full_model) %>%
mutate(
estimate = round(estimate, 4),
std.error = round(std.error, 4),
statistic = round(statistic, 3),
p.value = round(p.value, 4),
Significant = ifelse(p.value < 0.05, "**", ifelse(p.value < 0.1, "*", ""))
) %>%
arrange(p.value)
kable(coef_table, caption = "Multiple Regression Coefficients") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
row_spec(which(coef_table$p.value < 0.05), bold = TRUE, background = "#d4edda") %>%
row_spec(which(coef_table$p.value >= 0.05 & coef_table$p.value < 0.1), background = "#fff3cd")| term | estimate | std.error | statistic | p.value | Significant |
|---|---|---|---|---|---|
| (Intercept) | 47.3919 | 5.4921 | 8.629 | 0.0000 | ** |
| date_numeric | -0.0092 | 0.0027 | -3.349 | 0.0010 | ** |
| weather_precipitation_in | 1.9363 | 2.0644 | 0.938 | 0.3499 | |
| moon_illumination | -2.0632 | 2.4140 | -0.855 | 0.3942 | |
| paper_sourceJ | -2.0730 | 2.7446 | -0.755 | 0.4513 | |
| weather_wind_max_mph | 0.1402 | 0.2115 | 0.663 | 0.5084 | |
| sp500_daily_return_pct | -0.2932 | 0.7567 | -0.387 | 0.6990 | |
| days_delay | 0.0013 | 0.0041 | 0.330 | 0.7419 | |
| days_to_nearest_holiday | -0.0195 | 0.0706 | -0.275 | 0.7834 | |
| weather_temp_mean_f | 0.0014 | 0.0549 | 0.025 | 0.9800 |
# Note: paper_source represents whose newspaper was used, not who completed the puzzles
rf_data <- puzzle_data %>%
select(
time_minutes, whose_paper, days_delay,
weather_temp_mean_f, weather_precipitation_in, weather_wind_max_mph,
moon_illumination, sp500_daily_return_pct, djia_daily_return_pct,
nasdaq_daily_return_pct, days_to_nearest_holiday, date_numeric,
gold_daily_return_pct, oil_daily_return_pct
) %>%
rename(paper_source = whose_paper) %>% # Renamed for clarity
drop_na() %>%
mutate(paper_source = factor(paper_source))
set.seed(42)
rf_model <- randomForest(time_minutes ~ ., data = rf_data, ntree = 500, importance = TRUE)
importance_df <- importance(rf_model) %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
arrange(desc(`%IncMSE`)) %>%
mutate(
`%IncMSE` = round(`%IncMSE`, 2),
IncNodePurity = round(IncNodePurity, 2)
)
kable(importance_df, caption = "Random Forest Feature Importance") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| Variable | %IncMSE | IncNodePurity |
|---|---|---|
| date_numeric | 1.44 | 2359.46 |
| sp500_daily_return_pct | 0.60 | 1137.80 |
| paper_source | 0.16 | 132.00 |
| weather_wind_max_mph | 0.05 | 1526.89 |
| days_delay | -0.38 | 789.55 |
| moon_illumination | -0.53 | 1520.78 |
| djia_daily_return_pct | -0.82 | 1001.26 |
| days_to_nearest_holiday | -2.66 | 988.86 |
| weather_precipitation_in | -3.06 | 777.06 |
| oil_daily_return_pct | -3.22 | 1070.87 |
| nasdaq_daily_return_pct | -3.81 | 1133.81 |
| weather_temp_mean_f | -4.39 | 1575.64 |
| gold_daily_return_pct | -4.65 | 1191.18 |
Key Finding: The most important predictor is
date_numeric (time trend), confirming that completion times
are improving over time.
fastest <- puzzle_data %>% slice_min(time_minutes, n = 5)
slowest <- puzzle_data %>% slice_max(time_minutes, n = 5)
kable(
fastest %>%
select(complete_date, whose_paper, time_minutes, zodiac_sign, weather_description) %>%
rename(`Paper Source` = whose_paper),
caption = "Top 5 Fastest Team Times"
) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| complete_date | Paper Source | time_minutes | zodiac_sign | weather_description |
|---|---|---|---|---|
| 2024-06-05 | B | 23.45000 | Gemini | Slight rain |
| 2025-02-05 | J | 24.08333 | Aquarius | Overcast |
| 2025-09-10 | B | 24.40000 | Virgo | Overcast |
| 2024-08-21 | B | 24.85000 | Leo | Overcast |
| 2024-06-12 | B | 26.76667 | Gemini | Moderate drizzle |
kable(
slowest %>%
select(complete_date, whose_paper, time_minutes, zodiac_sign, weather_description, notes) %>%
rename(`Paper Source` = whose_paper),
caption = "Top 5 Slowest Team Times"
) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)| complete_date | Paper Source | time_minutes | zodiac_sign | weather_description | notes |
|---|---|---|---|---|---|
| 2023-01-26 | B | 84.98333 | Aquarius | Mainly clear | Bert showed up and distracted everyone. |
| 2023-09-20 | B | 68.86667 | Virgo | Light drizzle | Bert wasn’t even here. |
| 2023-05-24 | J | 68.26667 | Gemini | Slight rain | Fuckin Shortz |
| 2025-01-02 | B | 67.66667 | Capricorn | Clear sky | Yeah, not a great start. Sudoku and NYT killed us. |
| 2025-07-14 | B | 64.33333 | Cancer | Heavy rain | 5am flight from MIA. G left the paper on the lawn. |
| Factor | Effect | p-value |
|---|---|---|
| Time Trend | Improving ~0.3 min/month | < 0.001 |
| Day of Week | Wednesday is fastest | 0.003 |
| Same Day Completion | 5 minutes faster | 0.009 |
| Wind Speed | Higher wind = slower | r = 0.16 |
# Calculate sentiment stats for summary
sentiment_summary_stats <- puzzle_data %>%
filter(!is.na(note_sentiment_label)) %>%
group_by(note_sentiment_label) %>%
summarise(mean_time = mean(time_minutes), .groups = "drop")
sentiment_aov_p <- summary(aov(time_minutes ~ note_sentiment_label,
data = puzzle_data %>%
filter(!is.na(note_sentiment_label))))[[1]][["Pr(>F)"]][1]Note: “whose_paper” indicates whose physical newspaper was used (B = Branden’s paper, J = James’s paper). All puzzles were completed as a team effort by Branden and James working together.
DT::datatable(
puzzle_data %>%
# select(paper_date, complete_date, time_minutes, whose_paper,
# zodiac_sign, moon_phase_name, weather_description,
# sp500_daily_return_pct, notes) %>%
select(paper_date, complete_date, time_minutes, whose_paper,
zodiac_sign, moon_phase_name, weather_description,
sp500_daily_return_pct) %>%
mutate(time_minutes = round(time_minutes, 2),
sp500_daily_return_pct = round(sp500_daily_return_pct, 2)) %>%
rename(`Paper Source` = whose_paper),
options = list(pageLength = 10, scrollX = TRUE),
caption = "Full Puzzle Wednesday Dataset (Team Completion Times)"
)Report generated on 2026-01-24 10:45:54.601529