library(ggplot2)
# Calculate sum of main numbers for each draw
results$main_sum <- rowSums(results[, paste0("main_", 1:5)])
# Calculate average of main numbers for each draw
results$main_avg <- results$main_sum / 5
# Plot the sum of main numbers over time
ggplot(results, aes(x = draw_date, y = main_sum)) +
geom_line(color = "blue") +
geom_smooth(method = "loess", color = "red") +
labs(
title = "Sum of Main Numbers Over Time",
x = "Date",
y = "Sum of Main Numbers"
) +
theme_minimal()
# Monthly average of main numbers
results$month_year <- format(results$draw_date, "%Y-%m")
monthly_avg <- aggregate(main_avg ~ month_year, data = results, FUN = mean)
monthly_avg$date <- as.Date(paste0(monthly_avg$month_year, "-01"))
ggplot(monthly_avg, aes(x = date, y = main_avg)) +
geom_line(color = "darkgreen") +
geom_point() +
labs(
title = "Monthly Average of Main Numbers",
x = "Month",
y = "Average Value"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Create a plot of the main numbers over time
main_numbers_time <- results %>%
filter(year >= 2024 & year <= 2025) %>%
select(draw_date, main_1, main_2, main_3, main_4, main_5) %>%
pivot_longer(
cols = starts_with("main_"),
names_to = "position",
values_to = "number"
)
ggplot(main_numbers_time, aes(x = draw_date, y = number, color = position)) +
geom_line(alpha = 0.7) +
# geom_smooth(se=F) +
geom_point(size = 1) +
scale_color_brewer(palette = "Set1", name = "Ball Position") +
scale_y_continuous(breaks = seq(0, 50, by = 5)) +
theme_minimal() +
labs(
title = "Main Numbers Over Time (2022-2025)",
subtitle = "Tracking the 5 main numbers drawn in each Eurojackpot draw",
x = "Draw Date",
y = "Number Value (1-50)"
) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank()
)
std_dev_over_time <- results %>%
filter(year >= 2024 & year <= 2025) %>%
select(draw_date, main_1, main_2, main_3, main_4, main_5) %>%
rowwise() %>%
mutate(std_dev = sd(c(main_1, main_2, main_3, main_4, main_5))) %>%
ungroup()
ggplot(std_dev_over_time, aes(x = draw_date, y = std_dev)) +
geom_line(color = "steelblue") +
geom_point(size = 1, color = "steelblue") +
geom_smooth(method = "loess", se = TRUE, color = "red", linetype = "dashed") +
theme_minimal() +
labs(
title = "Standard Deviation of Main Numbers Over Time (2022-2025)",
subtitle = "Higher values indicate more spread out numbers in a draw",
x = "Draw Date",
y = "Standard Deviation"
) +
scale_y_continuous(limits = c(0, NA)) +
theme(
panel.grid.minor = element_blank()
)
# Analyze if there are any seasonal patterns
monthly_analysis <- results %>%
group_by(month) %>%
summarise(
draws = n(),
avg_main_1 = mean(main_1),
avg_main_5 = mean(main_5),
avg_euro_1 = mean(euro_1),
avg_euro_2 = mean(euro_2),
avg_std_dev = mean(sd(c(main_1, main_2, main_3, main_4, main_5)), na.rm = TRUE)
) %>%
mutate(month_name = month.name[as.numeric(month)])
# Plot monthly patterns
ggplot(monthly_analysis, aes(x = factor(month_name, levels = month.name), y = avg_std_dev)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal() +
labs(
title = "Average Standard Deviation of Main Numbers by Month",
x = "Month",
y = "Average Standard Deviation"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))