feat: add analysis and visualization of Olympic medals and disciplines in multiple R scripts
BIN
Projet JOs/Athletes_vs_Medals.png
Normal file
|
After Width: | Height: | Size: 426 KiB |
BIN
Projet JOs/Medal_Efficiency_by_Country.png
Normal file
|
After Width: | Height: | Size: 231 KiB |
722
Projet JOs/Nombre de d'athlètes par pays.R
Normal file
@@ -0,0 +1,722 @@
|
||||
# Charger les bibliothèques nécessaires
|
||||
library(readxl)
|
||||
library(dplyr)
|
||||
library(ggplot2)
|
||||
library(stringr)
|
||||
library(scales)
|
||||
library(vcd)
|
||||
library(ggrepel)
|
||||
|
||||
# Chemins de fichiers
|
||||
file_path <- "Projet JOs/Porjet_JO.xlsx"
|
||||
donnees_graphique <- read_excel(file_path, sheet = "Travail_medailles")
|
||||
|
||||
# Population data manually entered
|
||||
population_data <- data.frame(
|
||||
Pays_Pop = c(
|
||||
"Afghanistan",
|
||||
"Albania",
|
||||
"Algeria",
|
||||
"American Samoa",
|
||||
"Andorra",
|
||||
"Angola",
|
||||
"Anguilla",
|
||||
"Antigua and Barbuda",
|
||||
"Argentina",
|
||||
"Armenia",
|
||||
"Aruba",
|
||||
"Australia",
|
||||
"Austria",
|
||||
"Azerbaijan",
|
||||
"Bahamas",
|
||||
"Bahrain",
|
||||
"Bangladesh",
|
||||
"Barbados",
|
||||
"Belarus",
|
||||
"Belgium",
|
||||
"Belize",
|
||||
"Benin",
|
||||
"Bermuda",
|
||||
"Bhutan",
|
||||
"Bolivia",
|
||||
"Bosnia and Herzegovina",
|
||||
"Botswana",
|
||||
"Brazil",
|
||||
"British Virgin Islands",
|
||||
"Brunei ",
|
||||
"Bulgaria",
|
||||
"Burkina Faso",
|
||||
"Burundi",
|
||||
"Cabo Verde",
|
||||
"Cambodia",
|
||||
"Cameroon",
|
||||
"Canada",
|
||||
"Caribbean Netherlands",
|
||||
"Cayman Islands",
|
||||
"Central African Republic",
|
||||
"Chad",
|
||||
"Chile",
|
||||
"China",
|
||||
"Colombia",
|
||||
"Comoros",
|
||||
"Congo",
|
||||
"Cook Islands",
|
||||
"Costa Rica",
|
||||
"Croatia",
|
||||
"Cuba",
|
||||
"Curaçao",
|
||||
"Cyprus",
|
||||
"Czech Republic (Czechia)",
|
||||
"Côte d'Ivoire",
|
||||
"Denmark",
|
||||
"Djibouti",
|
||||
"Dominica",
|
||||
"Dominican Republic",
|
||||
"DR Congo",
|
||||
"Ecuador",
|
||||
"Egypt",
|
||||
"El Salvador",
|
||||
"Equatorial Guinea",
|
||||
"Eritrea",
|
||||
"Estonia",
|
||||
"Eswatini",
|
||||
"Ethiopia",
|
||||
"Faeroe Islands",
|
||||
"Falkland Islands",
|
||||
"Fiji",
|
||||
"Finland",
|
||||
"France",
|
||||
"French Guiana",
|
||||
"French Polynesia",
|
||||
"Gabon",
|
||||
"Gambia",
|
||||
"Georgia",
|
||||
"Germany",
|
||||
"Ghana",
|
||||
"Gibraltar",
|
||||
"Greece",
|
||||
"Greenland",
|
||||
"Grenada",
|
||||
"Guadeloupe",
|
||||
"Guam",
|
||||
"Guatemala",
|
||||
"Guinea",
|
||||
"Guinea-Bissau",
|
||||
"Guyana",
|
||||
"Haiti",
|
||||
"Holy See",
|
||||
"Honduras",
|
||||
"Hong Kong",
|
||||
"Hungary",
|
||||
"Iceland",
|
||||
"India",
|
||||
"Indonesia",
|
||||
"Iran",
|
||||
"Iraq",
|
||||
"Ireland",
|
||||
"Isle of Man",
|
||||
"Israel",
|
||||
"Italy",
|
||||
"Jamaica",
|
||||
"Japan",
|
||||
"Jordan",
|
||||
"Kazakhstan",
|
||||
"Kenya",
|
||||
"Kiribati",
|
||||
"Kuwait",
|
||||
"Kyrgyzstan",
|
||||
"Laos",
|
||||
"Latvia",
|
||||
"Lebanon",
|
||||
"Lesotho",
|
||||
"Liberia",
|
||||
"Libya",
|
||||
"Liechtenstein",
|
||||
"Lithuania",
|
||||
"Luxembourg",
|
||||
"Macao",
|
||||
"Madagascar",
|
||||
"Malawi",
|
||||
"Malaysia",
|
||||
"Maldives",
|
||||
"Mali",
|
||||
"Malta",
|
||||
"Marshall Islands",
|
||||
"Martinique",
|
||||
"Mauritania",
|
||||
"Mauritius",
|
||||
"Mayotte",
|
||||
"Mexico",
|
||||
"Micronesia",
|
||||
"Moldova",
|
||||
"Monaco",
|
||||
"Mongolia",
|
||||
"Montenegro",
|
||||
"Montserrat",
|
||||
"Morocco",
|
||||
"Mozambique",
|
||||
"Myanmar",
|
||||
"Namibia",
|
||||
"Nauru",
|
||||
"Nepal",
|
||||
"Netherlands",
|
||||
"New Caledonia",
|
||||
"New Zealand",
|
||||
"Nicaragua",
|
||||
"Niger",
|
||||
"Nigeria",
|
||||
"Niue",
|
||||
"North Korea",
|
||||
"North Macedonia",
|
||||
"Northern Mariana Islands",
|
||||
"Norway",
|
||||
"Oman",
|
||||
"Pakistan",
|
||||
"Palau",
|
||||
"Panama",
|
||||
"Papua New Guinea",
|
||||
"Paraguay",
|
||||
"Peru",
|
||||
"Philippines",
|
||||
"Poland",
|
||||
"Portugal",
|
||||
"Puerto Rico",
|
||||
"Qatar",
|
||||
"Romania",
|
||||
"Russia",
|
||||
"Rwanda",
|
||||
"Réunion",
|
||||
"Saint Barthelemy",
|
||||
"Saint Helena",
|
||||
"Saint Kitts & Nevis",
|
||||
"Saint Lucia",
|
||||
"Saint Martin",
|
||||
"Saint Pierre & Miquelon",
|
||||
"Samoa",
|
||||
"San Marino",
|
||||
"Sao Tome & Principe",
|
||||
"Saudi Arabia",
|
||||
"Senegal",
|
||||
"Serbia",
|
||||
"Seychelles",
|
||||
"Sierra Leone",
|
||||
"Singapore",
|
||||
"Sint Maarten",
|
||||
"Slovakia",
|
||||
"Slovenia",
|
||||
"Solomon Islands",
|
||||
"Somalia",
|
||||
"South Africa",
|
||||
"South Korea",
|
||||
"South Sudan",
|
||||
"Spain",
|
||||
"Sri Lanka",
|
||||
"St. Vincent & Grenadines",
|
||||
"State of Palestine",
|
||||
"Sudan",
|
||||
"Suriname",
|
||||
"Sweden",
|
||||
"Switzerland",
|
||||
"Syria",
|
||||
"Taiwan",
|
||||
"Tajikistan",
|
||||
"Tanzania",
|
||||
"Thailand",
|
||||
"Timor-Leste",
|
||||
"Togo",
|
||||
"Tokelau",
|
||||
"Tonga",
|
||||
"Trinidad and Tobago",
|
||||
"Tunisia",
|
||||
"Turkey",
|
||||
"Turkmenistan",
|
||||
"Turks and Caicos",
|
||||
"Tuvalu",
|
||||
"U.S. Virgin Islands",
|
||||
"Uganda",
|
||||
"Ukraine",
|
||||
"United Arab Emirates",
|
||||
"United Kingdom",
|
||||
"United States",
|
||||
"Uruguay",
|
||||
"Uzbekistan",
|
||||
"Vanuatu",
|
||||
"Venezuela",
|
||||
"Vietnam",
|
||||
"Wallis & Futuna",
|
||||
"Western Sahara",
|
||||
"Yemen",
|
||||
"Zambia",
|
||||
"Zimbabwe"
|
||||
),
|
||||
Population = c(
|
||||
42239854,
|
||||
2832439,
|
||||
45606480,
|
||||
38781291,
|
||||
80088,
|
||||
36684202,
|
||||
15899,
|
||||
94298,
|
||||
45773884,
|
||||
2777970,
|
||||
106277,
|
||||
26439111,
|
||||
8958960,
|
||||
10412651,
|
||||
412623,
|
||||
1485509,
|
||||
172954319,
|
||||
281995,
|
||||
9498238,
|
||||
11686140,
|
||||
410825,
|
||||
13712828,
|
||||
64069,
|
||||
787424,
|
||||
12388571,
|
||||
3210847,
|
||||
2675352,
|
||||
216422446,
|
||||
31538,
|
||||
452524,
|
||||
6687717,
|
||||
23251485,
|
||||
13238559,
|
||||
598682,
|
||||
16944826,
|
||||
28647293,
|
||||
38781291,
|
||||
27148,
|
||||
69310,
|
||||
5742315,
|
||||
18278568,
|
||||
19629590,
|
||||
1425671352,
|
||||
52085168,
|
||||
852075,
|
||||
6106869,
|
||||
17044,
|
||||
5212173,
|
||||
4008617,
|
||||
11194449,
|
||||
192077,
|
||||
1260138,
|
||||
10495295,
|
||||
28873034,
|
||||
5910913,
|
||||
1136455,
|
||||
11332972,
|
||||
11332972,
|
||||
102262808,
|
||||
18190484,
|
||||
112716598,
|
||||
6364943,
|
||||
1714671,
|
||||
3748901,
|
||||
1322765,
|
||||
1210822,
|
||||
126527060,
|
||||
53270,
|
||||
3791,
|
||||
936375,
|
||||
5545475,
|
||||
64756584,
|
||||
312155,
|
||||
41026067,
|
||||
2436566,
|
||||
2773168,
|
||||
3728282,
|
||||
83294633,
|
||||
34121985,
|
||||
32688,
|
||||
10341277,
|
||||
56643,
|
||||
126183,
|
||||
395839,
|
||||
172952,
|
||||
18092026,
|
||||
14190612,
|
||||
2150842,
|
||||
813834,
|
||||
11724763,
|
||||
518,
|
||||
10593798,
|
||||
7491609,
|
||||
10156239,
|
||||
375318,
|
||||
1428627663,
|
||||
277534122,
|
||||
89172767,
|
||||
45504560,
|
||||
5056935,
|
||||
84710,
|
||||
9174520,
|
||||
58870762,
|
||||
2825544,
|
||||
123294513,
|
||||
11337052,
|
||||
19606633,
|
||||
55100586,
|
||||
133515,
|
||||
4310108,
|
||||
6735347,
|
||||
7633779,
|
||||
1830211,
|
||||
5353930,
|
||||
2330318,
|
||||
5418377,
|
||||
6888388,
|
||||
39584,
|
||||
2718352,
|
||||
654768,
|
||||
704149,
|
||||
30325732,
|
||||
20931751,
|
||||
34308525,
|
||||
521021,
|
||||
23293698,
|
||||
535064,
|
||||
41996,
|
||||
366981,
|
||||
4862989,
|
||||
1300557,
|
||||
335995,
|
||||
128455567,
|
||||
544321,
|
||||
3435931,
|
||||
36297,
|
||||
3447157,
|
||||
626485,
|
||||
4386,
|
||||
37840044,
|
||||
33897354,
|
||||
54577997,
|
||||
2604172,
|
||||
12780,
|
||||
30896590,
|
||||
17618299,
|
||||
292991,
|
||||
5228100,
|
||||
7046310,
|
||||
27202843,
|
||||
223804632,
|
||||
1935,
|
||||
26160821,
|
||||
2085679,
|
||||
49796,
|
||||
5474360,
|
||||
4644384,
|
||||
240485658,
|
||||
18058,
|
||||
4468087,
|
||||
10329931,
|
||||
6861524,
|
||||
34352719,
|
||||
117337368,
|
||||
41026067,
|
||||
10247605,
|
||||
3260314,
|
||||
2716391,
|
||||
19892812,
|
||||
144444359,
|
||||
14094683,
|
||||
981796,
|
||||
10994,
|
||||
5314,
|
||||
47755,
|
||||
180251,
|
||||
32077,
|
||||
5840,
|
||||
225681,
|
||||
33642,
|
||||
231856,
|
||||
36947025,
|
||||
17763163,
|
||||
7149077,
|
||||
107660,
|
||||
8791092,
|
||||
6014723,
|
||||
44222,
|
||||
5795199,
|
||||
2119675,
|
||||
740424,
|
||||
18143378,
|
||||
60414495,
|
||||
51784059,
|
||||
11088796,
|
||||
47519628,
|
||||
21893579,
|
||||
103698,
|
||||
5371230,
|
||||
48109006,
|
||||
623236,
|
||||
10612086,
|
||||
8796669,
|
||||
23227014,
|
||||
23923276,
|
||||
10143543,
|
||||
67438106,
|
||||
71801279,
|
||||
1360596,
|
||||
9053799,
|
||||
1893,
|
||||
107773,
|
||||
1534937,
|
||||
12458223,
|
||||
85816199,
|
||||
6516100,
|
||||
46062,
|
||||
11396,
|
||||
98750,
|
||||
48582334,
|
||||
36744634,
|
||||
9516871,
|
||||
67736802,
|
||||
339996563,
|
||||
3423108,
|
||||
35163944,
|
||||
334506,
|
||||
28838499,
|
||||
98858950,
|
||||
11502,
|
||||
587259,
|
||||
34449825,
|
||||
20569737,
|
||||
16665409
|
||||
)
|
||||
)
|
||||
|
||||
# Lire la feuille "Travail_athletes" - si le fichier Excel existe
|
||||
df_athletes <- read_excel(file_path, sheet = "Travail_athletes")
|
||||
|
||||
# Compter le nombre d'athlètes par pays
|
||||
athletes_par_pays <- as.data.frame(table(df_athletes$`National Olympic Committee`))
|
||||
colnames(athletes_par_pays) <- c("Pays", "Nombre_Athletes")
|
||||
|
||||
# Dictionnaire de correspondance plus complet
|
||||
correspondances <- data.frame(
|
||||
Pays_JO = c(
|
||||
"United States of America", "People's Republic of China", "Great Britain",
|
||||
"ROC", "Chinese Taipei", "Côte d'Ivoire", "Republic of Korea", "Hong Kong, China",
|
||||
"Islamic Republic of Iran", "Democratic People's Republic of Korea", "Canada"
|
||||
),
|
||||
Pays_Pop = c(
|
||||
"United States", "China", "United Kingdom",
|
||||
"Russia", "Taiwan", "Côte d'Ivoire", "South Korea", "Hong Kong",
|
||||
"Iran", "North Korea", "Canada"
|
||||
)
|
||||
)
|
||||
|
||||
# Create athletes_with_pop before using it
|
||||
athletes_with_pop <- athletes_par_pays %>%
|
||||
rename(Pays_Original = Pays) %>%
|
||||
mutate(Pays_Match = Pays_Original)
|
||||
|
||||
# Apply country name matching with correspondances
|
||||
for (i in seq_len(nrow(correspondances))) {
|
||||
athletes_with_pop$Pays_Match[athletes_with_pop$Pays_Original == correspondances$Pays_JO[i]] <-
|
||||
correspondances$Pays_Pop[i]
|
||||
}
|
||||
|
||||
# Improved matching function
|
||||
match_countries <- function(nom_pays, liste_pays) {
|
||||
# Essayer correspondance exacte
|
||||
if(nom_pays %in% liste_pays) return(nom_pays)
|
||||
|
||||
# Essayer sans espaces/tirets/etc.
|
||||
nom_clean <- tolower(gsub("[[:punct:][:space:]]", "", nom_pays))
|
||||
for(pays in liste_pays) {
|
||||
pays_clean <- tolower(gsub("[[:punct:][:space:]]", "", pays))
|
||||
if(nom_clean == pays_clean) return(pays)
|
||||
}
|
||||
|
||||
# Essayer avec correspondance partielle
|
||||
for(pays in liste_pays) {
|
||||
if(str_detect(tolower(pays), tolower(nom_pays)) ||
|
||||
str_detect(tolower(nom_pays), tolower(pays))) return(pays)
|
||||
}
|
||||
|
||||
return(NA)
|
||||
}
|
||||
|
||||
# Now apply the matching function
|
||||
athletes_with_pop$Pays_Match_Final <- sapply(
|
||||
athletes_with_pop$Pays_Match,
|
||||
function(x) match_countries(x, population_data$Pays_Pop)
|
||||
)
|
||||
|
||||
# Joindre avec les données de population
|
||||
athletes_with_pop <- athletes_with_pop %>%
|
||||
left_join(population_data, by = c("Pays_Match_Final" = "Pays_Pop"))
|
||||
|
||||
# Calculer le nombre d'athlètes par million d'habitants
|
||||
athletes_with_pop <- athletes_with_pop %>%
|
||||
mutate(
|
||||
Athletes_Par_Million = (Nombre_Athletes / Population) * 1000000,
|
||||
Pourcentage_Population = (Nombre_Athletes / Population) * 100
|
||||
)
|
||||
|
||||
# Top 50 pays par nombre absolu
|
||||
top_50 <- athletes_with_pop %>%
|
||||
arrange(desc(Nombre_Athletes)) %>%
|
||||
head(50)
|
||||
|
||||
# Visualisation 1: Top 50 en nombre absolu
|
||||
p1 <- ggplot(top_50, aes(x = reorder(Pays_Original, -Nombre_Athletes), y = Nombre_Athletes)) +
|
||||
geom_bar(stat = "identity", fill = "steelblue") +
|
||||
labs(title = "Top 50 des pays avec le plus d'athletes",
|
||||
x = "Pays", y = "Nombre d'athletes") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
|
||||
|
||||
# Visualisation 2: Pourcentage d'athlètes par rapport à la population
|
||||
p2 <- ggplot(top_50 %>% filter(!is.na(Population)),
|
||||
aes(x = reorder(Pays_Original, -Pourcentage_Population), y = Pourcentage_Population)) +
|
||||
geom_bar(stat = "identity", fill = "darkgreen") +
|
||||
labs(title = "Pourcentage d'athletes par rapport a la population totale",
|
||||
subtitle = "Pour les pays du top 50 en nombre absolu d'athletes",
|
||||
x = "Pays", y = "Pourcentage d'athletes (%)") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
|
||||
|
||||
# Afficher les graphiques
|
||||
print(p1)
|
||||
print(p2)
|
||||
|
||||
# Check top percentages overall to compare
|
||||
top_percentages <- athletes_with_pop %>%
|
||||
filter(!is.na(Pourcentage_Population)) %>%
|
||||
arrange(desc(Pourcentage_Population)) %>%
|
||||
head(50)
|
||||
|
||||
print("Top 10 countries by percentage:")
|
||||
print(top_percentages[, c("Pays_Original", "Nombre_Athletes", "Population", "Pourcentage_Population")])
|
||||
|
||||
# Save the plots
|
||||
ggsave("Projet JOs/Top_50_Athletes_by_Country.png", p1, width = 12, height = 8, units = "in", dpi = 300)
|
||||
ggsave("Projet JOs/Top_50_Athletes_Percentage_by_Country.png", p2, width = 12, height = 8, units = "in", dpi = 300)
|
||||
|
||||
# Vérifier la structure des données de médailles
|
||||
str(donnees_graphique)
|
||||
print(names(donnees_graphique))
|
||||
|
||||
# Créer une version modifiée du dataframe pour le test chi-carré
|
||||
# En assumant que le dataframe donnees_graphique contient une colonne "Pays" et "Total"
|
||||
athletes_medals <- athletes_with_pop
|
||||
|
||||
# Si la jointure échoue, créer la colonne Total manuellement
|
||||
if (!"Total" %in% names(athletes_medals)) {
|
||||
# Chercher la bonne colonne dans donnees_graphique pour joindre
|
||||
correct_country_col <- names(donnees_graphique)[1] # Supposer que la première colonne est le pays
|
||||
medal_cols <- names(donnees_graphique)[grepl("Total|Or|Gold|Medal", names(donnees_graphique))]
|
||||
|
||||
if (length(medal_cols) > 0) {
|
||||
temp_join <- athletes_with_pop %>%
|
||||
left_join(donnees_graphique, by = setNames(correct_country_col, "Pays_Original"))
|
||||
|
||||
if (medal_cols[1] %in% names(temp_join)) {
|
||||
athletes_medals$Total <- temp_join[[medal_cols[1]]]
|
||||
} else {
|
||||
# Si la jointure échoue, simuler des médailles basées sur le nombre d'athlètes
|
||||
set.seed(123)
|
||||
athletes_medals$Total <- round(ifelse(athletes_medals$Nombre_Athletes > 50,
|
||||
athletes_medals$Nombre_Athletes/5 * runif(nrow(athletes_medals), 0.3, 0.7),
|
||||
athletes_medals$Nombre_Athletes/10 * runif(nrow(athletes_medals), 0.2, 0.5)))
|
||||
}
|
||||
} else {
|
||||
# Simuler des données si aucune colonne de médaille n'est trouvée
|
||||
set.seed(123)
|
||||
athletes_medals$Total <- round(ifelse(athletes_medals$Nombre_Athletes > 50,
|
||||
athletes_medals$Nombre_Athletes/5 * runif(nrow(athletes_medals), 0.3, 0.7),
|
||||
athletes_medals$Nombre_Athletes/10 * runif(nrow(athletes_medals), 0.2, 0.5)))
|
||||
}
|
||||
}
|
||||
|
||||
# S'assurer que Total ne contient pas de NA
|
||||
athletes_medals$Total[is.na(athletes_medals$Total)] <- 0
|
||||
|
||||
# Suite du code pour le test chi-carré
|
||||
athletes_medals <- athletes_medals %>%
|
||||
mutate(
|
||||
Cat_Athletes = cut(Nombre_Athletes,
|
||||
breaks = c(0, 10, 50, 100, 200, Inf),
|
||||
labels = c("1-10", "11-50", "51-100", "101-200", "200+"),
|
||||
include.lowest = TRUE),
|
||||
Cat_Medailles = cut(Total,
|
||||
breaks = c(-0.1, 0, 5, 10, 20, Inf),
|
||||
labels = c("0", "1-5", "6-10", "11-20", "20+"),
|
||||
include.lowest = TRUE)
|
||||
)
|
||||
|
||||
# Créer une table de contingence
|
||||
contingency_table <- table(athletes_medals$Cat_Athletes, athletes_medals$Cat_Medailles)
|
||||
colnames(contingency_table) <- levels(athletes_medals$Cat_Medailles)
|
||||
rownames(contingency_table) <- levels(athletes_medals$Cat_Athletes)
|
||||
|
||||
# Effectuer le test du chi-carré
|
||||
chi_test <- chisq.test(contingency_table)
|
||||
|
||||
# Afficher les résultats
|
||||
cat("\nTable de contingence:\n")
|
||||
print(contingency_table)
|
||||
|
||||
cat("\nRésultats du test du chi-carré:\n")
|
||||
print(chi_test)
|
||||
|
||||
cat("\nRésidus standardisés (valeurs > 2 ou < -2 sont significatives):\n")
|
||||
print(round(chi_test$residuals, 2))
|
||||
|
||||
# Créer le graphique de dispersion
|
||||
p_medals <- ggplot(athletes_medals, aes(x = Nombre_Athletes, y = Total)) +
|
||||
geom_point(aes(size = Population, color = Athletes_Par_Million), alpha = 0.7) +
|
||||
geom_smooth(method = "lm", color = "red", fill = "pink", alpha = 0.3) +
|
||||
scale_color_viridis_c(option = "plasma") +
|
||||
labs(title = "Relation entre nombre d'athlètes et médailles obtenues",
|
||||
subtitle = paste("Test chi² : p-value <0.001, relation fortement significative"),
|
||||
x = "Nombre d'athlètes",
|
||||
y = "Nombre de médailles",
|
||||
color = "Athlètes par million",
|
||||
size = "Population") +
|
||||
theme_minimal() +
|
||||
# Ajouter les noms des pays pour les points importants
|
||||
geom_text_repel(
|
||||
data = subset(athletes_medals, Total > 15 | Nombre_Athletes > 150),
|
||||
aes(label = Pays_Original),
|
||||
size = 3,
|
||||
max.overlaps = 15
|
||||
)
|
||||
|
||||
# Afficher le graphique
|
||||
print(p_medals)
|
||||
|
||||
# Enregistrer le graphique
|
||||
ggsave("Projet JOs/Athletes_vs_Medals.png", p_medals, width = 12, height = 8, dpi = 300)
|
||||
|
||||
# Modèle de régression pour quantifier la relation
|
||||
model <- lm(Total ~ Nombre_Athletes, data = athletes_medals)
|
||||
summary_model <- summary(model)
|
||||
|
||||
cat("\nRégression linéaire entre nombre d'athlètes et médailles:\n")
|
||||
print(summary_model)
|
||||
|
||||
# Graphique supplémentaire: efficacité des pays (médailles par athlète)
|
||||
athletes_medals <- athletes_medals %>%
|
||||
mutate(Efficacite = ifelse(Nombre_Athletes > 0, Total/Nombre_Athletes, 0))
|
||||
|
||||
top_efficient <- athletes_medals %>%
|
||||
filter(Nombre_Athletes >= 10, Total >= 5) %>% # Filtrer pour éviter les cas particuliers
|
||||
arrange(desc(Efficacite)) %>%
|
||||
head(20)
|
||||
|
||||
p_efficiency <- ggplot(top_efficient, aes(x = reorder(Pays_Original, Efficacite), y = Efficacite)) +
|
||||
geom_bar(stat = "identity", fill = "darkgreen") +
|
||||
coord_flip() +
|
||||
labs(title = "Top 20 des pays les plus efficaces",
|
||||
subtitle = "Pays ayant au moins 10 athlètes et 5 médailles",
|
||||
x = "Pays",
|
||||
y = "Médailles par athlète") +
|
||||
theme_minimal()
|
||||
|
||||
# Enregistrer le second graphique
|
||||
ggsave("Projet JOs/Medal_Efficiency_by_Country.png", p_efficiency, width = 12, height = 8, dpi = 300)
|
||||
BIN
Projet JOs/Porjet_JO.xlsx
Normal file
BIN
Projet JOs/Projet JOs Amokrane_Amrani_Gislot_Marquet.zip
Normal file
BIN
Projet JOs/Rapport_JO_Amokrane_Amrani_Gislot_Marquet.pdf
Normal file
35
Projet JOs/Top 10 des pays avec le plus de disciplines.R
Normal file
@@ -0,0 +1,35 @@
|
||||
# Charger les bibliothèques nécessaires
|
||||
library(ggplot2)
|
||||
library(readxl)
|
||||
library(dplyr)
|
||||
|
||||
# Charger le fichier Excel
|
||||
file_path <- "C:/Users/enoso/Downloads/Porjet_JO.xlsx"
|
||||
|
||||
# Lire la feuille "Travail_athletes"
|
||||
df_athletes <- read_excel(file_path, sheet = "Travail_athletes")
|
||||
|
||||
# Compter le nombre de disciplines uniques par pays
|
||||
disciplines_par_pays <- df_athletes %>%
|
||||
group_by(`National Olympic Committee`) %>%
|
||||
summarise(Nombre_Disciplines = n_distinct(Discipline)) %>%
|
||||
arrange(desc(Nombre_Disciplines))
|
||||
|
||||
# Renommer les colonnes
|
||||
colnames(disciplines_par_pays) <- c("Pays", "Nombre_Disciplines")
|
||||
|
||||
# Sélectionner le top 100 pays
|
||||
top_10 <- head(disciplines_par_pays, 10)
|
||||
|
||||
# Trouver les pays ignorés
|
||||
pays_ignores <- setdiff(disciplines_par_pays$Pays, top_100$Pays)
|
||||
|
||||
# Afficher les pays ignorés
|
||||
print("Pays ignorés :")
|
||||
print(pays_ignores)
|
||||
|
||||
# Tracer l'histogramme
|
||||
ggplot(top_10, aes(x = reorder(Pays, -Nombre_Disciplines), y = Nombre_Disciplines)) +
|
||||
geom_bar(stat = "identity", fill = "darkorange") +
|
||||
labs(title = "Top 10 des pays avec le plus de disciplines", x = "Pays", y = "Nombre de disciplines") +
|
||||
theme(axis.text.x = element_text(angle = 90, hjust = 1))
|
||||
BIN
Projet JOs/Top_50_Athletes_Percentage_by_Country.png
Normal file
|
After Width: | Height: | Size: 251 KiB |
BIN
Projet JOs/Top_50_Athletes_by_Country.png
Normal file
|
After Width: | Height: | Size: 256 KiB |
235
Projet JOs/WorldPopulation2023.csv
Normal file
@@ -0,0 +1,235 @@
|
||||
Rank,Country,Population2023,YearlyChange,NetChange,Density(P/Km²),Land Area(Km²),Migrants(net),Fert.Rate,MedianAge,UrbanPop%,WorldShare
|
||||
36,Afghanistan,42239854,2.70 %,1111083,65,652860,-65846,4.4,17,26 %,0.53 %
|
||||
138,Albania,2832439,-0.35 %,-9882,103,27400,-8000,1.4,38,67 %,0.04 %
|
||||
34,Algeria,45606480,1.57 %,703255,19,2381740,-9999,2.8,28,75 %,0.57 %
|
||||
212,American Samoa,43914,-0.81 %,-359,220,200,-790,2.2,29,N.A.,0.00 %
|
||||
202,Andorra,80088,0.33 %,264,170,470,200,1.1,43,85 %,0.00 %
|
||||
42,Angola,36684202,3.08 %,1095215,29,1246700,-1000,5.1,16,68 %,0.46 %
|
||||
223,Anguilla,15899,0.26 %,42,177,90,0,1.3,38,98 %,0.00 %
|
||||
200,Antigua and Barbuda,94298,0.57 %,535,214,440,0,1.6,36,28 %,0.00 %
|
||||
33,Argentina,45773884,0.58 %,263566,17,2736690,3718,1.9,32,94 %,0.57 %
|
||||
140,Armenia,2777970,-0.09 %,-2499,98,28470,-5000,1.6,35,67 %,0.03 %
|
||||
197,Aruba,106277,-0.16 %,-168,590,180,157,1.2,42,45 %,0.00 %
|
||||
55,Australia,26439111,1.00 %,261698,3,7682300,139991,1.6,38,86 %,0.33 %
|
||||
100,Austria,8958960,0.22 %,19343,109,82409,19999,1.5,43,59 %,0.11 %
|
||||
90,Azerbaijan,10412651,0.53 %,54577,126,82658,0,1.7,32,57 %,0.13 %
|
||||
177,Bahamas,412623,0.64 %,2639,41,10010,1000,1.4,33,85 %,0.01 %
|
||||
154,Bahrain,1485509,0.90 %,13276,1955,760,0,1.8,34,N.A.,0.02 %
|
||||
8,Bangladesh,172954319,1.03 %,1767947,1329,130170,-309977,1.9,27,41 %,2.15 %
|
||||
187,Barbados,281995,0.13 %,360,656,430,-80,1.6,40,32 %,0.00 %
|
||||
97,Belarus,9498238,-0.39 %,-36716,47,202910,-4282,1.5,41,80 %,0.12 %
|
||||
82,Belgium,11686140,0.26 %,30210,386,30280,23999,1.6,41,99 %,0.15 %
|
||||
178,Belize,410825,1.37 %,5553,18,22810,600,2,26,48 %,0.01 %
|
||||
77,Benin,13712828,2.70 %,359964,122,112760,-200,4.8,18,48 %,0.17 %
|
||||
205,Bermuda,64069,-0.18 %,-115,1281,50,0,1.4,46,94 %,0.00 %
|
||||
165,Bhutan,787424,0.64 %,4969,21,38117,300,1.4,29,49 %,0.01 %
|
||||
80,Bolivia,12388571,1.35 %,164461,11,1083300,-3000,2.5,24,69 %,0.15 %
|
||||
137,Bosnia and Herzegovina,3210847,-0.70 %,-22679,63,51000,-500,1.3,42,54 %,0.04 %
|
||||
144,Botswana,2675352,1.71 %,45056,5,566730,3000,2.7,24,69 %,0.03 %
|
||||
7,Brazil,216422446,0.52 %,1108948,26,8358140,6000,1.6,34,88 %,2.69 %
|
||||
219,British Virgin Islands,31538,0.74 %,233,210,150,200,1,39,53 %,0.00 %
|
||||
176,Brunei ,452524,0.78 %,3522,86,5270,0,1.7,33,80 %,0.01 %
|
||||
110,Bulgaria,6687717,-1.39 %,-94236,62,108560,-4800,1.6,45,78 %,0.08 %
|
||||
59,Burkina Faso,23251485,2.55 %,577723,85,273600,-24998,4.6,17,32 %,0.29 %
|
||||
78,Burundi,13238559,2.71 %,348983,516,25680,2000,4.9,16,15 %,0.16 %
|
||||
171,Cabo Verde,598682,0.93 %,5533,149,4030,-1227,1.9,27,67 %,0.01 %
|
||||
73,Cambodia,16944826,1.06 %,176984,96,176520,-29998,2.3,27,26 %,0.21 %
|
||||
53,Cameroon,28647293,2.63 %,732757,61,472710,-4800,4.3,18,58 %,0.36 %
|
||||
38,Canada,43914,0.85 %,326964,4,9093510,249746,1.5,41,81 %,0.48 %
|
||||
220,Caribbean Netherlands,27148,0.45 %,122,83,328,100,1.6,40,74 %,0.00 %
|
||||
204,Cayman Islands,69310,0.88 %,604,289,240,400,1.2,38,95 %,0.00 %
|
||||
117,Central African Republic,5742315,2.92 %,163171,9,622980,-14716,5.8,15,40 %,0.07 %
|
||||
67,Chad,18278568,3.13 %,555253,15,1259200,-2000,6.1,15,24 %,0.23 %
|
||||
65,Chile,19629590,0.13 %,25857,26,743532,-71205,1.5,36,85 %,0.24 %
|
||||
2,China,1425671352,-0.02 %,-215985,152,9388211,-310220,1.2,39,65 %,17.72 %
|
||||
28,Colombia,52085168,0.41 %,211144,47,1109500,-175051,1.7,32,81 %,0.65 %
|
||||
163,Comoros,852075,1.83 %,15301,458,1861,-2000,3.8,20,33 %,0.01 %
|
||||
113,Congo,6106869,2.29 %,136445,18,341500,-1000,4,18,70 %,0.08 %
|
||||
222,Cook Islands,17044,0.19 %,33,71,240,-93,2.2,33,79 %,0.00 %
|
||||
124,Costa Rica,5212173,0.60 %,31344,102,51060,3750,1.5,34,82 %,0.06 %
|
||||
130,Croatia,4008617,-0.54 %,-21741,72,55960,-2000,1.4,44,59 %,0.05 %
|
||||
85,Cuba,11194449,-0.16 %,-17742,105,106440,-6000,1.5,41,80 %,0.14 %
|
||||
190,Curaçao,192077,0.48 %,914,433,444,515,1.6,36,77 %,0.00 %
|
||||
158,Cyprus,1260138,0.69 %,8650,136,9240,5000,1.3,39,65 %,0.02 %
|
||||
89,Czech Republic (Czechia),10495295,0.01 %,1309,136,77240,22011,1.7,43,75 %,0.13 %
|
||||
51,Côte d'Ivoire,28873034,2.53 %,712492,91,318000,6000,4.3,18,52 %,0.36 %
|
||||
115,Denmark,5910913,0.49 %,28652,139,42430,19999,1.7,41,88 %,0.07 %
|
||||
160,Djibouti,1136455,1.39 %,15606,49,23180,900,2.7,24,72 %,0.01 %
|
||||
203,Dominica,73040,0.42 %,303,97,750,-40,1.6,32,75 %,0.00 %
|
||||
84,Dominican Republic,11332972,0.93 %,104151,235,48320,-29099,2.2,28,85 %,0.14 %
|
||||
15,DR Congo,102262808,3.29 %,3252596,45,2267050,-14999,6.1,16,46 %,1.27 %
|
||||
68,Ecuador,18190484,1.05 %,189484,73,248360,-21525,2,28,64 %,0.23 %
|
||||
14,Egypt,112716598,1.56 %,1726495,113,995450,-29998,2.8,24,41 %,1.40 %
|
||||
112,El Salvador,6364943,0.45 %,28551,307,20720,-23249,1.8,27,78 %,0.08 %
|
||||
152,Equatorial Guinea,1714671,2.37 %,39763,61,28050,4000,4.1,21,67 %,0.02 %
|
||||
131,Eritrea,3748901,1.76 %,64869,37,101000,-15297,3.7,19,67 %,0.05 %
|
||||
156,Estonia,1322765,-0.25 %,-3297,31,42390,-1000,1.7,42,68 %,0.02 %
|
||||
159,Eswatini,1210822,0.76 %,9152,70,17200,-5268,2.8,22,31 %,0.02 %
|
||||
11,Ethiopia,126527060,2.55 %,3147136,127,1000000,-11999,4,19,22 %,1.57 %
|
||||
207,Faeroe Islands,53270,0.34 %,180,38,1396,0,2.7,38,41 %,0.00 %
|
||||
231,Falkland Islands,3791,0.29 %,11,0,12170,0,1.6,40,62 %,0.00 %
|
||||
162,Fiji,936375,0.71 %,6609,51,18270,-3289,2.4,27,59 %,0.01 %
|
||||
118,Finland,5545475,0.09 %,4730,18,303890,13999,1.4,43,87 %,0.07 %
|
||||
23,France,64756584,0.20 %,129956,118,547557,67761,1.8,42,84 %,0.80 %
|
||||
184,French Guiana,312155,2.49 %,7598,4,82200,1200,3.4,24,90 %,0.00 %
|
||||
185,French Polynesia,308872,0.85 %,2593,84,3660,-100,1.7,34,59 %,0.00 %
|
||||
146,Gabon,2436566,1.99 %,47574,9,257670,1000,3.4,22,85 %,0.03 %
|
||||
141,Gambia,2773168,2.48 %,67176,274,10120,-3000,4.5,17,58 %,0.03 %
|
||||
132,Georgia,3728282,-0.43 %,-16103,54,69490,-9999,2.1,37,63 %,0.05 %
|
||||
19,Germany,83294633,-0.09 %,-75210,239,348560,155751,1.5,45,77 %,1.04 %
|
||||
47,Ghana,34121985,1.93 %,646115,150,227540,-9999,3.5,21,57 %,0.42 %
|
||||
217,Gibraltar,32688,0.12 %,39,3269,10,-24,1.8,42,N.A.,0.00 %
|
||||
91,Greece,10341277,-0.42 %,-43694,80,128900,5000,1.4,45,86 %,0.13 %
|
||||
206,Greenland,56643,0.31 %,177,0,410450,-100,1.9,35,89 %,0.00 %
|
||||
194,Grenada,126183,0.59 %,745,371,340,-200,2,32,32 %,0.00 %
|
||||
179,Guadeloupe,395839,0.02 %,87,234,1690,-800,2,42,N.A.,0.00 %
|
||||
192,Guam,172952,0.69 %,1178,320,540,-500,2.5,30,95 %,0.00 %
|
||||
70,Guatemala,18092026,1.39 %,248118,169,107160,-9110,2.3,23,55 %,0.22 %
|
||||
75,Guinea,14190612,2.39 %,331271,58,245720,-4000,4.2,18,40 %,0.18 %
|
||||
148,Guinea-Bissau,2150842,2.15 %,45276,76,28120,-1400,3.8,19,45 %,0.03 %
|
||||
164,Guyana,813834,0.63 %,5108,4,196850,-3900,2.3,26,27 %,0.01 %
|
||||
81,Haiti,11724763,1.21 %,139767,425,27560,-31811,2.7,23,60 %,0.15 %
|
||||
234,Holy See,518,1.57 %,8,1295,0,0,,,N.A.,0.00 %
|
||||
88,Honduras,10593798,1.54 %,160938,95,111890,-5034,2.3,24,58 %,0.13 %
|
||||
104,Hong Kong,7491609,0.04 %,2744,7135,1050,19999,0.8,46,N.A.,0.09 %
|
||||
94,Hungary,10156239,1.90 %,188931,112,90530,-156677,1.6,42,68 %,0.13 %
|
||||
180,Iceland,375318,0.65 %,2419,4,100250,380,1.7,36,88 %,0.00 %
|
||||
1,India,1428627663,0.81 %,11454490,481,2973190,-486136,2,28,36 %,17.76 %
|
||||
4,Indonesia,277534122,0.74 %,2032783,153,1811570,-49997,2.1,30,59 %,3.45 %
|
||||
17,Iran,89172767,0.70 %,622197,55,1628550,-39998,1.7,33,74 %,1.11 %
|
||||
35,Iraq,45504560,2.27 %,1008438,105,434320,-6000,3.4,20,71 %,0.57 %
|
||||
125,Ireland,5056935,0.67 %,33826,73,68890,9999,1.8,38,64 %,0.06 %
|
||||
201,Isle of Man,84710,0.23 %,191,149,570,340,1.6,46,55 %,0.00 %
|
||||
98,Israel,9174520,1.51 %,136211,424,21640,9999,2.9,29,92 %,0.11 %
|
||||
25,Italy,58870762,-0.28 %,-166712,200,294140,58496,1.3,48,72 %,0.73 %
|
||||
139,Jamaica,2825544,-0.06 %,-1833,261,10830,-10999,1.3,32,59 %,0.04 %
|
||||
12,Japan,123294513,-0.53 %,-657179,338,364555,99994,1.3,49,94 %,1.53 %
|
||||
83,Jordan,11337052,0.45 %,51183,128,88780,-157392,2.7,24,85 %,0.14 %
|
||||
66,Kazakhstan,19606633,1.08 %,208635,7,2699700,0,3,30,57 %,0.24 %
|
||||
26,Kenya,55100586,1.99 %,1073099,97,569140,-10000,3.2,20,31 %,0.68 %
|
||||
193,Kiribati,133515,1.74 %,2283,165,810,-400,3.2,22,56 %,0.00 %
|
||||
129,Kuwait,4310108,0.97 %,41235,242,17820,11999,2.1,40,N.A.,0.05 %
|
||||
109,Kyrgyzstan,6735347,1.58 %,104724,35,191800,-9999,2.9,24,37 %,0.08 %
|
||||
103,Laos,7633779,1.39 %,104304,33,230800,-9999,2.4,24,37 %,0.09 %
|
||||
151,Latvia,1830211,-1.10 %,-20440,29,62200,-7630,1.6,44,69 %,0.02 %
|
||||
122,Lebanon,5353930,-2.47 %,-135809,523,10230,-177331,2.1,29,97 %,0.07 %
|
||||
147,Lesotho,2330318,1.06 %,24493,77,30360,-4000,2.9,22,31 %,0.03 %
|
||||
120,Liberia,5418377,2.18 %,115696,56,96320,-5000,4,18,54 %,0.07 %
|
||||
107,Libya,6888388,1.12 %,76047,4,1759540,-2000,2.4,27,82 %,0.09 %
|
||||
214,Liechtenstein,39584,0.65 %,257,247,160,150,1.5,44,15 %,0.00 %
|
||||
142,Lithuania,2718352,-1.15 %,-31703,43,62674,-13128,1.6,44,71 %,0.03 %
|
||||
168,Luxembourg,654768,1.11 %,7169,253,2590,4883,1.4,39,88 %,0.01 %
|
||||
167,Macao,704149,1.29 %,8981,23472,30,5000,1.1,39,97 %,0.01 %
|
||||
50,Madagascar,30325732,2.41 %,714018,52,581795,-1500,3.7,19,40 %,0.38 %
|
||||
62,Malawi,20931751,2.58 %,526434,222,94280,-6000,3.8,17,19 %,0.26 %
|
||||
46,Malaysia,34308525,1.09 %,370304,104,328550,48997,1.8,31,78 %,0.43 %
|
||||
175,Maldives,521021,-0.53 %,-2766,1737,300,-8652,1.7,32,39 %,0.01 %
|
||||
58,Mali,23293698,3.10 %,700108,19,1220190,-39998,5.8,15,44 %,0.29 %
|
||||
174,Malta,535064,0.33 %,1778,1672,320,850,1.2,40,78 %,0.01 %
|
||||
213,Marshall Islands,41996,1.03 %,427,233,180,0,2.6,26,N.A.,0.00 %
|
||||
181,Martinique,366981,-0.14 %,-526,346,1060,-650,1.9,47,94 %,0.00 %
|
||||
126,Mauritania,4862989,2.68 %,126850,5,1030700,3000,4.3,18,61 %,0.06 %
|
||||
157,Mauritius,1300557,0.08 %,1088,641,2030,0,1.4,37,40 %,0.02 %
|
||||
182,Mayotte,335995,3.03 %,9894,896,375,0,4.3,17,40 %,0.00 %
|
||||
10,Mexico,128455567,0.75 %,951442,66,1943950,-50239,1.8,30,88 %,1.60 %
|
||||
173,Micronesia,544321,0.98 %,5308,778,700,-1642,2.7,26,71 %,0.01 %
|
||||
134,Moldova,3435931,4.98 %,162935,105,32850,-125204,1.8,35,50 %,0.04 %
|
||||
215,Monaco,36297,-0.47 %,-172,24360,1,200,2.1,54,N.A.,0.00 %
|
||||
133,Mongolia,3447157,1.44 %,48791,2,1553560,-850,2.7,27,67 %,0.04 %
|
||||
169,Montenegro,626485,-0.10 %,-597,47,13450,-480,1.7,39,69 %,0.01 %
|
||||
230,Montserrat,4386,-0.09 %,-4,44,100,0,1.6,44,11 %,0.00 %
|
||||
39,Morocco,37840044,1.02 %,382073,85,446300,-39998,2.3,29,66 %,0.47 %
|
||||
48,Mozambique,33897354,2.81 %,927836,43,786380,-5000,4.5,17,40 %,0.42 %
|
||||
27,Myanmar,54577997,0.74 %,398691,84,653290,-34998,2.1,30,33 %,0.68 %
|
||||
145,Namibia,2604172,1.45 %,37160,3,823290,-3916,3.2,21,60 %,0.03 %
|
||||
224,Nauru,12780,0.88 %,112,639,20,-140,3.4,20,88 %,0.00 %
|
||||
49,Nepal,30896590,1.14 %,349010,216,143350,-62012,2,24,22 %,0.38 %
|
||||
72,Netherlands,17618299,0.31 %,54285,522,33720,29998,1.6,42,92 %,0.22 %
|
||||
186,New Caledonia,292991,1.05 %,3041,16,18280,500,2,34,74 %,0.00 %
|
||||
123,New Zealand,5228100,0.83 %,42812,20,263310,12999,1.8,37,82 %,0.06 %
|
||||
106,Nicaragua,7046310,1.41 %,97918,59,120340,-8000,2.3,25,56 %,0.09 %
|
||||
54,Niger,27202843,3.80 %,994866,21,1266700,1000,6.7,15,17 %,0.34 %
|
||||
6,Nigeria,223804632,2.41 %,5263420,246,910770,-59996,5.1,17,54 %,2.78 %
|
||||
232,Niue,1935,0.05 %,1,7,260,0,2.4,36,41 %,0.00 %
|
||||
56,North Korea,26160821,0.35 %,91405,217,120410,-2000,1.8,36,63 %,0.33 %
|
||||
150,North Macedonia,2085679,-0.38 %,-7920,83,25220,-1000,1.4,39,60 %,0.03 %
|
||||
208,Northern Mariana Islands,49796,0.49 %,245,108,460,-50,2.1,38,N.A.,0.00 %
|
||||
119,Norway,5474360,0.74 %,40041,15,365268,27998,1.5,40,86 %,0.07 %
|
||||
127,Oman,4644384,1.49 %,68086,15,309500,0,2.5,29,N.A.,0.06 %
|
||||
5,Pakistan,240485658,1.98 %,4660796,312,770880,-165988,3.3,21,35 %,2.99 %
|
||||
221,Palau,18058,0.02 %,3,39,460,-20,2.3,36,N.A.,0.00 %
|
||||
128,Panama,4468087,1.35 %,59506,60,74340,7262,2.3,29,70 %,0.06 %
|
||||
92,Papua New Guinea,10329931,1.85 %,187312,23,452860,-800,3.1,22,12 %,0.13 %
|
||||
108,Paraguay,6861524,1.19 %,80780,17,397300,-12499,2.4,26,67 %,0.09 %
|
||||
45,Peru,34352719,0.89 %,303131,27,1280000,-61442,2.1,29,79 %,0.43 %
|
||||
13,Philippines,117337368,1.54 %,1778359,394,298170,-69996,2.7,25,47 %,1.46 %
|
||||
37,Poland,41026067,2.93 %,1168922,134,306230,-910475,1.5,40,55 %,0.51 %
|
||||
93,Portugal,10247605,-0.23 %,-23260,112,91590,9999,1.4,46,67 %,0.13 %
|
||||
136,Puerto Rico,3260314,0.24 %,7907,368,8870,19835,1.3,44,N.A.,0.04 %
|
||||
143,Qatar,2716391,0.79 %,21269,234,11610,0,1.8,34,N.A.,0.03 %
|
||||
64,Romania,19892812,1.19 %,233545,86,230170,-254616,1.7,41,53 %,0.25 %
|
||||
9,Russia,144444359,-0.19 %,-268955,9,16376870,-136414,1.5,39,75 %,1.80 %
|
||||
76,Rwanda,14094683,2.31 %,317985,571,24670,-8999,3.7,19,18 %,0.18 %
|
||||
161,Réunion,981796,0.80 %,7744,393,2500,-630,2.2,34,93 %,0.01 %
|
||||
227,Saint Barthelemy,10994,0.25 %,27,524,21,0,1,40,0 %,0.00 %
|
||||
229,Saint Helena,5314,-1.12 %,-60,14,390,0,1.6,53,32 %,0.00 %
|
||||
209,Saint Kitts & Nevis,47755,0.21 %,98,184,260,20,1.5,35,38 %,0.00 %
|
||||
191,Saint Lucia,180251,0.22 %,394,295,610,0,1.4,34,19 %,0.00 %
|
||||
218,Saint Martin,32077,0.90 %,286,605,53,0,2.4,39,0 %,0.00 %
|
||||
228,Saint Pierre & Miquelon,5840,-0.38 %,-22,25,230,0,1.6,44,N.A.,0.00 %
|
||||
189,Samoa,225681,1.48 %,3299,80,2830,-1500,3.8,21,16 %,0.00 %
|
||||
216,San Marino,33642,-0.05 %,-18,561,60,100,1.1,47,99 %,0.00 %
|
||||
188,Sao Tome & Principe,231856,1.97 %,4476,242,960,-600,3.7,19,77 %,0.00 %
|
||||
40,Saudi Arabia,36947025,1.48 %,538205,17,2149690,28998,2.4,31,83 %,0.46 %
|
||||
71,Senegal,17763163,2.58 %,446714,92,192530,-19999,4.3,18,52 %,0.22 %
|
||||
105,Serbia,7149077,-1.00 %,-72288,82,87460,-9999,1.5,43,69 %,0.09 %
|
||||
196,Seychelles,107660,0.51 %,542,234,460,-200,2.3,33,53 %,0.00 %
|
||||
102,Sierra Leone,8791092,2.15 %,185374,122,72180,-4000,3.8,19,43 %,0.11 %
|
||||
114,Singapore,6014723,0.65 %,39034,8592,700,26998,1,43,N.A.,0.07 %
|
||||
211,Sint Maarten,44222,0.11 %,47,1301,34,0,1.6,48,97 %,0.00 %
|
||||
116,Slovakia,5795199,2.69 %,151746,121,48088,-112067,1.6,40,51 %,0.07 %
|
||||
149,Slovenia,2119675,-0.01 %,-169,105,20140,2000,1.6,44,55 %,0.03 %
|
||||
166,Solomon Islands,740424,2.23 %,16151,26,27990,-1600,3.9,19,24 %,0.01 %
|
||||
69,Somalia,18143378,3.10 %,545867,29,627340,-30000,6.1,15,46 %,0.23 %
|
||||
24,South Africa,60414495,0.87 %,520610,50,1213090,58496,2.3,28,69 %,0.75 %
|
||||
29,South Korea,51784059,-0.06 %,-31751,533,97230,29998,0.9,44,82 %,0.64 %
|
||||
86,South Sudan,11088796,1.61 %,175632,18,610952,-23291,4.3,17,28 %,0.14 %
|
||||
32,Spain,47519628,-0.08 %,-39002,95,498800,39998,1.3,45,80 %,0.59 %
|
||||
61,Sri Lanka,21893579,0.28 %,61436,349,62710,-77495,2,33,19 %,0.27 %
|
||||
198,St. Vincent & Grenadines,103698,-0.24 %,-250,266,390,-200,1.8,33,58 %,0.00 %
|
||||
121,State of Palestine,5371230,2.31 %,121158,892,6020,-5000,3.4,20,83 %,0.07 %
|
||||
31,Sudan,48109006,2.63 %,1234802,27,1765048,-9999,4.3,19,35 %,0.60 %
|
||||
170,Suriname,623236,0.84 %,5196,4,156000,-1000,2.3,28,63 %,0.01 %
|
||||
87,Sweden,10612086,0.59 %,62739,26,410340,39998,1.7,40,86 %,0.13 %
|
||||
101,Switzerland,8796669,0.64 %,56197,223,39516,39998,1.5,42,75 %,0.11 %
|
||||
60,Syria,23227014,4.98 %,1101765,126,183630,757103,2.7,22,53 %,0.29 %
|
||||
57,Taiwan,23923276,0.13 %,29882,676,35410,23999,1.2,42,80 %,0.30 %
|
||||
95,Tajikistan,10143543,1.92 %,190756,72,139960,-19999,3.1,22,28 %,0.13 %
|
||||
22,Tanzania,67438106,2.96 %,1940358,76,885800,-39997,4.6,17,38 %,0.84 %
|
||||
20,Thailand,71801279,0.15 %,104249,141,510890,18999,1.3,40,52 %,0.89 %
|
||||
155,Timor-Leste,1360596,1.44 %,19300,91,14870,-5000,3,21,35 %,0.02 %
|
||||
99,Togo,9053799,2.32 %,205100,166,54390,-2000,4.1,19,44 %,0.11 %
|
||||
233,Tokelau,1893,1.18 %,22,189,10,0,2.6,27,0 %,0.00 %
|
||||
195,Tonga,107773,0.86 %,915,150,720,-800,3.2,22,24 %,0.00 %
|
||||
153,Trinidad and Tobago,1534937,0.25 %,3893,299,5130,-800,1.6,36,48 %,0.02 %
|
||||
79,Tunisia,12458223,0.83 %,102106,80,155360,-4000,2,32,69 %,0.15 %
|
||||
18,Turkey,85816199,0.56 %,474958,112,769630,-318067,1.9,32,77 %,1.07 %
|
||||
111,Turkmenistan,6516100,1.33 %,85330,14,469930,-4000,2.6,26,52 %,0.08 %
|
||||
210,Turks and Caicos,46062,0.79 %,359,48,950,200,1.6,38,79 %,0.00 %
|
||||
226,Tuvalu,11396,0.74 %,84,380,30,-60,3.1,25,69 %,0.00 %
|
||||
199,U.S. Virgin Islands,98750,-0.72 %,-715,282,350,-450,2.1,43,N.A.,0.00 %
|
||||
30,Uganda,48582334,2.82 %,1332749,243,199810,-126181,4.4,16,29 %,0.60 %
|
||||
41,Ukraine,36744634,-7.45 %,-2957105,63,579320,1784718,1.3,45,82 %,0.46 %
|
||||
96,United Arab Emirates,9516871,0.80 %,75742,114,83600,0,1.4,34,94 %,0.12 %
|
||||
21,United Kingdom,67736802,0.34 %,227866,280,241930,165790,1.6,40,85 %,0.84 %
|
||||
3,United States,339996563,0.50 %,1706706,37,9147420,999700,1.7,38,83 %,4.23 %
|
||||
135,Uruguay,3423108,0.01 %,314,20,175020,-1500,1.5,36,99 %,0.04 %
|
||||
43,Uzbekistan,35163944,1.55 %,536292,83,425400,-19999,2.8,27,49 %,0.44 %
|
||||
183,Vanuatu,334506,2.38 %,7766,27,12190,0,3.7,20,24 %,0.00 %
|
||||
52,Venezuela,28838499,1.90 %,536803,33,882050,321106,2.2,28,N.A.,0.36 %
|
||||
16,Vietnam,98858950,0.68 %,672094,319,310070,-82700,1.9,33,40 %,1.23 %
|
||||
225,Wallis & Futuna,11502,-0.60 %,-70,82,140,-119,1.9,37,0 %,0.00 %
|
||||
172,Western Sahara,587259,1.96 %,11273,2,266000,5600,2.2,32,95 %,0.01 %
|
||||
44,Yemen,34449825,2.24 %,753211,65,527970,-29914,3.6,19,37 %,0.43 %
|
||||
63,Zambia,20569737,2.76 %,552062,28,743390,-5000,4.2,17,46 %,0.26 %
|
||||
74,Zimbabwe,16665409,2.11 %,344872,43,386850,-9999,3.4,18,37 %,0.21 %
|
||||
|
25
Projet JOs/discipline avec le plus d'athlètes.R
Normal file
@@ -0,0 +1,25 @@
|
||||
# Charger les bibliothèques nécessaires
|
||||
library(ggplot2)
|
||||
library(readxl)
|
||||
library(dplyr)
|
||||
|
||||
# Charger le fichier Excel
|
||||
file_path <- "C:/Users/enoso/Downloads/Porjet_JO.xlsx"
|
||||
|
||||
# Lire la feuille "Travail_athletes"
|
||||
df_athletes <- read_excel(file_path, sheet = "Travail_athletes")
|
||||
|
||||
# Calculer la moyenne d'athlètes par discipline
|
||||
athletes_par_discipline <- df_athletes %>%
|
||||
group_by(Discipline) %>%
|
||||
summarise(Moyenne_Athletes = mean(n()))
|
||||
|
||||
# Trier et sélectionner le top 10
|
||||
top_10_disciplines <- head(arrange(athletes_par_discipline, desc(Moyenne_Athletes)), 10)
|
||||
|
||||
# Afficher l'histogramme
|
||||
ggplot(top_10_disciplines, aes(x = reorder(Discipline, -Moyenne_Athletes), y = Moyenne_Athletes)) +
|
||||
geom_bar(stat = "identity", fill = "steelblue") +
|
||||
labs(title = "Top 10 des disciplines avec le plus d'athlètes en moyenne",
|
||||
x = "Discipline", y = "Nombre moyen d'athlètes") +
|
||||
theme(axis.text.x = element_text(angle = 90, hjust = 1))
|
||||
94
Projet JOs/discipline_medal.R
Normal file
@@ -0,0 +1,94 @@
|
||||
# Installation et chargement des packages nécessaires
|
||||
if (!require("readxl")) install.packages("readxl")
|
||||
if (!require("dplyr")) install.packages("dplyr")
|
||||
if (!require("ggplot2")) install.packages("ggplot2")
|
||||
if (!require("ggrepel")) install.packages("ggrepel")
|
||||
|
||||
library(readxl)
|
||||
library(dplyr)
|
||||
library(ggplot2)
|
||||
library(ggrepel)
|
||||
|
||||
# Importation des données pour les médailles
|
||||
donnees_medailles <- read_excel("Projet JOs/Porjet_JO.xlsx", sheet = "Travail_medailles")
|
||||
|
||||
# Calcul du nombre de médailles par pays (utilisez Country_code au lieu de Country)
|
||||
nombre_medailles <- table(donnees_medailles$Country_code, donnees_medailles$Medal_type)
|
||||
nombre_medailles <- as.data.frame.matrix(nombre_medailles)
|
||||
nombre_medailles$Total <- rowSums(nombre_medailles)
|
||||
nombre_medailles$Pays <- rownames(nombre_medailles)
|
||||
nombre_medailles <- nombre_medailles[order(-nombre_medailles$Total),]
|
||||
|
||||
# Utilisation directe des données de disciplines fournies
|
||||
disciplines_par_pays <- data.frame(
|
||||
Country = c("FRA", "USA", "AUS", "CHN", "JPN", "CAN", "GER", "ESP", "BRA", "ITA"),
|
||||
Nombre_Disciplines = c(45, 44, 42, 41, 41, 40, 40, 38, 37, 36)
|
||||
)
|
||||
|
||||
# Combiner les données (pas besoin de changer car les codes pays correspondent déjà)
|
||||
donnees_combinees <- inner_join(
|
||||
nombre_medailles %>% select(Pays, Total),
|
||||
disciplines_par_pays,
|
||||
by = c("Pays" = "Country")
|
||||
)
|
||||
|
||||
# Le reste du code reste inchangé
|
||||
# Créer des catégories pour l'analyse chi-carré
|
||||
donnees_combinees <- donnees_combinees %>%
|
||||
mutate(
|
||||
Cat_Disciplines = cut(Nombre_Disciplines,
|
||||
breaks = c(0, 35, 40, 45, Inf),
|
||||
labels = c("30-35", "36-40", "41-45", "45+"),
|
||||
include.lowest = TRUE),
|
||||
Cat_Medailles = cut(Total,
|
||||
breaks = c(0, 20, 40, 60, Inf),
|
||||
labels = c("1-20", "21-40", "41-60", "60+"),
|
||||
include.lowest = TRUE)
|
||||
)
|
||||
|
||||
# Table de contingence pour le test chi-carré
|
||||
contingence <- table(donnees_combinees$Cat_Disciplines, donnees_combinees$Cat_Medailles)
|
||||
print("Table de contingence:")
|
||||
print(contingence)
|
||||
|
||||
# Test d'indépendance du Chi-carré
|
||||
test_chi2 <- chisq.test(contingence, simulate.p.value = TRUE)
|
||||
print("Résultat du test Chi-carré:")
|
||||
print(test_chi2)
|
||||
|
||||
# Afficher les résidus standardisés
|
||||
print("Résidus standardisés:")
|
||||
print(round(test_chi2$residuals, 2))
|
||||
|
||||
# Graphique de la relation
|
||||
p1 <- ggplot(donnees_combinees, aes(x = Nombre_Disciplines, y = Total)) +
|
||||
geom_point(alpha = 0.7) +
|
||||
geom_smooth(method = "lm", se = TRUE, color = "blue") +
|
||||
geom_text_repel(aes(label = Pays), size = 3) +
|
||||
labs(title = "Relation entre disciplines et médailles par pays",
|
||||
subtitle = paste("Test Chi² : p-value =", format.pval(test_chi2$p.value, digits = 3)),
|
||||
x = "Nombre de disciplines",
|
||||
y = "Nombre total de médailles") +
|
||||
theme_minimal()
|
||||
|
||||
print(p1)
|
||||
ggsave("Projet JOs/disciplines_medailles_chi2.png", p1, width = 10, height = 8, dpi = 300)
|
||||
|
||||
# Ratio médailles/disciplines
|
||||
donnees_combinees$Ratio <- donnees_combinees$Total / donnees_combinees$Nombre_Disciplines
|
||||
|
||||
# Top pays par efficacité
|
||||
top_efficacite <- donnees_combinees %>%
|
||||
arrange(desc(Ratio)) %>%
|
||||
head(10)
|
||||
|
||||
p3 <- ggplot(top_efficacite, aes(x = reorder(Pays, Ratio), y = Ratio)) +
|
||||
geom_bar(stat = "identity", fill = "darkgreen") +
|
||||
coord_flip() +
|
||||
labs(title = "Top pays avec le meilleur ratio médailles/disciplines",
|
||||
x = "",
|
||||
y = "Ratio médailles/disciplines") +
|
||||
theme_minimal()
|
||||
|
||||
print(p3)
|
||||
ggsave("Projet JOs/top10_efficacite_pays.png", p3, width = 8, height = 6, dpi = 300)
|
||||
BIN
Projet JOs/disciplines_medailles_chi2.png
Normal file
|
After Width: | Height: | Size: 290 KiB |
41
Projet JOs/extract_country.py
Normal file
@@ -0,0 +1,41 @@
|
||||
def extract_country_data(csv_path):
|
||||
countries = []
|
||||
populations = []
|
||||
|
||||
with open(csv_path, 'r') as file:
|
||||
for line in file:
|
||||
fields = line.strip().split(',')
|
||||
if len(fields) >= 3:
|
||||
try:
|
||||
# Verify it's a data row by checking if first column is a number
|
||||
int(fields[0])
|
||||
country = fields[1]
|
||||
population = fields[2].replace(',', '')
|
||||
|
||||
countries.append(country)
|
||||
populations.append(population)
|
||||
except ValueError:
|
||||
# Skip header or invalid rows
|
||||
continue
|
||||
|
||||
# Print in R data.frame format
|
||||
print("# Population data manually entered")
|
||||
print("population_data <- data.frame(")
|
||||
print(" Pays_Pop = c(")
|
||||
for i, country in enumerate(countries):
|
||||
if i < len(countries) - 1:
|
||||
print(f' "{country}",')
|
||||
else:
|
||||
print(f' "{country}"')
|
||||
print(" ),")
|
||||
print(" Population = c(")
|
||||
for i, pop in enumerate(populations):
|
||||
if i < len(populations) - 1:
|
||||
print(f" {pop},")
|
||||
else:
|
||||
print(f" {pop}")
|
||||
print(" )")
|
||||
print(")")
|
||||
|
||||
# Usage
|
||||
extract_country_data("WorldPopulation2023.csv")
|
||||
26
Projet JOs/genre.R
Normal file
@@ -0,0 +1,26 @@
|
||||
# Charger les packages nécessaires
|
||||
library(readxl)
|
||||
library(ggplot2)
|
||||
|
||||
# Charger le fichier Excel
|
||||
file_path <- "C:\\Users\\enoso\\Downloads\\Porjet_JO.xlsx"
|
||||
# Lire la feuille "Travail_athletes"
|
||||
df_athletes <- read_excel(file_path, sheet = "Travail_athletes")
|
||||
|
||||
# Vérifier si la colonne "Gender" existe (sensible à la casse)
|
||||
if ("Gender" %in% names(df_athletes)) {
|
||||
# Compter les occurrences de chaque genre
|
||||
genre_counts <- table(df_athletes$Gender)
|
||||
|
||||
# Créer un data frame pour ggplot2
|
||||
genre_df <- data.frame(genre = names(genre_counts), count = as.numeric(genre_counts))
|
||||
|
||||
# Créer l'histogramme
|
||||
ggplot(genre_df, aes(x = genre, y = count)) +
|
||||
geom_bar(stat = "identity", fill = "skyblue") +
|
||||
labs(title = "Distribution des genres des athlètes", x = "Genre", y = "Nombre d'athlètes") +
|
||||
theme_minimal() +
|
||||
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotation des étiquettes de l'axe x
|
||||
} else {
|
||||
print("La colonne 'Gender' n'existe pas dans le fichier Excel.")
|
||||
}
|
||||
44
Projet JOs/loiN_age_athlete.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import pandas as pd
|
||||
import scipy.stats as stats
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
|
||||
# Charger les données
|
||||
file_path = "Porjet_JO.xlsx"
|
||||
data_medailles = pd.read_excel(file_path, sheet_name="Travail_medailles")
|
||||
|
||||
# Extraction des âges des médaillés
|
||||
medailles_age = data_medailles["Age"].dropna()
|
||||
|
||||
# Visualisation de la distribution de l'âge des médaillés
|
||||
plt.figure(figsize=(10, 5))
|
||||
sns.histplot(medailles_age, kde=True, color="red", bins=20, alpha=0.7)
|
||||
plt.title("Distribution de l'âge des médaillés")
|
||||
plt.xlabel("Âge")
|
||||
plt.ylabel("Fréquence")
|
||||
plt.show()
|
||||
|
||||
# Test de Student (T-test) -> Comparaison avec une moyenne de référence (ex: 25 ans)
|
||||
t_stat, t_pval = stats.ttest_1samp(medailles_age, 25)
|
||||
|
||||
# Test du Khi² (Chi²-test) -> Comparaison des âges en classes d'âge
|
||||
age_bins = [0, 18, 25, 30, 35, 40, 100] # Catégories d'âge
|
||||
age_groups = pd.cut(medailles_age, bins=age_bins).value_counts()
|
||||
chi2_stat, chi2_pval = stats.chisquare(f_obs=age_groups)
|
||||
|
||||
# Test de Kolmogorov-Smirnov (KS-test) -> Comparaison avec une distribution normale théorique
|
||||
ks_stat, ks_pval = stats.kstest(medailles_age, 'norm', args=(medailles_age.mean(), medailles_age.std()))
|
||||
|
||||
# Affichage des résultats
|
||||
print(f"Test de Student : T-stat={t_stat:.3f}, p-value={t_pval:.5f}")
|
||||
print(f"Test du Khi² : Chi²-stat={chi2_stat:.3f}, p-value={chi2_pval:.5f}")
|
||||
print(f"Test de Kolmogorov-Smirnov : KS-stat={ks_stat:.3f}, p-value={ks_pval:.5f}")
|
||||
|
||||
# Interprétation des résultats
|
||||
def interpret_pvalue(pval, alpha=0.05):
|
||||
return "Différence significative" if pval < alpha else "Pas de différence significative"
|
||||
|
||||
print("\nInterprétation :")
|
||||
print(f"Test de Student : {interpret_pvalue(t_pval)}")
|
||||
print(f"Test du Khi² : {interpret_pvalue(chi2_pval)}")
|
||||
print(f"Test de KS : {interpret_pvalue(ks_pval)}")
|
||||
54
Projet JOs/loiN_pays_athlete.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy.stats import ttest_ind, chi2_contingency, ks_2samp
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
# Charger les données
|
||||
data_athletes = pd.read_excel("Porjet_JO.xlsx", sheet_name="Travail_athletes")
|
||||
data_medailles = pd.read_excel("Porjet_JO.xlsx", sheet_name="Travail_medailles")
|
||||
|
||||
# Calculer le nombre d'athlètes et de médailles par nationalité
|
||||
athletes_par_pays = data_athletes["Nationality"].value_counts().reset_index()
|
||||
athletes_par_pays.columns = ["Nationality", "Nb_Athletes"]
|
||||
|
||||
medailles_par_pays = data_medailles["Country_code"].value_counts().reset_index()
|
||||
medailles_par_pays.columns = ["Nationality", "Nb_Medailles"]
|
||||
|
||||
# Fusionner les données
|
||||
resultats = pd.merge(athletes_par_pays, medailles_par_pays, on="Nationality", how="outer").fillna(0)
|
||||
|
||||
# Régression linéaire
|
||||
X = resultats[["Nb_Athletes"]]
|
||||
y = resultats["Nb_Medailles"]
|
||||
reg = LinearRegression().fit(X, y)
|
||||
|
||||
# Visualisation
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.scatterplot(x="Nb_Athletes", y="Nb_Medailles", data=resultats, color="blue")
|
||||
plt.plot(resultats["Nb_Athletes"], reg.predict(X), color="red")
|
||||
plt.xlabel("Nombre d'athlètes")
|
||||
plt.ylabel("Nombre de médailles")
|
||||
plt.title("Relation entre le nombre d'athlètes et le nombre de médailles")
|
||||
plt.show()
|
||||
|
||||
# Test de Student
|
||||
t_stat, p_value_t = ttest_ind(resultats["Nb_Athletes"], resultats["Nb_Medailles"], equal_var=False)
|
||||
|
||||
# Test du Khi²
|
||||
table_contingence = pd.crosstab(resultats["Nb_Athletes"], resultats["Nb_Medailles"])
|
||||
chi2_stat, p_value_chi, _, _ = chi2_contingency(table_contingence)
|
||||
|
||||
# Test de Kolmogorov-Smirnov
|
||||
ks_stat, p_value_ks = ks_2samp(resultats["Nb_Athletes"], resultats["Nb_Medailles"])
|
||||
|
||||
|
||||
# Fonction d'interprétation
|
||||
def interpret_pvalue(pval, alpha=0.05):
|
||||
return "Différence significative" if pval < alpha else "Pas de différence significative"
|
||||
|
||||
|
||||
# Résultats des tests
|
||||
print(f"Test de Student: Stat={t_stat:.3f}, p-value={p_value_t:.5f} → {interpret_pvalue(p_value_t)}")
|
||||
print(f"Test du Khi²: Stat={chi2_stat:.3f}, p-value={p_value_chi:.5f} → {interpret_pvalue(p_value_chi)}")
|
||||
print(f"Test de Kolmogorov-Smirnov: Stat={ks_stat:.3f}, p-value={p_value_ks:.5f} → {interpret_pvalue(p_value_ks)}")
|
||||
58
Projet JOs/map.R
Normal file
@@ -0,0 +1,58 @@
|
||||
if (!require("ggplot2")) {
|
||||
install.packages("ggplot2")
|
||||
}
|
||||
if (!require("rnaturalearth")) {
|
||||
install.packages("rnaturalearth")
|
||||
}
|
||||
|
||||
if (!require("rnaturalearthdata")) {
|
||||
install.packages("rnaturalearthdata")
|
||||
}
|
||||
if (!require("dplyr")) {
|
||||
install.packages("dplyr")
|
||||
}
|
||||
|
||||
library(ggplot2)
|
||||
library(rnaturalearth)
|
||||
library(rnaturalearthdata)
|
||||
library(dplyr)
|
||||
|
||||
|
||||
# Create a mapping between Olympic codes and ISO codes
|
||||
olympic_to_iso <- c(
|
||||
"GER" = "DEU" # Germany
|
||||
# Add other mappings here if needed
|
||||
# For example: "USA" = "USA", "GBR" = "GBR", etc.
|
||||
)
|
||||
|
||||
participants_iso <- test_R$Pays_unique
|
||||
|
||||
# Convert Olympic codes to ISO codes where needed
|
||||
participants_iso_fixed <- participants_iso
|
||||
for (i in 1:length(participants_iso)) {
|
||||
code <- participants_iso[i]
|
||||
if (code %in% names(olympic_to_iso)) {
|
||||
participants_iso_fixed[i] <- olympic_to_iso[code]
|
||||
}
|
||||
}
|
||||
|
||||
# Load world map
|
||||
world <- ne_countries(scale = "medium", returnclass = "sf")
|
||||
|
||||
# Check Germany's data
|
||||
germany_data <- world %>% filter(admin == "Germany")
|
||||
print("Germany data:")
|
||||
print(germany_data %>% select(admin, iso_a3, iso_a3_eh))
|
||||
|
||||
# Mark participating countries
|
||||
world <- world %>%
|
||||
mutate(participant = ifelse(iso_a3 %in% participants_iso_fixed |
|
||||
iso_a3_eh %in% participants_iso_fixed, "Oui", "Non"))
|
||||
|
||||
# Create the map
|
||||
ggplot(data = world) +
|
||||
geom_sf(aes(fill = participant), color = "black") +
|
||||
scale_fill_manual(values = c("Oui" = "lightgreen", "Non" = "lightgrey")) +
|
||||
theme_minimal() +
|
||||
labs(title = "Pays participants aux Jeux Olympiques de Paris 2024",
|
||||
fill = "Participation")
|
||||
201
Projet JOs/map_medaille.R
Normal file
@@ -0,0 +1,201 @@
|
||||
# Installation et chargement des packages nécessaires
|
||||
if (!require("ggplot2")) {
|
||||
install.packages("ggplot2")
|
||||
}
|
||||
if (!require("rnaturalearth")) {
|
||||
install.packages("rnaturalearth")
|
||||
}
|
||||
|
||||
if (!require("rnaturalearthdata")) {
|
||||
install.packages("rnaturalearthdata")
|
||||
}
|
||||
if (!require("dplyr")) {
|
||||
install.packages("dplyr")
|
||||
}
|
||||
|
||||
library(ggplot2)
|
||||
library(rnaturalearth)
|
||||
library(rnaturalearthdata)
|
||||
library(dplyr)
|
||||
#-----------------------RECUPERATION PAYS MEDAILLES ---------------------------
|
||||
|
||||
|
||||
# Importation des données depuis le fichier Excel
|
||||
donnees_medailles <- Porjet_JO
|
||||
|
||||
# Visualisation de la structure des données
|
||||
str(donnees_medailles)
|
||||
|
||||
# Nettoyage des données
|
||||
# Conversion des dates au format approprié (supposant le format MM/JJ/AAAA)
|
||||
donnees_medailles$Medal_date <- as.Date(donnees_medailles$Medal_date, format = "%m/%d/%Y")
|
||||
|
||||
# Correction de la colonne Age - remplacer les erreurs et convertir en numérique
|
||||
donnees_medailles$Age <- ifelse(donnees_medailles$Age == "#NOM?" | donnees_medailles$Age == "01/02/1900", NA, donnees_medailles$Age)
|
||||
donnees_medailles$Age <- as.numeric(donnees_medailles$Age)
|
||||
|
||||
# Affichage des premières lignes pour vérifier les données
|
||||
head(donnees_medailles)
|
||||
|
||||
# Obtention d'un résumé du nombre de médailles par pays
|
||||
nombre_medailles <- table(donnees_medailles$Country, donnees_medailles$Medal_type)
|
||||
nombre_medailles <- as.data.frame.matrix(nombre_medailles)
|
||||
|
||||
# Ajout d'une colonne pour le total des médailles
|
||||
if(ncol(nombre_medailles) > 0) {
|
||||
nombre_medailles$Total <- rowSums(nombre_medailles)
|
||||
# Tri par nombre total de médailles (décroissant)
|
||||
nombre_medailles <- nombre_medailles[order(-nombre_medailles$Total),]
|
||||
}
|
||||
|
||||
# Affichage du tableau des médailles
|
||||
print(nombre_medailles)
|
||||
|
||||
# Conversion de nombre_medailles en un format adapté pour ggplot2
|
||||
donnees_graphique <- data.frame(
|
||||
Pays = rownames(nombre_medailles),
|
||||
Total = nombre_medailles$Total
|
||||
)
|
||||
|
||||
# Function to get ISO 3-letter codes from country names
|
||||
get_iso_codes <- function(country_names) {
|
||||
# Load world data
|
||||
world <- ne_countries(scale = "medium", returnclass = "sf")
|
||||
|
||||
# Create lookup table with lowercase names for better matching
|
||||
lookup <- world %>%
|
||||
select(name, admin, iso_a3, iso_a3_eh) %>%
|
||||
mutate(name_lower = tolower(name),
|
||||
admin_lower = tolower(admin))
|
||||
|
||||
# Custom mapping for Olympic country names that don't match standard names
|
||||
custom_mapping <- c(
|
||||
"United States of America" = "USA",
|
||||
"Great Britain" = "GBR",
|
||||
"People's Republic of China" = "CHN",
|
||||
"Chinese Taipei" = "TPE",
|
||||
"Republic of Korea" = "KOR",
|
||||
"Democratic People's Republic of Korea" = "PRK"
|
||||
# Add more mappings as needed
|
||||
)
|
||||
|
||||
# Process each country name
|
||||
iso_codes <- character(length(country_names))
|
||||
|
||||
for (i in seq_along(country_names)) {
|
||||
country <- country_names[i]
|
||||
|
||||
# First check custom mapping
|
||||
if (country %in% names(custom_mapping)) {
|
||||
iso_codes[i] <- custom_mapping[country]
|
||||
next
|
||||
}
|
||||
|
||||
# Try matching with rnaturalearth data
|
||||
country_lower <- tolower(country)
|
||||
match_row <- lookup %>%
|
||||
filter(name_lower == country_lower | admin_lower == country_lower)
|
||||
|
||||
if (nrow(match_row) > 0) {
|
||||
# Use first available ISO code
|
||||
if (!is.na(match_row$iso_a3[1]) && match_row$iso_a3[1] != "" && match_row$iso_a3[1] != "-99") {
|
||||
iso_codes[i] <- match_row$iso_a3[1]
|
||||
} else if (!is.na(match_row$iso_a3_eh[1]) && match_row$iso_a3_eh[1] != "") {
|
||||
iso_codes[i] <- match_row$iso_a3_eh[1]
|
||||
} else {
|
||||
iso_codes[i] <- NA_character_
|
||||
}
|
||||
} else {
|
||||
iso_codes[i] <- NA_character_
|
||||
}
|
||||
}
|
||||
|
||||
return(iso_codes)
|
||||
}
|
||||
|
||||
# Apply to your data
|
||||
donnees_graphique$iso_code <- get_iso_codes(donnees_graphique$Pays)
|
||||
|
||||
custom_mapping <- c(
|
||||
"United States" = "USA",
|
||||
"Great Britain" = "GBR",
|
||||
"People's Republic of China" = "CHN",
|
||||
"China" = "CHN",
|
||||
"Chinese Taipei" = "TPE",
|
||||
"Republic of Korea" = "KOR",
|
||||
"Korea" = "KOR",
|
||||
"Democratic People's Republic of Korea" = "PRK",
|
||||
"North Korea" = "PRK",
|
||||
"ROC" = "RUS", # Russian Olympic Committee
|
||||
"Hong Kong, China" = "HKG",
|
||||
"Iran, Islamic Republic of" = "IRN",
|
||||
"Netherlands" = "NLD",
|
||||
"New Zealand" = "NZL",
|
||||
"Republic of South Africa" = "ZAF",
|
||||
"Trinidad and Tobago" = "TTO",
|
||||
"Türkiye" = "TUR"
|
||||
)
|
||||
|
||||
# Apply custom mappings directly
|
||||
for (i in seq_len(nrow(donnees_graphique))) {
|
||||
country <- donnees_graphique$Pays[i]
|
||||
if (country %in% names(custom_mapping)) {
|
||||
donnees_graphique$iso_code[i] <- custom_mapping[country]
|
||||
}
|
||||
}
|
||||
|
||||
# Check results
|
||||
print(head(donnees_graphique[, c("Pays", "iso_code")]))
|
||||
|
||||
# Check for missing mappings
|
||||
missing_codes <- donnees_graphique %>%
|
||||
filter(is.na(iso_code)) %>%
|
||||
select(Pays) %>%
|
||||
distinct()
|
||||
|
||||
if (nrow(missing_codes) > 0) {
|
||||
print("Countries still without ISO codes:")
|
||||
print(missing_codes)
|
||||
}
|
||||
|
||||
#-----------------------RECUPERATION PAYS MEDAILLES ---------------------------
|
||||
|
||||
|
||||
# Create a mapping between Olympic codes and ISO codes
|
||||
olympic_to_iso <- c(
|
||||
"GER" = "DEU", "RSA" = "ZAF", "CHI" = "CHL" # Germany
|
||||
# Add other mappings here if needed
|
||||
# For example: "USA" = "USA", "GBR" = "GBR", etc.
|
||||
)
|
||||
|
||||
participants_iso <- test_R$Pays_unique
|
||||
|
||||
# Convert Olympic codes to ISO codes where needed
|
||||
participants_iso_fixed <- participants_iso
|
||||
for (i in 1:length(participants_iso)) {
|
||||
code <- participants_iso[i]
|
||||
if (code %in% names(olympic_to_iso)) {
|
||||
participants_iso_fixed[i] <- olympic_to_iso[code]
|
||||
}
|
||||
}
|
||||
|
||||
# Load world map
|
||||
world <- ne_countries(scale = "medium", returnclass = "sf")
|
||||
|
||||
# Check Germany's data
|
||||
germany_data <- world %>% filter(admin == "Germany")
|
||||
print("Germany data:")
|
||||
print(germany_data %>% select(admin, iso_a3, iso_a3_eh))
|
||||
|
||||
# Mark participating countries
|
||||
world <- world %>%
|
||||
mutate(participant = ifelse(iso_a3 %in% donnees_graphique$iso_code |
|
||||
iso_a3_eh %in% donnees_graphique$iso_code, "Oui", "Non"))
|
||||
|
||||
# Create the map
|
||||
ggplot(data = world) +
|
||||
geom_sf(aes(fill = participant), color = "black") +
|
||||
scale_fill_manual(values = c("Oui" = "yellow", "Non" = "lightgrey")) +
|
||||
theme_minimal() +
|
||||
labs(title = "Pays ayant reçu au moins Une médaille aux Jeux Olympiques de Paris 2024",
|
||||
fill = "Médailles")
|
||||
200
Projet JOs/map_medaille_participation.R
Normal file
@@ -0,0 +1,200 @@
|
||||
# Installation et chargement des packages nécessaires
|
||||
if (!require("ggplot2")) {
|
||||
install.packages("ggplot2")
|
||||
}
|
||||
if (!require("rnaturalearth")) {
|
||||
install.packages("rnaturalearth")
|
||||
}
|
||||
if (!require("rnaturalearthdata")) {
|
||||
install.packages("rnaturalearthdata")
|
||||
}
|
||||
if (!require("dplyr")) {
|
||||
install.packages("dplyr")
|
||||
}
|
||||
if (!require("readxl")) {
|
||||
install.packages("readxl")
|
||||
}
|
||||
|
||||
library(ggplot2)
|
||||
library(rnaturalearth)
|
||||
library(rnaturalearthdata)
|
||||
library(dplyr)
|
||||
library(readxl)
|
||||
|
||||
#-----------------------RECUPERATION PAYS MEDAILLES ---------------------------
|
||||
# Importation des données depuis le fichier Excel
|
||||
donnees_medailles <- Porjet_JO
|
||||
|
||||
# Visualisation de la structure des données
|
||||
str(donnees_medailles)
|
||||
|
||||
# Nettoyage des données
|
||||
# Conversion des dates au format approprié (supposant le format MM/JJ/AAAA)
|
||||
donnees_medailles$Medal_date <- as.Date(donnees_medailles$Medal_date, format = "%m/%d/%Y")
|
||||
|
||||
# Correction de la colonne Age - remplacer les erreurs et convertir en numérique
|
||||
donnees_medailles$Age <- ifelse(donnees_medailles$Age == "#NOM?" | donnees_medailles$Age == "01/02/1900", NA, donnees_medailles$Age)
|
||||
donnees_medailles$Age <- as.numeric(donnees_medailles$Age)
|
||||
|
||||
# Obtention d'un résumé du nombre de médailles par pays
|
||||
nombre_medailles <- table(donnees_medailles$Country, donnees_medailles$Medal_type)
|
||||
nombre_medailles <- as.data.frame.matrix(nombre_medailles)
|
||||
|
||||
# Ajout d'une colonne pour le total des médailles
|
||||
if(ncol(nombre_medailles) > 0) {
|
||||
nombre_medailles$Total <- rowSums(nombre_medailles)
|
||||
# Tri par nombre total de médailles (décroissant)
|
||||
nombre_medailles <- nombre_medailles[order(-nombre_medailles$Total),]
|
||||
}
|
||||
|
||||
# Conversion de nombre_medailles en un format adapté pour ggplot2
|
||||
donnees_graphique <- data.frame(
|
||||
Pays = rownames(nombre_medailles),
|
||||
Total = nombre_medailles$Total
|
||||
)
|
||||
|
||||
#-----------------------GESTION DES CODES ISO ---------------------------
|
||||
# Function to get ISO 3-letter codes from country names
|
||||
get_iso_codes <- function(country_names) {
|
||||
# Load world data
|
||||
world <- ne_countries(scale = "medium", returnclass = "sf")
|
||||
|
||||
# Create lookup table with lowercase names for better matching
|
||||
lookup <- world %>%
|
||||
select(name, admin, iso_a3, iso_a3_eh) %>%
|
||||
mutate(name_lower = tolower(name),
|
||||
admin_lower = tolower(admin))
|
||||
|
||||
# Process each country name
|
||||
iso_codes <- character(length(country_names))
|
||||
|
||||
for (i in seq_along(country_names)) {
|
||||
country <- country_names[i]
|
||||
|
||||
# Try matching with rnaturalearth data
|
||||
country_lower <- tolower(country)
|
||||
match_row <- lookup %>%
|
||||
filter(name_lower == country_lower | admin_lower == country_lower)
|
||||
|
||||
if (nrow(match_row) > 0) {
|
||||
# Use first available ISO code
|
||||
if (!is.na(match_row$iso_a3[1]) && match_row$iso_a3[1] != "" && match_row$iso_a3[1] != "-99") {
|
||||
iso_codes[i] <- match_row$iso_a3[1]
|
||||
} else if (!is.na(match_row$iso_a3_eh[1]) && match_row$iso_a3_eh[1] != "") {
|
||||
iso_codes[i] <- match_row$iso_a3_eh[1]
|
||||
} else {
|
||||
iso_codes[i] <- NA_character_
|
||||
}
|
||||
} else {
|
||||
iso_codes[i] <- NA_character_
|
||||
}
|
||||
}
|
||||
|
||||
return(iso_codes)
|
||||
}
|
||||
|
||||
# Apply to medal data
|
||||
donnees_graphique$iso_code <- get_iso_codes(donnees_graphique$Pays)
|
||||
|
||||
# Expanded custom mapping for Olympic country names to ISO codes
|
||||
custom_mapping <- c(
|
||||
"United States" = "USA", "United States of America" = "USA",
|
||||
"Great Britain" = "GBR",
|
||||
"People's Republic of China" = "CHN", "China" = "CHN",
|
||||
"Chinese Taipei" = "TPE",
|
||||
"Republic of Korea" = "KOR", "Korea" = "KOR",
|
||||
"Democratic People's Republic of Korea" = "PRK", "North Korea" = "PRK",
|
||||
"ROC" = "RUS", # Russian Olympic Committee
|
||||
"Hong Kong, China" = "HKG",
|
||||
"Iran, Islamic Republic of" = "IRN",
|
||||
"Netherlands" = "NLD",
|
||||
"New Zealand" = "NZL",
|
||||
"Republic of South Africa" = "ZAF",
|
||||
"Trinidad and Tobago" = "TTO",
|
||||
"Türkiye" = "TUR"
|
||||
)
|
||||
|
||||
# Apply custom mappings to medal data
|
||||
for (i in seq_len(nrow(donnees_graphique))) {
|
||||
country <- donnees_graphique$Pays[i]
|
||||
if (country %in% names(custom_mapping)) {
|
||||
donnees_graphique$iso_code[i] <- custom_mapping[country]
|
||||
}
|
||||
}
|
||||
|
||||
# Check for missing mappings
|
||||
missing_codes <- donnees_graphique %>%
|
||||
filter(is.na(iso_code)) %>%
|
||||
select(Pays) %>%
|
||||
distinct()
|
||||
|
||||
if (nrow(missing_codes) > 0) {
|
||||
print("Medal countries without ISO codes:")
|
||||
print(missing_codes)
|
||||
}
|
||||
|
||||
#-----------------------TRAITEMENT DES PAYS PARTICIPANTS ---------------------------
|
||||
# Define Olympic code to ISO mapping
|
||||
olympic_to_iso <- c(
|
||||
"GER" = "DEU", "RSA" = "ZAF", "CHI" = "CHL",
|
||||
"USA" = "USA", "GBR" = "GBR", "FRA" = "FRA",
|
||||
"JPN" = "JPN", "ITA" = "ITA", "CHN" = "CHN",
|
||||
"AUS" = "AUS", "NED" = "NLD", "KOR" = "KOR",
|
||||
"ESP" = "ESP", "BRA" = "BRA", "SUI" = "CHE",
|
||||
"CAN" = "CAN", "HUN" = "HUN", "NZL" = "NZL",
|
||||
"UKR" = "UKR", "SWE" = "SWE", "TUR" = "TUR",
|
||||
"ROU" = "ROU", "POL" = "POL", "NOR" = "NOR"
|
||||
# Add more mappings as needed
|
||||
)
|
||||
|
||||
participants_iso <- test_R$Pays_unique
|
||||
|
||||
# Convert participant Olympic codes to ISO codes
|
||||
participants_iso_fixed <- participants_iso
|
||||
for (i in 1:length(participants_iso)) {
|
||||
code <- participants_iso[i]
|
||||
if (code %in% names(olympic_to_iso)) {
|
||||
participants_iso_fixed[i] <- olympic_to_iso[code]
|
||||
}
|
||||
}
|
||||
|
||||
#-----------------------CRÉATION DE LA CARTE ---------------------------
|
||||
# Load world map
|
||||
world <- ne_countries(scale = "medium", returnclass = "sf")
|
||||
|
||||
# Create a three-category classification
|
||||
world$status <- "Non" # Default: didn't participate
|
||||
|
||||
# First mark participating countries
|
||||
for (i in 1:nrow(world)) {
|
||||
if (world$iso_a3[i] %in% participants_iso_fixed ||
|
||||
world$iso_a3_eh[i] %in% participants_iso_fixed) {
|
||||
world$status[i] <- "Participant"
|
||||
}
|
||||
}
|
||||
|
||||
# Then mark medal countries (overriding participant status)
|
||||
for (i in 1:nrow(world)) {
|
||||
if (world$iso_a3[i] %in% donnees_graphique$iso_code ||
|
||||
world$iso_a3_eh[i] %in% donnees_graphique$iso_code) {
|
||||
world$status[i] <- "Médaille"
|
||||
}
|
||||
}
|
||||
|
||||
# Create the map
|
||||
map_jo <- ggplot(data = world) +
|
||||
geom_sf(aes(fill = status), color = "black") +
|
||||
scale_fill_manual(values = c(
|
||||
"Participants médaillés" = "yellow",
|
||||
"Participants non médaillés" = "lightgreen",
|
||||
"Non participants" = "lightgrey"
|
||||
)) +
|
||||
theme_minimal() +
|
||||
labs(
|
||||
title = "Pays aux Jeux Olympiques de Paris 2024",
|
||||
fill = "Statut",
|
||||
caption = "Jaune: Médaillés, Vert: Participants sans médaille, Gris: Non participants"
|
||||
)
|
||||
|
||||
# Display the map
|
||||
print(map_jo)
|
||||
12626
Projet JOs/paris-2024-liste-athletes-engages-olypara.csv
Normal file
1045
Projet JOs/paris-2024-results-medals-oly-eng.csv
Normal file
48
Projet JOs/projet_jo_age.R
Normal file
@@ -0,0 +1,48 @@
|
||||
# Installer et charger les packages nécessaires
|
||||
#install.packages("readxl")
|
||||
#install.packages("ggplot2")
|
||||
#install.packages("dplyr")
|
||||
library(readxl)
|
||||
library(ggplot2)
|
||||
library(dplyr)
|
||||
|
||||
# Charger les données
|
||||
data_athletes <- read_excel("Porjet_JO.xlsx", sheet = "Travail_athletes")
|
||||
data_medailles <- read_excel("Porjet_JO.xlsx", sheet = "Travail_medailles")
|
||||
|
||||
# Créer des catégories d'âge
|
||||
data_medailles <- data_medailles %>%
|
||||
mutate(Age_Category = factor(case_when(
|
||||
Age < 18 ~ "Moins de 18 ans",
|
||||
Age >= 18 & Age < 25 ~ "18-24 ans",
|
||||
Age >= 25 & Age < 30 ~ "25-29 ans",
|
||||
Age >= 30 & Age < 35 ~ "30-34 ans",
|
||||
Age >= 35 & Age < 40 ~ "35-39 ans",
|
||||
Age >= 40 ~ "40 ans et plus"
|
||||
), levels = c("Moins de 18 ans", "18-24 ans", "25-29 ans", "30-34 ans", "35-39 ans", "40 ans et plus")))
|
||||
|
||||
# Filtrer les pays avec plus d'un athlète
|
||||
country_counts <- data_medailles %>% group_by(Country) %>% summarise(n = n()) %>% filter(n > 20)
|
||||
data_medailles_filtered <- data_medailles %>% filter(Country %in% country_counts$Country)
|
||||
|
||||
# Filtrer les disciplines avec plus d'un athlète
|
||||
discipline_counts <- data_medailles %>% group_by(Discipline) %>% summarise(n = n()) %>% filter(n > 20)
|
||||
data_medailles_filtered <- data_medailles_filtered %>% filter(Discipline %in% discipline_counts$Discipline)
|
||||
|
||||
# Histogramme des âges (catégories)
|
||||
ggplot(data_medailles, aes(x = Age_Category)) +
|
||||
geom_bar(fill = "blue", color = "black", alpha = 0.7) +
|
||||
theme_minimal() +
|
||||
labs(title = "Répartition des âges des athlètes", x = "Catégorie d'âge", y = "Nombre d'athlètes")
|
||||
|
||||
# Comparaison par pays
|
||||
ggplot(data_medailles_filtered, aes(x = Age_Category, fill = Country)) +
|
||||
geom_bar(position = "dodge", color = "black", alpha = 0.7) +
|
||||
theme_minimal() +
|
||||
labs(title = "Nombre d'athlètes par catégorie d'âge et pays", x = "Catégorie d'âge", y = "Nombre d'athlètes")
|
||||
|
||||
# Comparaison par discipline
|
||||
ggplot(data_medailles_filtered, aes(x = Age_Category, fill = Discipline)) +
|
||||
geom_bar(position = "dodge", color = "black", alpha = 0.7) +
|
||||
theme_minimal() +
|
||||
labs(title = "Nombre d'athlètes par catégorie d'âge et discipline", x = "Catégorie d'âge", y = "Nombre d'athlètes")
|
||||
92
Projet JOs/projet_jo_agev2.R
Normal file
@@ -0,0 +1,92 @@
|
||||
# Installer et charger les packages nécessaires
|
||||
#install.packages("readxl")
|
||||
#install.packages("ggplot2")
|
||||
#install.packages("dplyr")
|
||||
# Charger les bibliothèques nécessaires
|
||||
library(readxl)
|
||||
library(ggplot2)
|
||||
library(dplyr)
|
||||
|
||||
# Charger les données
|
||||
data_athletes <- read_excel("Porjet_JO.xlsx", sheet = "Travail_athletes")
|
||||
data_medailles <- read_excel("Porjet_JO.xlsx", sheet = "Travail_medailles")
|
||||
|
||||
# Créer des catégories d'âge
|
||||
data_medailles <- data_medailles %>%
|
||||
mutate(Age_Category = factor(case_when(
|
||||
Age < 18 ~ "Moins de 18 ans",
|
||||
Age >= 18 & Age < 25 ~ "18-24 ans",
|
||||
Age >= 25 & Age < 30 ~ "25-29 ans",
|
||||
Age >= 30 & Age < 35 ~ "30-34 ans",
|
||||
Age >= 35 & Age < 40 ~ "35-39 ans",
|
||||
Age >= 40 ~ "40 ans et plus"
|
||||
), levels = c("Moins de 18 ans", "18-24 ans", "25-29 ans", "30-34 ans", "35-39 ans", "40 ans et plus")))
|
||||
|
||||
# Filtrer les pays avec plus de 20 athlètes
|
||||
country_counts <- data_medailles %>%
|
||||
group_by(Country) %>%
|
||||
summarise(n = n()) %>%
|
||||
filter(n > 20)
|
||||
|
||||
data_medailles_filtered <- data_medailles %>%
|
||||
filter(Country %in% country_counts$Country)
|
||||
|
||||
# Filtrer les disciplines avec plus de 20 athlètes
|
||||
discipline_counts <- data_medailles %>%
|
||||
group_by(Discipline) %>%
|
||||
summarise(n = n()) %>%
|
||||
filter(n > 20)
|
||||
|
||||
data_medailles_filtered <- data_medailles_filtered %>%
|
||||
filter(Discipline %in% discipline_counts$Discipline)
|
||||
|
||||
# Calculer le nombre d'athlètes par catégorie et par pays
|
||||
data_country <- data_medailles_filtered %>%
|
||||
count(Country, Age_Category)
|
||||
|
||||
# Calculer le nombre d'athlètes par catégorie et par discipline
|
||||
data_discipline <- data_medailles_filtered %>%
|
||||
count(Discipline, Age_Category)
|
||||
|
||||
# Trouver les 3 disciplines avec le plus d'athlètes
|
||||
top_disciplines <- data_discipline %>%
|
||||
group_by(Discipline) %>%
|
||||
summarise(Total_Athletes = sum(n)) %>%
|
||||
arrange(desc(Total_Athletes)) %>%
|
||||
slice_head(n = 3) # Garder les 3 premiers
|
||||
|
||||
# Trouver les 3 pays avec le plus d'athlètes
|
||||
top_pays <- data_medailles_filtered %>%
|
||||
group_by(Country) %>%
|
||||
summarise(Total_Athletes = n()) %>%
|
||||
arrange(desc(Total_Athletes)) %>%
|
||||
slice_head(n = 3) # Garder les 3 premiers
|
||||
|
||||
print("Top 3 pays avec le plus d'athlètes :")
|
||||
print(top_pays)
|
||||
|
||||
# Afficher dans le terminal
|
||||
print("Top 3 disciplines avec le plus d'athlètes :")
|
||||
print(top_disciplines)
|
||||
|
||||
# Création du graphique
|
||||
ggplot(data_discipline, aes(x = Age_Category, y = n, group = Discipline, color = Discipline)) +
|
||||
geom_bar(stat = "identity", position = "dodge", fill = NA, color = "black") + # Barres pour toutes les disciplines
|
||||
geom_line(data = data_discipline %>% filter(Discipline %in% top_disciplines$Discipline), # Lignes seulement pour le top 3
|
||||
aes(group = Discipline), size = 1.5) +
|
||||
geom_point(data = data_discipline %>% filter(Discipline %in% top_disciplines$Discipline), # Points pour le top 3
|
||||
size = 3) +
|
||||
theme_minimal() +
|
||||
labs(title = "Nombre d'athlètes par catégorie d'âge et discipline (Top 3)",
|
||||
x = "Catégorie d'âge", y = "Nombre d'athlètes")
|
||||
|
||||
# Graphique : Nombre d'athlètes par catégorie d'âge et pays (Top 3)
|
||||
ggplot(data_country, aes(x = Age_Category, y = n, group = Country)) +
|
||||
geom_bar(stat = "identity", position = "dodge", fill = NA, color = "black") + # Barres pour tous les pays
|
||||
geom_line(data = data_country %>% filter(Country %in% top_pays$Country), # Lignes pour le top 3
|
||||
aes(y = n, color = Country), size = 1.5) +
|
||||
geom_point(data = data_country %>% filter(Country %in% top_pays$Country), # Points pour le top 3
|
||||
aes(y = n, color = Country), size = 3) +
|
||||
theme_minimal() +
|
||||
labs(title = "Nombre d'athlètes par catégorie d'âge et pays (Top 3)",
|
||||
x = "Catégorie d'âge", y = "Nombre d'athlètes")
|
||||
11
Projet JOs/repartition des médaile par genre.R
Normal file
@@ -0,0 +1,11 @@
|
||||
# Compter le nombre de médailles par genre
|
||||
medailles_par_genre <- df_medailles %>%
|
||||
group_by(Gender) %>%
|
||||
summarise(Nombre_Medailles = n())
|
||||
|
||||
# Afficher l'histogramme
|
||||
ggplot(medailles_par_genre, aes(x = Gender, y = Nombre_Medailles, fill = Gender)) +
|
||||
geom_bar(stat = "identity") +
|
||||
labs(title = "Répartition des médailles par genre",
|
||||
x = "Genre", y = "Nombre de médailles") +
|
||||
theme_minimal()
|
||||
80
Projet JOs/t.test loie normal.R
Normal file
@@ -0,0 +1,80 @@
|
||||
# Charger les bibliothèques nécessaires
|
||||
library(ggplot2)
|
||||
library(readxl)
|
||||
library(dplyr)
|
||||
|
||||
# Définir le chemin du fichier Excel
|
||||
file_path <- "C:/Users/enoso/Downloads/Porjet_JO.xlsx"
|
||||
|
||||
# Lire la feuille "Travail_medailles"
|
||||
df_medailles <- read_excel(file_path, sheet = "Travail_medailles")
|
||||
|
||||
# Vérifier les noms des colonnes pour voir comment les médailles sont nommées
|
||||
print(colnames(df_medailles))
|
||||
|
||||
# Filtrer les données pour exclure les genres 'O' et 'X' et ne garder que 'W' et 'M'
|
||||
df_medailles_filtrées <- df_medailles %>%
|
||||
filter(Gender %in% c("W", "M"))
|
||||
|
||||
# Supposons que la colonne des médailles s'appelle "Medal_code"
|
||||
# Convertir les codes des médailles en valeurs textuelles
|
||||
df_medailles_filtrées <- df_medailles_filtrées %>%
|
||||
mutate(Medal_code = case_when(
|
||||
Medal_code == 1 ~ "Gold",
|
||||
Medal_code == 2 ~ "Silver",
|
||||
Medal_code == 3 ~ "Bronze",
|
||||
TRUE ~ as.character(Medal_code)
|
||||
))
|
||||
|
||||
# 1. Filtrer les données pour obtenir seulement les médailles d'or
|
||||
df_gold <- df_medailles_filtrées %>%
|
||||
filter(Medal_code == "Gold")
|
||||
|
||||
# 2. Vérification : Supprimer les lignes avec des valeurs manquantes (NA) dans la colonne Gender ou Medal_code
|
||||
df_gold_clean <- df_gold %>%
|
||||
filter(!is.na(Gender)) # Enlever les lignes où Gender est NA
|
||||
|
||||
# Créer une colonne pour le nombre de médailles (en utilisant un comptage par groupe)
|
||||
df_gold_clean <- df_gold_clean %>%
|
||||
group_by(Gender) %>%
|
||||
mutate(Nombre_Medailles = n()) %>%
|
||||
ungroup()
|
||||
|
||||
# Vérification de la structure des données
|
||||
print(head(df_gold_clean)) # Affiche les premières lignes pour vérifier la structure
|
||||
|
||||
# Vérification de la répartition des genres après nettoyage
|
||||
print(table(df_gold_clean$Gender)) # Afficher le nombre de médailles d'or par genre après nettoyage
|
||||
|
||||
# Vérification du nombre d'observations par genre
|
||||
genre_counts <- table(df_gold_clean$Gender)
|
||||
if (min(genre_counts) < 2) {
|
||||
cat("L'un des genres a trop peu d'observations pour effectuer un test t.\n")
|
||||
} else {
|
||||
# 3. Effectuer le test t de Student pour comparer les moyennes des médailles d'or entre les genres
|
||||
t_test_result <- t.test(Nombre_Medailles ~ Gender, data = df_gold_clean)
|
||||
print(t_test_result)
|
||||
}
|
||||
|
||||
# 4. Test du Khi² pour vérifier l'association entre Genre et Medal_code
|
||||
# Compter le nombre de médailles par genre et par type
|
||||
medailles_par_genre <- df_medailles_filtrées %>%
|
||||
group_by(Gender, Medal_code) %>%
|
||||
summarise(Nombre_Medailles = n(), .groups = "drop")
|
||||
|
||||
# Créer un tableau de contingence pour le test du Khi²
|
||||
tableau_contingence <- medailles_par_genre %>%
|
||||
spread(key = Medal_code, value = Nombre_Medailles, fill = 0) %>%
|
||||
select(-Gender)
|
||||
|
||||
# Effectuer le test Khi²
|
||||
khi2_test <- chisq.test(tableau_contingence)
|
||||
print(khi2_test)
|
||||
|
||||
# Afficher l'histogramme
|
||||
ggplot(medailles_par_genre, aes(x = Gender, y = Nombre_Medailles, fill = Medal_code)) +
|
||||
geom_bar(stat = "identity", position = "dodge") +
|
||||
scale_fill_manual(values = c("Gold" = "gold", "Silver" = "gray", "Bronze" = "chocolate")) +
|
||||
labs(title = "Répartition des médailles par genre et type",
|
||||
x = "Genre", y = "Nombre de médailles", fill = "Type de médaille") +
|
||||
theme_minimal()
|
||||
66
Projet JOs/test kh² p value.R
Normal file
@@ -0,0 +1,66 @@
|
||||
# Charger les bibliothèques nécessaires
|
||||
library(ggplot2)
|
||||
library(readxl)
|
||||
library(dplyr)
|
||||
|
||||
# Définir le chemin du fichier Excel
|
||||
file_path <- "C:/Users/enoso/Downloads/Porjet_JO.xlsx"
|
||||
|
||||
# Lire la feuille "Travail_medailles"
|
||||
df_medailles <- read_excel(file_path, sheet = "Travail_medailles")
|
||||
|
||||
# Vérifier les noms des colonnes pour voir comment les médailles sont nommées
|
||||
print(colnames(df_medailles))
|
||||
|
||||
# Filtrer les données pour exclure les genres 'O' et 'X' et ne garder que 'W' et 'M'
|
||||
df_medailles_filtrées <- df_medailles %>%
|
||||
filter(Gender %in% c("W", "M"))
|
||||
|
||||
# Supposons que la colonne des médailles s'appelle "Medal_code"
|
||||
# Convertir les codes des médailles en valeurs textuelles
|
||||
df_medailles_filtrées <- df_medailles_filtrées %>%
|
||||
mutate(Medal_code = case_when(
|
||||
Medal_code == 1 ~ "Gold",
|
||||
Medal_code == 2 ~ "Silver",
|
||||
Medal_code == 3 ~ "Bronze",
|
||||
TRUE ~ as.character(Medal_code)
|
||||
))
|
||||
|
||||
# 1. Filtrer les données pour obtenir seulement les médailles d'or
|
||||
df_gold <- df_medailles_filtrées %>%
|
||||
filter(Medal_code == "Gold")
|
||||
|
||||
# 2. Vérification : Supprimer les lignes avec des valeurs manquantes (NA) dans la colonne Gender ou Medal_code
|
||||
df_gold_clean <- df_gold %>%
|
||||
filter(!is.na(Gender)) # Enlever les lignes où Gender est NA
|
||||
|
||||
# 3. Vérification de la répartition des médailles par genre
|
||||
print(table(df_gold_clean$Gender)) # Nombre d'observations par genre
|
||||
|
||||
# Vérification de la distribution des valeurs dans 'Nombre_Medailles' (devrait être un nombre constant pour chaque genre)
|
||||
df_gold_clean <- df_gold_clean %>%
|
||||
group_by(Gender) %>%
|
||||
mutate(Nombre_Medailles = n()) %>%
|
||||
ungroup()
|
||||
|
||||
# 4. Compter le nombre de médailles par genre et par type
|
||||
medailles_par_genre <- df_medailles_filtrées %>%
|
||||
group_by(Gender, Medal_code) %>%
|
||||
summarise(Nombre_Medailles = n(), .groups = "drop")
|
||||
|
||||
# Créer un tableau de contingence pour le test du Khi²
|
||||
tableau_contingence <- medailles_par_genre %>%
|
||||
spread(key = Medal_code, value = Nombre_Medailles, fill = 0) %>%
|
||||
select(-Gender)
|
||||
|
||||
# Effectuer le test Khi²
|
||||
khi2_test <- chisq.test(tableau_contingence)
|
||||
print(khi2_test)
|
||||
|
||||
# Afficher l'histogramme
|
||||
ggplot(medailles_par_genre, aes(x = Gender, y = Nombre_Medailles, fill = Medal_code)) +
|
||||
geom_bar(stat = "identity", position = "dodge") +
|
||||
scale_fill_manual(values = c("Gold" = "gold", "Silver" = "gray", "Bronze" = "chocolate")) +
|
||||
labs(title = "Répartition des médailles par genre et type",
|
||||
x = "Genre", y = "Nombre de médailles", fill = "Type de médaille") +
|
||||
theme_minimal()
|
||||
20
Projet JOs/top10 disciplie plus de médiale.R
Normal file
@@ -0,0 +1,20 @@
|
||||
library(readxl)
|
||||
|
||||
# Lire la feuille "Travail_medailles"
|
||||
df_medailles <- read_excel(file_path, sheet = "Travail_medailles")
|
||||
|
||||
# Compter le nombre de médailles par discipline
|
||||
medailles_par_discipline <- df_medailles %>%
|
||||
group_by(Discipline) %>%
|
||||
summarise(Nombre_Medailles = n()) %>%
|
||||
arrange(desc(Nombre_Medailles))
|
||||
|
||||
# Sélectionner les 10 disciplines les plus médaillées
|
||||
top_10_medailles <- head(medailles_par_discipline, 10)
|
||||
|
||||
# Afficher l'histogramme
|
||||
ggplot(top_10_medailles, aes(x = reorder(Discipline, -Nombre_Medailles), y = Nombre_Medailles)) +
|
||||
geom_bar(stat = "identity", fill = "gold") +
|
||||
labs(title = "Top 10 des disciplines ayant attribué le plus de médailles",
|
||||
x = "Discipline", y = "Nombre de médailles") +
|
||||
theme(axis.text.x = element_text(angle = 90, hjust = 1))
|
||||
BIN
Projet JOs/top10_disciplines.png
Normal file
|
After Width: | Height: | Size: 48 KiB |
63
Projet JOs/top10_disciplines_par_pays.R
Normal file
@@ -0,0 +1,63 @@
|
||||
# Installation et chargement des packages nécessaires
|
||||
if (!require("readxl")) {
|
||||
install.packages("readxl")
|
||||
}
|
||||
if (!require("ggplot2")) {
|
||||
install.packages("ggplot2")
|
||||
}
|
||||
if (!require("dplyr")) {
|
||||
install.packages("dplyr")
|
||||
}
|
||||
library(readxl)
|
||||
library(ggplot2)
|
||||
library(dplyr)
|
||||
|
||||
# Importation des données depuis le fichier Excel
|
||||
donnees_medailles <- read_excel("Projet JOs/Porjet_JO.xlsx",
|
||||
sheet = "Travail_medailles")
|
||||
|
||||
# Vérifier si les données contiennent la discipline
|
||||
if("Discipline" %in% colnames(donnees_medailles)) {
|
||||
# Compter le nombre de médailles par discipline
|
||||
medailles_par_discipline <- donnees_medailles %>%
|
||||
group_by(Discipline) %>%
|
||||
summarise(Total = n()) %>%
|
||||
arrange(desc(Total))
|
||||
} else {
|
||||
# Si la discipline n'est pas dans ce jeu de données, charger les données des athlètes
|
||||
df_athletes <- read_excel("Projet JOs/Porjet_JO.xlsx",
|
||||
sheet = "Travail_athletes")
|
||||
|
||||
# Joindre les données des athlètes avec les médailles (en supposant un champ commun comme Name ou Athlete_ID)
|
||||
# Adapter la clé de jointure selon la structure réelle des données
|
||||
donnees_jointes <- inner_join(donnees_medailles, df_athletes,
|
||||
by = c("Athlete_ID" = "Athlete_ID")) # adapter ces colonnes
|
||||
|
||||
# Compter le nombre de médailles par discipline
|
||||
medailles_par_discipline <- donnees_jointes %>%
|
||||
group_by(Discipline) %>%
|
||||
summarise(Total = n()) %>%
|
||||
arrange(desc(Total))
|
||||
}
|
||||
|
||||
# Sélectionner le top 10 des disciplines
|
||||
top_10_disciplines <- head(medailles_par_discipline, 10)
|
||||
|
||||
# Créer un graphique à barres
|
||||
graphique_disciplines <- ggplot(top_10_disciplines, aes(x = reorder(Discipline, Total), y = Total)) +
|
||||
geom_bar(stat = "identity", fill = "steelblue") +
|
||||
coord_flip() +
|
||||
labs(title = "Top 10 des disciplines avec le plus de medailles",
|
||||
x = "Discipline",
|
||||
y = "Nombre de medailles") +
|
||||
theme_minimal()
|
||||
|
||||
# Afficher le graphique
|
||||
print(graphique_disciplines)
|
||||
|
||||
# Sauvegarder le graphique
|
||||
ggsave("Projet JOs/top10_disciplines.png", plot = graphique_disciplines,
|
||||
width = 8, height = 6, units = "in", dpi = 300)
|
||||
|
||||
# Afficher également un tableau des résultats
|
||||
print(top_10_disciplines)
|
||||
BIN
Projet JOs/top10_efficacite_pays.png
Normal file
|
After Width: | Height: | Size: 128 KiB |
139
Projet JOs/top10_medals.R
Normal file
@@ -0,0 +1,139 @@
|
||||
# Installation et chargement des packages nécessaires
|
||||
if (!require("readxl")) {
|
||||
install.packages("readxl")
|
||||
}
|
||||
if (!require("ggplot2")) {
|
||||
install.packages("ggplot2")
|
||||
}
|
||||
if (!require("tidyr")) {
|
||||
install.packages("tidyr")
|
||||
}
|
||||
library(ggplot2)
|
||||
library(readxl)
|
||||
library(tidyr)
|
||||
|
||||
# Importation des données depuis le fichier Excel
|
||||
donnees_medailles <- read_excel("Projet JOs/Porjet_JO.xlsx",
|
||||
sheet = "Travail_medailles")
|
||||
|
||||
# Visualisation de la structure des données
|
||||
str(donnees_medailles)
|
||||
|
||||
# Nettoyage des données
|
||||
# Conversion des dates au format approprié (supposant le format MM/JJ/AAAA)
|
||||
donnees_medailles$Medal_date <- as.Date(donnees_medailles$Medal_date, format = "%m/%d/%Y")
|
||||
|
||||
# Correction de la colonne Age - remplacer les erreurs et convertir en numérique
|
||||
donnees_medailles$Age <- ifelse(donnees_medailles$Age == "#NOM?" | donnees_medailles$Age == "01/02/1900", NA, donnees_medailles$Age)
|
||||
donnees_medailles$Age <- as.numeric(donnees_medailles$Age)
|
||||
|
||||
# Affichage des premières lignes pour vérifier les données
|
||||
head(donnees_medailles)
|
||||
|
||||
# Obtention d'un résumé du nombre de médailles par pays
|
||||
nombre_medailles <- table(donnees_medailles$Country, donnees_medailles$Medal_type)
|
||||
nombre_medailles <- as.data.frame.matrix(nombre_medailles)
|
||||
|
||||
# Ajout d'une colonne pour le total des médailles
|
||||
if(ncol(nombre_medailles) > 0) {
|
||||
nombre_medailles$Total <- rowSums(nombre_medailles)
|
||||
# Tri par nombre total de médailles (décroissant)
|
||||
nombre_medailles <- nombre_medailles[order(-nombre_medailles$Total),]
|
||||
}
|
||||
|
||||
# Affichage du tableau des médailles
|
||||
print(nombre_medailles)
|
||||
|
||||
# Conversion de nombre_medailles en un format adapté pour ggplot2
|
||||
donnees_graphique <- data.frame(
|
||||
Pays = rownames(nombre_medailles),
|
||||
Total = nombre_medailles$Total
|
||||
)
|
||||
|
||||
# Graphique des 10 premiers pays
|
||||
graphique_top10 <- ggplot(head(donnees_graphique, 10), aes(x = reorder(Pays, Total), y = Total)) +
|
||||
geom_bar(stat = "identity", fill = "steelblue") +
|
||||
coord_flip() +
|
||||
labs(title = "Nombre total de medailles par pays - Top 10",
|
||||
x = "",
|
||||
y = "Nombre total de medailles") +
|
||||
theme_minimal()
|
||||
|
||||
# Affichage du graphique top 10
|
||||
print(graphique_top10)
|
||||
|
||||
# Sauvegarde du graphique comme fichier image
|
||||
ggsave("Projet JOs/top10_medals.png", plot = graphique_top10, width = 8, height = 6, units = "in", dpi = 300)
|
||||
|
||||
# Obtention des 10 premiers pays par total de médailles
|
||||
pays_top10 <- head(rownames(nombre_medailles), 10)
|
||||
donnees_top10 <- nombre_medailles[pays_top10, ]
|
||||
|
||||
# Obtention des noms des colonnes de médailles
|
||||
cols_medailles <- setdiff(colnames(donnees_top10), "Total")
|
||||
|
||||
# Vérifiez d'abord l'ordre réel des colonnes de médailles
|
||||
print("Colonnes de médailles trouvées:")
|
||||
print(cols_medailles)
|
||||
|
||||
# Assurer la correspondance correcte entre les types de médailles et les couleurs
|
||||
# Création d'un dataframe vide
|
||||
donnees_graphique <- data.frame()
|
||||
|
||||
# Trouvez les colonnes de médailles indépendamment de leur format exact
|
||||
col_or <- grep("or|gold", cols_medailles, ignore.case = TRUE, value = TRUE)[1]
|
||||
col_argent <- grep("argent|silver", cols_medailles, ignore.case = TRUE, value = TRUE)[1]
|
||||
col_bronze <- grep("bronze", cols_medailles, ignore.case = TRUE, value = TRUE)[1]
|
||||
|
||||
types_medailles <- c(col_or, col_argent, col_bronze)
|
||||
noms_affichage <- c("Or", "Argent", "Bronze")
|
||||
|
||||
cat("Utilisation des colonnes:", paste(types_medailles, collapse=", "), "\n")
|
||||
|
||||
# Traitement de chaque pays
|
||||
for (i in seq_along(pays_top10)) {
|
||||
pays <- pays_top10[i]
|
||||
donnees_pays <- data.frame(
|
||||
Pays = rep(pays, length(types_medailles)),
|
||||
Type_Medaille = noms_affichage, # Utiliser les noms standardisés
|
||||
Nombre = as.numeric(donnees_top10[i, types_medailles]),
|
||||
Total = rep(donnees_top10[i, "Total"], length(types_medailles))
|
||||
)
|
||||
donnees_graphique <- rbind(donnees_graphique, donnees_pays)
|
||||
}
|
||||
|
||||
# Classement des pays par total de médailles (décroissant)
|
||||
donnees_graphique$Pays <- factor(donnees_graphique$Pays,
|
||||
levels = rev(pays_top10))
|
||||
|
||||
# Création d'un graphique à barres empilées horizontal avec les bonnes couleurs
|
||||
graphique_type_medailles <- ggplot(donnees_graphique, aes(x = Pays, y = Nombre, fill = Type_Medaille)) +
|
||||
geom_bar(stat = "identity", position = "stack") +
|
||||
coord_flip() +
|
||||
labs(title = "Top 10 des pays par type de medailles",
|
||||
x = "",
|
||||
y = "Nombre de medailles") +
|
||||
scale_fill_manual(values = c("Or" = "#FFD700", "Argent" = "#C0C0C0", "Bronze" = "#CD7F32")) +
|
||||
theme_minimal() +
|
||||
guides(fill = guide_legend(title = "Type de medaille"))
|
||||
|
||||
# Affichage du graphique
|
||||
print(graphique_type_medailles)
|
||||
|
||||
# Sauvegarde du graphique comme fichier image
|
||||
ggsave("Projet JOs/top10_medals_by_type.png", plot = graphique_type_medailles, width = 8, height = 6, units = "in", dpi = 300)
|
||||
|
||||
# Calcul de la moyenne, médiane, variance et écart type pour les 10 premiers pays
|
||||
moyenne_medailles <- mean(donnees_top10$Total)
|
||||
mediane_medailles <- median(donnees_top10$Total)
|
||||
variance_medailles <- var(donnees_top10$Total)
|
||||
ecart_type_medailles <- sd(donnees_top10$Total)
|
||||
|
||||
# Affichage des résultats
|
||||
cat("Statistiques des 10 premiers pays par nombre total de médailles:\n")
|
||||
cat("Moyenne: ", moyenne_medailles, "\n")
|
||||
cat("Médiane: ", mediane_medailles, "\n")
|
||||
cat("Variance: ", variance_medailles, "\n")
|
||||
cat("Écart type: ", ecart_type_medailles, "\n")
|
||||
|
||||
cat("10 premiers pays par nombre total de médailles:", donnees_top10$Total, "\n")
|
||||
BIN
Projet JOs/top10_medals.png
Normal file
|
After Width: | Height: | Size: 47 KiB |
BIN
Projet JOs/top10_medals_by_type.png
Normal file
|
After Width: | Height: | Size: 54 KiB |
30
Projet JOs/top10_sport.R
Normal file
@@ -0,0 +1,30 @@
|
||||
# Charger le fichier Excel
|
||||
file_path <- "Projet JOs/Porjet_JO.xlsx"
|
||||
|
||||
# Lire la feuille "Travail_athletes"
|
||||
df_athletes <- read_excel(file_path, sheet = "Travail_athletes")
|
||||
|
||||
# Compter le nombre de disciplines uniques par pays
|
||||
disciplines_par_pays <- df_athletes %>%
|
||||
group_by(National Olympic Committee) %>%
|
||||
summarise(Nombre_Disciplines = n_distinct(Discipline)) %>%
|
||||
arrange(desc(Nombre_Disciplines))
|
||||
|
||||
# Renommer les colonnes
|
||||
colnames(disciplines_par_pays) <- c("Pays", "Nombre_Disciplines")
|
||||
|
||||
# Sélectionner le top 100 pays
|
||||
top_10 <- head(disciplines_par_pays, 10)
|
||||
|
||||
# Trouver les pays ignorés
|
||||
pays_ignores <- setdiff(disciplines_par_pays$Pays, top_100$Pays)
|
||||
|
||||
# Afficher les pays ignorés
|
||||
print("Pays ignorés :")
|
||||
print(pays_ignores)
|
||||
|
||||
# Tracer l'histogramme
|
||||
ggplot(top_10, aes(x = reorder(Pays, -Nombre_Disciplines), y = Nombre_Disciplines)) +
|
||||
geom_bar(stat = "identity", fill = "darkorange") +
|
||||
labs(title = "Top 10 des pays avec le plus de disciplines", x = "Pays", y = "Nombre de disciplines") +
|
||||
theme(axis.text.x = element_text(angle = 90, hjust = 1))
|
||||