I have been asked to split the Rate column into three columns namely Rate, 'MaxCI' and 'MinCI'. I need to figure out how the average rate changes from 2011 to 2016 and so I am guessing I would need the mean.
Everytime I execute my own code, it nullifies the values in the MinCI and MaxCI columns and the rate column values dont seem correct. Please advise what I am doing wrong and how I can go about this?
df <- df%>%
separate (Rate,c('Rate', 'MinCI', 'MaxCI'), convert = TRUE)
< https://docs.google.com/spreadsheets/d/1xzJ4vBvBdfp5vVj4Cleonhu-v1rTzGVrRp4LLPVRgR0/edit?usp=sharing I have pasted a link for the data I am using for this.
Use separate
with sep
argument to mention the separator ( -
) explicitly to get Rate
column in three different columns and get mean Rate
for each Year
.
library(dplyr)
result <- df %>%
tidyr::separate(Rate,c('Rate', 'MinCI', 'MaxCI'), sep = '-',
convert = TRUE) %>%
group_by(Year) %>%
summarise(avg_rate = mean(Rate, na.rm = TRUE))
Another approach would be using separate_rows()
then filter the first record per country and year as it belong to rate and then compute the average rate per country in order to see the average across all years. Here the code:
library(dplyr)
library(tidyr)
#Code
df2 <- df1 %>% separate_rows(Rate,sep = '-') %>%
group_by(Country,Year) %>%
mutate(Var=1:n(),Rate=as.numeric(trimws(Rate))) %>%
#Filter 1 in Var as they are the rate
filter(Var==1) %>%
ungroup() %>% group_by(Country) %>%
summarise(AvgRate=mean(Rate,na.rm=T))
Output:
# A tibble: 68 x 2
Country AvgRate
<chr> <dbl>
1 Afghanistan 4.85
2 Albania 20.2
3 Algeria 25.7
4 Andorra 25.1
5 Angola 7.35
6 Antigua and Barbuda 17.8
7 Argentina 27.0
8 Armenia 19.0
9 Australia 27.6
10 Austria 19.0
# ... with 58 more rows
If you need by years you can adjust the last group_by()
to Year
.
Some data used:
#Data
df1 <- structure(list(Country = c("Afghanistan", "Afghanistan", "Albania",
"Albania", "Algeria", "Algeria", "Andorra", "Andorra", "Angola",
"Angola", "Antigua and Barbuda", "Antigua and Barbuda", "Argentina",
"Argentina", "Armenia", "Armenia", "Australia", "Australia",
"Austria", "Austria", "Azerbaijan", "Azerbaijan", "Bahamas",
"Bahamas", "Bahrain", "Bahrain", "Bangladesh", "Bangladesh",
"Barbados", "Barbados", "Belarus", "Belarus", "Belgium", "Belgium",
"Belize", "Belize", "Benin", "Benin", "Bhutan", "Bhutan", "Bolivia (Plurinational State of)",
"Bolivia (Plurinational State of)", "Bosnia and Herzegovina",
"Bosnia and Herzegovina", "Botswana", "Botswana", "Brazil", "Brazil",
"Brunei Darussalam", "Brunei Darussalam", "Bulgaria", "Bulgaria",
"Burkina Faso", "Burkina Faso", "Burundi", "Burundi", "Cabo Verde",
"Cabo Verde", "Cambodia", "Cambodia", "Cameroon", "Cameroon",
"Canada", "Canada", "Central African Republic", "Central African Republic",
"Chad", "Chad", "Chile", "Chile", "China", "China", "Colombia",
"Colombia", "Comoros", "Comoros", "Congo", "Congo", "Cook Islands",
"Cook Islands", "Costa Rica", "Costa Rica", "Côte d'Ivoire",
"Côte d'Ivoire", "Croatia", "Croatia", "Cuba", "Cuba", "Cyprus",
"Cyprus", "Czechia", "Czechia", "Democratic People's Republic of Korea",
"Democratic People's Republic of Korea", "Democratic Republic of the Congo",
"Democratic Republic of the Congo", "Denmark", "Denmark", "Djibouti",
"Djibouti", "Dominica", "Dominica", "Dominican Republic", "Dominican Republic",
"Ecuador", "Ecuador", "Egypt", "Egypt", "El Salvador", "El Salvador",
"Equatorial Guinea", "Equatorial Guinea", "Eritrea", "Eritrea",
"Estonia", "Estonia", "Eswatini", "Eswatini", "Ethiopia", "Ethiopia",
"Fiji", "Fiji", "Finland", "Finland", "France", "France", "Gabon",
"Gabon", "Gambia", "Gambia", "Georgia", "Georgia", "Germany",
"Germany", "Ghana"), Year = c(2011L, 2016L, 2011L, 2016L, 2011L,
2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L,
2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L,
2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L,
2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L,
2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L,
2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L,
2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L,
2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L,
2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L,
2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L,
2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L,
2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L,
2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L,
2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L, 2016L, 2011L,
2016L, 2011L, 2016L, 2011L), Rate = c("4.2-2.6-6.2", "5.5-3.4-8.1",
"18.8-14.8-23.0", "21.7-17.0-26.7", "24.0-19.9-28.4", "27.4-22.5-32.7",
"24.6-19.8-29.8", "25.6-20.1-31.3", "6.5-4.0-9.7", "8.2-5.1-12.2",
"16.7-12.4-21.3", "18.9-13.8-24.6", "25.8-21.9-30.1", "28.3-23.4-33.5",
"17.9-14.2-21.9", "20.2-15.7-24.8", "26.2-23.4-29.2", "29.0-25.3-32.9",
"18.0-14.6-21.7", "20.1-15.9-24.6", "17.2-13.2-21.4", "19.9-15.3-24.9",
"29.0-23.7-34.4", "31.6-25.6-37.8", "27.1-22.7-31.6", "29.8-24.6-35.2",
"2.6-1.8-3.6", "3.6-24-5.1", "20.4-16.5-24.6", "23.1-18.3-28.4",
"22.7-18.2-27.6", "24.5-19.4-30.0", "20.4-17.7-23.2", "22.1-18.6-25.7",
"21.5-17.2-26.2", "24.1-19.0-29.8", "7.9-6.0-10.1", "9.6-7.0-12.6",
"4.9-3.3-6.9", "6.4-4.3-9.0", "17.9-13.9-22.2", "20.2-15.4-25.4",
"16.0-12.7-19.6", "17.9-13.9-22.3", "17.2-13.4-21.2", "18.9-14.6-23.5",
"19.6-17.1-22.2", "22.1-18.7-25.7", "11.6-8.6-14.9", "14.1-10.5-18.3",
"22.8-18.3-27.5", "25.0-19.7-30.5", "4.2-3.0-5.7", "5.6-3.9-7.7",
"4.2-2.7-6.0", "5.4-3.5-7.9", "9.9-7.2-13.0", "11.8-8.4-15.5",
"2.9-1.9-4.2", "3.9-2.5-5.6", "9.4-7.4-11.7", "11.4-8.6-14.5",
"26.6-23.6-29.6", "29.4-25.7-33.3", "6.1-4.0-8.9", "7.5-4.8-10.9",
"4.9-3.4-6.7", "6.1-4.2-8.5", "25.6-21.9-29.5", "28.0-23.4-32.9",
"4.7-3.8-5.7", "6.2-4.7-7.9", "20.0-16.9-23.3", "22.3-18.5-26.5",
"6.4-4.5-8.7", "7.8-5.4-10.7", "8.0-5.7-10.7", "9.6-6.6-13.1",
"53.2-48.1-58.4", "55.9-50.2-61.5", "22.2-18.4-26.1", "25.7-21.2-30.6",
"8.3-6.3-10.6", "10.3-7.6-13.4", "22.0-18.4-25.9", "24.4-20.1-29.0",
"22.2-18.4-26.0", "24.6-20.1-29.4", "20.1-15.7-24.8", "21.8-16.7-27.4",
"24.2-20.3-28.4", "26.0-21.3-31.1", "5.7-3.5-8.5", "6.8-4.0-10.1",
"5.3-3.5-7.6", "6.7-4.4-9.7", "17.8-15.1-20.6", "19.7-16.2-23.3",
"12.0-8.6-16.0", "13.5-9.5-18.3", "25.0-20.3-30.0", "27.9-22.4-33.7",
"23.8-19.8-28.0", "27.6-22.7-33.0", "17.6-13.8-21.9", "19.9-15.4-25.0",
"28.6-25.2-32.4", "32.0-27.6-36.6", "21.7-17.6-26.0", "24.6-19.6-29.7",
"6.5-4.2-9.4", "8.0-5.0-11.6", "3.9-2.6-5.5", "5.0-3.3-7.3",
"19.9-17.1-22.9", "21.2-17.7-25.2", "14.5-11.1-18.3", "16.5-12.5-20.9",
"3.4-2.3-4.9", "4.5-2.9-6.5", "27.1-22.1-32.1", "30.2-24.5-36.0",
"20.3-18.0-22.9", "22.2-19.0-25.7", "19.7-16.4-23.1", "21.6-17.4-25.9",
"13.2-9.5-17.4", "15.0-10.6-20.1", "8.4-6.2-10.9", "10.3-7.5-13.6",
"18.7-14.8-22.9", "21.7-17.2-26.7", "20.3-17.8-22.8", "22.3-18.8-25.9",
"9.1-7.3-11.2")), class = "data.frame", row.names = c(NA, -135L
))
An alternative would be to use strsplit
and then append the results on first two columns of your original dataframe.
library(dplyr)
bind_cols(select(df1, 1:2),
setNames(as.data.frame(do.call('rbind', strsplit(df1$Rate, split = '-'))),
c('Rate', 'MinCI', 'MaxCI'))
) %>%
mutate(across(c('Rate', 'MinCI', 'MaxCI'), as.numeric)) %>%
na.omit %>%
group_by(Year) %>%
#group_by(Country, Year) %>%
summarise(avg_rate = mean(Rate))
We can use parse_number
to retrieve the 'Rate' as a numeric value and get the mean
library(dplyr)
df1 %>%
group_by(Year) %>%
summarise(avg_rate = mean(readr::parse_number(Rate), na.rm = TRUE))
If we need by 'Country'
df1 %>%
group_by(Country, Year) %>%
summarise(avg_rate = mean(readr::parse_number(Rate), na.rm = TRUE))
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.