简体   繁体   中英

Select max and min in R

I have write this code:

db_dati <- iris

setosa <- db_dati[db_dati$Species == "setosa",]
max_setosa <- max(setosa$Sepal.Length)
min_setosa <- min(setosa$Sepal.Length)

virginica <- db_dati[db_dati$Species == "virginica",]
max_virginica <- max(virginica$Sepal.Length)
min_virginica <- min(virginica$Sepal.Length)

versicolor <- db_dati[db_dati$Species == "versicolor",]
max_versicolor <- max(versicolor$Sepal.Length)
min_versicolor <- min(versicolor$Sepal.Length)

result <- cbind(max_setosa, min_setosa, max_virginica, min_virginica, max_versicolor, min_versicolor)

Is there a smarter way to select the species? I have a dataset with a lot of class and I'm looking for a faster selector.

Thanks

Are you simply looking for?

library(tidyverse)
iris |> 
  group_by(Species) |> 
  summarize(max = max(Sepal.Length),
            min = min(Sepal.Length))

# A tibble: 3 × 3
  Species      max   min
  <fct>      <dbl> <dbl>
1 setosa       5.8   4.3
2 versicolor   7     4.9
3 virginica    7.9   4.9

Maybe you want this:

library(dplyr)
iris %>% 
  group_by(Species) %>% 
  summarise(across(everything(), list(min = min, max = max)))

Output:

# A tibble: 3 × 9
  Species    Sepal.Length_min Sepal.Length_max Sepal.Width_min Sepal.Width_max Petal.Length_min
  <fct>                 <dbl>            <dbl>           <dbl>           <dbl>            <dbl>
1 setosa                  4.3              5.8             2.3             4.4              1  
2 versicolor              4.9              7               2               3.4              3  
3 virginica               4.9              7.9             2.2             3.8              4.5
# … with 3 more variables: Petal.Length_max <dbl>, Petal.Width_min <dbl>, Petal.Width_max <dbl>

Expanding on the answer by @deschen

library(dplyr)

# Simply filter, preserves data structure
iris %>% 
  group_by(Species) %>% 
  filter(Sepal.Length == max(Sepal.Length) | Sepal.Length == min(Sepal.Length))
#> # A tibble: 6 x 5
#> # Groups:   Species [3]
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species   
#>          <dbl>       <dbl>        <dbl>       <dbl> <fct>     
#> 1          4.3         3            1.1         0.1 setosa    
#> 2          5.8         4            1.2         0.2 setosa    
#> 3          7           3.2          4.7         1.4 versicolor
#> 4          4.9         2.4          3.3         1   versicolor
#> 5          4.9         2.5          4.5         1.7 virginica 
#> 6          7.9         3.8          6.4         2   virginica

# Summarise the values (changes data structure)
iris %>% 
  group_by(Species) %>% 
  summarise(max = max(Sepal.Length),
            min = min(Sepal.Length))
#> # A tibble: 3 x 3
#>   Species      max   min
#>   <fct>      <dbl> <dbl>
#> 1 setosa       5.8   4.3
#> 2 versicolor   7     4.9
#> 3 virginica    7.9   4.9

Another possible solution:

library(tidyverse)

iris %>% 
  group_by(Species) %>% 
  summarise(min = min(Sepal.Length),  max = max(Sepal.Length)) %>% 
  pivot_wider(names_from = Species, values_from = c(min, max))

#> # A tibble: 1 × 6
#>   min_setosa min_versicolor min_virginica max_setosa max_versicolor
#>        <dbl>          <dbl>         <dbl>      <dbl>          <dbl>
#> 1        4.3            4.9           4.9        5.8              7
#> # … with 1 more variable: max_virginica <dbl>

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM