简体   繁体   中英

dplyr mutate across to create a new column and modify all columns in the data

I have some data which looks like:

   column1    column2 column3 column4 column5 column6 column7 column8  column9 column10
   <chr>      <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>    <chr>   <lgl>   
 1 company42  NA      NA      NA      NA      NA      NA      NA       NA      NA      
 2 company105 NA      €315k   NA      NA      NA      NA      Mar 2015 NA      NA      
 3 company23  NA      NA      NA      NA      NA      NA      NA       NA      NA      
 4 company70  NA      €570    NA      NA      NA      NA      Apr 2016 NA      NA

I want to do two things to it.

  1. Extract all of the EUR signs and put them into a column "currency" - each row contains currency data where the currency is unique across the columns but it can change down the rows.
  2. Convert all of the "K" to "000" and all of the "M" to "000000".

Expected output:

   column1    column2 column3 column4 column5 column6 column7 column8  column9 column10  column 11
   <chr>      <chr>   <chr>   <chr>   <chr>   <chr>   <chr>   <chr>    <chr>   <lgl>       <chr>
 1 company42  NA      NA      NA      NA      NA      NA      NA       NA      NA            NA
 2 company105 NA      315000  NA      NA      NA      NA      Mar 2015 NA      NA            €
 3 company23  NA      NA      NA      NA      NA      NA      NA       NA      NA            NA
 4 company70  NA      570    NA      NA      NA      NA      Apr 2016 NA      NA             €

Where the new column 11 has been added and the € removed from column 3 and finally the "K" has been converted to "000".

Data:

data <- structure(list(column1 = c("company42", "company105", "company23", 
"company70", "company77", "company51", "company20", "company17", 
"company78", "company80", "company39", "company37", "company101", 
"company61", "company104", "company41", "company88", "company131", 
"company102", "company45"), column2 = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, "\20060k", NA, NA, NA, NA, NA, NA, NA
), column3 = c(NA, "\200315k", NA, "\200570", NA, NA, NA, NA, 
NA, "$1.05M", NA, NA, "\200177k", NA, NA, NA, "\20070k", NA, 
NA, "\200223k"), column4 = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_), column5 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), column6 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "\200653k", NA, 
NA, NA, NA, NA, NA), column7 = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), column8 = c(NA, 
"Mar 2015", NA, "Apr 2016", NA, NA, NA, NA, NA, "Sep 2012", NA, 
NA, NA, NA, NA, NA, "Jul 2014", NA, NA, "May 2016"), column9 = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
    column10 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame"))

Try this approach, maybe is not the most optimal but it can address you to a new path when modifying it:

library(tidyverse)
#Code 1
data2 <- data %>% mutate(across(column2:column7,~ifelse(grepl('k',.),gsub('k','000',.),
                                               ifelse(grepl('M',.),gsub('M','000000',.),.))))
#Code 2
data2$Currency <- apply(data2[,2:7],1,
                        function(x) trimws(gsub('NA','',
                                         paste0(gsub("[[:digit:]]", "", x),
                                                collapse = ',')),whitespace = ','))
data2$Currency <- gsub('\\.','',data2$Currency)
#Code 3
data3 <- data2 %>% mutate(across(column2:column7,~gsub("[[:punct:]]", "", .)))
data3 <- data3 %>% mutate(across(column2:column7,~gsub("€", "", .)))
data3 <- data3 %>% mutate(across(column2:column7,as.numeric))

Output:

# A tibble: 20 x 11
   column1    column2   column3 column4 column5 column6 column7 column8  column9 column10 Currency
   <chr>        <dbl>     <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>    <chr>   <lgl>    <chr>   
 1 company42       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
 2 company105      NA    315000      NA      NA      NA      NA Mar 2015 NA      NA       "€"     
 3 company23       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
 4 company70       NA       570      NA      NA      NA      NA Apr 2016 NA      NA       "€"     
 5 company77       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
 6 company51       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
 7 company20       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
 8 company17       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
 9 company78       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
10 company80       NA 105000000      NA      NA      NA      NA Sep 2012 NA      NA       "$"     
11 company39       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
12 company37       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
13 company101   60000    177000      NA      NA      NA      NA NA       NA      NA       "€,€"   
14 company61       NA        NA      NA      NA  653000      NA NA       NA      NA       "€"     
15 company104      NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
16 company41       NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
17 company88       NA     70000      NA      NA      NA      NA Jul 2014 NA      NA       "€"     
18 company131      NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
19 company102      NA        NA      NA      NA      NA      NA NA       NA      NA       ""      
20 company45       NA    223000      NA      NA      NA      NA May 2016 NA      NA       "€"     

Does this work:

> data %>% rowwise() %>% mutate(column11 = case_when(any(grepl('\u20AC', c_across(column2:column10))) ~ '\u20AC',
+                                                    any(grepl('\\$', c_across(column2:column10))) ~ '$', TRUE ~ NA_character_)) %>% 
+   mutate(across(2:10, ~ str_remove_all(., '\\$|\u20AC'))) %>% mutate(across(2:10, ~ case_when(grepl('k$',.) ~ parse_number(.)*1000,
+                                                                                               grepl('M$',.) ~ parse_number(.)*1000000,
+                                                                                               TRUE ~ NA_real_)))
# A tibble: 20 x 11
# Rowwise: 
   column1    column2 column3 column4 column5 column6 column7 column8 column9 column10 column11
   <chr>        <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>    <dbl> <chr>   
 1 company42       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
 2 company105      NA  315000      NA      NA      NA      NA      NA      NA       NA €       
 3 company23       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
 4 company70       NA      NA      NA      NA      NA      NA      NA      NA       NA €       
 5 company77       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
 6 company51       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
 7 company20       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
 8 company17       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
 9 company78       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
10 company80       NA 1050000      NA      NA      NA      NA      NA      NA       NA $       
11 company39       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
12 company37       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
13 company101   60000  177000      NA      NA      NA      NA      NA      NA       NA €       
14 company61       NA      NA      NA      NA  653000      NA      NA      NA       NA €       
15 company104      NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
16 company41       NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
17 company88       NA   70000      NA      NA      NA      NA      NA      NA       NA €       
18 company131      NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
19 company102      NA      NA      NA      NA      NA      NA      NA      NA       NA NA      
20 company45       NA  223000      NA      NA      NA      NA      NA      NA       NA €       
> 

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM