簡體   English   中英

如何在 R 中使用 dplyr 來消除一行並重命名因子變量

[英]How to use dplyr in R to both eliminate a row and rename a factor variable

我正在將一些 pdfs 表轉換為涉及美國各州數據的數據框,其中一個表有一個小插曲,哥倫比亞特區被分成兩行。 這是數據的一瞥

$ State <fct> Alabama, Alaska, Arkansas, Arizona, California, Colorado, Connecticut, District of, Columbia, Delaw…
$ Any   <fct> "1,133", "311", "591", "1,080", "3,698", "910", "643", "", "69", "32", "3,113", "1,759", "34", "876"…
$ DD    <fct> "47,421", "3,317", "41,962", "67,185", "212,800", "37,444", "9,886", "", "34,841", "2,183", "101,41"…
$ MG    <fct> "15,653", "1,617", "4,827", "16,879", "28,774", "6,040", "22,023", "", "4,278", "569", "29,128", "2"…
$ Sil   <fct> "8,550", "2,085", "7,218", "15,860", "7,303", "6,475", "5,170", "", "160", "302", "28,312", "29,259"…
$ SBR   <fct> "1,327", "508", "1,091", "5,492", "3,738", "2,217", "1,068", "", "440", "94", "7,432", "3,563", "55"…
$ SBS   <fct> "2,075", "1,101", "1,017", "1,851", "12,166", "1,389", "960", "", "1,048", "507", "6,125", "9,422",…
$ Total <fct> "76,159", "8,939", "56,706", "108,347", "268,479", "54,475", "39,750", "", "40,836", "3,687", "175",…

我試圖找到一種方法來使用 dplyr 來消除狀態為“District of”的空行,並將狀態列中的“Colombia”單元格重命名為“District of Colombia”

我在嘗試

data_table %>%
  filter(as.character(State) != "District of")

過濾掉空列,但這不起作用

我對 State 列重新編碼的嘗試是

  atf_data_table %>% mutate(State = recode_factor(State, `Colombia` = "District of Colombia"))

但這也沒有結果。

編輯:附上一段數據的dput

structure(list(State = structure(c(1L, 2L, 4L, 3L, 5L, 6L, 8L, 
10L, 7L), .Label = c("Alabama", "Alaska", "Arizona", "Arkansas", 
"California", "Colorado", "Columbia", "Connecticut", "Delaware", 
"District of", "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", 
"Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Maine", 
"Maryland", "Massachusetts", "Michigan", "Minnesota", "Mississippi", 
"Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", 
"New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", 
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", 
"South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", 
"Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin", 
"Wyoming"), class = "factor"), Any = structure(c(6L, 24L, 38L, 
4L, 23L, 50L, 40L, 1L, 44L), .Label = c("", "1,019", "1,061", 
"1,080", "1,110", "1,133", "1,330", "1,430", "1,487", "1,502", 
"1,726", "1,759", "1,776", "199", "2,012", "2,043", "2,514", 
"2,633", "221", "277", "286", "3,113", "3,698", "311", "32", 
"34", "344", "403", "404", "410", "42", "425", "430", "431", 
"517", "570", "588", "591", "6,169", "643", "670", "674", "679", 
"69", "717", "747", "820", "837", "876", "910", "933", "958"), class = "factor"), 
    DD = structure(c(41L, 26L, 38L, 47L, 21L, 36L, 52L, 1L, 33L
    ), .Label = c("", "1,605", "101,418", "102,723", "11,737", 
    "12,878", "13,004", "13,960", "14,049", "148,700", "159,805", 
    "161,841", "17,492", "19,379", "2,183", "2,216", "2,517", 
    "2,710", "2,852", "21,221", "212,800", "22,239", "24,986", 
    "25,316", "26,358", "3,317", "3,352", "3,459", "30,778", 
    "32,848", "34,628", "34,693", "34,841", "35,707", "36,066", 
    "37,444", "37,700", "41,962", "44,821", "45,037", "47,421", 
    "48,026", "5,068", "5,503", "6,776", "60,764", "67,185", 
    "69,005", "71,814", "8,318", "83,363", "9,886"), class = "factor"), 
    MG = structure(c(11L, 5L, 34L, 12L, 23L, 39L, 18L, 1L, 31L
    ), .Label = c("", "1,070", "1,480", "1,495", "1,617", "1,659", 
    "1,892", "10,703", "11,234", "13,442", "15,653", "16,879", 
    "17,019", "17,384", "18,013", "2,049", "2,438", "22,023", 
    "22,081", "23,709", "24,651", "28,690", "28,774", "29,128", 
    "3,262", "3,289", "3,709", "3,805", "3,886", "30,220", "4,278", 
    "4,342", "4,664", "4,827", "429", "569", "595", "6,010", 
    "6,040", "6,076", "6,182", "6,385", "6,442", "6,555", "6,828", 
    "6,843", "6,895", "7,521", "7,972", "8,230", "9,090", "9,863"
    ), class = "factor"), Sil = structure(c(47L, 17L, 43L, 13L, 
    44L, 38L, 36L, 1L, 15L), .Label = c("", "1,071", "1,103", 
    "1,153", "1,290", "1,826", "10,407", "105", "11,964", "12,046", 
    "12,914", "15,736", "15,860", "16,735", "160", "2,022", "2,085", 
    "2,266", "2,273", "2,303", "2,325", "2,782", "2,887", "2,920", 
    "27", "28,312", "29,259", "3,368", "3,679", "301", "302", 
    "4,359", "4,416", "4,913", "47,712", "5,170", "5,857", "6,475", 
    "6,671", "619", "68", "7,024", "7,218", "7,303", "7,457", 
    "8,400", "8,550", "8,609", "9,571", "9,839", "9,841", "965"
    ), class = "factor"), SBR = structure(c(11L, 42L, 3L, 41L, 
    35L, 25L, 2L, 1L, 39L), .Label = c("", "1,068", "1,091", 
    "1,094", "1,095", "1,103", "1,174", "1,183", "1,214", "1,315", 
    "1,327", "1,370", "1,379", "1,380", "1,419", "1,420", "1,457", 
    "1,577", "1,713", "1,787", "1,864", "1,927", "106", "134", 
    "2,217", "2,343", "2,346", "2,621", "2,623", "2,842", "2,850", 
    "260", "264", "3,563", "3,738", "3,853", "343", "369", "440", 
    "497", "5,492", "508", "55", "572", "6,720", "682", "688", 
    "7,432", "700", "895", "9,271", "94"), class = "factor"), 
    SBS = structure(c(20L, 5L, 3L, 15L, 17L, 10L, 52L, 1L, 4L
    ), .Label = c("", "1,011", "1,017", "1,048", "1,101", "1,106", 
    "1,121", "1,160", "1,309", "1,389", "1,470", "1,508", "1,638", 
    "1,674", "1,851", "112", "12,166", "12,323", "175", "2,075", 
    "2,266", "2,316", "2,542", "211", "3,655", "3,771", "3,822", 
    "345", "373", "385", "398", "410", "5,538", "507", "548", 
    "59", "6,125", "6,272", "6,557", "607", "7,342", "712", "752", 
    "767", "768", "8,581", "850", "887", "9,422", "936", "94", 
    "960"), class = "factor"), Total = structure(c(47L, 49L, 
    41L, 4L, 18L, 39L, 29L, 1L, 30L), .Label = c("", "10,871", 
    "106,513", "108,347", "108,574", "11,770", "112,931", "114,110", 
    "14,742", "15,724", "17,455", "175,528", "18,376", "197,217", 
    "223,303", "258,204", "26,151", "268,479", "27,553", "29,383", 
    "3,592", "3,687", "3,803", "30,319", "36,080", "37,307", 
    "38,917", "39,389", "39,750", "40,836", "43,589", "45,530", 
    "48,340", "48,359", "48,989", "49,024", "5,581", "50,550", 
    "54,475", "55,105", "56,706", "58,964", "6,185", "65,524", 
    "69,371", "7,652", "76,159", "8,293", "8,939", "80,736", 
    "82,187", "95,060"), class = "factor")), row.names = c(NA, 
9L), class = "data.frame")

任何幫助,將不勝感激

我真的沒有看到這里的問題。 這段代碼對我來說很好用

my_data %>%
  filter(State != "District of") %>% 
  mutate(State = recode_factor(State, 
                               "Columbia" = "District of Columbia"))

你在“Colombia”中有一個錯字,但過濾沒問題

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM