[英]Keep only letters in all rows of specific column - remove all other characters
這是示例數據的樣子:
exp_data
# Name Greg Matt
# 1 Y.L[+12,000]STISKDLITY.M NA L[+12]
# 2 Y.L[+12,000]STISKDLITY.M NA L[+12]
輸入:
exp_data <- structure(list(Name = structure(c(71L,71L), .Label = c("F.AM[+15,995]KTKAAL.A", "F.AMKTKAAL.A", "F.EKIKAAY.L",
"F.EKIKAAYL.S", "F.NPTAGC[+58,005]ASL[+12,000]AKEM[+12,000]F[+1151,607].A",
"F.QGRVTM[+15,995].T", "F.SGSNSGNTATL.T", "F.TGYY.M", "F.TNC[+58,005]DF[+1151,607]EKIKAAY.L",
"L.DKSITSL[+370,222]Y.A", "L.DY[+12,000]WGQGTL.V", "L.DYWGQGTL.V",
"L.EQVSQL.Q", "L.EQVSQLQGLW.R", "L.EWMGW.I", "L.ITY[+1151,607]M[+15,995]SGTKSTEF.N",
"L.KQQGGGLEVL.F", "L.KQQGGGLEVLF.Q", "L.L[+504,270]KQQGGGLEVL.F",
"L.LKQQGGGL.E", "L.LKQQGGGLEVL.F", "L.QGLW.R", "L.RSDDTAVY.Y",
"L.RSDDTAVYY.C", "L.SRLRSDDTAVY.Y", "L.SRLRSDDTAVYY.C", "L.STISKDL[+12,000]ITY.M",
"L.STISKDLITY.M", "L.STISKDLITY[+1012,607]M[+15,995].S", "L.STISKDLITY[+12,000].M",
"L.STISKDLITY[+12,000]M[+386,228].S", "L.STISKDLITY[+2918,448].M",
"L.STISKDLITY[+762,322]M[+15,995].S", "L.STISKDLITYM.S", "L.STISKDLITYM[+1282,648].S",
"L.STISKDLITYM[+1456,695].S", "L.STISKDLITYM[+1490,759].S", "L.STISKDLITYM[+371,206].S",
"L.TEIQSL.T", "L.TISRVEAGDEADY.Y", "L.TISRVEAGDEADY[+12,000]Y.C",
"L.TISRVEAGDEADYY.C", "L.TISRVEAGDEADYY[+12,000].C", "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY.V",
"L.VTVSSGGGSEGGGSEGGGSEGGGSGSY[+1239,661].V", "L.VTVSSGGGSEGGGSEGGGSEGGGSGSY[+1987,847].V",
"L.VVY[+1501,680]DDSDRPSGIPERF.S", "L.VVYDDSDRPSGIPERF.S", "M.KKARKSKVTTNKC[+58,005]L[+2909,467]EQVSQLQGL.W",
"M.SGTKSTEF.N", "M.TELDYW.G", "M.TRDTSISTAY.M", "M.TRDTSISTAY[+12,000].M",
"M.TRDTSISTAYM.E", "M.TRDTSISTAYMEL.S", "W.GQGTL.V", "W.GQGTLVTVSSGGGSEGGGSEGGGSEGGGSGSY.V",
"W.GQGTLVTVSSGGGSEGGGSEGGGSEGGGSGSY[+1239,661].V", "W.INPNSGGTNY.A",
"W.INPNSGGTNY[+12,000].A", "W.VRQAPGQGL.E", "W.VRQAPGQGLEW.M",
"W.VRQAPGQGLEW[+12,000]M[+486,244].G", "W.VRQAPGQGLEWM.G", "W.Y[+12,000]QQKPGQAPVLVVY.D",
"W.YQQKPGQAPVL.V", "W.YQQKPGQAPVL[+12,000]VVY.D", "W.YQQKPGQAPVLVVY.D",
"Y.AQKF.Q", "Y.DDSDRPSGIPERF.S", "Y.L[+12,000]STISKDLITY.M",
"Y.LSTISKDL.I", "Y.LSTISKDL[+12,000]ITY.M", "Y.LSTISKDLITY.M",
"Y.M[+12,000]SGTKSTEF.N", "Y.M[+15,995]EL.S", "Y.M[+15,995]ELSRL.R",
"Y.M[+15,995]SGTKSTEF.N", "Y.MELSRL.R", "Y.MSGTKSTEF.N", "Y.QQKPGQAPVL.V",
"Y.QQKPGQAPVL[+12,000]VVY.D", "Y.QQKPGQAPVL[+12,000]VVYDDSDRPSGIPERF.S",
"Y.QQKPGQAPVLVVY.D", "Y.QQKPGQAPVLVVYDDSDRPSGIPERF.S", "Y.TFTGY.Y",
"Y.TFTGYY.M", "Y.TILDKSITSL.Y", "Y.VLTQPPSVSVAPGQTARITC[+58,005]GGNNIGSKSVHW.Y",
"Y.WGQGTL.V", "Y.YMHW.V"), class = "factor"), Greg = c(NA,
NA), Matt = structure(c(6L, 6L), .Label = c("","C[+58]", "C[+58], F[+1152]", "C[+58], F[+1152], L[+12], M[+12]",
"C[+58], L[+2909]", "L[+12]", "L[+370]", "L[+504]", "M[+12]",
"M[+1283]", "M[+1457]", "M[+1491]", "M[+16]", "M[+16], Y[+1013]",
"M[+16], Y[+1152]", "M[+16], Y[+762]", "M[+371]", "M[+386], Y[+12]",
"M[+486], W[+12]", "Y[+12]", "Y[+1240]", "Y[+1502]", "Y[+1988]",
"Y[+2918]"), class = "factor")), row.names = 1:2, class = "data.frame")
我想專注於名為Name
的列,並在所有行中只保留字母。 數據框非常長,行包含字符串開頭、中間、結尾、特定字母之間的所有類型的字符(數字、點、問號等)。 我想在所有這些行中只保留字母。
使用gsub
:
exp_data$Name <- gsub("[^A-Za-z]+", "", exp_data$Name)
exp_data$clean_name = gsub(x = exp_data$Name, pattern = "[^a-zA-Z]", replacement = "")
exp_data
# Name Greg Matt clean_name
# 1 Y.L[+12,000]STISKDLITY.M NA L[+12] YLSTISKDLITYM
# 2 Y.L[+12,000]STISKDLITY.M NA L[+12] YLSTISKDLITYM
Gregor 和 Tim 解決方案的另一個變體:
exp_data$Name <- gsub('[^[:alpha:]]+', '', exp_data$Name)
head(exp_data)
# Name Greg Matt
# 1 YLSTISKDLITYM NA L[+12]
# 2 YLSTISKDLITYM NA L[+12]
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.