提示:本站收集StackOverFlow近2千万问答,支持中英文搜索,鼠标放在语句上弹窗显示对应的参考中文或英文, 本站还提供 中文繁体 英文版本 中英对照 版本,有任何建议请联系yoyou2525@163.com。
我有超过 100 个包含此类数据的 csv 文件...
> dput(head(hobo.temp))
structure(list(Serial = c("Plot Title: 20461693", "#", "1",
"2", "3", "4"), Date = c("", "Date Time, GMT-05:00", "02/14/20 10:14:50 AM",
"02/14/20 10:14:57 AM", "02/14/20 11:14:50 AM", "02/14/20 12:14:50 PM"
), Temp = c("", "Temp, °C (LGR S/N: 20461693, SEN S/N: 20461693)",
"18.866", "", "20.817", "20.913"), X1 = c("", "Coupler Detached (LGR S/N: 20461693, SEN S/N: 20461693)",
"", "Logged", "", ""), X2 = c("", "Coupler Attached (LGR S/N: 20461693, SEN S/N: 20461693)",
"", "", "", ""), X3 = c("", "Host Connected (LGR S/N: 20461693, SEN S/N: 20461693)",
"", "", "", ""), X4 = c("", "End Of File (LGR S/N: 20461693, SEN S/N: 20461693)",
"", "", "", "")), row.names = c(NA, 6L), class = "data.frame")
这很讨厌,所以我写了代码来清理它......
hobo.temp <- read.csv("20461693_suw_main_01_19_2021.csv",
colClasses = c(rep("character", 3), rep("NULL", 4)),
col.names = c("Serial", "Date", "Temp", 1, 2, 3, 4),
header = FALSE, fill = TRUE, stringsAsFactors = FALSE)
hobo.temp$Date = as.POSIXct(hobo.temp$Date, format="%m/%d/%y %H:%M")
hobo.temp[,1] <- hobo.temp[1,1]
hobo.temp <- hobo.temp[-c(1:4),]
hobo.temp <- na.omit(hobo.temp)
hobo.temp <- arrange(hobo.temp, Date)
row.names(hobo.temp) <- NULL
hobo.temp$Serial <- gsub("Plot Title: ", "", hobo.temp$Serial, fixed = TRUE)
hobo.temp$Temp <- as.numeric(hobo.temp$Temp)
return(hobo.temp)
但是当我尝试将其转换为 function 并使用此代码对其进行迭代时。
filenames <- list.files(path = ".", pattern='^.*\\.csv$')
hobo.read <- function(fnam) {
hobo.temp <- read.csv(fnam, colClasses = c(rep("character", 3), rep("NULL", 4)),
col.names = c("Serial", "Date", "Temp", 1, 2, 3, 4),
header = FALSE, fill = TRUE, stringsAsFactors = FALSE)
hobo.temp$Date = as.POSIXct(hobo.temp$Date, format="%m/%d/%y %H:%M")
hobo.temp[,1] <- hobo.temp[1,1]
hobo.temp <- hobo.temp[-c(1:4),]
hobo.temp <- na.omit(hobo.temp)
hobo.temp <- arrange(hobo.temp, Date)
row.names(hobo.temp) <- NULL
hobo.temp$Serial <- gsub("Plot Title: ", "", hobo.temp$Serial, fixed = TRUE)
hobo.temp$Temp <- as.numeric(hobo.temp$Temp)
return(hobo.temp)
}
my.df <- do.call("rbind", lapply(filenames, hobo.read))
我收到这个错误
Error in read.table(file = file, header = header, sep = sep, quote = quote, :
more columns than column names
我在编写函数方面很糟糕,所以我提前道歉。
我意识到其中一些文件有 8 列。 我认为这可能是这种情况,并试图用我原来的问题代码colClasses = c(rep("character", 3), rep("NULL", 4))
当我将 4 切换为 5 in ```rep("NULL", 5)),它正确地取消了第 8 列。 我修改了我原来的问题代码,使其更具可读性(也许)。 这是我的第一个真正的 function,它嵌套启动。 这很草率,但我为此感到自豪。
#reads filenames from
filenames <- list.files(path = ".", pattern='^.*\\.csv$')
#first function imports data
hobo.read <- function(x) {
#in... rep("NULL", 5)... 5 has to be larger than the number for columns in the csv with the largest number of columns
df1 <- read.csv(x, colClasses = c(rep("character", 3), rep("NULL", 5)),
col.names = c("Serial", "Date", "Temp", 1, 2, 3, 4),
header = FALSE, fill = TRUE, stringsAsFactors = FALSE)
# line applies the action function below
df2 <- hobo.fix(df1)
}
#function of actions to apply within 1st function
hobo.fix <- function(hobo.temp) {
hobo.temp[,1] <- hobo.temp[1,1]
hobo.temp <- hobo.temp[-c(1:4),]
hobo.temp$Serial <- gsub("Plot Title: ", "", hobo.temp$Serial, fixed = TRUE)
hobo.temp$Temp <- as.numeric(hobo.temp$Temp)
hobo.temp$Date = as.POSIXct(hobo.temp$Date, format="%m/%d/%y %H:%M")
hobo.temp <- na.omit(hobo.temp)
hobo.temp <- dplyr::arrange(hobo.temp, Date)
row.names(hobo.temp) <- NULL
return(hobo.temp)
}
hobo <- do.call("rbind", lapply(filenames, hobo.read))
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.