简体   繁体   中英

Fill rows of columns of a DataFrame with 0s based on the column name

I have a DataFrame that looks like so:

dput(head(f))

structure(list(`2021-04-01` = c(18.75, 18.8047425327496, 19.044178996207, 
19.1751973213021, 19.1689792828825, 19.5836158822698), `2021-05-01` = c(18.68, 
18.6054569055353, 18.8313151379181, 19.0204171560939, 19.0789341312536, 
19.5202197400974), `2021-06-01` = c(18.7, 18.6167441350011, 18.8507363020792, 
19.0240964484369, 19.0811699993501, 19.5312231113571), `2021-07-01` = c(18.74, 
18.6232367708829, 18.8951394692142, 19.0857033425782, 19.1976392153446, 
19.6658807214903), `2021-08-01` = c(18.77, 18.644593579267, 18.9246150091022, 
19.1032894987568, 19.2156342675263, 19.6849727955065), `2021-09-01` = c(18.84, 
18.7266061665193, 19.0136981423636, 19.2080462372435, 19.3252661741782, 
19.7976573506291), `2021-10-01` = c(19.16, 19.0429325189963, 
19.2868666432121, 19.5302122400335, 19.6246896164923, 20.0574681959085
), `2021-11-01` = c(19.61, 19.4885636931401, 19.7281581945936, 
19.9833632850547, 20.1125365664892, 20.5253519746824), `2021-12-01` = c(20.49, 
20.3817436029844, 20.615343551369, 20.8552877862877, 20.9919455460469, 
21.3954384388117), `2022-01-01` = c(20.48, 20.3740323095769, 
20.6359913586072, 20.8916278650574, 21.0735209988655, 21.4729020165058
), `2022-02-01` = c(20.61, 20.5005105056777, 20.7692639443274, 
21.0278974843904, 21.2088372694747, 21.6274189250732), `2022-03-01` = c(20.01, 
19.8989327908198, 20.160435596222, 20.4165374436547, 20.5841189948464, 
20.9775903556065), `2022-04-01` = c(17.33, 17.2371485625473, 
17.4547245416883, 17.5005672257715, 17.6708376845902, 18.1363806857805
), `2022-05-01` = c(16.98, 16.8871485729785, 17.1047245517176, 
17.1505672343433, 17.320837688241, 17.7863806850577), `2022-06-01` = c(16.93, 
16.8371485710296, 17.0547245439537, 17.100567227402, 17.270837681622, 
17.7363806838834), `2022-07-01` = c(16.55, 16.4602929774512, 
16.679186983992, 16.72087791148, 16.902375852915, 17.3788582533972
), `2022-08-01` = c(16.55, 16.4602929762459, 16.6791869839035, 
16.7208779112979, 16.902375853404, 17.3788582563241), `2022-09-01` = c(16.63, 
16.5371485669301, 16.7547245454607, 16.8005672316863, 16.9708376873097, 
17.4363806848043), `2022-10-01` = c(17.55, 17.4565144059899, 
17.6932969707138, 17.7610505877454, 17.9658483820753, 18.3789383274342
), `2022-11-01` = c(17.95, 17.8565144105445, 18.0932969766868, 
18.1610505955585, 18.3658483847519, 18.7789383271028), `2022-12-01` = c(18.83, 
18.746506350021, 18.9775786073436, 19.0456532330963, 19.247708755873, 
19.6618961429746), `2023-01-01` = c(18.85, 18.7565144062112, 
18.9932969711003, 19.0610505896545, 19.2658483811053, 19.6789383251607
), `2023-02-01` = c(18.9, 18.8065144004764, 19.0432969669948, 
19.1110505845328, 19.3158483743015, 19.7289383179684), `2023-03-01` = c(18.48, 
18.3965063458801, 18.6275786028996, 18.695653229726, 18.8977087489342, 
19.3118961401579), `2023-04-01` = c(16.45, 16.3946104511309, 
16.6014635966272, 16.5836894226683, 16.7780479423448, 17.1192896129417
), `2023-05-01` = c(16.1, 16.0446104494817, 16.2514635937641, 
16.2336894172376, 16.4280479394591, 16.7692896149847), `2023-06-01` = c(16.05, 
15.9946104513846, 16.2014635962793, 16.1836894234373, 16.3780479477376, 
16.7192896151668), `2023-07-01` = c(15.68, 15.6196427933565, 
15.8264798722289, 15.8067013586247, 15.9929523803558, 16.3360049618321
), `2023-08-01` = c(15.68, 15.6196427956788, 15.8264798738901, 
15.8067013650241, 15.9929523892458, 16.3360049695611), `2023-09-01` = c(15.75, 
15.6946104485846, 15.9014635974283, 15.8836894236607, 16.0780479442838, 
16.4192896139878), `2023-10-01` = c(16.55, 16.4859287021049, 
16.6972792014036, 16.6652734109894, 16.9082634547469, 17.2600362319714
), `2023-11-01` = c(16.95, 16.88772137434, 17.1001895977296, 
17.0660375905701, 17.3119906079332, 17.6598028036498), `2023-12-01` = c(17.83, 
17.7629779733904, 17.9769203281466, 17.9457670813362, 18.1919876279738, 
18.5322466212441), `2024-01-01` = c(17.85, 17.7877213736662, 
18.0001895966288, 17.9660375873591, 18.21199060611, 18.5598028001363
), `2024-02-01` = c(17.9, 17.8377213696917, 18.0501895967882, 
18.0160375825238, 18.2619906037143, 18.609802799337), `2024-03-01` = c(17.48, 
17.412977978269, 17.6269203306822, 17.5957670842671, 17.8419876327419, 
18.182246625778), `2024-04-01` = c(15.58, 15.6320820495314, 15.9040941973079, 
16.0011191104772, 16.1344022617966, 16.2335337117727), `2024-05-01` = c(15.23, 
15.282082046754, 15.5540941922692, 15.6511191067781, 15.7844022615365, 
15.8835337091116), `2024-06-01` = c(15.18, 15.2320820475369, 
15.5040941967701, 15.6011191055654, 15.7344022621257, 15.8335337059841
), `2024-07-01` = c(14.8, 14.8510913081696, 15.1150995970647, 
15.2182582392206, 15.34485846568, 15.4485814541912), `2024-08-01` = c(14.8, 
14.8510913086602, 15.1150995968972, 15.2182582375734, 15.3448584628037, 
15.4485814528923), `2024-09-01` = c(14.88, 14.9320820482739, 
15.2040941957414, 15.3011191040817, 15.434402262001, 15.5335337090296
), `2024-10-01` = c(15.48, 15.4841139854442, 15.5870431340295, 
15.6417314142646, 15.8049227959112, 16.098072929477), `2024-11-01` = c(15.88, 
15.8841139866634, 15.9870431368691, 16.0417314145131, 16.2049227962264, 
16.4980729322541), `2024-12-01` = c(16.75, 16.7489599312631, 
16.8369242831981, 16.8966205174003, 17.071500700515, 17.3712327127006
), `2025-01-01` = c(16.78, 16.6562480763831, 16.6057902890351, 
16.4748384688119, 16.6420566625675, 16.8092903223808), `2025-02-01` = c(16.83, 
16.7062480758168, 16.6557902869362, 16.5248384644428, 16.69205666146, 
16.8592903220325), `2025-03-01` = c(16.4, 16.2779097817421, 16.2307811943544, 
16.1021665690717, 16.2687265649343, 16.4311314730022), `2025-04-01` = c(15.73, 
15.7087537504016, 15.6075842903385, 15.5299411837982, 15.7125519597493, 
15.8306868005935), `2025-05-01` = c(15.38, 15.3587537511295, 
15.257584289102, 15.1799411833166, 15.3625519604448, 15.480686801248
), `2025-06-01` = c(15.33, 15.3087537532081, 15.2075842896311, 
15.129941184956, 15.312551964791, 15.4306868025988), `2025-07-01` = c(14.95, 
14.9219729698033, 14.8130638959243, 14.7435847446705, 14.9181912547184, 
15.0389778390791), `2025-08-01` = c(14.95, 14.9219729696512, 
14.8130638953738, 14.7435847412341, 14.9181912530557, 15.0389778362589
), `2025-09-01` = c(15.03, 15.0087537477099, 14.9075842830852, 
14.8299411740066, 15.0125519532892, 15.1306867930961), `2025-10-01` = c(15.75, 
15.7563564789682, 15.6174493875532, 15.5606783126084, 15.7533723756061, 
15.8531515873088), `2025-11-01` = c(16.15, 16.1563564821382, 
16.0174493913382, 15.9606783194065, 16.1533723777501, 16.2531515879125
), `2025-12-01` = c(17.03, 17.0272458979667, 16.8737455455617, 
16.8203287006477, 17.0225669443998, 17.1255973103374), Date = c("2021-04-01", 
"2021-04-02", "2021-04-03", "2021-04-04", "2021-04-05", "2021-04-06"
)), row.names = c(NA, 6L), class = "data.frame")

As you can see I have a column that is called Date with Dates from 2021-04-01 until 2025-12-31 . I also have columns, one for each month between these Dates. So a column for April 2021, May 2021, ..., December 2025.

What I want is to optimize the nested for loop that I am using (maybe with dplyr ) below:

  for(i in  1:nrow(f)):
    for(j in 1:ncol(f)):
      if(as.character(f$Date[i]) != colnames(f)[j]):
        f[i, j] <- 0
   

I tried to do it using the comment's suggestions and it worked, but somehow it is slower:

  require(tidyr)
  long <- f %>%
    gather(Month_Product, Price, -c(Date)) %>%
    filter(Month_Product == as.character(floor_date(ymd(Date), unit = "month")))%>%
    spread(Month_Product, Price) %>%
    mutate_all(~replace(., is.na(.), 0)) %>%
    mutate(SPOT = rowSums(across(where(is.numeric)))) %>%
    dplyr::select(Date, SPOT)

So if I were to take only the head of the result DataFrame it would look have the column of 2021-04-01 with values since the first 6 Date Values are in the month of 2021-04-01 and the other columns values would all be equal to 0.

I hope this is clear I could provide further clarification if needed.

Thank you.

Base solution, with the help of lubridate package to get the floor date.

library(lubridate) # floor_date

# get index where floor date of "Date" doesn't match column name
ix <- matrix(
  rep(head(colnames(f), -1), each = nrow(f)),
  nrow = nrow(f)) != as.character(floor_date(ymd(f$Date), unit = "month"))

# assign 0, based on index, excluding the last "Date" column
f[, -ncol(f)][ ix ] <- 0

# then row sums
data.frame(f$Date,
           SPOT = rowSums(f[, -ncol(f)]))
#         Date     SPOT
# 1 2021-04-01 18.75000
# 2 2021-04-02 18.80474
# 3 2021-04-03 19.04418
# 4 2021-04-04 19.17520
# 5 2021-04-05 19.16898
# 6 2021-04-06 19.58362

A bit more clarification: get column names excluding last "Date" column, then repeat each as many as number of rows, then convert to matrix. Then compare this matrix's each column with the floor of "Date" column. This will give us logical (TRUE/FALSE) matrix, which we then use as index to assign zero. And lastly, sum by rows, excluding last "Date" column.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM