繁体   English   中英

如何处理 NA 值(连接 geomline plot 中的间隙)ggplot

[英]how to deal with NA values (connect gaps in a geomline plot ) ggplot

我在 ggplot 尝试 plot 和带有 NA 的 geom_line plot 时遇到问题。 我有很多线(y轴),我想连接每条线的间隙。

这是我的数据:

structure(list(date = structure(c(18607, 18608, 18609, 18610, 
18611, 18612, 18613, 18614, 18615, 18616, 18617, 18618, 18619, 
18620, 18621, 18622, 18623, 18624, 18625, 18626, 18627, 18628, 
18629, 18630, 18631, 18632, 18633, 18634, 18635, 18636, 18637, 
18638, 18639, 18640, 18641, 18642, 18643, 18644, 18645, 18646, 
18647, 18648, 18649, 18650, 18651, 18652, 18653, 18654, 18655, 
18656, 18657, 18658, 18659, 18660, 18661, 18662, 18663, 18664, 
18665, 18666, 18667), class = c("IDate", "Date")), Chi_totalvacc = c(NA, 
NA, NA, NA, 1500000, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 4500000, NA, NA, NA, NA, NA, NA, NA, NA, 
9e+06, NA, NA, NA, 1e+07, NA, NA, NA, NA, NA, NA, 1.5e+07, NA, 
NA, NA, NA, NA, NA, 22767000, NA, NA, NA, 2.4e+07, NA, NA, 31200000, 
NA, NA, NA, NA, NA, 40520000), Fra_totalvacc = c(NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13, 52, 138, 
237, 316, 325, 367, 441, 1800, 6744, 17064, 43282, 76022, 86315, 
91629, 125412, 179119, 235786, 306206, 364846, 382326, 391910, 
468073, 585735, 708973, 850380, 959716, 995147, 1003968, 1075045, 
1171264, 1274711, 1396881, 1494643, 1525377, 1533630, 1609072, 
1717385, 1825982, 1962126, 2077037, 2109641, 2120218, 2216826, 
NA), Ger_totalvacc = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, 23901, 43735, 87147, 144956, 183014, 
207472, 258752, 283996, 332881, 385010, 443307, 501885, 561825, 
618352, 651592, 717160, 799049, 897524, 998032, 1100992, 1164666, 
1209742, 1290992, 1394793, 1527791, 1623663, 1735441, 1815811, 
1878262, 1972918, 2071243, 2169433, 2266658, 2365891, 2441074, 
2500998, 2622076, 2754182, 2883466, 3017627, 3138376, 3232341, 
3287632, 3369433, NA), Ita_totalvacc = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7012, 7984, 8911, 
13577, 38802, 49839, 88324, 123425, 191734, 271115, 336183, 427838, 
522977, 609744, 669967, 750455, 831407, 924254, 1016354, 1104250, 
1168365, 1204380, 1246052, 1277808, 1307096, 1341483, 1367115, 
1405360, 1438903, 1507134, 1586484, 1656548, 1748991, 1845153, 
1934175, 1993189, 2077226, 2163136, 2259925, 2362756, 2463197, 
2537186, 2576746, 2640017, 2699495), SPA_totalvacc = c(NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, 82834, 139339, NA, 207323, 277976, NA, 
NA, 406091, 488122, 581638, 676186, 768950, NA, NA, 897942, 966097, 
1025937, 1103301, 1165825, NA, 1237593, 1291216, 1356461, 1395618, 
1474189, NA, NA, 1609261, 1673054, 1764778, 1865342, 1988160, 
NA, NA, 2105033, 2167241, NA), UK_totalvacc = c(NA, NA, 86095, 
NA, NA, NA, NA, NA, NA, 673216, NA, NA, NA, NA, NA, NA, 1001985, 
NA, NA, NA, NA, NA, NA, 1397251, NA, NA, NA, NA, NA, NA, 2677971, 
2843815, 3067541, 3356229, 3678180, 4006440, 4286830, 4514802, 
4723443, 5070365, 5437284, 5849899, 6329968, 6822981, 7044048, 
7325773, 7638543, 7953250, 8369438, 8859372, 9468382, 9790576, 
10143511, 10520433, 10992444, 11477040, 11975267, 12526737, 12806587, 
13162878, NA), US_totalvacc = c(NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 556208, 614117, NA, 1008025, NA, NA, 1944585, NA, 2127143, 
NA, 2794588, NA, NA, 4225756, NA, 4563260, 4836469, 5306797, 
5919418, 6688231, NA, NA, 8987322, 9327138, 10278462, 11148991, 
12279180, NA, NA, NA, 15707588, 16525281, 17546374, 19107959, 
20537990, 21848655, 22734243, 23540994, 24652634, 26193682, 27884661, 
29577902, 31123299, 32222402, 32780860, 33878254, 35203710, 36819212, 
39037964, 41210937, 42417617, 43206190), Chi_newvacc = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_), Fra_newvacc = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
39, 86, 99, 79, 9, 42, 74, 1359, 4944, 10320, 26218, 32740, 10293, 
5314, 33783, 53707, 56667, 70420, 58640, 17480, 9584, 76163, 
117662, 123238, 141407, 109336, 35431, 8821, 71077, 96219, 103447, 
122170, 97762, 30734, 8253, 75442, 108313, 108597, 136144, 114911, 
32604, 10577, 96608, NA), Ger_newvacc = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 19834, 43412, 
57809, 38058, 24458, 51280, 25244, 48885, 52129, 58297, 58578, 
59940, 56527, 33240, 65568, 81889, 98475, 100508, 102960, 63674, 
45076, 81250, 103801, 132998, 95872, 111778, 80370, 62451, 94656, 
98325, 98190, 97225, 99233, 75183, 59924, 121078, 132106, 129284, 
134161, 120749, 93965, 55291, 81801, NA), Ita_newvacc = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
972, 927, 4666, 25225, 11037, 38485, 35101, 68309, 79381, 65068, 
91655, 95139, 86767, 60223, 80488, 80952, 92847, 92100, 87896, 
64115, 36015, 41672, 31756, 29288, 34387, 25632, 38245, 33543, 
68231, 79350, 70064, 92443, 96162, 89022, 59014, 84037, 85910, 
96789, 102831, 100441, 73989, 39560, 63271, 59478), Spa_newvacc = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, 56505, NA, NA, 70653, NA, NA, 
NA, 82031, 93516, 94548, 92764, NA, NA, NA, 68155, 59840, 77364, 
62524, NA, NA, 53623, 65245, 39157, 78571, NA, NA, NA, 63793, 
91724, 100564, 122818, NA, NA, NA, 62208, NA), UK_newvacc = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 165844, 
223726, 288688, 321951, 328260, 280390, 227972, 208641, 346922, 
366919, 412615, 480069, 493013, 221067, 281725, 312770, 314707, 
416188, 489934, 609010, 322194, 352935, 376922, 472011, 484596, 
498227, 551470, 279850, 356291, NA), US_newvacc = c(NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 57909, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 273209, 470328, 612621, 768813, NA, 
NA, NA, 339816, 951324, 870529, 1130189, NA, NA, NA, NA, 817693, 
1021093, 1561585, 1430031, 1310665, 885588, 806751, 1111640, 
1541048, 1690979, 1693241, 1545397, 1099103, 558458, 1097394, 
1325456, 1615502, 2218752, 2172973, 1206680, 788573), percentChi = c(NA, 
NA, NA, NA, 0.00104215606460204, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, 0.00312646819380613, NA, NA, 
NA, NA, NA, NA, NA, NA, 0.00625293638761226, NA, NA, NA, 0.00694770709734695, 
NA, NA, NA, NA, NA, NA, 0.0104215606460204, NA, NA, NA, NA, NA, 
NA, 0.0158178447485298, NA, NA, NA, 0.0166744970336327, NA, NA, 
0.0216768461437225, NA, NA, NA, NA, NA, 0.0281521091584498), 
    percentFra = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, 1.99161951022338e-07, 7.96647804089353e-07, 
    2.11418071085251e-06, 3.63087556863801e-06, 4.84116742485068e-06, 
    4.97904877555845e-06, 5.62249507886139e-06, 6.75618618468085e-06, 
    2.75762701415545e-05, 0.000103319092130358, 0.000261423040941937, 
    0.000663086735703757, 0.00116466844927848, 0.00132235875403793, 
    0.00140377003155583, 0.00192133066166257, 0.00274412996193617, 
    0.00361227690644254, 0.00469112187498047, 0.005589495475592, 
    0.00585729169896665, 0.00600412001732035, 0.00717094860775991, 
    0.00897354810631302, 0.0108615727617046, 0.0130279492238751, 
    0.0147029931528734, 0.0152458013903097, 0.0153809404341534, 
    0.016469850741293, 0.0179439402617098, 0.0195287638268951, 
    0.0214004265620027, 0.0228981550739908, 0.0233690045665078, 
    0.0234954417651068, 0.0246512245273397, 0.0263105959428075, 
    0.0279743182808978, 0.0300600647932043, 0.0318205185588911, 
    0.0323200167320551, 0.0324820579594369, 0.0339621070182343, 
    NA), percentGer = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, 0.000285269451086363, 0.000521997382672778, 
    0.0010401396114733, 0.00173011667092066, 0.00218435644203672, 
    0.00247627394484707, 0.0030883243800468, 0.00338962315512835, 
    0.00397308816146101, 0.00459527180296893, 0.00529107336733786, 
    0.00599022879622104, 0.0067056403228566, 0.00738031612142398, 
    0.00777705084190056, 0.00855963514250851, 0.0095370181005442, 
    0.0107123626131474, 0.0119119719177702, 0.0131408469725316, 
    0.0139008255101858, 0.0144388283459319, 0.0154085845444494, 
    0.0166474973218318, 0.0182348897512525, 0.0193791662591204, 
    0.0207132882081406, 0.0216725412010619, 0.0224179226700294, 
    0.0235476856574371, 0.0247212398509046, 0.02589318275715, 
    0.0270536079436221, 0.0282379995355912, 0.0291353432927991, 
    0.0298505638520602, 0.0312956855874953, 0.0328724315857889, 
    0.0344154957134091, 0.0360167690838621, 0.0374579640526595, 
    0.0385794796365819, 0.0392394032054709, 0.040215735842947, 
    NA), percentIta = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, 0.000115973999330619, 0.000132050258222428, 
    0.000147382245869245, 0.000224554904294326, 0.000641760285514358, 
    0.000824305212869184, 0.0014608225209466, 0.00204137063140069, 
    0.00317115784193624, 0.0044840688574616, 0.00556025199899679, 
    0.00707616713143374, 0.008649705397594, 0.0100847761334639, 
    0.011080826070955, 0.0124120461591072, 0.0137509405107633, 
    0.015286570561512, 0.0168098457095938, 0.0182635893840325, 
    0.0193240105145349, 0.0199196756009428, 0.0206089038525266, 
    0.0211341278004363, 0.0216185326053986, 0.0221872716120988, 
    0.0226112085132457, 0.0232437563746832, 0.0237985361606996, 
    0.0249270333010772, 0.0262394315964115, 0.0273982453855017, 
    0.0289271935344065, 0.0305176515668696, 0.0319900185617941, 
    0.0329660724118364, 0.0343559906921769, 0.03577688719567, 
    0.0373777154074799, 0.0390784744384507, 0.0407397043966319, 
    0.0419634351776463, 0.0426177322987985, 0.043664194208617, 
    0.0446479223221633), percentUK = c(NA, NA, 0.00126822901521792, 
    NA, NA, NA, NA, NA, NA, 0.00991686003494918, NA, NA, NA, 
    NA, NA, NA, 0.0147598170603767, NA, NA, NA, NA, NA, NA, 0.020582313255616, 
    NA, NA, NA, NA, NA, NA, 0.0394480576585418, 0.0418910354481905, 
    0.0451866484879564, 0.0494391892620458, 0.0541817132144057, 
    0.0590171723762088, 0.06314747882347, 0.066505637892606, 
    0.0695790401803588, 0.0746894013676221, 0.0800943299004608, 
    0.0861723868737362, 0.0932440801788834, 0.100506446070975, 
    0.103762890506856, 0.107912862274233, 0.112520144800392, 
    0.11715596045394, 0.123286649778355, 0.130503660224278, 0.139474728841014, 
    0.144220832323552, 0.149419768469507, 0.154972046962729, 
    0.161925041279496, 0.169063419906112, 0.176402591025979, 
    0.184526062249886, 0.18864841418564, 0.193896786147554, NA
    ), percentUS = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.00168037326903914, 
    0.00185532353159701, NA, 0.00304536839549806, NA, NA, 0.0058748321731699, 
    NA, 0.00642636250579591, NA, 0.00844279653147306, NA, NA, 
    0.0127665323473984, NA, 0.0137861737401756, 0.0146115719733202, 
    0.016032491123855, 0.0178832950541329, 0.020205974364912, 
    NA, NA, 0.0271518130790054, 0.0281784393101847, 0.0310525069607676, 
    0.0336824829077575, 0.0370969238804909, NA, NA, NA, 0.0474545691472975, 
    0.0499249209931545, 0.0530097694354692, 0.0577275111639817, 
    0.0620478119620596, 0.0660074932875084, 0.0686829643389529, 
    0.0711202590473544, 0.0744786611933046, 0.0791343581007677, 
    0.0842430151321418, 0.0893585059457244, 0.0940273417209259, 
    0.0973478680368378, 0.0990350388345988, 0.102350402049806, 
    0.10635476881851, 0.111235400483066, 0.117938525126054, 0.124503345739105, 
    0.128148875498268, 0.130531252216844), percentSPA = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, 0.00177166900763928, 0.00298020846337796, 
    NA, 0.00443426290739067, 0.00594540242011176, NA, NA, 0.0086855498826719, 
    0.0104400441768706, 0.01244018178846, 0.0144623920081075, 
    0.0164464457037476, NA, NA, 0.0192053506055199, 0.0206630624293562, 
    0.0219429314857477, 0.0235976071154046, 0.0249348820632961, 
    NA, 0.0264698694035218, 0.0276167681069122, 0.0290122403091893, 
    0.0298497375124166, 0.0315302286826997, NA, NA, 0.034419173755977, 
    0.0357835903120329, 0.0377454002941261, 0.039896281841368, 
    0.0425231360821416, NA, NA, 0.0450228375565341, 0.046353353837617, 
    NA)), row.names = c(NA, -61L), class = "data.frame")

这是我的代码:

ggplot(jointdataset, aes(x=date))+ 
       geom_line(aes(y =percentFra), color = "lightgreen", linetype="solid", size= 1.5) + 
       geom_line(aes(y = percentGer), color="steelblue", linetype="solid", size=1.5)+
      geom_line(aes(y =percentIta ), color = "darkgreen",linetype="solid", size=1.5) + 
      geom_line(aes(y = percentUS), color="darkblue", linetype="solid", size=1.5)+
      geom_line(aes(y = percentSPA), color="orange", linetype="solid", size=1.5)+
      geom_line(aes(y = percentUK), color="darkred", linetype="solid", size=1.5)

我的输出 plot: 在此处输入图像描述

所以我尝试连接间隙(使用 na.omit),但是当我使用 na.omit 时,图表变为空白

ggplot(na.omit(jointdataset), aes(x=date))+ 
   geom_line(aes(y =percentFra), color = "lightgreen", linetype="solid", size= 1.5) + 
   geom_line(aes(y = percentGer), color="steelblue", linetype="solid", size=1.5)+
  geom_line(aes(y =percentIta ), color = "darkgreen",linetype="solid", size=1.5) + 
  geom_line(aes(y = percentUS), color="darkblue", linetype="solid", size=1.5)+
  geom_line(aes(y = percentSPA), color="orange", linetype="solid", size=1.5)+
  geom_line(aes(y = percentUK), color="darkred", linetype="solid", size=1.5)

在此处输入图像描述

还有其他方法可以连接间隙吗?

一种方法是用列中的先前值填充NA值。 如果我们能以长格式(整齐)获得数据会更好,这样plot更容易。

library(tidyverse)

jointdataset %>%
  select(date, starts_with('percent'), -percentChi) %>%
  fill(everything()) %>%
  pivot_longer(cols = -date) %>%
  ggplot(aes(x=date, y = value, color = name)) + 
  geom_line(linetype="solid", size= 1.5) + 
  scale_color_manual(values = c('lightgreen', 'steelblue', 'darkgreen', 
                                'darkblue', 'orange', 'darkred'), 
                     labels = c('France', 'German', 'Italy', 'US', 'Spain','UK'),
                     breaks = c('percentFra', 'percentGer', 'percentIta', 
                                'percentUS', 'percentSPA', 'percentUK'))

在此处输入图像描述

我找到了解决方案:首先我 select 来自大数据集的一些变量。

这是我现在的数据集(df4):

structure(list(date = structure(c(18607, 18608, 18609, 18610, 
18611, 18612, 18613, 18614, 18615, 18616, 18617, 18618, 18619, 
18620, 18621, 18622, 18623, 18624, 18625, 18626, 18627, 18628, 
18629, 18630, 18631, 18632, 18633, 18634, 18635, 18636, 18637, 
18638, 18639, 18640, 18641, 18642, 18643, 18644, 18645, 18646, 
18647, 18648, 18649, 18650, 18651, 18652, 18653, 18654, 18655, 
18656, 18657, 18658, 18659, 18660, 18661, 18662, 18663, 18664, 
18665, 18666, 18667), class = c("IDate", "Date")), Fra_totalvacc = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13, 
52, 138, 237, 316, 325, 367, 441, 1800, 6744, 17064, 43282, 76022, 
86315, 91629, 125412, 179119, 235786, 306206, 364846, 382326, 
391910, 468073, 585735, 708973, 850380, 959716, 995147, 1003968, 
1075045, 1171264, 1274711, 1396881, 1494643, 1525377, 1533630, 
1609072, 1717385, 1825982, 1962126, 2077037, 2109641, 2120218, 
2216826, NA), Ger_totalvacc = c(NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, 23901, 43735, 87147, 144956, 
183014, 207472, 258752, 283996, 332881, 385010, 443307, 501885, 
561825, 618352, 651592, 717160, 799049, 897524, 998032, 1100992, 
1164666, 1209742, 1290992, 1394793, 1527791, 1623663, 1735441, 
1815811, 1878262, 1972918, 2071243, 2169433, 2266658, 2365891, 
2441074, 2500998, 2622076, 2754182, 2883466, 3017627, 3138376, 
3232341, 3287632, 3369433, NA), UK_totalvacc = c(NA, NA, 86095, 
NA, NA, NA, NA, NA, NA, 673216, NA, NA, NA, NA, NA, NA, 1001985, 
NA, NA, NA, NA, NA, NA, 1397251, NA, NA, NA, NA, NA, NA, 2677971, 
2843815, 3067541, 3356229, 3678180, 4006440, 4286830, 4514802, 
4723443, 5070365, 5437284, 5849899, 6329968, 6822981, 7044048, 
7325773, 7638543, 7953250, 8369438, 8859372, 9468382, 9790576, 
10143511, 10520433, 10992444, 11477040, 11975267, 12526737, 12806587, 
13162878, NA), US_totalvacc = c(NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 556208, 614117, NA, 1008025, NA, NA, 1944585, NA, 2127143, 
NA, 2794588, NA, NA, 4225756, NA, 4563260, 4836469, 5306797, 
5919418, 6688231, NA, NA, 8987322, 9327138, 10278462, 11148991, 
12279180, NA, NA, NA, 15707588, 16525281, 17546374, 19107959, 
20537990, 21848655, 22734243, 23540994, 24652634, 26193682, 27884661, 
29577902, 31123299, 32222402, 32780860, 33878254, 35203710, 36819212, 
39037964, 41210937, 42417617, 43206190), SPA_totalvacc = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 82834, 139339, NA, 207323, 277976, 
NA, NA, 406091, 488122, 581638, 676186, 768950, NA, NA, 897942, 
966097, 1025937, 1103301, 1165825, NA, 1237593, 1291216, 1356461, 
1395618, 1474189, NA, NA, 1609261, 1673054, 1764778, 1865342, 
1988160, NA, NA, 2105033, 2167241, NA)), row.names = c(NA, -61L
), class = "data.frame")

然后我将data.frame object转换为动物园object:

 z <- read.zoo(df4)

我使用了动物园图书馆的这个function:

filled <- na.fill(z, list(NA, "extend", NA))

现在这是我的 plot 代码:

autoplot(filled, facets = NULL) 

结果:

在此处输入图像描述

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM