简体   繁体   中英

ggplot normalising each facet_wrap

I am trying to use ggplot to plot a number of graphs.

The code is;

df %>%
  ggplot(aes(x = decile, y = ave_Networth)) +
  geom_point() +
  geom_errorbar(aes(ymin = ave_Networth - sd_Networth, ymax = ave_Networth + sd_Networth)) +
  facet_wrap(~AGE_bin, scales = "free")

However the one "decile" distorts all the other "deciles". How can I normalise each facet_wrap such that "decile 10" can be better compared with the other "deciles"

Data:

df <- structure(list(AGE_bin = c("bin_18_24", "bin_18_24", "bin_18_24", 
"bin_18_24", "bin_18_24", "bin_18_24", "bin_18_24", "bin_18_24", 
"bin_18_24", "bin_18_24", "bin_25_29", "bin_25_29", "bin_25_29", 
"bin_25_29", "bin_25_29", "bin_25_29", "bin_25_29", "bin_25_29", 
"bin_25_29", "bin_25_29", "bin_30_34", "bin_30_34", "bin_30_34", 
"bin_30_34", "bin_30_34", "bin_30_34", "bin_30_34", "bin_30_34", 
"bin_30_34", "bin_30_34", "bin_35_39", "bin_35_39", "bin_35_39", 
"bin_35_39", "bin_35_39", "bin_35_39", "bin_35_39", "bin_35_39", 
"bin_35_39", "bin_35_39", "bin_40_44", "bin_40_44", "bin_40_44", 
"bin_40_44", "bin_40_44", "bin_40_44", "bin_40_44", "bin_40_44", 
"bin_40_44", "bin_40_44", "bin_45_49", "bin_45_49", "bin_45_49", 
"bin_45_49", "bin_45_49", "bin_45_49", "bin_45_49", "bin_45_49", 
"bin_45_49", "bin_45_49", "bin_50_54", "bin_50_54", "bin_50_54", 
"bin_50_54", "bin_50_54", "bin_50_54", "bin_50_54", "bin_50_54", 
"bin_50_54", "bin_50_54", "bin_55_59", "bin_55_59", "bin_55_59", 
"bin_55_59", "bin_55_59", "bin_55_59", "bin_55_59", "bin_55_59", 
"bin_55_59", "bin_55_59", "bin_60_64", "bin_60_64", "bin_60_64", 
"bin_60_64", "bin_60_64", "bin_60_64", "bin_60_64", "bin_60_64", 
"bin_60_64", "bin_60_64", "bin_65_90", "bin_65_90", "bin_65_90", 
"bin_65_90", "bin_65_90", "bin_65_90", "bin_65_90", "bin_65_90", 
"bin_65_90", "bin_65_90"), decile = c(1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L), ave_Networth = c(-42799.9514563107, -13326.7549019608, 
-3804.56310679612, 182.656862745098, 2367.56310679612, 5490.49019607843, 
10219.3786407767, 15573.6666666667, 32942.0873786408, 1215195, 
-88543.938547486, -24661.6629213483, -6073.77653631285, 1068.8595505618, 
4448.57541899441, 9035.94943820225, 18357.6983240223, 38919.7191011236, 
75015.1117318436, 360729.747191011, -83359.7058823529, -10225.6590909091, 
645.52036199095, 6519.20454545455, 16950.520361991, 32910.3090909091, 
59947, 99614.9363636364, 193918.529411765, 4559636.43636364, 
-43682.3646341463, -2316.69918699187, 3812.26016260163, 14740.2073170732, 
31149.2520325203, 61549.8536585366, 111223.390243902, 220390.162601626, 
431319.044715447, 39707858.5813008, -47304.6305970149, 908.816479400749, 
9145.00746268657, 31857.7191011236, 76431.6940298507, 136961.548689139, 
239810.029850746, 409516.632958802, 799214.123134328, 23437733.3146067, 
-23534.0347222222, 5107.85069444444, 19872.9652777778, 62279.3229166667, 
141796.600694444, 254556.736111111, 481769.951388889, 928658.868055556, 
2675194.20138889, 43276345.59375, -18186.3404255319, 7353.51671732523, 
40047.4984802432, 106741.513677812, 201771.617021277, 349972.689969605, 
632600.917933131, 1395636.24620061, 7467362.75075988, 100107189.018237, 
-12872.2715053763, 12844.7601078167, 64099.5698924731, 162562.291105121, 
324875.215053763, 672549.58490566, 1464271.26344086, 3975236.26684636, 
15764846.3172043, 133524703.185984, -2200.25284090909, 29720.5982905983, 
108347.6875, 241293.168091168, 464230.673295455, 805069.43019943, 
1655039.94318182, 4463594.18803419, 15625284.53125, 129427645.128205, 
-12766.4538361508, 46987.2743823147, 140018.637191157, 279912.055006502, 
506132.146423927, 860558.538361508, 1646383.63328999, 4331098.02275683, 
20196055.0429129, 222960808.747984), sd_Networth = c(19952.4205187352, 
4009.59002234056, 1755.86187802571, 354.750993275092, 875.657161288449, 
983.36347182754, 1154.42818471179, 2673.13307234081, 9814.53960254566, 
3229068.69348881, 80763.4718472531, 6641.16873426075, 3948.88255496786, 
823.703039739828, 1163.67857190816, 1530.58811957344, 4659.8855411689, 
7753.20657773506, 14988.8062067764, 391183.089014281, 79403.7440792276, 
6771.38822139849, 1076.50097149467, 2085.51169306169, 3538.12985729517, 
6773.87617091665, 8136.08653692693, 15575.370906716, 43499.5408140372, 
16251487.9059923, 35314.6097298394, 2791.20649192616, 2213.05829515479, 
3292.91147796933, 6575.35932388955, 11179.6713837163, 22475.5169477255, 
39758.4468521584, 119334.223663411, 174371376.396929, 49949.5306903766, 
1799.05664503879, 4185.55767385215, 9485.99298648255, 15351.1273951797, 
23735.7079084032, 38061.3561426131, 60915.0623003272, 222652.345949324, 
94889492.5724926, 40634.4838428703, 3486.55103511871, 6022.01536051466, 
18357.8033065045, 30008.4145616776, 43065.4085235003, 91012.8666376759, 
203097.385703473, 1053542.62119673, 58091928.9133239, 31388.6889295018, 
5191.21573011365, 14192.8835953361, 22709.198055496, 33034.8868226208, 
54945.0489348437, 119298.977766417, 450266.641660294, 4096090.77500322, 
156293273.663792, 33679.4592685038, 7624.74535501237, 24662.3647632881, 
29814.2874815741, 66391.9192226496, 123491.617620793, 406935.703862311, 
1212704.00461397, 7023794.80821185, 141166857.287318, 16746.6945744379, 
14991.3779599531, 26718.8686094867, 49599.5165232508, 69555.903370777, 
142279.335735688, 350387.632009764, 1529856.10479949, 6307011.85646724, 
166820992.513686, 165289.391214998, 21461.3316797954, 33730.6952915096, 
51158.8410213337, 78696.8069684297, 138373.125085833, 394345.528508884, 
1597491.31445124, 10026567.8512041, 269598766.17565)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
    AGE_bin = c("bin_18_24", "bin_25_29", "bin_30_34", "bin_35_39", 
    "bin_40_44", "bin_45_49", "bin_50_54", "bin_55_59", "bin_60_64", 
    "bin_65_90"), .rows = list(1:10, 11:20, 21:30, 31:40, 41:50, 
        51:60, 61:70, 71:80, 81:90, 91:100)), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE))

Edited to include pseudo_log transform, which is more appropriate than log_10 for representing negative numbers.

Perhaps using a log transform would help?

df %>%
  ggplot(aes(x = decile, y = ave_Networth)) +
  geom_point() +
  scale_y_log10(labels = scales::comma) +
  geom_errorbar(aes(ymin = ave_Networth - sd_Networth, 
                    ymax = ave_Networth + sd_Networth, group = decile)) +
  facet_wrap(~AGE_bin, scales = "free", nrow = 2)

在此处输入图片说明


Edit: A problem with the standard log transform is that it excludes negative numbers, which distorts the visual in two ways, by excluding some of the low deciles, and by omitting mention of the large negatives possible in decile 10.

So another related approach that might be more appropriate here is scales::pseudo_log_trans , which combines a signed log transform (which can represent negative numbers) with a linear transformation near zero. By controlling the first "sigma" term, we can shift how much space is treated linearly vs. log. In this case, sigma ~1000 seemed to spread the visual space best for this dataset, but that's pretty subjective. At the cost of some uneven space distortion, the pseudo-log transform allows a nice balance between precision and breadth.

df %>%
  ggplot(aes(x = decile, y = ave_Networth)) +
  geom_point() +
  scale_y_continuous(trans = scales::pseudo_log_trans(1000),
                     labels = scales::comma,
                     breaks = c(-100000000, -10000000, -1000000, -100000, -10000, -1000, 1000, 10000, 100000, 1000000, 10000000, 100000000), minor_breaks = NULL) +
  scale_x_continuous(breaks = 1:10, minor_breaks = NULL) +
  geom_errorbar(aes(ymin = ave_Networth - sd_Networth, 
                    ymax = ave_Networth + sd_Networth, group = decile)) +
  facet_wrap(~AGE_bin, scales = "free_y", nrow = 2)

在此处输入图片说明

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM