简体   繁体   中英

Why serde_json rust so slow when deserializing in Untagged Enums

When running code:

#![allow(unused)]
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

use std::time::Instant;

#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
#[serde(untagged)]
enum NumberOrString {
    String(String),
    Int(i64),
    Float(f64),
}

fn main() {
    let json_str = r#"{
        "17594136111": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499],
        "0000000017704043101": ["5", "7"],
        "features": ["a1"]
    }"#;

    let start_time = Instant::now();
    let parsed: HashMap<&str, Vec<serde_json::Value>> = serde_json::from_str(json_str).expect("panicking !!! ");
    println!("Elapsed time: {:.2?}", start_time.elapsed());

    let start_time = Instant::now();
    let parsed2: HashMap<&str, Vec<NumberOrString>> = serde_json::from_str(json_str).expect("panicking !!! ");
    println!("Elapsed time: {:.2?}", start_time.elapsed());
}

And the output comes as:

$ cargo run 
Compiling rust_tutorial v0.1.0 (/Users/sandeep.yadav/code/codetest/rust/rust_tutorial)
Finished dev [unoptimized + debuginfo] target(s) in 2.26s
Running `target/debug/rust_tutorial`
Elapsed time: 360.78µs
Elapsed time: 2.22ms

$ cargo run --release
Compiling rust_tutorial v0.1.0 (/Users/sandeep.yadav/code/codetest/rust/rust_tutorial)
Finished release [optimized] target(s) in 2.47s
Running `target/release/rust_tutorial`
Elapsed time: 74.82µs
Elapsed time: 439.90µs

$ cargo run --release
Finished release [optimized] target(s) in 0.03s
Running `target/release/rust_tutorial`
Elapsed time: 63.13µs
Elapsed time: 354.89µs

Why is untaggedJson so slow, when compared to another enum that is defined in serde_json::Value?

As serde_json::Value contains much more than String int64 and f64., it contains, Null, Bool, List and Object. I'm actually reducing the possible acceptable value set and still time taken increases by atleast 5 times?

Any alternates I can use to achieve same result?

After implementing a custom Visitor pattern for the NumberOrString Enum -- as @Chayim correctly mentions is how serde-json impls Deserialize for Value here -- and finally, after removing the default #derive(Deserialize) , it looks like the performance times are now much improved, as shown below.

#![allow(unused)]

use std::collections::HashMap;
use std::fmt;
use std::time::Instant;

use serde::de::{Error, Visitor};
use serde::{Deserialize, Deserializer, Serialize};

#[derive(Serialize, Debug, Clone, PartialEq)]
// note: it appears that an "untagged enum" is not needed anymore
// #[serde(untagged)]
enum NumberOrString {
    String(String),
    Int(i64),
    Float(f64),
}

impl<'de> Deserialize<'de> for NumberOrString {
    #[inline]
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        use crate::NumberOrString::*;

        struct NumberOrStringVisitor;

        impl<'de> Visitor<'de> for NumberOrStringVisitor {
            type Value = NumberOrString;

            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
                formatter.write_str("a number or string")
            }

            #[inline]
            fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(Int(v))
            }

            #[inline]
            fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(Int(v as i64))
            }

            #[inline]
            fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(Float(v))
            }

            #[inline]
            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(String(v.to_owned()))
            }

            #[inline]
            fn visit_string<E>(self, v: std::string::String) -> Result<Self::Value, E>
            where
                E: Error,
            {
                Ok(String(v))
            }
        }

        deserializer.deserialize_any(NumberOrStringVisitor)
    }
}

fn main() {
    let json_str = r#"{
        "17594136111": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499],
        "0000000017704043101": ["5", "7"],
        "features": ["a1"]
    }"#;

    let start_time = Instant::now();
    let parsed: HashMap<&str, Vec<serde_json::Value>> =
        serde_json::from_str(json_str).expect("panicking !!! ");
    println!("Elapsed time: {:.2?}", start_time.elapsed());

    let start_time = Instant::now();
    let parsed2: HashMap<&str, Vec<NumberOrString>> =
        serde_json::from_str(json_str).expect("panicking !!! ");
    println!("Elapsed time: {:.2?}", start_time.elapsed());
}

My times (on my Windows 11 PC) were as follows:

$ cargo run --release
Finished release [optimized] target(s) in 0.03s
Running `target/release/rust_tutorial`
Elapsed time: 286.00µs
Elapsed time: 20.50µs

$ cargo run --release
Finished release [optimized] target(s) in 0.03s
Running `target/release/rust_tutorial`
Elapsed time: 303.90µs
Elapsed time: 24.00µs

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM