简体   繁体   中英

JSON.parse not working with json from twitter streaming api

I'm using node.js to use the twitter streaming api. Everything works fine except when I try to parse the json I get back. Here is an example of what I try to parse :

{
    "text": "NEWS Nº2559 (use google translator to read it): http://t.co/dF3ClUC",
    "in_reply_to_user_id": null,
    "in_reply_to_status_id": null,
    "favorited": false,
    "in_reply_to_status_id_str": null,
    "id_str": "93748566299918337",
    "in_reply_to_screen_name": null,
    "in_reply_to_user_id_str": null,
    "geo": null,
    "source": "web",
    "contributors": null,
    "retweeted": false,
    "retweet_count": 0,
    "entities": {
        "user_mentions": [],
        "hashtags": [],
        "urls": [
            {
                "display_url": "luxatenealibros.blogspot.com/2011/07/lux-at…",
                "indices": [
                    48,
                    67
                ],
                "expanded_url": "http://luxatenealibros.blogspot.com/2011/07/lux-atenea-news-n2559-cinderella-fables.html",
                "url": "http://t.co/dF3ClUC"
            }
        ]
    },
    "place": null,
    "coordinates": null,
    "user": {
        "favourites_count": 0,
        "profile_sidebar_fill_color": "efefef",
        "profile_image_url": "http://a0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG",
        "default_profile_image": false,
        "show_all_inline_media": false,
        "geo_enabled": false,
        "profile_background_tile": true,
        "screen_name": "LUXATENEAWEBZIN",
        "id_str": "112305851",
        "profile_link_color": "009999",
        "url": null,
        "description": "LUX ATENEA WEBZINE\u000d\u000aREVISTA CULTURAL GÓTICA ATIS&NYD\u000d\u000a",
        "follow_request_sent": null,
        "statuses_count": 3027,
        "verified": false,
        "profile_sidebar_border_color": "eeeeee",
        "time_zone": null,
        "contributors_enabled": false,
        "profile_use_background_image": true,
        "location": "",
        "is_translator": false,
        "lang": "es",
        "profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme14/bg.gif",
        "profile_background_color": "131516",
        "protected": false,
        "listed_count": 2,
        "profile_background_image_url": "http://a1.twimg.com/images/themes/theme14/bg.gif",
        "friends_count": 3,
        "followers_count": 55,
        "name": "LUX ATENEA WEBZINE",
        "notifications": null,
        "created_at": "Mon Feb 08 00:53:45 +0000 2010",
        "id": 112305851,
        "default_profile": false,
        "following": null,
        "utc_offset": null,
        "profile_text_color": "333333",
        "profile_image_url_https": "https://si0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG"
    },
    "truncated": false,
    "id": 93748566299918340,
    "created_at": "Wed Jul 20 18:26:14 +0000 2011"
}

jsonlint.com tells me that it is valid json but it's impossible to parse it from node.js. Any idea why ?

I noticed that

"id_str": "93748566299918337",

and

"id":      93748566299918340,

seem to be two different representations of the same data, but the number form seems to have lost some precision.

Is it possible that the JSON number parser is detecting a loss of precision due to the ID number literal being right up against the limit of the mantissa and bails on that?

JSON doesn't actually specify any semantics for numbers, and doesn't specify how lossy number parsers can be, but implementations might bail on numbers they can't represent.

Eg only a JSON parser that can use a good bigint/bigdecimal representation, like python's, will be able to do something reasonable with { "foo": 1e500 } whereas a JavaScript JS parser (that represents numbers using its native number type) would probably turn that number into Infinity which is not round-trippable via JSON.

Section 4 of RFC 4627 says

4 Parsers

... An implementation may set limits on the range of numbers.

EDIT:

The other clue I notice is in

"text": "NEWS Nº2559 ...",
               ^

which contains a non-ASCII character. If you're using Node.js and you're opening a file without specifying the correct encoding, the JSON parser might be assuming UTF-8 since RFC 4627 says

3 Encoding

JSON text SHALL be encoded in Unicode. The default encoding is UTF-8.

and if your file is not UTF-8 then that might lead to a byte sequence that is not valid in UTF-8 which would have to be rejected by the decoder.

I have found the problem, it comes from the user.description part and the characters \ & \ . Here is how I did to make it work :

var test = '{"text": "NEWS Nº2559 (use google translator to read it): http://t.co/dF3ClUC","in_reply_to_user_id": null,"in_reply_to_status_id": null,"favorited": false,"in_reply_to_status_id_str": null,"id_str": "93748566299918337","in_reply_to_screen_name": null,"in_reply_to_user_id_str": null,"geo": null,"source": "web","contributors": null,"retweeted": false,"retweet_count": 0,"entities": {"user_mentions": [],"hashtags": [],"urls": [{"display_url": "luxatenealibros.blogspot.com/2011/07/lux-at…","indices": [48,67],"expanded_url": "http://luxatenealibros.blogspot.com/2011/07/lux-atenea-news-n2559-cinderella-fables.html","url": "http://t.co/dF3ClUC"}]},"place": null,"coordinates": null,"user": {"favourites_count": 0,"profile_sidebar_fill_color": "efefef","profile_image_url": "http://a0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG","default_profile_image": false,"show_all_inline_media": false,"geo_enabled": false,"profile_background_tile": true,"screen_name": "LUXATENEAWEBZIN","id_str": "112305851","profile_link_color": "009999","url": null,"description": "LUX ATENEA WEBZINE\u000d\u000aREVISTA CULTURAL GÓTICA ATIS&NYD\u000d\u000a","follow_request_sent": null,"statuses_count": 3027,"verified": false,"profile_sidebar_border_color": "eeeeee","time_zone": null,"contributors_enabled": false,"profile_use_background_image": true,"location": "","is_translator": false,"lang": "es","profile_background_image_url_https": "https://si0.twimg.com/images/themes/theme14/bg.gif","profile_background_color": "131516","protected": false,"listed_count": 2,"profile_background_image_url": "http://a1.twimg.com/images/themes/theme14/bg.gif","friends_count": 3,"followers_count": 55,"name": "LUX ATENEA WEBZINE","notifications": null,"created_at": "Mon Feb 08 00:53:45 +0000 2010","id": 112305851,"default_profile": false,"following": null,"utc_offset": null,"profile_text_color": "333333","profile_image_url_https": "https://si0.twimg.com/profile_images/983835547/logo_LUX_ATENEA_WEBZINE_normal.JPG"},"truncated": false,"id": 93748566299918340,"created_at": "Wed Jul 20 18:26:14 +0000 2011"}';

test = test.replace(/\n/g, '');
test = test.replace(/\r/g, '');

console.log(JSON.parse(test));

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM