简体   繁体   中英

How to deserialise big JSON file (~300Mb)

I want to parse a JSON file (size ~300Mb). I use Jackson library and ObjectMapper . Is it normal if i get memory problems?

The first time, i use BufferedReader , it crash application. Next, i use this library. How many time to parse and save into SQLite database, it's very long?

Jackson

You can mix Streaming API with regular ObjectMapper . Using these we can implement nice Iterator class. Using URL we can build stream and pass to our implementation. Example code could look like below:

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.math.BigDecimal;
import java.net.URL;
import java.util.Iterator;

public class JsonPathApp {

    public static void main(String[] args) throws Exception {
        //Just to make it work. Probably you should not do that!
        SSLUtilities.trustAllHostnames();
        SSLUtilities.trustAllHttpsCertificates();

        URL url = new URL("https://data.opendatasoft.com/explore/dataset/vehicules-commercialises@public/download/?format=json&timezone=Europe/Berlin");
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(url.openConnection().getInputStream()))) {
            FieldsJsonIterator fieldsJsonIterator = new FieldsJsonIterator(reader);
            while (fieldsJsonIterator.hasNext()) {
                Fields fields = fieldsJsonIterator.next();
                System.out.println(fields);
                // Save object to DB
            }
        }
    }
}

class FieldsJsonIterator implements Iterator<Fields> {

    private final ObjectMapper mapper;
    private final JsonParser parser;

    public FieldsJsonIterator(Reader reader) throws IOException {
        mapper = new ObjectMapper();
        mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);

        parser = mapper.getFactory().createParser(reader);
        skipStart();
    }

    private void skipStart() throws IOException {
        while (parser.currentToken() != JsonToken.START_OBJECT) {
            parser.nextToken();
        }
    }

    @Override
    public boolean hasNext() {
        try {
            while (parser.currentToken() == null) {
                parser.nextToken();
            }
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }

        return parser.currentToken() == JsonToken.START_OBJECT;
    }

    @Override
    public Fields next() {
        try {
            return mapper.readValue(parser, FieldsWrapper.class).fields;
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    }

    private static final class FieldsWrapper {
        public Fields fields;
    }
}

class Fields {

    private String cnit;

    @JsonProperty("puissance_maximale")
    private BigDecimal maximumPower;

    @JsonProperty("champ_v9")
    private String fieldV9;

    @JsonProperty("boite_de_vitesse")
    private String gearbox;

    // add other required properties

    // getters, setters, toString
}

Above code prints:

Fields{cnit='MMB76K3BQJ41', maximumPower=110.0, fieldV9='70/220*2006/96EURO4', gearbox='A 5'}
Fields{cnit='M10MCDVPF15Z219', maximumPower=95.0, fieldV9='"715/2007*566/2011EURO5', gearbox='A 7'}
Fields{cnit='M10MCDVP027V654', maximumPower=150.0, fieldV9='715/2007*692/2008EURO5', gearbox='A 7'}
Fields{cnit='M10MCDVPG137264', maximumPower=120.0, fieldV9='715/2007*692/2008EURO5', gearbox='M 6'}
Fields{cnit='MVV4912QN718', maximumPower=210.0, fieldV9='null', gearbox='A 6'}
Fields{cnit='MMB76K3B2K88', maximumPower=110.0, fieldV9='null', gearbox='A 5'}
Fields{cnit='M10MCDVP012N140', maximumPower=80.0, fieldV9='70/220*2006/96EURO4', gearbox='M 6'}
Fields{cnit='MJN5423PU123', maximumPower=88.0, fieldV9='null', gearbox='M 6'}
Fields{cnit='M10MCDVP376T303', maximumPower=120.0, fieldV9='"715/2007*692/2008EURO5', gearbox='M 6'}
Fields{cnit='MMB53H3B5Z93', maximumPower=80.0, fieldV9='70/220*2006/96EURO4', gearbox='M 6'}
Fields{cnit='MPE1403E4834', maximumPower=81.0, fieldV9='null', gearbox='M 5'}
Fields{cnit='M10MCDVP018J905', maximumPower=110.0, fieldV9='70/220*2006/96EURO4', gearbox='M 6'}
Fields{cnit='M10MCDVPG112904', maximumPower=100.0, fieldV9='"715/2007*692/2008EURO5', gearbox='M 6'}
Fields{cnit='M10MCDVP015R723', maximumPower=110.0, fieldV9='70/220*2006/96EURO4', gearbox='A 5'}
...

Gson

We can do the same using Gson . Example implementation could look like below:

class FieldsJsonIterator implements Iterator<Fields> {

    private final Gson mapper;
    private final JsonReader parser;

    public FieldsJsonIterator(Reader reader) throws IOException {
        mapper = new GsonBuilder().create();

        parser = mapper.newJsonReader(reader);
        skipStart();
    }

    private void skipStart() throws IOException {
        parser.beginArray();
    }

    @Override
    public boolean hasNext() {
        try {
            return parser.hasNext();
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    }

    @Override
    public Fields next() {
        return ((FieldsWrapper) mapper.fromJson(parser, FieldsWrapper.class)).fields;
    }

    private static final class FieldsWrapper {
        public Fields fields;
    }
}

class Fields {

    private String cnit;

    @SerializedName("puissance_maximale")
    private BigDecimal maximumPower;

    @SerializedName("champ_v9")
    private String fieldV9;

    @SerializedName("boite_de_vitesse")
    private String gearbox;

    // getters, setters, toString
}

Usage and output should be the same like it is for Jackson .

See also:

Thank you for your code, it's works fine and fast, i use Jackson library.

I see this class in your code, i'm interrested, where do you found this library for Android code:

   //Just to make it work. Probably you should not do that!
    SSLUtilities.trustAllHostnames();
    SSLUtilities.trustAllHttpsCertificates();

Of more, i would like to know if it's normal the parser JSON doesn't parse in order of json objects file (here "designation_commerciale"="LaFerrari" year="2014" it's the first element)?

Thank you for your help.

You can read the JSON file each token. Of course, you need to know the JSON structure and define the Object. You need to store data in the while to avoid out of memory. I hope it helps you. I try with file about 170MB. Can you provide me your JSON structure?

JSON structure

[{
  "id":1,
  "content":"Jan"
},
{
  "id":2,
  "content":"Feb"
}]

Object base on JSON

@NoArgsConstructor @AllArgsConstructor @FieldNameConstants @Setter
public class MyObject {
private int id;
private String content;
}    

Process file

public List<MyObject> process(String configFile) throws IOException {
    try {
        File jsonFile = new File(getClass().getClassLoader().getResource(configFile).getFile());
        var jsonfactory = new JsonFactory();
        JsonParser jsonParser = jsonfactory.createParser(jsonFile);
        JsonToken jsonToken = jsonParser.nextToken();
        List<MyObject> data = new ArrayList<>();
        MyObject object = new MyObject();

        while (jsonToken != JsonToken.END_ARRAY) {
            String fieldName = jsonParser.getCurrentName();
            if (MyObject.Fields.id.equals(fieldName)) {
                jsonToken = jsonParser.nextToken();
                object.setId(jsonParser.getIntValue());
            }

            if (MyObject.Fields.content.equals(fieldName)) {
                jsonToken = jsonParser.nextToken();
                object.setContent(jsonParser.getText());
            }

            if (jsonToken == JsonToken.END_OBJECT) {
                data.add(object);
                object = new MyObject();
                //TODO: Should store and clear data after have big list to avoid out of memory
            }
            jsonToken = jsonParser.nextToken();
        }
        return data;
    } catch (IOException e) {
        e.getMessage();
    }
    return null;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM