[英]What is the best/fastest way to load these .csv files?
我一直在尝试加载 6 个 .csv 文件。 我使用过 CSVReader 库和 BufferedReader 库。 我注意到使用 BufferedReader 库时文件加载速度更快。 尽管如此,这导致我出现 OutOfMemory 异常,因此我不得不在 Eclipse 中将最大内存使用量设置为 1024mb。 另一方面,当我使用 CSVReader 库时,我没有遇到这个问题。 我想知道我的代码是否有问题,考虑到它在速度和内存方面是最佳的,加载文件的最佳方法是什么。 这是我的代码(我在这里使用 BufferedReader):
public void loadMovingViolations(int semestre)
{
try
{
String path = ".\\data\\Moving_Violations_Issued_in_";
String mth1Path = "";
String mth2Path = "";
String mth3Path = "";
String mth4Path = "";
String mth5Path = "";
String mth6Path = "";
if (semestre == 1)
{
mth1Path = path + "January_2018.csv";
mth2Path = path + "February_2018.csv";
mth3Path = path + "March_2018.csv";
mth4Path = path + "April_2018.csv";
mth5Path = path + "May_2018.csv";
mth6Path = path + "June_2018.csv";
}
else if (semestre == 2)
{
mth1Path = path + "July_2018.csv";
mth2Path = path + "August_2018.csv";
mth3Path = path + "September_2018.csv";
mth4Path = path + "October_2018.csv";
mth5Path = path + "November_2018.csv";
mth6Path = path + "December_2018.csv";
}
String[] mths = {mth1Path, mth2Path, mth3Path, mth4Path, mth5Path, mth6Path};
String cPath = "";
int numInfracs = 0;
int[] infracs = new int[6];
double xMin = Double.MAX_VALUE, yMin = Double.MAX_VALUE, xMax = 0, yMax = 0;
BufferedReader br = null;
int i = 0;
while (i < mths.length)
{
int tempInfrac = 0;
cPath = mths[i];
br = new BufferedReader(new FileReader(cPath));
String row = br.readLine();
while ( (row = br.readLine()) != null)
{
String[] columns = row.split(",");
String in1 = columns[0];
Integer objId = Integer.parseInt(in1);
String location = columns[2];
String in2 = columns[3];
int adressId = 0;
if ( !(in2.compareTo("") == 0) )
adressId = Integer.parseInt(in2);
String in3 = columns[4];
double streetId = 0;
if ( !(in3.compareTo("") == 0) )
streetId = Double.parseDouble(in3);
String in4 = columns[5];
Double xCord = Double.parseDouble(in4);
String in5 = columns[6];
Double yCord = Double.parseDouble(in5);
String ticketType = columns[7];
String in6 = columns[8];
Integer fineAmt = Integer.parseInt(in6);
String in7 = columns[9];
double totalPaid = Double.parseDouble(in7);
String in8 = columns[10];
Integer penalty1 = Integer.parseInt(in8);
String accident = columns[12];
String date = columns[13];
String vioCode = columns[14];
String vioDesc = columns[15];
VOMovingViolations vomv = new VOMovingViolations(objId, location, adressId, streetId, xCord, yCord, ticketType, fineAmt, totalPaid, penalty1, accident, date, vioCode, vioDesc);
movingViolationsQueue.enqueue(vomv);
tempInfrac++;
if (xCord > xMax)
xMax = xCord;
if (yCord > yMax)
yMax = yCord;
if (xCord < xMin)
xMin = xCord;
if (yCord < yMin)
yMin = yCord;
}
numInfracs += tempInfrac;
infracs[i] = tempInfrac;
i++;
br.close();
}
System.out.println();
int j = 0;
for (int current: infracs)
{
String[] sa = mths[j].substring(35).split("_");
String mth = sa[0];
System.out.println("En el mes " + mth + " se encontraron " +
current + " infracciones");
j++;
}
System.out.println();
System.out.println("Se encontraron " + numInfracs + " infracciones en el semestre.");
System.out.println();
System.out.println("Minimax: " + "("+xMin+", "+yMin+"), " + "("+xMax+", "+yMax+")");
System.out.println();
}
catch (Exception e)
{
e.printStackTrace();
System.out.println();
System.out.println("No se pudieron cargar los datos");
System.out.println();
}
}
关于“更好”的方式,像往常一样取决于。
你正在重新发明轮子。 编写一个功能齐全的 csv 解析器来处理任意输入数据是非常困难的。 您的解析器对“,”进行了简单的拆分,这意味着只要其中一列包含带有逗号的字符串,它就会失败! 当分隔符更改时,您也可能会遇到麻烦。
您的代码速度更快,因为它省略了 csv 解析器可以执行的大量操作。 因此,您的代码适用于您的表,但如果其他人为您提供了有效的 csv 文件,您的解析器将向您抛出异常。 真正的 csv 解析器会接受任何格式正确的输入!
因此:如果您的代码的唯一目的是读取具有给定结构的文件,当然,您可以使用更快的解决方案。 但是,如果您希望输入数据格式会随着时间而改变,那么每次更新都会使您更改代码。 更糟糕的是,随着时间的推移,此类更新可能会使您的代码变得更加复杂。 因此,您必须仔细权衡开发效率与运行时性能。
谢谢您的回答。 我试过使用另一个库,现在加载文件只需要大约 1.2 秒(我加载了大约 600k 个对象)。 但是当我没有在 Eclipse 命令中放置 Xms512m 和 Xml1024m 时,我仍然得到 OutOfMemory: java heap 异常。 有什么办法可以让我的加载方法使用更少的内存?
public void loadMovingViolations(int semestre)
{
CsvParserSettings settings = new CsvParserSettings();
settings.getFormat().setLineSeparator("\n");
CsvParser parser = new CsvParser(settings);
String path = ".\\data\\Moving_Violations_Issued_in_";
String mth1Path = "";
String mth2Path = "";
String mth3Path = "";
String mth4Path = "";
String mth5Path = "";
String mth6Path = "";
if (semestre == 1)
{
mth1Path = path + "January_2018.csv";
mth2Path = path + "February_2018.csv";
mth3Path = path + "March_2018.csv";
mth4Path = path + "April_2018.csv";
mth5Path = path + "May_2018.csv";
mth6Path = path + "June_2018.csv";
}
else if (semestre == 2)
{
mth1Path = path + "July_2018.csv";
mth2Path = path + "August_2018.csv";
mth3Path = path + "September_2018.csv";
mth4Path = path + "October_2018.csv";
mth5Path = path + "November_2018.csv";
mth6Path = path + "December_2018.csv";
}
String[] mths = {mth1Path, mth2Path, mth3Path, mth4Path, mth5Path, mth6Path};
String cPath = "";
int numInfracs = 0;
int[] infracs = new int[6];
double xMin = Double.MAX_VALUE, yMin = Double.MAX_VALUE, xMax = 0, yMax = 0;
try
{
int i = 0;
while (i < mths.length)
{
int tempInfrac = 0;
cPath = mths[i];
parser.beginParsing(new FileReader(cPath));
parser.parseNext();
String[] row = null;
while((row = parser.parseNext()) != null)
{
String in1 = row[0].toString();
Integer objId = Integer.parseInt(in1);
String location = row[2].toString();
int addressId = 0;
if (row[3] != null)
{
String in2 = row[3].toString();
addressId = Integer.parseInt(in2);
}
double streetId = 0;
if (row[4] != null)
{
String in3 = row[4].toString();
streetId = Double.parseDouble(in3);
}
String in4 = row[5].toString();
Double xCord = Double.parseDouble(in4);
String in5 = row[6].toString();
Double yCord = Double.parseDouble(in5);
String ticketType = row[7].toString();
String in6 = row[8].toString();
Integer fineAmt = Integer.parseInt(in6);
String in7 = row[9].toString();
double totalPaid = Double.parseDouble(in7);
String in8 = row[10].toString();
Integer penalty1 = Integer.parseInt(in8);
String accident = row[12].toString();
String date = "";
if (row[13] != null)
date = row[13].toString();
String vioCode = row[14].toString();
String vioDesc = "";
if (row[15] != null)
vioDesc = row[15].toString();
VOMovingViolations vomv = new VOMovingViolations(objId, location, addressId, streetId, xCord, yCord, ticketType, fineAmt, totalPaid, penalty1, accident, date, vioCode, vioDesc);
queue.enqueue(vomv);
tempInfrac++;
if (xCord > xMax)
xMax = xCord;
if (yCord > yMax)
yMax = yCord;
if (xCord < xMin)
xMin = xCord;
if (yCord < yMin)
yMin = yCord;
}
numInfracs += tempInfrac;
infracs[i] = tempInfrac;
parser.stopParsing();
i++;
}
System.out.println();
int j = 0;
for (int current: infracs)
{
String[] sa = mths[j].substring(35).split("_");
String mth = sa[0];
System.out.println("En el mes " + mth + " se encontraron " +
current + " infracciones");
j++;
}
System.out.println();
System.out.println("Se encontraron " + numInfracs + " infracciones en el semestre.");
System.out.println();
System.out.println("Minimax: " + "("+xMin+", "+yMin+"), " + "("+xMax+", "+yMax+")");
System.out.println();
}
catch (FileNotFoundException e)
{
e.printStackTrace();
System.out.println();
System.out.println("No se encontró el archivo");
System.out.println();
}
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.