I have a rtf file that gets uploaded and once its uploaded I want it to be parsed and converted to a json file. I'm using mostly angular for this. I have the upload working just fine, I'm just having an issue with the parsing. The file does get cleaned up but its not properly formated as a json file.Does anyone have any suggestions as to the best thing to do?
this.parseFile=function(file, uploadUrl){
reader.onload = function(onLoadEvent){
$rootScope.$apply(function(){
var dataURL = reader.result;
setUpData(dataURL);
});
};
reader.readAsText(file);
};
function saveFile(file){
var fd = new FormData();
fd.append('save_file', file);
$http({
method: 'POST',
url: "includes/loadFile.php",
data: fd,
transformRequest: angular.identity,
headers: {'Content-Type': undefined}
})
.success(function (data, status, headers, config) {
msg = {
title: "success",
cls: "success",
msg: data
};
popUp(msg);
})
.error(function (data, status, header, config) {
msg = {
title: "error",
cls: "danger",
msg: data
};
popUp(msg);
});
};
function setUpData(data)
{
text = data;
text = text.replace('/[\p{Z}\s]{2,}/u', ' ');
// skip over the heading stuff
j= text.indexOf('{',1); // skip ahead to the first part of the header
var loc = 1;
var t ="";
var ansa="";
len = text.length;
getpgraph(); // skip by the first paragrap
while(j<len) {
c = text.substr(j,1);
if (c=="\\") {
// have a tag
var tag = gettag();
if (tag.length > 0) {
// process known tags
switch (tag) {
case 'par':
ansa+="\r\n";
break;
// ad a list of common tags
// parameter tags
case 'spriority1':
case 'fprq2':
case 'author':
case 'operator':
case 'sqformat':
case 'company':
case 'xmlns1':
case 'wgrffmtfilter':
case 'pnhang':
case 'themedata':
case 'colorschememapping':
var tt = gettag();
break;
case '*':
case 'info':
case 'stylesheet':
// gets to end of paragraph
j--;
getpgraph();
default:
// ignore the tag
}
}
} else {
ansa += c;
}
j++;
};
ansa = ansa.replace(/{|}|"|( | )\r/g,'');
ansa = ansa.replace(/Amount After|Amt After/g,'');
ansa = ansa.replace(/negetive amount means Refund NOTE: Layaways are not in Sales Report till they're paid off/g,'');
ansa = ansa.replace("On Line Vendor Sales Quick Report by Month",'');
ansa = ansa.replace(/ ( ) /g,'');
ansa.trim();
ansa.split("\n");
console.log(ansa);
var newData = new Array();
var vendor = new Array();
var orders = new Array();
var v = 0;
//Lets look for the line that has the word vendor
vendorLine = ansa.match(/Vendor/gi);
//Look for each order for the vendor
orderLine = ansa.match(/date/gi);
figuresLine = ansa.match(/figures/g);
var start;
var end;
for(arr = 0; arr <=ansa.length; arr++)
{
}
if(vendorLine)
{
vendorInfo = ansa.split(' ',ansa.replace('/Vendor|\r|\s/','') );
vendorFName = vendorInfo[0];
vendorLName = vendorInfo[1];
vendorID = vendorInfo[2];
vendor = {vendorid: vendorID, vendorInfo:{fname: vendorFName,lname:vendorLName} , orders:orders};
}
if( orderLine )
{
start = 1;
}
if( figuresLine )
{
end = 0;
}
if( !end || !start)
{
if( end < start )
{
ordersLine = explode(" ",v);
orderDesc = new Array();
//Remove the stuff we dont need. Like Qty ,Subtotal and Inv#. Once we remove them we reindex them
unset(ordersLine[0],ordersLine[2],ordersLine[count(ordersLine)]); // Removes Subtotal and Inv#
unset(ordersLine[count(ordersLine)]); // Removes QTY
ordersLine = array_values(ordersLine);
orderDate = ordersLine[0];
orderAmt = end(ordersLine);
/*foreach (ordersLine as key=>value)
{
if(key != 0)
{
if(key != count(ordersLine)-1)
{
array_push(orderDesc,value);
orderLines = array("vendorid"=>vendorID,"order"=>array("date"=> orderDate,"desc"=> implode(" " ,orderDesc), "amount"=>orderAmt) );
}
};
}*/
}
}
vendor['orders'] = orders;
saveFile(ansa);
}
function gettag() {
// gets the text following the / character or gets the param if it there
var tag='';
while(true) {
j++;
if (j>=len) break;
c = text.substr(j,1);
if (c==';') break;
if (c=='}') break;
if (c=="\\") {
j--;
break;
}
if (c=="{") {
//getpgraph();
break;
}
if (((c>='0')&&(c<='9'))||((c>='a')&&(c<='z'))||((c>='A')&&(c<='Z'))||c=="'"||c=="-"||c=="*" ){
tag= tag+c;
} else {
// end of tag
j--;
break;
}
}
return tag;
}
function getpgraph() {
// if the first char after a tag is { then throw out the entire paragraph
// this has to be nested
var nest = 0;
while(true) {
j++;
if (j>=len) break;
if (text.substr(j,1)=='}') {
if (nest==0) return;
nest--;
}
if (text.substr(j,1)=='{') {
nest++;
}
}
return;
}
What I expect from this is to create a clean well formatted JSON file that it can then be displayed back to the user on the page that they are in. Basically refresh the page with the newly uploaded parsed data.
There is a JS RTF parser that may help. You would then need to json encode the result. https://github.com/lazygyu/RTF-parser
Not sure if this answers your question, but a little more information might help lock down the fix.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.