简体   繁体   中英

Posting a pdf to Solr using Ajax

I am trying to push (Post) pdf files to Solr/Tika for text extraction and indexing using Ajax/js. I've gotten the following curl command to work:

curl 'http://localhost:8983/solr/techproducts/update/extract?literal.id=doc1&commit=true' -F "myfile=@/PathToFile/SomeDoc.pdf"

This command puts the desired pdf into the Solr Index, and I can retrieve it just fine. However, I need to be able to do this from a web browsers. After much googling, and a little experimentation I've got the following js code ALMOST working. It returns a 0 status code, and status of Success, but nothing gets committed to the index:

   $("#solrPost").click(function(event) {
        event.stopPropagation();
        event.preventDefault();

        /* Read a local pdf file as a blob */
        let fileAsBlob = null;
        let file = $('#upload_file')[0].files[0];
        let myReader = new FileReader();

        myReader.onloadend = function() {
            fileAsBlob = myReader.result;
            sendToSolr(fileAsBlob); 
        };
        fileAsBlob = myReader.readAsArrayBuffer(file);

        function sendToSolr(fileAsBlob) {
            $.ajax({ 
                url:"http://localhost:8983/solr/techproducts/update/extract?literal.id=doc2&commit=true",
                type: 'POST',
                data: fileAsBlob,
                cache: false,
                crossOrigin: true,
                dataType: 'jsonp',
                jsonp: 'json.wrf',
                processData: false,
                contentType: false, 

                success: function(data, status) {
                    console.log("Ajax.post successful, status: " + data.responseHeader.status + "\t status text: " + status);
                    console.log("debug");
                },
                error: function(data, status) {
                    console.log("Ajax.post error, status: " + data.status + "\t status text:" + data.statusText);
                },
                done: function(data, status) {
                    console.log("Ajax.post Done");
                }
            });
        }

This is SO close to working, but I just can't figure out what's going wrong. All indications (From client side) are good, but nothing added to the index. Note:

  1. The fileReader is working, I see an Array of the same size as the source pdf.
  2. Even though I specify POST, when I examine the network tab in the browser/debugger, it says GET.
  3. I've hardcoded the literal.id=doc2 for simplicity, not a long term strategy...

I know there are similar posts, but none address the issue of extracting pdf's using Solr/Tika outside of the provided post script. Thanks in advance.

Well it took some searching but thanks to a post by "tonejac" I found the solution. If you look at: [ JQuery Ajax is sending GET instead of POST The VERY last comment states that if you use dataType:jsonp that "POST" gets converted to "GET". I deleted the jsonp, installed a plugin to handle the CORS issue I was trying to avoid by using jsonp, and viola, it worked. For those interested, the working code is posted below. It's not fancy or robust but allows me to post or get documents (.pdf, .docx...) to Solr from a web app. I've only posted the js code, but the html is simple and provides an input of type "file", as well as inputs to set id for posting docs, or searching by id. There are two buttons, solrPost, and solrGet which call the listeners in the js. The connectSolr() function is called from the html onLoad.

function connectSolr() {
$("#solrPost").click(function(event) {
    event.stopPropagation();
    event.preventDefault();

    /* Read a local pdf file as a blob */
    let fileAsBlob = null;
    let file = $('#upload_file')[0].files[0];
    let myReader = new FileReader();

    myReader.onloadend = function() {
        fileAsBlob = myReader.result;

        sendToSolr(fileAsBlob); 
    };
    fileAsBlob = myReader.readAsArrayBuffer(file);
    /* Get the unique Id for the doc and append to the extract url*/
    let docId = $("#userSetId").val();
    let extractUrl = "http://localhost:8983/solr/techproducts/update/extract/?commit=true&literal.id=" + docId;


    /* Ajax call to Solr/Tika to extract text from pdf and index it */
    function sendToSolr(fileAsBlob) {
        $.ajax({ 
            url: extractUrl,
            type: 'POST',
            data: fileAsBlob,
            cache: false,
            jsonp: 'json.wrf',
            processData: false,
            contentType: false, 
            echoParams: "all",

            success: function(data, status) {
                console.log("Ajax.post successful, status: " + data.responseHeader.status + "\t status text: " + status);
                console.log("debug");
            },
            error: function(data, status) {
                console.log("Ajax.post error, status: " + data.status + "\t status text:" + data.statusText);
            },
            done: function(data, status) {
                console.log("Ajax.post Done");
            },
        });
    }
});


$("#solrGet").click(function(event) {
    event.stopPropagation();
    event.preventDefault();
    let docId = "id:" + $("#docId").val();
    $.ajax({
        url:"http://localhost:8983/solr/techproducts/select/",
        type: "get",
        dataType: "jsonp",
        data: {
            q: docId
            //wt: "json",
            //indent: "true"
        },
        jsonp: "json.wrf",
        //"json.wrf": "?",
        success: function(data, status) {
            renderDoc(data, status);
        },
        error: function(data, status) {
            console.log("Ajax.get error, Error: " + status);
        },
        done: function(data, status) {
            console.log("Ajax.get Done");
        }
    });
    console.log("Debug");
});


let  renderDoc = function(theText, statusCode) {
    let extractedText = theText.response.docs[0].content[0];
    let extractedLinks = theText.response.docs[0].links;
    let $textArea = $("#textArea");
    $textArea.empty();
    let sents = extractedText.split('\n')
    sents.map(function(element, i) {
        let newSpan = $("<span />");
        $textArea.append(newSpan.html(element).append("<br/>"));
    });
    console.log("debug");
};

}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM