New to JS and very new to Node. Running Tesseract.js (text recognition software: http://tesseract.projectnaptha.com ) in Safari takes about 10 sec and begins outputting progress immediately.
Node (v6.9.1)(run from terminal or through Electron) runs CPU to 100% for 4min 20sec before it begins outputting to console. It then finishes in about the same time.
What troubleshooting steps are recommended? Is this common for Node?
Only difference I see in logs is Safari "found in cache eng.traineddata" Clearing and disabling the cache only minimally affect the time. Have tried a few .JPG and .PNG (300-600kb) files with same result - but BMP (3.7MB) gave fast 17 sec response - then errors and didn't finish. (Is this a 'next tick' problem?)
var Tesseract = require('tesseract.js');
var image = "./images/sample.jpg";
function tesseract(){
Tesseract.recognize(image)
.progress(function(message){console.log(message)})
.then(result => console.log(result.text))
}
tesseract();
(the editor is forcing the output to be formatted as code)
NODE console.Log
>Bash-3.2$ node JustTess.js
*Waits 4+ min and Then*
{ status: 'loading tesseract core' }
{ status: 'loaded tesseract core' }
{ status: 'initializing tesseract', progress: 0 }
pre-main prep time:108 ms
{ status: 'initializing tesseract', progress: 1 }
{ status: 'loading eng.traineddata', progress: 0 }
{ status: 'loading eng.traineddata', progress: 1 }
{ status: 'initializing api', progress: 0 }
{ status: 'initializing api', progress: 0.3 }
{ status: 'initializing api', progress: 0.6 }
{ status: 'initializing api', progress: 1 }
{ status: 'recognizing text', progress: 0 }
{ status: 'recognizing text', progress: 0.014285714 }...
SAFARI console.log
>[Log] – {status: "loading tesseract core"}
[Log] – {status: "loaded tesseract core"}
[Log] – {status: "initializing tesseract api"}
[Log] pre-main prep time: 115 ms (index.js, line 10)
[Log] – {status: "initialized tesseract api"}
[Log] – {status: "found in cache eng.traineddata"}
[Log] – {status: "loaded eng.traineddata"}
[Log] – {status: "initialized with language"}
[Log] – {status: "recognizing text", progress: 0}
[Log] – {status: "recognizing text", progress: 0.0142}...
NODE with BMP
bash-3.2$ node JustTess.js
*After 17 sec*
{ status: 'initializing tesseract', progress: 0 }
pre-main prep time: 118 ms
{ status: 'initializing tesseract', progress: 1 }
{ status: 'loading eng.traineddata', progress: 0 }
{ status: 'loading eng.traineddata', progress: 1 }
{ status: 'initializing api', progress: 0 }
{ status: 'initializing api', progress: 0.3 }
{ status: 'initializing api', progress: 0.6 }
Error in pixRemoveColormap: pixs must be {1,2,4,8} bpp
Error in pixGetDepth: pix not defined
Error in pixGetWpl: pix not defined
Error in pixCreateHeader: depth must be {1, 2, 4, 8, 16, 24, 32}
Error in pixCreateNoInit: pixd not made
Error in pixCreateTemplateNoInit: pixd not made
Error in pixCreateTemplate: pixd not made
Error in pixCopy: pixd not made
{ status: 'initializing api', progress: 1 }
3
3
/Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programming/GitHub/ba/node_modules/tesser
act.js-core/index.js:4
function f(a){throw a;}var h=void 0,i=!0,j=null,k=!1;function aa(){return function(){}}function ba(
a){return function(){return a}}var n,Module;Module||(Module=eval("(function() { try { return Tesser
actCore || {} } catch(e) { return {} } })()"));var ca={},da;for(da in Module)Module.hasOwnProperty(
da)&&(ca[da]=Module[da]);var ea=i,fa=!ea&&i;
^
abort(3) at Error
at Error (native)
at Na (/Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programming/GitHub/ba/node_mod
ules/tesseract.js-core/index.js:32:26)
at ka (/Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programming/GitHub/ba/node_mod
ules/tesseract.js-core/index.js:507:108)
at Array.JHa (/Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programming/GitHub/ba/n
ode_modules/tesseract.js-core/index.js:402:25808)
at xd (/Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programming/GitHub/ba/node_mod
ules/tesseract.js-core/index.js:382:924)
at R.TesseractCore.V.Begin (/Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programmi
ng/GitHub/ba/node_modules/tesseract.js-core/index.js:511:288)
at DumpLiterallyEverything (/Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programmi
ng/GitHub/ba/node_modules/tesseract.js/src/common/dump.js:13:8)
at /Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programming/GitHub/ba/node_modules
/tesseract.js/src/common/worker.js:121:22
at /Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programming/GitHub/ba/node_modules
/tesseract.js/src/common/worker.js:92:9
at /Users/brent/Library/Mobile Documents/com~apple~CloudDocs/Programming/GitHub/ba/node_modules
/tesseract.js/src/node/lang.js:14:25
If this abort() is unexpected, build with -s ASSERTIONS=1 which can give more information.
I can't answer the question; however, the other answers doesn't shed much light on the matter. See http://www.jsbenchmarks.com/?anywhichway/lookup/master/benchmark.js/ for an example of how NodeJS and browsers differ dramatically across a number of functions. Note, although the browser results on this site are from multiple visitors and the Node results are from a single server, tests in an isolated environment show the same thing.
Tesseract.Js软件的更新已解决了这个问题。
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.