简体   繁体   English

如何从屏幕上的坐标读取tesseract-ocr?

[英]How to make tesseract-ocr read from coordinates on a screen?

I have been trying to look for an example of how to make a class/function that would attempt to read text from a the screen at specified coordinates. 我一直试图寻找一个如何创建一个类/函数的例子,该类/函数试图从指定坐标的屏幕读取文本。

Something simple that would use bitblt to capture the specified section of the screen and run tesseract on it. 一些简单的东西,可以使用bitblt捕获屏幕的指定部分并在其上运行tesseract。 All done in memory without having to create image files to disk. 所有内容都在内存中完成,无需将图像文件创建到磁盘。

Tesseract seems to have really poor API and requires a TIF image of all things, as far as I can see it can't even be made to accept a bitmap memory image without extensive delving into its code. Tesseract似乎有非常糟糕的API并且需要所有东西的TIF图像,据我所知它甚至不能接受位图内存图像而不需要对其代码进行大量深入研究。

Any help would be appreciated, an actual example would be ideal. 任何帮助将不胜感激,一个实际的例子将是理想的。

http://i.imgur.com/HaJ2zOI.png http://i.imgur.com/HaJ2zOI.png 在此输入图像描述

Read on/view the below to see how to use Tesseract-OCR with images from memory.. 阅读/查看以下内容,了解如何将Tesseract-OCR与内存中的图像一起使用。

#include <iostream>
#include <vector>
#include <stdexcept>
#include <fstream>
#include <memory>
#include <cstring>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

#if defined _WIN32 || defined _WIN64
#include <windows.h>
#endif

class Image
{
    private:
        std::vector<std::uint8_t> Pixels;
        std::uint32_t width, height;
        std::uint16_t BitsPerPixel;

        void Flip(void* In, void* Out, int width, int height, unsigned int Bpp);

    public:
        #if defined _WIN32 || defined _WIN64
        explicit Image(HDC DC, int X, int Y, int Width, int Height);
        #endif

        inline std::uint16_t GetBitsPerPixel() {return this->BitsPerPixel;}
        inline std::uint16_t GetBytesPerPixel() {return this->BitsPerPixel / 8;}
        inline std::uint16_t GetBytesPerScanLine() {return (this->BitsPerPixel / 8) * this->width;}
        inline int GetWidth() const {return this->width;}
        inline int GetHeight() const {return this->height;}
        inline const std::uint8_t* GetPixels() {return this->Pixels.data();}
};

void Image::Flip(void* In, void* Out, int width, int height, unsigned int Bpp)
{
   unsigned long Chunk = (Bpp > 24 ? width * 4 : width * 3 + width % 4);
   unsigned char* Destination = static_cast<unsigned char*>(Out);
   unsigned char* Source = static_cast<unsigned char*>(In) + Chunk * (height - 1);

   while(Source != In)
   {
      std::memcpy(Destination, Source, Chunk);
      Destination += Chunk;
      Source -= Chunk;
   }
}

#if defined _WIN32 || defined _WIN64
Image::Image(HDC DC, int X, int Y, int Width, int Height) : Pixels(), width(Width), height(Height), BitsPerPixel(32)
{
    BITMAP Bmp = {0};
    HBITMAP hBmp = reinterpret_cast<HBITMAP>(GetCurrentObject(DC, OBJ_BITMAP));

    if (GetObject(hBmp, sizeof(BITMAP), &Bmp) == 0)
        throw std::runtime_error("BITMAP DC NOT FOUND.");

    RECT area = {X, Y, X + Width, Y + Height};
    HWND Window = WindowFromDC(DC);
    GetClientRect(Window, &area);

    HDC MemDC = GetDC(nullptr);
    HDC SDC = CreateCompatibleDC(MemDC);
    HBITMAP hSBmp = CreateCompatibleBitmap(MemDC, width, height);
    DeleteObject(SelectObject(SDC, hSBmp));

    BitBlt(SDC, 0, 0, width, height, DC, X, Y, SRCCOPY);
    unsigned int data_size = ((width * BitsPerPixel + 31) / 32) * 4 * height;
    std::vector<std::uint8_t> Data(data_size);
    this->Pixels.resize(data_size);

    BITMAPINFO Info = {sizeof(BITMAPINFOHEADER), static_cast<long>(width), static_cast<long>(height), 1, BitsPerPixel, BI_RGB, data_size, 0, 0, 0, 0};
    GetDIBits(SDC, hSBmp, 0, height, &Data[0], &Info, DIB_RGB_COLORS);
    this->Flip(&Data[0], &Pixels[0], width, height, BitsPerPixel);

    DeleteDC(SDC);
    DeleteObject(hSBmp);
    ReleaseDC(nullptr, MemDC);
}
#endif

int main()
{
    #if defined _WIN32 || defined _WIN64
    HWND SomeWindowHandle = GetDesktopWindow();
    HDC DC = GetDC(SomeWindowHandle);

    Image Img = Image(DC, 0, 0, 200, 200); //screenshot of 0, 0, 200, 200..

    ReleaseDC(SomeWindowHandle, DC);
    #else
    Image Img = Image(some_pixel_pointer, 200, 200); //pointer to pixels..
    #endif

    std::unique_ptr<tesseract::TessBaseAPI> tesseract_ptr(new tesseract::TessBaseAPI());

    tesseract_ptr->Init("/tesseract/tessdata', 'eng");
    tesseract_ptr->SetImage(Img.GetPixels(), Img.GetWidth(), Img.GetHeight(), Img.GetBytesPerPixel(), Img.GetBytesPerScanLine()); //Fixed this line..

    std::unique_ptr<char[]> utf8_text_ptr(tesseract_ptr->GetUTF8Text());

    std::cout<<utf8_text_ptr.get()<<"\n";

    return 0;
}

You can do it like this on windows. 你可以在Windows上这样做。

#include <tesseract/capi.h>
#include <windows.h>

void ReadFromScreen(RECT rc)
{
    HWND hWndDesktop = GetDesktopWindow();
    HDC hDC = GetDC(hWndDesktop);

#define BITS_PER_PIXEL   32
#define BYTES_PER_PIXEL  (BITS_PER_PIXEL / 8)
    int nWidth = rc.right - rc.left;
    int nHeight = rc.bottom - rc.top;
    BITMAPINFO bi;
    memset(&bi, 0, sizeof(bi));
    bi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
    bi.bmiHeader.biWidth = nWidth;
    bi.bmiHeader.biHeight = -nHeight;
    bi.bmiHeader.biPlanes = 1;
    bi.bmiHeader.biBitCount = BITS_PER_PIXEL;
    bi.bmiHeader.biCompression = BI_RGB;

    void* pixels;
    HBITMAP hBitmap = ::CreateDIBSection(0, &bi, DIB_RGB_COLORS, &pixels, NULL, 0);
    HDC hMemDC = CreateCompatibleDC(NULL);
    SelectObject(hMemDC, hBitmap);
    BitBlt(hMemDC, 0, 0, nWidth, nHeight, hDC, rc.left, rc.top, SRCCOPY);
    int nDataSize = nWidth * nHeight * BYTES_PER_PIXEL;
    TessBaseAPISetImage(pTessBaseAPI, (const unsigned char*)pixels, nWidth, nHeight, BYTES_PER_PIXEL, BYTES_PER_PIXEL * nWidth);
    if (TessBaseAPIRecognize(pTessBaseAPI, NULL) != 0)
    {
        return;
    }
    char* szText = TessBaseAPIGetUTF8Text(pTessBaseAPI);
    // Todo something with szText

    TessDeleteText(szText);
    DeleteObject(hBitmap);
    DeleteDC(hMemDC);
}

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM