简体   繁体   中英

How to make tesseract-ocr read from coordinates on a screen?

I have been trying to look for an example of how to make a class/function that would attempt to read text from a the screen at specified coordinates.

Something simple that would use bitblt to capture the specified section of the screen and run tesseract on it. All done in memory without having to create image files to disk.

Tesseract seems to have really poor API and requires a TIF image of all things, as far as I can see it can't even be made to accept a bitmap memory image without extensive delving into its code.

Any help would be appreciated, an actual example would be ideal.

http://i.imgur.com/HaJ2zOI.png 在此输入图像描述

Read on/view the below to see how to use Tesseract-OCR with images from memory..

#include <iostream>
#include <vector>
#include <stdexcept>
#include <fstream>
#include <memory>
#include <cstring>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

#if defined _WIN32 || defined _WIN64
#include <windows.h>
#endif

class Image
{
    private:
        std::vector<std::uint8_t> Pixels;
        std::uint32_t width, height;
        std::uint16_t BitsPerPixel;

        void Flip(void* In, void* Out, int width, int height, unsigned int Bpp);

    public:
        #if defined _WIN32 || defined _WIN64
        explicit Image(HDC DC, int X, int Y, int Width, int Height);
        #endif

        inline std::uint16_t GetBitsPerPixel() {return this->BitsPerPixel;}
        inline std::uint16_t GetBytesPerPixel() {return this->BitsPerPixel / 8;}
        inline std::uint16_t GetBytesPerScanLine() {return (this->BitsPerPixel / 8) * this->width;}
        inline int GetWidth() const {return this->width;}
        inline int GetHeight() const {return this->height;}
        inline const std::uint8_t* GetPixels() {return this->Pixels.data();}
};

void Image::Flip(void* In, void* Out, int width, int height, unsigned int Bpp)
{
   unsigned long Chunk = (Bpp > 24 ? width * 4 : width * 3 + width % 4);
   unsigned char* Destination = static_cast<unsigned char*>(Out);
   unsigned char* Source = static_cast<unsigned char*>(In) + Chunk * (height - 1);

   while(Source != In)
   {
      std::memcpy(Destination, Source, Chunk);
      Destination += Chunk;
      Source -= Chunk;
   }
}

#if defined _WIN32 || defined _WIN64
Image::Image(HDC DC, int X, int Y, int Width, int Height) : Pixels(), width(Width), height(Height), BitsPerPixel(32)
{
    BITMAP Bmp = {0};
    HBITMAP hBmp = reinterpret_cast<HBITMAP>(GetCurrentObject(DC, OBJ_BITMAP));

    if (GetObject(hBmp, sizeof(BITMAP), &Bmp) == 0)
        throw std::runtime_error("BITMAP DC NOT FOUND.");

    RECT area = {X, Y, X + Width, Y + Height};
    HWND Window = WindowFromDC(DC);
    GetClientRect(Window, &area);

    HDC MemDC = GetDC(nullptr);
    HDC SDC = CreateCompatibleDC(MemDC);
    HBITMAP hSBmp = CreateCompatibleBitmap(MemDC, width, height);
    DeleteObject(SelectObject(SDC, hSBmp));

    BitBlt(SDC, 0, 0, width, height, DC, X, Y, SRCCOPY);
    unsigned int data_size = ((width * BitsPerPixel + 31) / 32) * 4 * height;
    std::vector<std::uint8_t> Data(data_size);
    this->Pixels.resize(data_size);

    BITMAPINFO Info = {sizeof(BITMAPINFOHEADER), static_cast<long>(width), static_cast<long>(height), 1, BitsPerPixel, BI_RGB, data_size, 0, 0, 0, 0};
    GetDIBits(SDC, hSBmp, 0, height, &Data[0], &Info, DIB_RGB_COLORS);
    this->Flip(&Data[0], &Pixels[0], width, height, BitsPerPixel);

    DeleteDC(SDC);
    DeleteObject(hSBmp);
    ReleaseDC(nullptr, MemDC);
}
#endif

int main()
{
    #if defined _WIN32 || defined _WIN64
    HWND SomeWindowHandle = GetDesktopWindow();
    HDC DC = GetDC(SomeWindowHandle);

    Image Img = Image(DC, 0, 0, 200, 200); //screenshot of 0, 0, 200, 200..

    ReleaseDC(SomeWindowHandle, DC);
    #else
    Image Img = Image(some_pixel_pointer, 200, 200); //pointer to pixels..
    #endif

    std::unique_ptr<tesseract::TessBaseAPI> tesseract_ptr(new tesseract::TessBaseAPI());

    tesseract_ptr->Init("/tesseract/tessdata', 'eng");
    tesseract_ptr->SetImage(Img.GetPixels(), Img.GetWidth(), Img.GetHeight(), Img.GetBytesPerPixel(), Img.GetBytesPerScanLine()); //Fixed this line..

    std::unique_ptr<char[]> utf8_text_ptr(tesseract_ptr->GetUTF8Text());

    std::cout<<utf8_text_ptr.get()<<"\n";

    return 0;
}

You can do it like this on windows.

#include <tesseract/capi.h>
#include <windows.h>

void ReadFromScreen(RECT rc)
{
    HWND hWndDesktop = GetDesktopWindow();
    HDC hDC = GetDC(hWndDesktop);

#define BITS_PER_PIXEL   32
#define BYTES_PER_PIXEL  (BITS_PER_PIXEL / 8)
    int nWidth = rc.right - rc.left;
    int nHeight = rc.bottom - rc.top;
    BITMAPINFO bi;
    memset(&bi, 0, sizeof(bi));
    bi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
    bi.bmiHeader.biWidth = nWidth;
    bi.bmiHeader.biHeight = -nHeight;
    bi.bmiHeader.biPlanes = 1;
    bi.bmiHeader.biBitCount = BITS_PER_PIXEL;
    bi.bmiHeader.biCompression = BI_RGB;

    void* pixels;
    HBITMAP hBitmap = ::CreateDIBSection(0, &bi, DIB_RGB_COLORS, &pixels, NULL, 0);
    HDC hMemDC = CreateCompatibleDC(NULL);
    SelectObject(hMemDC, hBitmap);
    BitBlt(hMemDC, 0, 0, nWidth, nHeight, hDC, rc.left, rc.top, SRCCOPY);
    int nDataSize = nWidth * nHeight * BYTES_PER_PIXEL;
    TessBaseAPISetImage(pTessBaseAPI, (const unsigned char*)pixels, nWidth, nHeight, BYTES_PER_PIXEL, BYTES_PER_PIXEL * nWidth);
    if (TessBaseAPIRecognize(pTessBaseAPI, NULL) != 0)
    {
        return;
    }
    char* szText = TessBaseAPIGetUTF8Text(pTessBaseAPI);
    // Todo something with szText

    TessDeleteText(szText);
    DeleteObject(hBitmap);
    DeleteDC(hMemDC);
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM