简体   繁体   中英

Draw tbitmap with scale and alpha channel faster

The following code copies a large bitmap blends it with the correct background and then draws a semi transparent image with a clipped region to save draw time... Images are in an array and prescaled...

This has been through several levels of optimization based on my limited knowledge of C++ and the Builder graphics...

Edit: Updated code... blend();

void blend(Graphics::TBitmap *dst,int x,int y,Graphics::TBitmap *src,BYTE 
alpha)
{
const int n=3;          // pixel align [Bytes]
int dx0,dy0,dx1,dy1,    // dst BBOX
    sx0,sy0,sx1,sy1,    // src BBOX
    dx,dy,sx,sy,i;
BYTE *dp,*sp;
WORD a,_a,sc,dc,da[256],sa[256];

// compute BBOX (handle clipping)
dx=src->Width; dy=src->Height;
dx0=x; sx0=0; dx1=x+dx; sx1=dx;
dy0=y; sy0=0; dy1=y+dy; sy1=dy;


// blend
a=alpha; _a=255-a;
for (i=0;i<256;i++){ da[i]=_a*i; sa[i]=a*i; }   // precompute BYTE*a and 
BYTE*_a LUTs

for (dy=dy0,sy=sy0;dy<dy1;dy++,sy++)        // ScanLines
    {
    dp=(BYTE*)dst->ScanLine[dy]+(n*dx0);
    sp=(BYTE*)src->ScanLine[sy]+(n*sx0);
    for (dx=dx0,sx=sx0;dx<dx1;dx++,sx++)    // single ScanLine
     for (i=0;i<n;i++,dp++,sp++)            // RGB
      *dp=WORD((sa[*sp]+da[*dp])>>8);       // blend function
    }
}

//--------------------------------------------------------------------------

    det1maps.push_back( new Graphics::TBitmap() );
    for (int i = 1; i < 176; i++)
    {
        det1maps.push_back( new Graphics::TBitmap() );
        det1maps[i]->SetSize(t,t);
        det1maps[i]->Canvas->StretchDraw(Rect(0, 0, t, t), Det1_bmp.get()); // scale
        t = t + 24;
    }

//------------------EDIT 3 Current version 1/18

det1maps[ss]->Transparent = true;
Form1->imgTemp->Picture->Assign(layer0_bmap.get()); //why background first?
HRGN MyRgn;
MyRgn = ::CreateRectRgn(0,0,Sw,Sh);
::SelectClipRgn(Form1->imgTemp->Canvas->Handle,MyRgn); //clip

Form1->imgTemp->Canvas->Draw(X3,Y3,det1maps[ss]); // draw det

blend(layer0_bmap.get(),0,0,Form1->imgTemp->Picture->Bitmap,int(obj[index]));

Here small simple C++/VCL ScanLine Alpha Blend example I just put together:

//---------------------------------------------------------------------------
void blend(Graphics::TBitmap *dst,int x,int y,Graphics::TBitmap *src,BYTE alpha)
    {
    const int n=3;          // pixel align [Bytes]
    int dx0,dy0,dx1,dy1,    // dst BBOX
        sx0,sy0,sx1,sy1,    // src BBOX
        dx,dy,sx,sy,i;
    BYTE *dp,*sp;
    WORD a,_a,sc,dc,da[256],sa[256];
    // compute BBOX (handle clipping)
    dx=src->Width; dy=src->Height;
    dx0=x; sx0=0; dx1=x+dx; sx1=dx;
    dy0=y; sy0=0; dy1=y+dy; sy1=dy;
    if (dx0<0){ sx0-=dx0; dx0=0; }
    if (dy0<0){ sy0-=dy0; dy0=0; }
    dx=dst->Width; dy=dst->Height;
    if (dx1>dx){ sx1+=dx-dx1; dx1=dx; }
    if (dy1>dy){ sy1+=dy-dy1; dy1=dy; }
    // make sure config is compatible with ScanLine[]
    dst->HandleType=bmDIB; dst->PixelFormat=pf24bit;
    src->HandleType=bmDIB; src->PixelFormat=pf24bit;
    // blend
    a=alpha; _a=255-a;
    for (i=0;i<256;i++){ da[i]=_a*i; sa[i]=a*i; }   // precompite BYTE*a and BYTE*_a LUTs
    for (dy=dy0,sy=sy0;dy<dy1;dy++,sy++)        // ScanLines
        {
        dp=(BYTE*)dst->ScanLine[dy]+(n*dx0);
        sp=(BYTE*)src->ScanLine[sy]+(n*sx0);
        for (dx=dx0,sx=sx0;dx<dx1;dx++,sx++)    // single ScanLine
         for (i=0;i<n;i++,dp++,sp++)            // RGB
          *dp=WORD((sa[*sp]+da[*dp])>>8);       // blend function
        }
    }
//---------------------------------------------------------------------------

I just process the image on per pixel/channel basis and for each channel (R,G,B) compute:

dst_pixel =  ( src_pixel*alpha + dst_pixel*(255-alpha) )/255

where channels and alpha are 8 bit unsigned integers... For speed I used 24 bit pixel format (usually I use 32bit instead).

To avoid *,/ in teh Blending I precomputed 2 LUT s with all posible combinations of number*alpha and number*(255-alpha) . The division is done by bit shift >>8 .

To improve speed you can remember all ScanLine[] of the dst image into your array once and then use that as the target image will be used many times ...

When I tested this on blending 2 1024x768 images together it took <=9ms on mine setup. The slowest operation is the ScanLine[] access, and images where formated to the pixel format prior to blending...

Here GIF preview (scaled down 1/4 and dithered by my capturer so it fits to imgur 2MByte limit):

动画片

This is the code I used for this (single timer VCL App):

//$$---- Form CPP ----
//---------------------------------------------------------------------------
#include <vcl.h>
#pragma hdrstop
#include "win_main.h"
#include <math.h>
#include <jpeg.hpp>
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TMain *Main;
Graphics::TBitmap *bmp,*bmp0,*bmp1; // back buffer, image0, image1, ...
//---------------------------------------------------------------------------
void blend(Graphics::TBitmap *dst,int x,int y,Graphics::TBitmap *src,BYTE alpha)
    {
    const int n=3;          // pixel align [Bytes]
    int dx0,dy0,dx1,dy1,    // dst BBOX
        sx0,sy0,sx1,sy1,    // src BBOX
        dx,dy,sx,sy,i;
    BYTE *dp,*sp;
    WORD a,_a,sc,dc,da[256],sa[256];
    // compute BBOX (handle clipping)
    dx=src->Width; dy=src->Height;
    dx0=x; sx0=0; dx1=x+dx; sx1=dx;
    dy0=y; sy0=0; dy1=y+dy; sy1=dy;
    if (dx0<0){ sx0-=dx0; dx0=0; }
    if (dy0<0){ sy0-=dy0; dy0=0; }
    dx=dst->Width; dy=dst->Height;
    if (dx1>dx){ sx1+=dx-dx1; dx1=dx; }
    if (dy1>dy){ sy1+=dy-dy1; dy1=dy; }
    // make sure config is compatible with ScanLine[]
    dst->HandleType=bmDIB; dst->PixelFormat=pf24bit;
    src->HandleType=bmDIB; src->PixelFormat=pf24bit;
    // blend
    a=alpha; _a=255-a;
    for (i=0;i<256;i++){ da[i]=_a*i; sa[i]=a*i; }   // precompite BYTE*a and BYTE*_a LUTs
    for (dy=dy0,sy=sy0;dy<dy1;dy++,sy++)        // ScanLines
        {
        dp=(BYTE*)dst->ScanLine[dy]+(n*dx0);
        sp=(BYTE*)src->ScanLine[sy]+(n*sx0);
        for (dx=dx0,sx=sx0;dx<dx1;dx++,sx++)    // single ScanLine
         for (i=0;i<n;i++,dp++,sp++)            // RGB
          *dp=WORD((sa[*sp]+da[*dp])>>8);       // blend function
        }
    }
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
void TMain::draw()
    {
    bmp->Canvas->Draw(0,0,bmp0);            // render background bmp0
    static float a=0.0; a+=0.025*M_PI;
    blend(bmp,0,0,bmp1,fabs(255.0*sin(a))); // alfa blend in bmp1
    Main->Canvas->Draw(0,0,bmp);            // show result on screen
    }
//---------------------------------------------------------------------------
__fastcall TMain::TMain(TComponent* Owner) : TForm(Owner)
    {
    // create bitmaps
    bmp=new Graphics::TBitmap;
    bmp0=new Graphics::TBitmap;
    bmp1=new Graphics::TBitmap;
    // laod images
    TJPEGImage *jpg=new TJPEGImage;
    jpg->LoadFromFile("img0.jpg"); bmp0->Assign(jpg);
    jpg->LoadFromFile("img1.jpg"); bmp1->Assign(jpg);
    delete jpg;
    }
//---------------------------------------------------------------------------
void __fastcall TMain::FormDestroy(TObject *Sender)
    {
    // delete bitmaps
    delete bmp0;
    delete bmp1;
    delete bmp;
    }
//---------------------------------------------------------------------------
void __fastcall TMain::FormResize(TObject *Sender)
    {
    bmp->Width =ClientWidth;
    bmp->Height=ClientHeight;
    }
//---------------------------------------------------------------------------
void __fastcall TMain::FormPaint(TObject *Sender)
    {
    draw();
    }
//---------------------------------------------------------------------------
void __fastcall TMain::tim_redrawTimer(TObject *Sender)
    {
    draw();
    }
//---------------------------------------------------------------------------

And here the images (first nice 1024x768 images I found on Google images):

img0

图片1

Here preview of Blending result:

混合

For more info about the ScanLine see:

If you need even more speed then you should go for GPU Blending ( OpenGL or DirectX ).

[Edit2] array + rectangle example

After your edited your question its now obvious:

  1. your array of bitmaps is not an array at all

    its rather some kind of list template like vector<Graphics::TBitmap*> or similar... So you do not have access to linear array of the bmps like I do. To make your life easier I used mine template with similar properties so you can see how to handle those (sorry I can not share the template code but you just need to change List<T> into Vector<T> or whatever you are using ...

    This is the reason why the array pointer did not work for you as you do not have one. Its possibly your template expose it with some member. Mine does it like map.dat so yours might have something similar or not at all if not stored linearly.

  2. You are blending just 2 images not the whole array

    so you can use the first example and add the ScanLine preloading as your images are static... Do the same for backbuffer image as that changes only after resize.

When I put all together here the result:

//$$---- Form CPP ----
//---------------------------------------------------------------------------
#include <vcl.h>
#pragma hdrstop
#include "win_main.h"
#include <math.h>
#include <jpeg.hpp>
#include "list.h"           // mine list<T> template you got probably vector<> or something similar instead
#include "performance.h"    // this is mine tbeg/tend/tstr time measurement
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TMain *Main;
//---------------------------------------------------------------------------
// [back buffer]
Graphics::TBitmap *bmp;             // bitmap
BYTE **bmp_pyx=NULL;                // preloaded ScanLines [y][x]
void bmp_init()                     // create preloaded ScanLines
    {
    bmp_pyx=new BYTE*[bmp->Height];
    for (int y=0;y<bmp->Height;y++)
     bmp_pyx[y]=(BYTE*)bmp->ScanLine[y];
    }
void bmp_exit()                     // release preloaded ScanLines
    {
    delete[] bmp_pyx;
    }
//---------------------------------------------------------------------------
// [array of images]
const AnsiString filename[]=        // filenames
    {
    "img0.jpg",
    "img1.jpg",
    "img2.jpg",
    "img3.jpg",
    "img4.jpg",
    "img5.jpg",
    "img6.jpg",
    "img7.jpg",
    "img8.jpg",
    "img9.jpg",
    ""
    };
List<Graphics::TBitmap*> map;       // your "array" of bitmaps
int maps=0;                         // number of images
BYTE ***map_pyx=NULL;               // preloaded ScanLines [ix][y][x]
//---------------------------------------------------------------------------
void map_init()                     // alocate and prepare data
    {
    int i,y;
    Graphics::TBitmap *bmp;
    TJPEGImage *jpg=new TJPEGImage;
    // create "array" of bmp (you already got this)
    for (maps=0;filename[maps]!="";maps++)
        {
        map.add(new Graphics::TBitmap); // this is like your push_back(new Graphics::TBitmap)
        jpg->LoadFromFile(filename[maps]);  // filename[] -> jpg -> bmp -> map[]
        map[maps]->Assign(jpg);             // here you can also rescale or whatever you want to do...
        map[maps]->HandleType=bmDIB;
        map[maps]->PixelFormat=pf24bit;
        }
    // create preloaded ScanLines (you need to add this into your app init)
    map_pyx=new BYTE**[maps];                   // **map_pyx[]
    for (i=0;i<maps;i++)
        {
        map_pyx[i]=new BYTE*[map[i]->Height];   // *map_pyx[][]
        for (y=0;y<map[i]->Height;y++)          // map_pyx[][]]
         map_pyx[i][y]=(BYTE*)map[i]->ScanLine[y];
        }
    delete jpg;
    }
//---------------------------------------------------------------------------
void map_exit()                     // release data (you need to add this in app exit)
    {
    int i;
    for (i=0;i<maps;i++)
        {
        delete   map[i];
        delete[] map_pyx[i];
        }
    delete[] map_pyx;
    }
//---------------------------------------------------------------------------
void blend_rec(BYTE **dp,int x0,int y0,int x1,int y1,BYTE **sp,BYTE alpha)
    {
    const int n=3;          // pixel align [Bytes]
    int x,y,i;
    BYTE *d,*s;
    WORD da[256],sa[256];
    // pixelformat align
    x0*=n; x1*=n;
    // prepare alpha*BYTE and (255-alpha)*BYTE LUTs
    y=    alpha; for (x=0;x<256;x++) sa[x]=x*y;
    y=255-alpha; for (x=0;x<256;x++) da[x]=x*y;
    // blend
    for (y=y0;y<y1;y++)
        {
        d=dp[y]+x0;
        s=sp[y]+x0;
        for (x=x0;x<x1;x++,d++,s++)
         *d=WORD((sa[*s]+da[*d])>>8);       // blend function
        }
    // release data
    }
//---------------------------------------------------------------------------
void TMain::draw()
    {
    bmp->Canvas->Draw(0,0,map[0]);              // render background bmp[0]
    static float a=0.0; a+=0.025*M_PI;          // animation ...
    BYTE alpha=128+float(127.0*sin(a));
    tbeg();
    blend_rec(bmp_pyx,200,500,400,600,map_pyx[1],alpha);    // add the blended rectangle (except background which is bmp[0]
    tend(); Caption=tstr();
    Canvas->Draw(0,0,bmp);                      // show on screen
//  bmp->SaveToFile("out.bmp");
    }
//---------------------------------------------------------------------------
__fastcall TMain::TMain(TComponent* Owner) : TForm(Owner)
    {
    // create bitmaps
    bmp=new Graphics::TBitmap;
    bmp_init();
    map_init();
    }
//---------------------------------------------------------------------------
void __fastcall TMain::FormDestroy(TObject *Sender)
    {
    // delete bitmaps
    delete bmp;
    bmp_exit();
    map_exit();
    }
//---------------------------------------------------------------------------
void __fastcall TMain::FormResize(TObject *Sender)
    {
    bmp->Width =ClientWidth;
    bmp->Height=ClientHeight;
    bmp->HandleType=bmDIB;
    bmp->PixelFormat=pf24bit;
    bmp_exit();
    bmp_init();
    }
//---------------------------------------------------------------------------
void __fastcall TMain::FormPaint(TObject *Sender)
    {
    draw();
    }
//---------------------------------------------------------------------------
void __fastcall TMain::tim_redrawTimer(TObject *Sender)
    {
    draw();
    }
//---------------------------------------------------------------------------

The blending is done in less than 0.5ms on my setup for teh rectangle I choosed. As you can see its way faster than original 9ms ... Because if you are using cliping region you will still blend whole image just not copy the result. This approach only Blends and copy what is needed.

Beware I removed the range checks so make sure the rectangle is inside of images ...

If you want to measure the time in the same way I do I am using this code of mine:

Performance.h:

//---------------------------------------------------------------------------
//--- Performance counter time measurement: 2.01 ----------------------------
//---------------------------------------------------------------------------
#ifndef _performance_h
#define _performance_h
//---------------------------------------------------------------------------
const int   performance_max=64;                 // push urovni
double      performance_Tms=-1.0,               // perioda citaca [ms]
            performance_tms=0.0,                // zmerany cas po tend [ms]
            performance_t0[performance_max];    // zmerane start casy [ms]
int         performance_ix=-1;                  // index aktualneho casu
//---------------------------------------------------------------------------
void tbeg(double *t0=NULL)  // mesure start time
    {
    double t;
    LARGE_INTEGER i;
    if (performance_Tms<=0.0)
        {
        for (int j=0;j<performance_max;j++) performance_t0[j]=0.0;
        QueryPerformanceFrequency(&i); performance_Tms=1000.0/double(i.QuadPart);
        }
    QueryPerformanceCounter(&i); t=double(i.QuadPart); t*=performance_Tms;
    if (t0) { t0[0]=t; return; }
    performance_ix++;
    if ((performance_ix>=0)&&(performance_ix<performance_max)) performance_t0[performance_ix]=t;
    }
//---------------------------------------------------------------------------
void tpause(double *t0=NULL)    // stop counting time between tbeg()..tend() calls
    {
    double t;
    LARGE_INTEGER i;
    QueryPerformanceCounter(&i); t=double(i.QuadPart); t*=performance_Tms;
    if (t0) { t0[0]=t-t0[0]; return; }
    if ((performance_ix>=0)&&(performance_ix<performance_max)) performance_t0[performance_ix]=t-performance_t0[performance_ix];
    }
//---------------------------------------------------------------------------
void tresume(double *t0=NULL)   // resume counting time between tbeg()..tend() calls
    {
    double t;
    LARGE_INTEGER i;
    QueryPerformanceCounter(&i); t=double(i.QuadPart); t*=performance_Tms;
    if (t0) { t0[0]=t-t0[0]; return; }
    if ((performance_ix>=0)&&(performance_ix<performance_max)) performance_t0[performance_ix]=t-performance_t0[performance_ix];
    }
//---------------------------------------------------------------------------
double tend(double *t0=NULL)    // return duration [ms] between matching tbeg()..tend() calls
    {
    double t;
    LARGE_INTEGER i;
    QueryPerformanceCounter(&i); t=double(i.QuadPart); t*=performance_Tms;
    if (t0) { t-=t0[0]; performance_tms=t; return t; }
    if ((performance_ix>=0)&&(performance_ix<performance_max)) t-=performance_t0[performance_ix]; else t=0.0;
    performance_ix--;
    performance_tms=t;
    return t;
    }
//---------------------------------------------------------------------------
double tper(double *t0=NULL)    // return duration [ms] between tper() calls
    {
    double t,tt;
    LARGE_INTEGER i;
    if (performance_Tms<=0.0)
        {
        for (int j=0;j<performance_max;j++) performance_t0[j]=0.0;
        QueryPerformanceFrequency(&i); performance_Tms=1000.0/double(i.QuadPart);
        }
    QueryPerformanceCounter(&i); t=double(i.QuadPart); t*=performance_Tms;
    if (t0) { tt=t-t0[0]; t0[0]=t; performance_tms=tt; return tt; }
    performance_ix++;
    if ((performance_ix>=0)&&(performance_ix<performance_max))
        {
        tt=t-performance_t0[performance_ix];
        performance_t0[performance_ix]=t;
        }
    else { t=0.0; tt=0.0; };
    performance_ix--;
    performance_tms=tt;
    return tt;
    }
//---------------------------------------------------------------------------
AnsiString tstr()
    {
    AnsiString s;
    s=s.sprintf("%8.3lf",performance_tms); while (s.Length()<8) s=" "+s; s="["+s+" ms]";
    return s;
    }
//---------------------------------------------------------------------------
AnsiString tstr(int N)
    {
    AnsiString s;
    s=s.sprintf("%8.3lf",performance_tms/double(N)); while (s.Length()<8) s=" "+s; s="["+s+" ms]";
    return s;
    }
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------
#endif
//---------------------------------------------------------------------------
//---------------------------------------------------------------------------

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM