简体   繁体   中英

How to add background image in pdf using Pymupdf module in python

I am trying to add the background image in pdf using Pymupdf but it is creating a layer between pdf and image as you can see the output.

How can I bypass(remove) the layer between pdf and backround image? please help me regrading this.

This is how I am adding the background image in the pdf here:

import fitz
pdf_name = '3_giberish template.pdf'[enter image description here][1]
doc = fitz.open(pdf_name)
doc = fitz.open(input_file)
#open page first
page = doc.loadPage(0)
background_img_filename = 'background.png'
# insert background image to the full page
full_img_rect = fitz.Rect(0,0,650,792)
#overlay = False add the background image here
page.insertImage(full_img_rect, filename=background_img_filename, overlay=False)
# save doc
doc.save(output_file_path, garbage=4, deflate=True, clean=True)
print("completed")

Hi this answer might not be optimal but I think it would help you.
First you need to convert pdf page to to RGBA image. Then, every white pixel will be converted to be transparent so as to make it disappear when overlay onto the background.
After that, if the background is smaller than text image, scale it bigger for the text to be fitted in.
Finally put the text image on top of the background and export it to pdf.
Bellow are the images I tested.
text.jpg
文字.jpg
background.jpg 背景.jpg res.png
res.png

"""
pip install opencv-python
pip install pymupdf
pip install Pillow
"""

import fitz
import cv2
import numpy as np
from PIL import Image


def pix2np(pix):
    im = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h, pix.w, pix.n)
    im = np.ascontiguousarray(im[..., [2, 1, 0]])  # rgb to bgr
    return im

def resize(img,scale_percent):
  width = int(img.shape[1] * scale_percent / 100)
  height = int(img.shape[0] * scale_percent / 100)
  dim = (width, height)
    
  # resize image
  return cv2.resize(img, dim, interpolation = cv2.INTER_AREA)

doc = fitz.open('text.pdf')
# fitz to opencv image
# https://study.marearts.com/2020/04/pdf-to-opencv-as-page-by-page-using.html
for page_num, page in enumerate(doc.pages()):
  mat = fitz.Matrix(1, 1)
  pix = page.get_pixmap(matrix = mat)
  im = pix2np(pix)

  # white border removed and keep the text
  # https://stackoverflow.com/a/49907762/7828101
  gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
  gray = 255*(gray < 128).astype(np.uint8) # invert the text to white
  coords = cv2.findNonZero(gray) # Find all non-zero points (text)
  x, y, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
  rect = im[y:y+h, x:x+w] # Crop the image - note we do this on the original image
  # cv2.imwrite('res.jpg',rect)

  # convert white background to transparent background
  new_img = cv2.cvtColor(rect, cv2.COLOR_BGR2BGRA)
  for i in range(new_img.shape[0]):
    for j in range(new_img.shape[1]):
      pixel = new_img[i,j]
      for k,value in enumerate(pixel):
        if value <250:
          break
        if k == 3:
          new_img[i,j,3] = 0

  # paste result image to background
  background = cv2.imread('background.jpg')
  background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA)
  if background.shape[0] < new_img.shape[0]:
    scale_percent = new_img.shape[0]/background.shape[0]
    background = resize(background,scale_percent)
  if background.shape[1] < new_img.shape[1]:
    scale_percent = new_img.shape[1]/background.shape[1]
    background = resize(background,scale_percent)
  
  y_position = int((background.shape[0] - new_img.shape[0])/2)
  x_position = int((background.shape[1] - new_img.shape[1])/2)

  # Merge two image
  # https://stackoverflow.com/a/14102014/7828101
  alpha_s = new_img[:, :, 3] / 255.0
  alpha_l = 1.0 - alpha_s

  for i in range(0,3):
    new_img_inside_background = background[y_position:y_position + new_img.shape[0],
                x_position:x_position + new_img.shape[1],:]
    background[y_position:y_position + new_img.shape[0],
                x_position:x_position + new_img.shape[1],i] = (alpha_s * new_img[:, :, i] +
                              alpha_l * new_img_inside_background[:,:,i])

  cv2.imwrite('res.png',background)
  background = cv2.cvtColor(background, cv2.COLOR_BGRA2RGB)
  im_pil = Image.fromarray(background)
  im_pil.save('{}_res.pdf'.format(page_num))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM