简体   繁体   中英

How to draw a single bounding box on each line ,crop the bounding box and save image in folder opencv python

I am using this repo to https://github.com/mindee/doctr for OCR I get the inference on the following image.

输入图像

The following predicted image is shown below预测图像

import json

from doctr.io import DocumentFile

from doctr.models import ocr_predictor



model = ocr_predictor(pretrained=True)

# PDF

doc = DocumentFile.from_images("/content/passbook_82_0.jpeg")

# Analyze

result = model(doc)

# Export results in json

with open("/content/preds.json", "w") as f:

    json.dump(result.export(), f)

The following JSON file saved the coordinates and confidence value. Each word has a geometry that gives [[xmin, ymin], [xmax, ymax]] in relative coordinates format.

{"pages": [{"page_idx": 0, "dimensions": [294, 632], "orientation": {"value": null, "confidence": null}, "language": {"value": null, "confidence": null}, "blocks": [{"geometry": [[0.0341796875, 0.169921875], [0.9775390625, 0.8896484375]], "lines": [{"geometry": [[0.0341796875, 0.169921875], [0.9775390625, 0.8896484375]], "words": [{"value": "anrdla", "confidence": 0.2445085644721985, "geometry": [[0.11328125, 0.1708984375], [0.1923828125, 0.212890625]]}, {"value": "veedas", "confidence": 0.16714346408843994, "geometry": [[0.1845703125, 0.169921875], [0.2744140625, 0.2158203125]]}, {"value": "State", "confidence": 0.9990315437316895, "geometry": [[0.642578125, 0.1748046875], [0.6982421875, 0.2236328125]]}, {"value": "Bank", "confidence": 0.999847412109375, "geometry": [[0.6982421875, 0.1767578125], [0.7529296875, 0.2236328125]]}, {"value": "of", "confidence": 0.9996934533119202, "geometry": [[0.7548828125, 0.1796875], [0.783203125, 0.22265625]]}, {"value": "India", "confidence": 0.9772151708602905, "geometry": [[0.78125, 0.177734375], [0.8388671875, 0.2236328125]]}, {"value": "REGULAR", "confidence": 0.8577464818954468, "geometry": [[0.0380859375, 0.2744140625], [0.1318359375, 0.310546875]]}, {"value": "SAVINGS", "confidence": 0.994958758354187, "geometry": [[0.140625, 0.27734375], [0.2314453125, 0.310546875]]}, {"value": "BANK", "confidence": 0.9993945956230164, "geometry": [[0.240234375, 0.27734375], [0.296875, 0.3134765625]]}, {"value": "ACCOUNT", "confidence": 0.9985705018043518, "geometry": [[0.3056640625, 0.279296875], [0.39453125, 0.3125]]}, {"value": "DARANGAMELA", "confidence": 0.4595634639263153, "geometry": [[0.73046875, 0.283203125], [0.8720703125, 0.314453125]]}, {"value": "No", "confidence": 0.9307771921157837, "geometry": [[0.0869140625, 0.322265625], [0.1201171875, 0.3564453125]]}, {"value": "CIF", "confidence": 0.9934298992156982, "geometry": [[0.03515625, 0.3212890625], [0.083984375, 0.3583984375]]}, {"value": "88537466750", "confidence": 0.9332917332649231, "geometry": [[0.3037109375, 0.3232421875], [0.4443359375, 0.361328125]]}, {"value": "DARANGAMELA", "confidence": 0.5046369433403015, "geometry": [[0.7294921875, 0.328125], [0.87109375, 0.359375]]}, {"value": "-", "confidence": 0.9411769509315491, "geometry": [[0.1826171875, 0.361328125], [0.1923828125, 0.3740234375]]}, {"value": "Account", "confidence": 0.9907050728797913, "geometry": [[0.0390625, 0.3564453125], [0.130859375, 0.3896484375]]}, {"value": "No", "confidence": 0.9819487929344177, "geometry": [[0.138671875, 0.3544921875], [0.1796875, 0.390625]]}, {"value": "20306950151", "confidence": 0.23563283681869507, "geometry": [[0.3076171875, 0.35546875], [0.4404296875, 0.3876953125]]}, {"value": "Customer", "confidence": 0.9558802843093872, "geometry": [[0.0380859375, 0.38671875], [0.1435546875, 0.4189453125]]}, {"value": "Name:", "confidence": 0.9982724785804749, "geometry": [[0.15234375, 0.3876953125], [0.2177734375, 0.421875]]}, {"value": "RASHO", "confidence": 0.6698806285858154, "geometry": [[0.2783203125, 0.388671875], [0.3466796875, 0.4228515625]]}, {"value": "Ms.", "confidence": 0.9627761840820312, "geometry": [[0.2265625, 0.388671875], [0.2685546875, 0.423828125]]}, {"value": "MANDAL", "confidence": 0.998543381690979, "geometry": [[0.353515625, 0.3916015625], [0.4326171875, 0.4248046875]]}, {"value": "S/D/W/H/O:BISWAMITRA", "confidence": 0.8553118705749512, "geometry": [[0.04296875, 0.4755859375], [0.296875, 0.5126953125]]}, {"value": "MANDAL", "confidence": 0.9796141386032104, "geometry": [[0.3056640625, 0.4794921875], [0.3828125, 0.51171875]]}, {"value": "Address:VILL-", "confidence": 0.7488418221473694, "geometry": [[0.041015625, 0.517578125], [0.2041015625, 0.5517578125]]}, {"value": "-", "confidence": 0.9209287166595459, "geometry": [[0.21484375, 0.5244140625], [0.236328125, 0.5498046875]]}, {"value": "CHUCHUNGJULI", "confidence": 0.5298648476600647, "geometry": [[0.2841796875, 0.5244140625], [0.4267578125, 0.552734375]]}, {"value": "NO", "confidence": 0.9970822334289551, "geometry": [[0.2392578125, 0.51953125], [0.2724609375, 0.5546875]]}, {"value": "Phone:2:284238", "confidence": 0.8328949809074402, "geometry": [[0.7275390625, 0.5283203125], [0.8818359375, 0.5634765625]]}, {"value": "SIDDHINATHPUR", "confidence": 0.5255517959594727, "geometry": [[0.208984375, 0.5703125], [0.3681640625, 0.5986328125]]}, {"value": "P.0-", "confidence": 0.7063982486724854, "geometry": [[0.138671875, 0.5654296875], [0.19921875, 0.6044921875]]}, {"value": "TAMULPUR", "confidence": 0.992978572845459, "geometry": [[0.44140625, 0.5703125], [0.54296875, 0.6025390625]]}, {"value": "P.S-", "confidence": 0.9322847127914429, "geometry": [[0.3779296875, 0.568359375], [0.4345703125, 0.6044921875]]}, {"value": "Email:sbi.091470sb", "confidence": 0.41425037384033203, "geometry": [[0.7275390625, 0.5732421875], [0.9580078125, 0.611328125]]}, {"value": "DIST-", "confidence": 0.9592916369438171, "geometry": [[0.138671875, 0.61328125], [0.2109375, 0.650390625]]}, {"value": "BAKSA", "confidence": 0.9974198341369629, "geometry": [[0.2158203125, 0.61328125], [0.28515625, 0.650390625]]}, {"value": "ASSAM", "confidence": 0.9992387890815735, "geometry": [[0.2900390625, 0.61328125], [0.3603515625, 0.650390625]]}, {"value": "781360", "confidence": 0.9988314509391785, "geometry": [[0.3671875, 0.6123046875], [0.4453125, 0.6494140625]]}, {"value": "Branch", "confidence": 0.9992311596870422, "geometry": [[0.7255859375, 0.6201171875], [0.806640625, 0.6572265625]]}, {"value": "Code:91471", "confidence": 0.6900002956390381, "geometry": [[0.814453125, 0.6220703125], [0.939453125, 0.6611328125]]}, {"value": "Phone:", "confidence": 0.9992220997810364, "geometry": [[0.0390625, 0.65625], [0.1171875, 0.6943359375]]}, {"value": "Date", "confidence": 0.9955922961235046, "geometry": [[0.7236328125, 0.6630859375], [0.7822265625, 0.7021484375]]}, {"value": "of", "confidence": 0.9997377991676331, "geometry": [[0.787109375, 0.6630859375], [0.8212890625, 0.703125]]}, {"value": "Issue:30/06", "confidence": 0.8237747550010681, "geometry": [[0.8310546875, 0.66796875], [0.9716796875, 0.70703125]]}, {"value": "Email:", "confidence": 0.9961839914321899, "geometry": [[0.0380859375, 0.701171875], [0.115234375, 0.7392578125]]}, {"value": "40/06/2020", "confidence": 0.2974379360675812, "geometry": [[0.70703125, 0.6982421875], [0.8583984375, 0.7548828125]]}, {"value": "5529182", "confidence": 0.9492482542991638, "geometry": [[0.8798828125, 0.712890625], [0.9775390625, 0.75390625]]}, {"value": "D.0.B.", "confidence": 0.8802379369735718, "geometry": [[0.0380859375, 0.75], [0.1171875, 0.787109375]]}, {"value": "Minor):", "confidence": 0.9770660996437073, "geometry": [[0.177734375, 0.7451171875], [0.2705078125, 0.78515625]]}, {"value": "(If", "confidence": 0.4771254062652588, "geometry": [[0.1279296875, 0.7431640625], [0.173828125, 0.7880859375]]}, {"value": "IFSC:SBINOOO009147", "confidence": 0.5092097520828247, "geometry": [[0.7314453125, 0.7568359375], [0.93359375, 0.7919921875]]}, {"value": "MOP.:", "confidence": 0.9868963360786438, "geometry": [[0.0341796875, 0.7919921875], [0.103515625, 0.830078125]]}, {"value": "RIMLON1781002512", "confidence": 0.2585943341255188, "geometry": [[0.705078125, 0.798828125], [0.9072265625, 0.83984375]]}, {"value": "No.:0000000203836166", "confidence": 0.8759520649909973, "geometry": [[0.1669921875, 0.8408203125], [0.4189453125, 0.87890625]]}, {"value": "Nom.", "confidence": 0.9955904483795166, "geometry": [[0.0361328125, 0.84765625], [0.091796875, 0.8837890625]]}, {"value": "Reg.", "confidence": 0.9835382699966431, "geometry": [[0.0986328125, 0.8447265625], [0.1572265625, 0.8896484375]]}, {"value": "BranchoMana", "confidence": 0.3897829055786133, "geometry": [[0.6943359375, 0.8486328125], [0.7919921875, 0.8857421875]]}, {"value": "DATION", "confidence": 0.5874566435813904, "geometry": [[0.796875, 0.8515625], [0.8818359375, 0.888671875]]}]}], "artefacts": []}]}]}

I want to draw a bounding box on each line and save the crop line image in the folder using OpenCV. How can I do that? as shown below.

我要的图

You could grab a random bounding box, select for example upper right corner and find all bounding boxes with their right upper corner within a set number of pixels. From this sub set of bounding boxes take the coordinate of the upper left most pixel to the lower right most pixel.

Remove the sub set from the original set of bounding boxes and repeat until set is empty.

Im sure there are smarter solutions but this should work if all document are as well formatted as this.

Good luck!

Pseudo code:

line_boxes = []
while items left in bounding_boxes:
    box = bounding_boxes.pop()
    line = []
    for other_box in bounding_boxes:
        if (other_box.upper_right_corner - box.upp_right_corner) < tolerance:
            line.append(other_box)
    
    line_boxes.append(line)

final_lines = []
for line in line_boxes:
    upper_right = get_upper_right_coordinate(line)
    lower_left = get_lower_left_coordinate(line)
    cropped_line = crop_box_from_original_image(upper_right, lower_left)
    final_lines.append(cropped_line)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM