[英]How can I rotate an image based on object position?
首先,很抱歉帖子的長度。
我正在開展一個基於葉子圖像對植物進行分類的項目。 為了減少數據的方差,我需要旋轉圖像,使莖在圖像底部水平對齊(270 度)。
到目前為止我在哪里...
到目前為止,我所做的是創建一個閾值圖像,然后從那里找到輪廓並在 object 周圍繪制一個橢圓(在許多情況下,它沒有涉及整個 object,因此省略了莖...),之后,我創建 4 個區域(帶有橢圓的邊緣)並嘗試計算最小值區域,這是由於假設必須在這些點中的任何一個處找到莖,因此它將是人口較少的區域(主要是因為它將被 0 包圍),這顯然不像我想的那樣工作。
之后,我以兩種不同的方式計算旋轉角度,第一種涉及 atan2 atan2
,這只需要我想要移動的點(人口最少區域的質心),其中x=image width / 2
和y = height
。 這種方法在某些情況下有效,但在大多數情況下,我沒有得到所需的角度,有時需要一個負角度,它會產生一個正角度,最后是頂部的莖。 在其他一些情況下,它只是以一種可怕的方式失敗。
我的第二種方法是嘗試基於 3 個點計算角度:圖像中心、人口最少區域的質心和 270º 點。 然后使用arccos
function,並將其結果轉換為度數。
這兩種方法對我來說都失敗了。
問題
這是一些示例和我得到的結果(二進制掩碼)。 矩形表示我正在比較的區域,橢圓上的紅線是橢圓的長軸,粉紅色圓圈是最小區域內的質心,紅色圓圈表示 270º 參考點(角度) ,白點代表圖像的中心。
我目前的解決方案
def brightness_distortion(I, mu, sigma):
return np.sum(I*mu/sigma**2, axis=-1) / np.sum((mu/sigma)**2, axis=-1)
def chromacity_distortion(I, mu, sigma):
alpha = brightness_distortion(I, mu, sigma)[...,None]
return np.sqrt(np.sum(((I - alpha * mu)/sigma)**2, axis=-1))
def bwareafilt ( image ):
image = image.astype(np.uint8)
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(image, connectivity=4)
sizes = stats[:, -1]
max_label = 1
max_size = sizes[1]
for i in range(2, nb_components):
if sizes[i] > max_size:
max_label = i
max_size = sizes[i]
img2 = np.zeros(output.shape)
img2[output == max_label] = 255
return img2
def get_thresholded_rotated(im_path):
#read image
img = cv2.imread(im_path)
img = cv2.resize(img, (600, 800), interpolation = Image.BILINEAR)
sat = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)[:,:,1]
val = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)[:,:,2]
sat = cv2.medianBlur(sat, 11)
val = cv2.medianBlur(val, 11)
#create threshold
thresh_S = cv2.adaptiveThreshold(sat , 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 401, 10);
thresh_V = cv2.adaptiveThreshold(val , 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 401, 10);
#mean, std
mean_S, stdev_S = cv2.meanStdDev(img, mask = 255 - thresh_S)
mean_S = mean_S.ravel().flatten()
stdev_S = stdev_S.ravel()
#chromacity
chrom_S = chromacity_distortion(img, mean_S, stdev_S)
chrom255_S = cv2.normalize(chrom_S, chrom_S, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX).astype(np.uint8)[:,:,None]
mean_V, stdev_V = cv2.meanStdDev(img, mask = 255 - thresh_V)
mean_V = mean_V.ravel().flatten()
stdev_V = stdev_V.ravel()
chrom_V = chromacity_distortion(img, mean_V, stdev_V)
chrom255_V = cv2.normalize(chrom_V, chrom_V, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX).astype(np.uint8)[:,:,None]
#create different thresholds
thresh2_S = cv2.adaptiveThreshold(chrom255_S , 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 401, 10);
thresh2_V = cv2.adaptiveThreshold(chrom255_V , 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 401, 10);
#thresholded image
thresh = cv2.bitwise_and(thresh2_S, cv2.bitwise_not(thresh2_V))
#find countours and keep max
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
# fit ellipse to leaf contours
ellipse = cv2.fitEllipse(big_contour)
(xc,yc), (d1,d2), angle = ellipse
print('thresh shape: ', thresh.shape)
#print(xc,yc,d1,d2,angle)
rmajor = max(d1,d2)/2
rminor = min(d1,d2)/2
origi_angle = angle
if angle > 90:
angle = angle - 90
else:
angle = angle + 90
#calc major axis line
xtop = xc + math.cos(math.radians(angle))*rmajor
ytop = yc + math.sin(math.radians(angle))*rmajor
xbot = xc + math.cos(math.radians(angle+180))*rmajor
ybot = yc + math.sin(math.radians(angle+180))*rmajor
#calc minor axis line
xtop_m = xc + math.cos(math.radians(origi_angle))*rminor
ytop_m = yc + math.sin(math.radians(origi_angle))*rminor
xbot_m = xc + math.cos(math.radians(origi_angle+180))*rminor
ybot_m = yc + math.sin(math.radians(origi_angle+180))*rminor
#determine which region is up and which is down
if max(xtop, xbot) == xtop :
x_tij = xtop
y_tij = ytop
x_b_tij = xbot
y_b_tij = ybot
else:
x_tij = xbot
y_tij = ybot
x_b_tij = xtop
y_b_tij = ytop
if max(xtop_m, xbot_m) == xtop_m :
x_tij_m = xtop_m
y_tij_m = ytop_m
x_b_tij_m = xbot_m
y_b_tij_m = ybot_m
else:
x_tij_m = xbot_m
y_tij_m = ybot_m
x_b_tij_m = xtop_m
y_b_tij_m = ytop_m
print('-----')
print(x_tij, y_tij)
rect_size = 100
"""
calculate regions of edges of major axis of ellipse
this is done by creating a squared region of rect_size x rect_size, being the edge the center of the square
"""
x_min_tij = int(0 if x_tij - rect_size < 0 else x_tij - rect_size)
x_max_tij = int(thresh.shape[1]-1 if x_tij + rect_size > thresh.shape[1] else x_tij + rect_size)
y_min_tij = int(0 if y_tij - rect_size < 0 else y_tij - rect_size)
y_max_tij = int(thresh.shape[0] - 1 if y_tij + rect_size > thresh.shape[0] else y_tij + rect_size)
x_b_min_tij = int(0 if x_b_tij - rect_size < 0 else x_b_tij - rect_size)
x_b_max_tij = int(thresh.shape[1] - 1 if x_b_tij + rect_size > thresh.shape[1] else x_b_tij + rect_size)
y_b_min_tij = int(0 if y_b_tij - rect_size < 0 else y_b_tij - rect_size)
y_b_max_tij = int(thresh.shape[0] - 1 if y_b_tij + rect_size > thresh.shape[0] else y_b_tij + rect_size)
sum_red_region = np.sum(thresh[y_min_tij:y_max_tij, x_min_tij:x_max_tij])
sum_yellow_region = np.sum(thresh[y_b_min_tij:y_b_max_tij, x_b_min_tij:x_b_max_tij])
"""
calculate regions of edges of minor axis of ellipse
this is done by creating a squared region of rect_size x rect_size, being the edge the center of the square
"""
x_min_tij_m = int(0 if x_tij_m - rect_size < 0 else x_tij_m - rect_size)
x_max_tij_m = int(thresh.shape[1]-1 if x_tij_m + rect_size > thresh.shape[1] else x_tij_m + rect_size)
y_min_tij_m = int(0 if y_tij_m - rect_size < 0 else y_tij_m - rect_size)
y_max_tij_m = int(thresh.shape[0] - 1 if y_tij_m + rect_size > thresh.shape[0] else y_tij_m + rect_size)
x_b_min_tij_m = int(0 if x_b_tij_m - rect_size < 0 else x_b_tij_m - rect_size)
x_b_max_tij_m = int(thresh.shape[1] - 1 if x_b_tij_m + rect_size > thresh.shape[1] else x_b_tij_m + rect_size)
y_b_min_tij_m = int(0 if y_b_tij_m - rect_size < 0 else y_b_tij_m - rect_size)
y_b_max_tij_m = int(thresh.shape[0] - 1 if y_b_tij_m + rect_size > thresh.shape[0] else y_b_tij_m + rect_size)
#value of the regions, the names of the variables are related to the color of the rectangles drawn at the end of the function
sum_red_region_m = np.sum(thresh[y_min_tij_m:y_max_tij_m, x_min_tij_m:x_max_tij_m])
sum_yellow_region_m = np.sum(thresh[y_b_min_tij_m:y_b_max_tij_m, x_b_min_tij_m:x_b_max_tij_m])
#print(sum_red_region, sum_yellow_region, sum_red_region_m, sum_yellow_region_m)
min_arg = np.argmin(np.array([sum_red_region, sum_yellow_region, sum_red_region_m, sum_yellow_region_m]))
print('min: ', min_arg)
if min_arg == 1: #sum_yellow_region < sum_red_region :
left_quartile = x_b_tij < thresh.shape[0] /2
upper_quartile = y_b_tij < thresh.shape[1] /2
center_x = x_b_min_tij + ((x_b_max_tij - x_b_min_tij) / 2)
center_y = y_b_min_tij + (y_b_max_tij - y_b_min_tij / 2)
center_x = x_b_min_tij + np.argmax(thresh[y_b_min_tij:y_b_max_tij, x_b_min_tij:x_b_max_tij].mean(axis=0))
center_y = y_b_min_tij + np.argmax(thresh[y_b_min_tij:y_b_max_tij, x_b_min_tij:x_b_max_tij].mean(axis=1))
elif min_arg == 0:
left_quartile = x_tij < thresh.shape[0] /2
upper_quartile = y_tij < thresh.shape[1] /2
center_x = x_min_tij + ((x_b_max_tij - x_b_min_tij) / 2)
center_y = y_min_tij + ((y_b_max_tij - y_b_min_tij) / 2)
center_x = x_min_tij + np.argmax(thresh[y_min_tij:y_max_tij, x_min_tij:x_max_tij].mean(axis=0))
center_y = y_min_tij + np.argmax(thresh[y_min_tij:y_max_tij, x_min_tij:x_max_tij].mean(axis=1))
elif min_arg == 3:
left_quartile = x_b_tij_m < thresh.shape[0] /2
upper_quartile = y_b_tij_m < thresh.shape[1] /2
center_x = x_b_min_tij_m + ((x_b_max_tij_m - x_b_min_tij_m) / 2)
center_y = y_b_min_tij_m + (y_b_max_tij_m - y_b_min_tij_m / 2)
center_x = x_b_min_tij_m + np.argmax(thresh[y_b_min_tij_m:y_b_max_tij_m, x_b_min_tij_m:x_b_max_tij_m].mean(axis=0))
center_y = y_b_min_tij_m + np.argmax(thresh[y_b_min_tij_m:y_b_max_tij_m, x_b_min_tij_m:x_b_max_tij_m].mean(axis=1))
else:
left_quartile = x_tij_m < thresh.shape[0] /2
upper_quartile = y_tij_m < thresh.shape[1] /2
center_x = x_min_tij_m + ((x_b_max_tij_m - x_b_min_tij_m) / 2)
center_y = y_min_tij_m + ((y_b_max_tij_m - y_b_min_tij_m) / 2)
center_x = x_min_tij_m + np.argmax(thresh[y_min_tij_m:y_max_tij_m, x_min_tij_m:x_max_tij_m].mean(axis=0))
center_y = y_min_tij_m + np.argmax(thresh[y_min_tij_m:y_max_tij_m, x_min_tij_m:x_max_tij_m].mean(axis=1))
# draw ellipse on copy of input
result = img.copy()
cv2.ellipse(result, ellipse, (0,0,255), 1)
cv2.line(result, (int(xtop),int(ytop)), (int(xbot),int(ybot)), (255, 0, 0), 1)
cv2.circle(result, (int(xc),int(yc)), 10, (255, 255, 255), -1)
cv2.circle(result, (int(center_x),int(center_y)), 10, (255, 0, 255), 5)
cv2.circle(result, (int(thresh.shape[1] / 2),int(thresh.shape[0] - 1)), 10, (255, 0, 0), 5)
cv2.rectangle(result,(x_min_tij,y_min_tij),(x_max_tij,y_max_tij),(255,0,0),3)
cv2.rectangle(result,(x_b_min_tij,y_b_min_tij),(x_b_max_tij,y_b_max_tij),(255,255,0),3)
cv2.rectangle(result,(x_min_tij_m,y_min_tij_m),(x_max_tij_m,y_max_tij_m),(255,0,0),3)
cv2.rectangle(result,(x_b_min_tij_m,y_b_min_tij_m),(x_b_max_tij_m,y_b_max_tij_m),(255,255,0),3)
plt.imshow(result)
plt.figure()
#rotate the image
rot_img = Image.fromarray(thresh)
#180
bot_point_x = int(thresh.shape[1] / 2)
bot_point_y = int(thresh.shape[0] - 1)
#poi
poi_x = int(center_x)
poi_y = int(center_y)
#image_center
im_center_x = int(thresh.shape[1] / 2)
im_center_y = int(thresh.shape[0] - 1) / 2
#a - adalt, b - abaix, c - dreta
#ba = a - b
#bc = c - a(b en realitat)
ba = np.array([im_center_x, im_center_y]) - np.array([bot_point_x, bot_point_y])
bc = np.array([poi_x, poi_y]) - np.array([im_center_x, im_center_y])
#angle 3 punts
cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
cos_angle = np.arccos(cosine_angle)
cos_angle = np.degrees(cos_angle)
print('cos angle: ', cos_angle)
print('print: ', abs(poi_x- bot_point_x))
m = (int(thresh.shape[1] / 2)-int(center_x) / int(thresh.shape[0] - 1)-int(center_y))
ttan = math.tan(m)
theta = math.atan(ttan)
print('theta: ', theta)
result = Image.fromarray(result)
result = result.rotate(cos_angle)
plt.imshow(result)
plt.figure()
#rot_img = rot_img.rotate(origi_angle)
rot_img = rot_img.rotate(cos_angle)
return rot_img
rot_img = get_thresholded_rotated(im_path)
plt.imshow(rot_img)
在此先感謝---編輯---
旋轉圖像。 找到最大的輪廓。 使用矩,找到該輪廓的中心。 將圖像分成左右部分(注意:應用cv2.blur(img, 5,5))
會產生更好的結果):
翻轉右側。 覆蓋左右部分:
使用 cv2.absDiff() 測量左和(翻轉)右之間的差異。 由於葉子具有左右對稱性,當葉子的莖(或脊柱)垂直時差異最小。
注意:會有兩個最小值; 當莖向上時一次,當莖向下時一次......
這適用於大多數葉子,只要它們有莖。 所以這里是檢測和旋轉一張葉子圖像的旋轉的概念:
找到葉子的近似輪廓。 由於莖的尖端通常屬於葉子的凸包(外部點) ,因此找到輪廓的凸包。
遍歷屬於葉子凸包的輪廓索引。 對於每個索引,計算 3 個點之間的角度:索引之前的輪廓中的點、索引處的輪廓中的點和索引之后的輪廓中的點。
計算的最小角度將是莖的尖端。 每次循環找到一個較小的角度,將三個點存儲在一個元組中,當檢測到最小的角度時,使用尖端兩側的2個坐標的中心計算莖指向的角度莖和莖的尖端。
隨着檢測到的莖的角度,我們可以相應地旋轉圖像。
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (3, 3), 2)
img_canny = cv2.Canny(img_blur, 127, 47)
kernel = np.ones((5, 5))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
img_erode = cv2.erode(img_dilate, kernel, iterations=1)
return img_erode
def get_contours(img):
contours, _ = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
peri = cv2.arcLength(cnt, True)
return cv2.approxPolyDP(cnt, 0.01 * peri, True)
def get_angle(a, b, c):
ba, bc = a - b, c - b
cos_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
return np.degrees(np.arccos(cos_angle))
def get_rot_angle(img):
contours = get_contours(img)
length = len(contours)
min_angle = 180
for i in cv2.convexHull(contours, returnPoints=False).ravel():
a, b, c = contours[[i - 1, i, (i + 1) % length], 0]
angle = get_angle(a, b, c)
if angle < min_angle:
min_angle = angle
pts = a, b, c
a, b, c = pts
return 180 - np.degrees(np.arctan2(*(np.mean((a, c), 0) - b)))
def rotate(img):
h, w, _ = img.shape
rot_mat = cv2.getRotationMatrix2D((w / 2, h / 2), get_rot_angle(img), 1)
return cv2.warpAffine(img, rot_mat, (w, h), flags=cv2.INTER_LINEAR)
img = cv2.imread("leaf.jpg")
cv2.imshow("Image", rotate(img))
cv2.waitKey(0)
Output 對於您提供的每個示例圖像:
分解代碼:
import cv2
import numpy as np
process
,將圖像處理成二值圖像,使程序能夠准確檢測葉子的輪廓:def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (3, 3), 2)
img_canny = cv2.Canny(img_blur, 127, 47)
kernel = np.ones((5, 5))
img_dilate = cv2.dilate(img_canny, kernel, iterations=2)
img_erode = cv2.erode(img_dilate, kernel, iterations=1)
return img_erode
get_contours
,得到圖像中最大輪廓的近似輪廓,使用前面定義的process
function:def get_contours(img):
contours, _ = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
peri = cv2.arcLength(cnt, True)
return cv2.approxPolyDP(cnt, 0.01 * peri, True)
get_angle
,得到 3 點之間的角度:def get_angle(a, b, c):
ba, bc = a - b, c - b
cos_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
return np.degrees(np.arccos(cos_angle))
get_rot_angle
,以獲取圖像需要旋轉的度數。 它通過使用之前定義的get_angle
function 找到葉子的凸包點來確定該角度,該點與葉子輪廓中的點與 2 個周圍點之間的角度最小,其中 3 個點之間的角度最小:def get_rot_angle(img):
contours = get_contours(img)
length = len(contours)
min_angle = 180
for i in cv2.convexHull(contours, returnPoints=False).ravel():
a, b, c = contours[[i - 1, i, (i + 1) % length], 0]
angle = get_angle(a, b, c)
if angle < min_angle:
min_angle = angle
pts = a, b, c
a, b, c = pts
return 180 - np.degrees(np.arctan2(*(np.mean((a, c), 0) - b)))
get_rot_angle
function 定義一個 function, rotate
,沿其中心旋轉圖像:def rotate(img):
h, w, _ = img.shape
rot_mat = cv2.getRotationMatrix2D((w / 2, h / 2), get_rot_angle(img), 1)
return cv2.warpAffine(img, rot_mat, (w, h), flags=cv2.INTER_LINEAR)
rotate
function 並顯示旋轉后的圖像:img = cv2.imread("leaf.jpg")
cv2.imshow("Image", rotate(img))
cv2.waitKey(0)
這就是我的意思,這需要改進。 這會沿着頂部邊緣每 5 個像素繪制一條穿過圖像中心的假想線,然后沿着左邊緣每 5 個像素繪制一條假想線,將線兩側的像素值相加,並打印最小和最大比率。 元組的第四個值應該是旋轉角度。
from PIL import Image
import numpy as np
import math
from pprint import pprint
def rads(degs):
return degs * math.pi / 180.0
clr = Image.open('20210210_155311.jpg').resize((640,480)).convert('L')
data = np.asarray(clr)
def ratio_lr( data, left, right ):
x0 = left
dx = (right-left) / data.shape[0]
lsum = 0
rsum = 0
for row in range(data.shape[0]):
lsum += data[row,:int(x0)].sum()
rsum += data[row,int(x0):].sum()
x0 += dx
return lsum / rsum
def ratio_tb( data, top, bottom ):
y0 = top
dy = (bottom - top) / data.shape[1]
tsum = 0
bsum = 0
for col in range(data.shape[1]):
tsum += data[:int(y0),col].sum()
bsum += data[int(y0):,col].sum()
y0 += dy
return tsum / bsum
midx = data.shape[1] // 2
midy = data.shape[0] // 2
track = []
for dx in range(-midx, midx, 5 ):
if dx == 0:
angle = 90
else:
angle = math.atan( midy / dx ) * 180 / math.pi
track.append( (ratio_lr( data, midx+dx, midx-dx ), dx, 0, angle) )
for dy in range(-midy, midy, 5 ):
angle = math.atan( dy / midx ) * 180 / math.pi
track.append((ratio_tb( data, midy+dy, midy-dy ), 0, dy, angle))
pprint(track)
print(min(track))
print(max(track))
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.