[英]Wrong offsets when displaying multiple VNRecognizedObjectObservation boundingBoxes using SwiftUI
我正在使用 Vision 来检测对象,在获得[VNRecognizedObjectObservation]
之后,我在显示它们之前转换了规范化的矩形:
let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -CGFloat(height))
VNImageRectForNormalizedRect(normalizedRect, width, height) // Displayed with SwiftUI, that's why I'm applying transform
.applying(transform)
宽高来自SwiftUI GeometryReader:
Image(...)
.resizable()
.scaledToFit()
.overlay {
GeometryReader { geometry in // ZStack and ForEach([VNRecognizedObjectObservation], id: \.uuid), then:
let calculatedRect = calculateRect(boundingBox, geometry)
Rectangle()
.frame(width: calculatedRect.width, height: calculatedRect.height)
.offset(x: calculatedRect.origin.x, y: calculatedRect.origin.y)
}
}
但问题是即使在正方形图像上,许多框的位置也不正确(而有些是准确的)。
这与 model 无关,因为当我在 Xcode Model 预览部分尝试时,相同的图像(使用相同的 MLModel)具有非常准确的 BB。
我的应用程序中的示例图片:
Xcode 中的示例图片预览:
将此代码作为macOS SwiftUI
项目放在ContentView.swift
中,同时将YOLOv3Tiny.mlmodel放在项目包中将产生相同的结果。
import SwiftUI
import Vision
import CoreML
class Detection: ObservableObject {
let imgURL = URL(string: "https://i.imgur.com/EqsxxTc.jpg")! // Xcode preview generates this: https://i.imgur.com/6IPNQ8b.png
@Published var objects: [VNRecognizedObjectObservation] = []
func getModel() -> VNCoreMLModel? {
if let modelURL = Bundle.main.url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") {
if let mlModel = try? MLModel(contentsOf: modelURL, configuration: MLModelConfiguration()) {
return try? VNCoreMLModel(for: mlModel)
}
}
return nil
}
func detect() async {
guard let model = getModel(), let tiff = NSImage(contentsOf: imgURL)?.tiffRepresentation else {
fatalError("Either YOLOv3Tiny.mlmodel is not in project bundle, or image failed to load.")
// YOLOv3Tiny: https://ml-assets.apple.com/coreml/models/Image/ObjectDetection/YOLOv3Tiny/YOLOv3Tiny.mlmodel
}
let request = VNCoreMLRequest(model: model) { (request, error) in
DispatchQueue.main.async {
self.objects = (request.results as? [VNRecognizedObjectObservation]) ?? []
}
}
try? VNImageRequestHandler(data: tiff).perform([request])
}
func deNormalize(_ rect: CGRect, _ geometry: GeometryProxy) -> CGRect {
let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -CGFloat(geometry.size.height))
return VNImageRectForNormalizedRect(rect, Int(geometry.size.width), Int(geometry.size.height)).applying(transform)
}
}
struct ContentView: View {
@StateObject var detection = Detection()
var body: some View {
AsyncImage(url: detection.imgURL) { img in
img.resizable().scaledToFit().overlay {
GeometryReader { geometry in
ZStack {
ForEach(detection.objects, id: \.uuid) { object in
let rect = detection.deNormalize(object.boundingBox, geometry)
Rectangle()
.stroke(lineWidth: 2)
.foregroundColor(.red)
.frame(width: rect.width, height: rect.height)
.offset(x: rect.origin.x, y: rect.origin.y)
}
}
}
}
} placeholder: {
ProgressView()
}
.onAppear {
Task { await self.detection.detect() }
}
}
}
编辑:进一步测试显示 VN 返回正确的位置,我的deNormalize()
function 也返回正确的位置和大小,因此它必须与 SwiftUI 相关。
第 1 期
GeometryReader
使内部的所有内容都缩小到最小尺寸。
将.border(Color.orange)
添加到ZStack
,您将看到类似我下面的内容。
您可以使用.frame(maxWidth: .infinity, maxHeight: .infinity)
使ZStack
伸展以占用所有可用空间。
第 2 期
position
与offset
。
offset
通常从中心开始,然后按指定的量进行offset
。
position
是origin
。
将此视图的中心定位在其父级坐标空间中的指定坐标处。
第 3 期
调整中心定位与原点使用的左上角 (0, 0)。
第 4 期
ZStack
需要在 X 轴上翻转。
下面是完整的代码
import SwiftUI
import Vision
import CoreML
@MainActor
class Detection: ObservableObject {
//Moved file to assets
//let imgURL = URL(string: "https://i.imgur.com/EqsxxTc.jpg")! // Xcode preview generates this: https://i.imgur.com/6IPNQ8b.png
let imageName: String = "EqsxxTc"
@Published var objects: [VNRecognizedObjectObservation] = []
func getModel() throws -> VNCoreMLModel {
//Used model directly instead of loading from URL
let model = try YOLOv3Tiny(configuration: .init()).model
let mlModel = try VNCoreMLModel(for: model)
return mlModel
}
func detect() async throws {
let model = try getModel()
guard let tiff = NSImage(named: imageName)?.tiffRepresentation else {
// YOLOv3Tiny: https://ml-assets.apple.com/coreml/models/Image/ObjectDetection/YOLOv3Tiny/YOLOv3Tiny.mlmodel
//fatalError("Either YOLOv3Tiny.mlmodel is not in project bundle, or image failed to load.")
throw AppError.unableToLoadImage
}
//Completion handlers are not compatible with async/await you have to convert to a continuation.
self.objects = try await withCheckedThrowingContinuation { (cont: CheckedContinuation<[VNRecognizedObjectObservation], Error>) in
let request = VNCoreMLRequest(model: model) { (request, error) in
if let error = error{
cont.resume(throwing: error)
}else{
cont.resume(returning: (request.results as? [VNRecognizedObjectObservation]) ?? [])
}
}
do{
try VNImageRequestHandler(data: tiff).perform([request])
}catch{
cont.resume(throwing: error)
}
}
}
func deNormalize(_ rect: CGRect, _ geometry: GeometryProxy) -> CGRect {
return VNImageRectForNormalizedRect(rect, Int(geometry.size.width), Int(geometry.size.height))
}
}
struct ContentView: View {
@StateObject var detection = Detection()
var body: some View {
Image(detection.imageName)
.resizable()
.scaledToFit()
.overlay {
GeometryReader { geometry in
ZStack {
ForEach(detection.objects, id: \.uuid) { object in
let rect = detection.deNormalize(object.boundingBox, geometry)
Rectangle()
.stroke(lineWidth: 2)
.foregroundColor(.red)
.frame(width: rect.width, height: rect.height)
//Changed to position
//Adjusting for center vs leading origin
.position(x: rect.origin.x + rect.width/2, y: rect.origin.y + rect.height/2)
}
}
//Geometry reader makes the view shrink to its smallest size
.frame(maxWidth: .infinity, maxHeight: .infinity)
//Flip upside down
.rotation3DEffect(.degrees(180), axis: (x: 1, y: 0, z: 0))
}.border(Color.orange)
}
.task {
do{
try await self.detection.detect()
}catch{
//Always throw errors to the View so you can tell the user somehow. You don't want crashes or to leave the user waiting for something that has failed.
print(error)
}
}
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
enum AppError: LocalizedError{
case cannotFindFile
case unableToLoadImage
}
如您所见,我还更改了一些其他内容,代码中有注释。
好吧,经过很长时间的故障排除,我终于设法让它正常工作(虽然仍然不明白问题的原因)......
问题是这部分:
GeometryReader { geometry in
ZStack {
ForEach(detection.objects, id: \.uuid) { object in
let rect = detection.deNormalize(object.boundingBox, geometry)
Rectangle()
.stroke(lineWidth: 2)
.foregroundColor(.red)
.frame(width: rect.width, height: rect.height)
.offset(x: rect.origin.x, y: rect.origin.y)
}
}
}
我假设因为许多Rectangle()
会重叠,所以我需要一个ZStack()
将它们放在一起,结果证明这是错误的,显然在使用.offset()
时它们可以毫无问题地重叠,所以删除ZStack()
彻底解决了问题:
GeometryReader { geometry in
ForEach(detection.objects, id: \.uuid) { object in
let rect = detection.deNormalize(object.boundingBox, geometry)
Rectangle()
.stroke(lineWidth: 2)
.foregroundColor(.red)
.frame(width: rect.width, height: rect.height)
.offset(x: rect.origin.x, y: rect.origin.y)
}
}
我仍然不明白的是,为什么将ZStack()
移到GeometryReader()
) 之外也能解决问题,以及为什么有些框在正确的位置而有些不在正确的位置!
ZStack {
GeometryReader { geometry in
ForEach(detection.objects, id: \.uuid) { object in
let rect = detection.deNormalize(object.boundingBox, geometry)
Rectangle()
.stroke(lineWidth: 2)
.foregroundColor(.red)
.frame(width: rect.width, height: rect.height)
.offset(x: rect.origin.x, y: rect.origin.y)
}
}
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.