[英]Swift extract regex matches
我想從匹配正則表達式模式的字符串中提取子字符串。
所以我正在尋找這樣的東西:
func matchesForRegexInText(regex: String!, text: String!) -> [String] {
???
}
所以這就是我所擁有的:
func matchesForRegexInText(regex: String!, text: String!) -> [String] {
var regex = NSRegularExpression(pattern: regex,
options: nil, error: nil)
var results = regex.matchesInString(text,
options: nil, range: NSMakeRange(0, countElements(text)))
as Array<NSTextCheckingResult>
/// ???
return ...
}
問題是, matchesInString
為我提供了一個NSTextCheckingResult
數組,其中NSTextCheckingResult.range
是NSRange
類型。
NSRange
與Range<String.Index>
不兼容,所以它阻止我使用text.substringWithRange(...)
知道如何在沒有太多代碼行的情況下在 swift 中實現這個簡單的事情嗎?
即使matchesInString()
方法將String
作為第一個參數,它在內部也可以使用NSString
,並且必須使用NSString
長度而不是 Swift 字符串長度來給出范圍參數。 否則對於“擴展字形簇”(例如“標志”)將失敗。
從Swift 4 (Xcode 9) 開始,Swift 標准庫提供了在Range<String.Index>
和NSRange
之間轉換的函數。
func matches(for regex: String, in text: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex)
let results = regex.matches(in: text,
range: NSRange(text.startIndex..., in: text))
return results.map {
String(text[Range($0.range, in: text)!])
}
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
例子:
let string = "🇩🇪€4€9"
let matched = matches(for: "[0-9]", in: string)
print(matched)
// ["4", "9"]
注意:強制展開Range($0.range, in: text)!
是安全的,因為NSRange
引用給定字符串text
的子字符串。 但是,如果您想避免它,請使用
return results.flatMap {
Range($0.range, in: text).map { String(text[$0]) }
}
反而。
(Swift 3 及更早版本的舊答案:)
因此,您應該將給定的 Swift 字符串轉換為NSString
,然后提取范圍。 結果將自動轉換為 Swift 字符串數組。
(Swift 1.2 的代碼可以在編輯歷史中找到。)
斯威夫特 2(Xcode 7.3.1):
func matchesForRegexInText(regex: String, text: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex, options: [])
let nsString = text as NSString
let results = regex.matchesInString(text,
options: [], range: NSMakeRange(0, nsString.length))
return results.map { nsString.substringWithRange($0.range)}
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
例子:
let string = "🇩🇪€4€9"
let matches = matchesForRegexInText("[0-9]", text: string)
print(matches)
// ["4", "9"]
斯威夫特 3 (Xcode 8)
func matches(for regex: String, in text: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex)
let nsString = text as NSString
let results = regex.matches(in: text, range: NSRange(location: 0, length: nsString.length))
return results.map { nsString.substring(with: $0.range)}
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
例子:
let string = "🇩🇪€4€9"
let matched = matches(for: "[0-9]", in: string)
print(matched)
// ["4", "9"]
我的答案建立在給定答案之上,但通過添加額外的支持使正則表達式匹配更加健壯:
do/catch
並使用guard
結構matchingStrings
作為對String
的擴展斯威夫特 4.2
//: Playground - noun: a place where people can play
import Foundation
extension String {
func matchingStrings(regex: String) -> [[String]] {
guard let regex = try? NSRegularExpression(pattern: regex, options: []) else { return [] }
let nsString = self as NSString
let results = regex.matches(in: self, options: [], range: NSMakeRange(0, nsString.length))
return results.map { result in
(0..<result.numberOfRanges).map {
result.range(at: $0).location != NSNotFound
? nsString.substring(with: result.range(at: $0))
: ""
}
}
}
}
"prefix12 aaa3 prefix45".matchingStrings(regex: "fix([0-9])([0-9])")
// Prints: [["fix12", "1", "2"], ["fix45", "4", "5"]]
"prefix12".matchingStrings(regex: "(?:prefix)?([0-9]+)")
// Prints: [["prefix12", "12"]]
"12".matchingStrings(regex: "(?:prefix)?([0-9]+)")
// Prints: [["12", "12"]], other answers return an empty array here
// Safely accessing the capture of the first match (if any):
let number = "prefix12suffix".matchingStrings(regex: "fix([0-9]+)su").first?[1]
// Prints: Optional("12")
斯威夫特 3
//: Playground - noun: a place where people can play
import Foundation
extension String {
func matchingStrings(regex: String) -> [[String]] {
guard let regex = try? NSRegularExpression(pattern: regex, options: []) else { return [] }
let nsString = self as NSString
let results = regex.matches(in: self, options: [], range: NSMakeRange(0, nsString.length))
return results.map { result in
(0..<result.numberOfRanges).map {
result.rangeAt($0).location != NSNotFound
? nsString.substring(with: result.rangeAt($0))
: ""
}
}
}
}
"prefix12 aaa3 prefix45".matchingStrings(regex: "fix([0-9])([0-9])")
// Prints: [["fix12", "1", "2"], ["fix45", "4", "5"]]
"prefix12".matchingStrings(regex: "(?:prefix)?([0-9]+)")
// Prints: [["prefix12", "12"]]
"12".matchingStrings(regex: "(?:prefix)?([0-9]+)")
// Prints: [["12", "12"]], other answers return an empty array here
// Safely accessing the capture of the first match (if any):
let number = "prefix12suffix".matchingStrings(regex: "fix([0-9]+)su").first?[1]
// Prints: Optional("12")
斯威夫特 2
extension String {
func matchingStrings(regex: String) -> [[String]] {
guard let regex = try? NSRegularExpression(pattern: regex, options: []) else { return [] }
let nsString = self as NSString
let results = regex.matchesInString(self, options: [], range: NSMakeRange(0, nsString.length))
return results.map { result in
(0..<result.numberOfRanges).map {
result.rangeAtIndex($0).location != NSNotFound
? nsString.substringWithRange(result.rangeAtIndex($0))
: ""
}
}
}
}
extension String {
func match(_ regex: String) -> [[String]] {
let nsString = self as NSString
return (try? NSRegularExpression(pattern: regex, options: []))?.matches(in: self, options: [], range: NSMakeRange(0, nsString.length)).map { match in
(0..<match.numberOfRanges).map { match.range(at: $0).location == NSNotFound ? "" : nsString.substring(with: match.range(at: $0)) }
} ?? []
}
}
返回一個二維字符串數組:
"prefix12suffix fix1su".match("fix([0-9]+)su")
返回...
[["fix12su", "12"], ["fix1su", "1"]]
// First element of sub-array is the match
// All subsequent elements are the capture groups
如果您想從字符串中提取子字符串,不僅僅是位置,(而是實際的字符串,包括表情符號)。 那么,以下可能是一個更簡單的解決方案。
extension String {
func regex (pattern: String) -> [String] {
do {
let regex = try NSRegularExpression(pattern: pattern, options: NSRegularExpressionOptions(rawValue: 0))
let nsstr = self as NSString
let all = NSRange(location: 0, length: nsstr.length)
var matches : [String] = [String]()
regex.enumerateMatchesInString(self, options: NSMatchingOptions(rawValue: 0), range: all) {
(result : NSTextCheckingResult?, _, _) in
if let r = result {
let result = nsstr.substringWithRange(r.range) as String
matches.append(result)
}
}
return matches
} catch {
return [String]()
}
}
}
示例用法:
"someText 👿🏅👿⚽️ pig".regex("👿⚽️")
將返回以下內容:
["👿⚽️"]
注意使用 "\w+" 可能會產生意外的 ""
"someText 👿🏅👿⚽️ pig".regex("\\w+")
將返回此字符串數組
["someText", "️", "pig"]
我發現不幸的是,接受的答案的解決方案無法在 Swift 3 for Linux 上編譯。 那么,這是一個修改后的版本:
import Foundation
func matches(for regex: String, in text: String) -> [String] {
do {
let regex = try RegularExpression(pattern: regex, options: [])
let nsString = NSString(string: text)
let results = regex.matches(in: text, options: [], range: NSRange(location: 0, length: nsString.length))
return results.map { nsString.substring(with: $0.range) }
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
主要區別在於:
Linux 上的 Swift 似乎需要在 Foundation 對象上刪除NS
前綴,而沒有 Swift 原生的等效對象。 (參見Swift 進化提案 #86 。)
Linux 上的 Swift 還需要為正則RegularExpression
初始化和matches
方法指定options
參數。
出於某種原因,將String
強制轉換為NSString
在 Linux 上的 Swift 中不起作用,但使用String
初始化一個新的NSString
作為源代碼確實有效。
此版本也適用於 macOS / Xcode 上的 Swift 3,唯一的例外是您必須使用名稱NSRegularExpression
而不是RegularExpression
。
沒有 NSString 的 Swift 4。
extension String {
func matches(regex: String) -> [String] {
guard let regex = try? NSRegularExpression(pattern: regex, options: [.caseInsensitive]) else { return [] }
let matches = regex.matches(in: self, options: [], range: NSMakeRange(0, self.count))
return matches.map { match in
return String(self[Range(match.range, in: self)!])
}
}
}
@p4bloch 如果要從一系列捕獲括號中捕獲結果,則需要使用 NSTextCheckingResult 的NSTextCheckingResult
rangeAtIndex(index)
方法,而不是range
。 這是上面的 @MartinR 用於 Swift2 的方法,適用於捕獲括號。 在返回的數組中,第一個結果[0]
是整個捕獲,然后各個捕獲組從[1]
開始。 我注釋掉了map
操作(這樣更容易看到我改變了什么)並用嵌套循環替換它。
func matches(for regex: String!, in text: String!) -> [String] {
do {
let regex = try NSRegularExpression(pattern: regex, options: [])
let nsString = text as NSString
let results = regex.matchesInString(text, options: [], range: NSMakeRange(0, nsString.length))
var match = [String]()
for result in results {
for i in 0..<result.numberOfRanges {
match.append(nsString.substringWithRange( result.rangeAtIndex(i) ))
}
}
return match
//return results.map { nsString.substringWithRange( $0.range )} //rangeAtIndex(0)
} catch let error as NSError {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
一個示例用例可能是,假設您要拆分一串title year
例如“Finding Dory 2016”,您可以這樣做:
print ( matches(for: "^(.+)\\s(\\d{4})" , in: "Finding Dory 2016"))
// ["Finding Dory 2016", "Finding Dory", "2016"]
上面的大多數解決方案只給出完全匹配,結果忽略了捕獲組,例如:^\d+\s+(\d+)
要按預期獲得捕獲組匹配,您需要類似 (Swift4) 的內容:
public extension String {
public func capturedGroups(withRegex pattern: String) -> [String] {
var results = [String]()
var regex: NSRegularExpression
do {
regex = try NSRegularExpression(pattern: pattern, options: [])
} catch {
return results
}
let matches = regex.matches(in: self, options: [], range: NSRange(location:0, length: self.count))
guard let match = matches.first else { return results }
let lastRangeIndex = match.numberOfRanges - 1
guard lastRangeIndex >= 1 else { return results }
for i in 1...lastRangeIndex {
let capturedGroupIndex = match.range(at: i)
let matchedString = (self as NSString).substring(with: capturedGroupIndex)
results.append(matchedString)
}
return results
}
}
我就是這樣做的,我希望它能帶來一個新的視角,它是如何在 Swift 上工作的。
在下面的這個例子中,我將得到[]
之間的任何字符串
var sample = "this is an [hello] amazing [world]"
var regex = NSRegularExpression(pattern: "\\[.+?\\]"
, options: NSRegularExpressionOptions.CaseInsensitive
, error: nil)
var matches = regex?.matchesInString(sample, options: nil
, range: NSMakeRange(0, countElements(sample))) as Array<NSTextCheckingResult>
for match in matches {
let r = (sample as NSString).substringWithRange(match.range)//cast to NSString is required to match range format.
println("found= \(r)")
}
這是一個非常簡單的解決方案,它返回一個包含匹配項的字符串數組
斯威夫特 3。
internal func stringsMatching(regularExpressionPattern: String, options: NSRegularExpression.Options = []) -> [String] {
guard let regex = try? NSRegularExpression(pattern: regularExpressionPattern, options: options) else {
return []
}
let nsString = self as NSString
let results = regex.matches(in: self, options: [], range: NSMakeRange(0, nsString.length))
return results.map {
nsString.substring(with: $0.range)
}
}
基本電話號碼匹配
let phoneNumbers = ["+79990001101", "+7 (800) 000-11-02", "+34 507 574 147 ", "+1-202-555-0118"]
let match: (String) -> String = {
$0.replacingOccurrences(of: #"[^\d+]"#, with: "", options: .regularExpression)
}
print(phoneNumbers.map(match))
// ["+79990001101", "+78000001102", "+34507574147", "+12025550118"]
非常感謝Lars Blumberg ,他回答了用Swift 4捕獲組和完整匹配,這對我有很大幫助。 當他們的正則表達式無效時,我還為那些確實想要 error.localizedDescription 響應的人添加了它:
extension String {
func matchingStrings(regex: String) -> [[String]] {
do {
let regex = try NSRegularExpression(pattern: regex)
let nsString = self as NSString
let results = regex.matches(in: self, options: [], range: NSMakeRange(0, nsString.length))
return results.map { result in
(0..<result.numberOfRanges).map {
result.range(at: $0).location != NSNotFound
? nsString.substring(with: result.range(at: $0))
: ""
}
}
} catch let error {
print("invalid regex: \(error.localizedDescription)")
return []
}
}
}
對我來說,將localizedDescription 作為錯誤有助於理解轉義出了什么問題,因為它顯示了最終的正則表達式 swift 嘗試實現哪個。
將@Mike Chirico 更新為Swift 5
extension String{
func regex(pattern: String) -> [String]?{
do {
let regex = try NSRegularExpression(pattern: pattern, options: NSRegularExpression.Options(rawValue: 0))
let all = NSRange(location: 0, length: count)
var matches = [String]()
regex.enumerateMatches(in: self, options: NSRegularExpression.MatchingOptions(rawValue: 0), range: all) {
(result : NSTextCheckingResult?, _, _) in
if let r = result {
let nsstr = self as NSString
let result = nsstr.substring(with: r.range) as String
matches.append(result)
}
}
return matches
} catch {
return nil
}
}
}
Regex
, RegexBuilder
👷♀️ Xcode 以前通過Find and Search
選項卡支持 Regex。 許多人發現 Apple 的NSRegularExpression
的 Swift API 冗長且笨拙,因此 Apple 在今年發布了Regex literal
支持和RegexBuilder
。
該 API 已被簡化,以便在 iOS 16 / macOS 13 中整理復雜的基於String
范圍的解析邏輯並提高性能。
func parseLine(_ line: Substring) throws -> MailmapEntry {
let regex = /\h*([^<#]+?)??\h*<([^>#]+)>\h*(?:#|\Z)/
guard let match = line.prefixMatch(of: regex) else {
throw MailmapError.badLine
}
return MailmapEntry(name: match.1, email: match.2)
}
目前,我們可以使用prefixMatch
或wholeMatch
進行匹配以找到單個匹配項,但未來 API 可能會針對多個匹配項進行改進。
RegexBuilder 是 Apple 發布的新 API,旨在使 RegEx 代碼更容易在 Swift 中編寫。 我們可以將正則表達式文字/\h*([^<#]+?)??\h*<([^>#]+)>\h*(?:#|\Z)/
從上面翻譯成如果我們想要更多的可讀性,請使用 RegexBuilder 更多的聲明形式。
請注意,如果我們想平衡可讀性和簡潔性,我們可以在 RegexBuilder 中使用原始字符串,也可以在構建器中交錯 Regex Literals。
import RegexBuilder
let regex = Regex {
ZeroOrMore(.horizontalWhitespace)
Optionally {
Capture(OneOrMore(.noneOf("<#")))
}
.repetitionBehavior(.reluctant)
ZeroOrMore(.horizontalWhitespace)
"<"
Capture(OneOrMore(.noneOf(">#")))
">"
ZeroOrMore(.horizontalWhitespace)
/#|\Z/
}
RegEx 文字/£|\Z/
等價於:
ChoiceOf {
"#"
Anchor.endOfSubjectBeforeNewline
}
RegexComponent
RegexBuilder
語法在可組合性方面也類似於 SwiftUI,因為我們可以在其他RegexComponent
中重用RegexComponent
:
struct MailmapLine: RegexComponent {
@RegexComponentBuilder
var regex: Regex<(Substring, Substring?, Substring)> {
ZeroOrMore(.horizontalWhitespace)
Optionally {
Capture(OneOrMore(.noneOf("<#")))
}
.repetitionBehavior(.reluctant)
ZeroOrMore(.horizontalWhitespace)
"<"
Capture(OneOrMore(.noneOf(">#")))
">"
ZeroOrMore(.horizontalWhitespace)
ChoiceOf {
"#"
Anchor.endOfSubjectBeforeNewline
}
}
}
您可以在字符串上使用matching(regex:)
,例如:
let array = try "Your String To Search".matching(regex: ".")
使用這個簡單的擴展:
public extension String {
func matching(regex: String) throws -> [String] {
let regex = try NSRegularExpression(pattern: regex)
let results = regex.matches(in: self, range: NSRange(startIndex..., in: self))
return results.map { String(self[Range($0.range, in: self)!]) }
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.