Im using the below code to extract sizes from excel cells, which has 7 different patterns..
The below code is for extracting OZ sizes however, i need to do the same ml and gram (g). Hence, the way i wrote the code will be very lengthy.
Value | Result |
---|---|
Size:0.028 oz x 5 | 0.028 oz x 5 |
Size:6x0.04 oz + 30 oz | 6 x0.04 oz |
Size:8 x 0.03 oz | 8 x 0.03 oz |
Size:2 x 0.07 oz | 2 x 0.07 oz |
Size:5 x 0.028 oz | 5 x 0.028 oz |
Size:0.028 ozx5 | 0.028 ozx5 |
Size:0.028 oz | 0.028 oz |
Size:30.00 oz | 30.00 oz |
Size:2 * 0.07 oz | 2 * 0.07 oz |
Size:0.028 oz * 5 | 0.028 oz * 5 |
Size:2*0.07 oz | 2 *0.07 oz |
Size:0.028 oz*5 | 0.028 oz*5 |
Size:3.00 oz | 3.00 oz |
Size:3.00 oz | 3.00 oz |
Color:01UNIVERSA#||#Size:1.00 oz | 1.00 oz |
Lancôme Effacil Eye Makeup Remover, 4.2 Fl. Oz. | 0. oz |
Confidence In A Cleanser, 1 fl. oz, Travel Size | 1 fl. oz |
Function getozv1(str As String) Dim n As Long, unit As String, nums() As Variant Static rgx As Object, cmat As Object 'with rgx as static, it only has to be created once; beneficial when filling a long column with this UDF If rgx Is Nothing Then Set rgx = CreateObject("VBScript.RegExp") End If getsize = vbNullString cnt = 0 With rgx.Global = True.MultiLine = False.Pattern = "[0-9]{1,2}\sx\s[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)" 'for Size:2 x 0.5 oz If.test(str) Then Set cmat =.Execute(str) 'resize the nums array to accept the matches ReDim nums(cmat.Count - 1) 'get measurement unit unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare) 'populate the nums array with the matches For n = LBound(nums) To UBound(nums) nums(n) = Val(cmat.Item(n)) cnt = cnt + 1 Next n If cnt > 1 Then getozv1 = "Set" Else 'convert the nums array to a subtotal with unit suffix getozv1 = LCase(Application.Sum(nums) & " " & unit) End If End If If Not.test(str) Then.Pattern = "[0-9]{1,2}x[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)" ''for no space Size:2x0.5 oz If.test(str) Then Set cmat =.Execute(str) 'resize the nums array to accept the matches ReDim nums(cmat.Count - 1) 'get measurement unit unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare) 'populate the nums array with the matches For n = LBound(nums) To UBound(nums) nums(n) = Val(cmat.Item(n)) cnt = cnt + 1 Next n If cnt > 1 Then getozv1 = "Set" Else 'convert the nums array to a subtotal with unit suffix getozv1 = LCase(Application.Sum(nums) & " " & unit) End If End If End If If Not.test(str) Then.Pattern = "[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)\sx\s[0-9]{1,2}" 'for Size:0.028 oz x 5/ 0.8g 'pattern2 without space [0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)x[0-9]{1,2} If.test(str) Then Set cmat =.Execute(str) 'resize the nums array to accept the matches ReDim nums(cmat.Count - 1) 'get measurement unit unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare) 'populate the nums array with the matches For n = LBound(nums) To UBound(nums) nums(n) = Val(cmat.Item(n)) cnt = cnt + 1 Next n If cnt > 1 Then getozv1 = "Set" Else 'convert the nums array to a subtotal with unit suffix getozv1 = LCase(Application.Sum(nums) & " " & unit) End If End If End If If Not.test(str) Then.Pattern = "[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)x[0-9]{1,2}" 'for without Size:0.028 oz x 5/ 0.8g If.test(str) Then Set cmat =.Execute(str) 'resize the nums array to accept the matches ReDim nums(cmat.Count - 1) 'get measurement unit unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare) 'populate the nums array with the matches For n = LBound(nums) To UBound(nums) nums(n) = Val(cmat.Item(n)) cnt = cnt + 1 Next n If cnt > 1 Then getozv1 = "Set" Else 'convert the nums array to a subtotal with unit suffix getozv1 = LCase(Application.Sum(nums) & " " & unit) End If End If End If If Not.test(str) Then.Pattern = "[0-9]{1,2}\s\*\s[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)" 'Size:2 * 0.07 oz If.test(str) Then Set cmat =.Execute(str) 'resize the nums array to accept the matches ReDim nums(cmat.Count - 1) 'get measurement unit unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare) 'populate the nums array with the matches For n = LBound(nums) To UBound(nums) nums(n) = Val(cmat.Item(n)) cnt = cnt + 1 Next n If cnt > 1 Then getozv1 = "Set" Else 'convert the nums array to a subtotal with unit suffix getozv1 = LCase(Application.Sum(nums) & " " & unit) End If End If End If If Not.test(str) Then.Pattern = "[0-9]{1,2}\*[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)" 'no space Size:2*0.07 oz If.test(str) Then Set cmat =.Execute(str) 'resize the nums array to accept the matches ReDim nums(cmat.Count - 1) 'get measurement unit unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare) 'populate the nums array with the matches For n = LBound(nums) To UBound(nums) nums(n) = Val(cmat.Item(n)) cnt = cnt + 1 Next n If cnt > 1 Then getozv1 = "Set" Else 'convert the nums array to a subtotal with unit suffix getozv1 = LCase(Application.Sum(nums) & " " & unit) End If End If End If If Not.test(str) Then.Pattern = "[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)\s\*\s[0-9]{1,2}" 'Size:0.028 oz * 5 If.test(str) Then Set cmat =.Execute(str) 'resize the nums array to accept the matches ReDim nums(cmat.Count - 1) 'get measurement unit unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare) 'populate the nums array with the matches For n = LBound(nums) To UBound(nums) nums(n) = Val(cmat.Item(n)) cnt = cnt + 1 Next n If cnt > 1 Then getozv1 = "Set" Else 'convert the nums array to a subtotal with unit suffix getozv1 = LCase(Application.Sum(nums) & " " & unit) End If End If End If If Not.test(str) Then.Pattern = "[0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)\*[0-9]{1,2}" 'Size:0.028 oz*5 If.test(str) Then Set cmat =.Execute(str) 'resize the nums array to accept the matches ReDim nums(cmat.Count - 1) 'get measurement unit unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare) 'populate the nums array with the matches For n = LBound(nums) To UBound(nums) nums(n) = Val(cmat.Item(n)) cnt = cnt + 1 Next n If cnt > 1 Then getozv1 = "Set" Else 'convert the nums array to a subtotal with unit suffix getozv1 = LCase(Application.Sum(nums) & " " & unit) End If End If End If If Not.test(str) Then.Pattern = "([0-9.\-*?/]{1,5}\s*(fl oz|Fl oz|fl. oz|fl.oz|oz|Oz|oZ|OZ\s+)){1,3}" 'base pattern If.test(str) Then Set cmat =.Execute(str) 'resize the nums array to accept the matches ReDim nums(cmat.Count - 1) 'get measurement unit unit = Replace(cmat.Item(0), Val(cmat.Item(0)), vbNullString, 1, vbTextCompare) 'populate the nums array with the matches For n = LBound(nums) To UBound(nums) nums(n) = Val(cmat.Item(n)) cnt = cnt + 1 Next n If cnt > 1 Then getozv1 = "Set" Else 'convert the nums array to a subtotal with unit suffix getozv1 = LCase(Application.Sum(nums) & " " & unit) End If End If End If End With End Function
How to do we do this in a efficient way? can we combine these 7 patterns without impacting result/accuracy?
Or is there better way to do this?
Few examples for gram(g) ml
Data | Expected result | Note |
---|---|---|
MamaDerma Stretch Mark Repair Cream - 100 ml | 100 ml | |
MamaDerma Stretch Mark Prevention Oil - 60 ml | 60 ml | |
Supermood Egoboost Moisture Kick Serum, 30ml | 30ml | |
Size:15 mL x3 | 15 ml x3 | |
Size:Mini Size - 7 g | 7 g | |
Type: 4 g Ampoule 40 g Rubber™ Mask with Moisturizing Hyaluronic Acid | Set | multiple sizes |
Size:Standard Size- 5.0 g-golden pink | 5.0 g | |
Size:Mini Size - 7 g | 7 g | |
Size:Standard Size - 21 g | 21 g | |
Size:Mini Size Translucent - 5.4 g | 5.4 g | |
05浅色/陶瓷白 7g | 7 g | |
Size: 28ml*4 | 28ml*4 | |
Size:20g*10 | 20g*10 | |
Size: (2ml+3ml)×4ml | Set | multiple sizes |
Size4*18ml | 4*18ml | |
Size: 27g*5ea | 27g*5 | |
Size 4.6ml × 4 | 4.6ml × 4 | |
Size 4 x 4.6ml | 4 x 4.6ml | |
Size 4.6ml×4 | 4.6ml×4 | |
Size 4x4.6ml | 4x4.6ml | |
Size: 28ml * 4 | 28ml * 4 | |
Size:20g * 10 | 20g * 10 | |
27mlx3 片 | 27mlx3 | |
Size: 12 x 1.5g | 12 x 1.5g | |
Size: 12x1.5g | 12x1.5g |
It is not too difficult to develop a single regex that works for all of your examples.
I suggest:
Option Explicit
'set reference to Microsoft VBcript Regular Expressions 5.5
Function getAmt(S As String) As String
Dim RE As RegExp, MC As MatchCollection
Const sPat As String = "(?:\d+\s*[x*]\s*)?\d+(?:\.\d+)?\s*(?=(g|ml|(?:fl)?\.?\s*oz))\1(?:\s*[×x*]\s*\d+)?"
Set RE = New RegExp
With RE
.Global = True
.IgnoreCase = True
.Pattern = sPat
Set MC = .Execute(S)
Select Case MC.Count
Case 0
getAmt = "no amount"
Case 1
getAmt = MC(0)
Case Is > 1
getAmt = "Set"
End Select
End With
End Function
It works on all of your examples (except for the obvious typos)
Here is a detailed explanation of the regex:
(?:\d+\s*[x*]\s*)?\d+(?:\.\d+)?\s*(?=(g|ml|(?:fl)?\.?\s*oz))\1(?:\s*[×x*]\s*\d+)?
Options: Case insensitive
(?:\d+\s*[x*]\s*)?
?
\d+
\s*
[x*]
\s*
\d+
(?:\.\d+)?
\s*
(?=(g|ml|(?:fl)?\.?\s*oz))
(g|ml|(?:fl)?\.?\s*oz)
\1
(?:\s*[×x*]\s*\d+)?
?
\s*
[×x*]
\s*
\d+
Created with RegexBuddy
I wonder why you want to consolidate seven regular expressions into one. It may or may not be more "efficient" (how are you planning to test that?), it will certainly be much more difficult to maintain.
The first thing I suggest you do is set up a regression test environment so you can make sure that any changes you make in the future do not break the existing tests. In VBA, that would look something like this.
Option Explicit
Sub TestParse()
' Two dimensional array containing the test values and the expected results
Dim testdata(1 To 25, 1 To 2) As String
Dim i As Integer
Dim result As String
testdata(1, 1) = "MamaDerma Stretch Mark Repair Cream - 100 ml"
testdata(1, 2) = "100 ml"
testdata(2, 1) = "MamaDerma Stretch Mark Repair Cream - 60 ml"
testdata(2, 2) = "60 ml"
'add more values & expected results
testdata(25, 1) = "Size: 12x1.5g"
testdata(25, 2) = "12x1.5g"
' Test all values
For i = LBound(testdata, 1) To UBound(testdata, 1)
result = Parse(testdata(i, 1))
Debug.Print testdata(i), result, IIf(testdata(i, 2) = result, "PASS", "FAIL")
Next i
End Sub
' Function to parse values
Function Parse(ByVal toParse As String) As String
' Code to parse values goes here
End Function
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.