如何用正则表达式匹配数据

Question

I have a array list like :我有一个数组列表，如：

data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R','R85_R84<100K(970,1000K)R',
'R85_R86=10K(9,11K)R']

I want to split the array list like我想拆分数组列表

SCN: TEST BEGA

STEP R8
CHILD R8
Operator =
MEASURE_CHILD 11K(10,15A)B

STEP R9
CHILD R9
Operator =
MEASURE_CHILD 1K(0,3A)B

STEP R10_R84
CHILD R10_R84
Operator =
MEASURE_CHILD 13MEG(7,14K)R

STEP R85
CHILD R84
Operator <
MEASURE_CHILD 100K(970,1000K)R
CHILD R86
Operator =
MEASURE_CHILD 10K(9,11K)R

I use this code to do the stuff but I don't know what's wrong :我使用这段代码来做这些事情，但我不知道出了什么问题：

def createTreeStandardBloc( self ):
    data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R','R85_R84<100K(970,1000K)R','R85_R85=10K(9,11K)R']
    last_s = None
    for i, line in enumerate(data):
        if i == 0:
            print("SCN:", line.strip("- "))
        elif line.strip():
            s, c, op, mc = re.match("^\s*([^_]+)(_\w+)?([<>=])(.*)\s*$", line).groups()
            if s != last_s:
                print("STEP", s)
            print("CHILD", c or s)
            print("Operator",op)
            print("MEASURE_CHILD", mc)
            last_s = s

The problem is that the step of data R10_R84 is divided to R10 for step and R84 for child I want any data will divided when the prefix is repeated like R85.问题是数据R10_R84的步骤被划分为步骤的R10和子节点的R84我希望任何数据在前缀像R85一样重复时会被划分。

Answer 1

I'm sure someone else will come up with a better solution but here goes.我相信其他人会想出更好的解决方案，但这里是。

from collections import defaultdict

def get_operator(string):
    '''
    returns the operator found in the string
    '''
    operators = '=><'
    for i in operators:
        if i in string:
            return i

    return None

def createTreeStandardBloc(data):
    # parsed is a default dict of lists which will default
    # to an empty list if a new key is added
    parsed = defaultdict(list)

    # this loop does a few things
    for line in data[1:]:
        # it gets the operator
        oper = get_operator(line)
        # splits the line based on the operator
        split_line = line.split(oper)

        prefixes = split_line[0].split('_')

        # if there aren't 2 prefixes
        # it sets the child to the first and only prefix
        # otherwise it sets it to the second
        if len(prefixes) == 1:
            child = prefixes[0]
        else:
            child = prefixes[1]

        # then it adds it preformatted to the defaultdict
        # this means that any additional items found with
        # the same step prefix will just get added onto that step
        # as a child 
        parsed[prefixes[0]].append('CHILD ' + child)
        parsed[prefixes[0]].append('Operator ' + oper)
        parsed[prefixes[0]].append('MEASURE_CHILD ' + split_line[1])

    # here we start the final formatting of data
    formatted = []
    formatted.append('SCN: ' + data[0].strip('- '))

    for key, items in parsed.items():
        formatted.append(' ')

        # we get the first child prefix here
        child_prefix = items[0][6:]

        # if the child is different from the step 
        # and there are only 3 items
        # we should join them back together
        # I know mutating a collection were iterating over 
        # is sinful but I did it anyway ;)
        if len(items) == 3 and key != child_prefix:
            key = key + '_' + child_prefix
            items[0] = 'CHILD ' + key

        # now we can safely add our step to the formatted list
        formatted.append('STEP ' + key)

        # and the items
        for item in items:
            formatted.append(item)

    return formatted


data = ['- TEST BEGA', 
        'R8=11K(10,15A)B',
        'R9=1K(0,3A)B',
        'R10_R84=13MEG(7,14K)R',
        'R85_R84<100K(970,1000K)R',
        'R85_R86=10K(9,11K)R']

new_data = createTreeStandardBloc(data)

for line in new_data:
    print(line)

Answer 2

I modified your solution slightly to split s and c on _ .我稍微修改了您的解决方案以在_上拆分s和c 。

Here is the solution that I came up with:这是我想出的解决方案：

  def createTreeStandardBloc():
    data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R','R85_R84<100K(970,1000K)R','R85_R85=10K(9,11K)R', 'R85_R86=10K(9,11K)R']
    last_s = None
    for i, line in enumerate(data):
        if i == 0:
            print("SCN:", line.strip("- "))
        elif line.strip():
            s_c, op, mc = re.match("(.*)([=<>])(.*)", line).groups()
            s_c_list = s_c.split('_')
            s = s_c_list[0]
            if(len(s_c_list) > 1):
                c = s_c_list[1]
            else:
                c = s
            if s != last_s:
                print("STEP", s)
            print("CHILD", c)
            print("Operator",op)
            print("MEASURE_CHILD", mc)
            last_s = s

This printed这印

SCN: TEST BEGA
STEP R8
CHILD R8
Operator =
MEASURE_CHILD 11K(10,15A)B
STEP R9
CHILD R9
Operator =
MEASURE_CHILD 1K(0,3A)B
STEP R10
CHILD R84
Operator =
MEASURE_CHILD 13MEG(7,14K)R
STEP R85
CHILD R84
Operator <
MEASURE_CHILD 100K(970,1000K)R
CHILD R85
Operator =
MEASURE_CHILD 10K(9,11K)R
CHILD R86
Operator =
MEASURE_CHILD 10K(9,11K)R

I don't know if this is exactly what you wanted but this at least shows you how you can split s and c without having to use regex.我不知道这是否正是您想要的，但这至少向您展示了如何在不使用正则表达式的情况下拆分s和c 。

Answer 3

Have changed your code to give the right answer已更改您的代码以提供正确答案

import re

data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R'
        ,'R85_R84<100K(970,1000K)R','R85_R85=10K(9,11K)R', 'R92_R86=10K(9,12K)R']
last_s = last_c = last_op = last_mc = None
repeat = 0
for i, line in enumerate(data):
    if i == 0:
        print("SCN:", line.strip("- "))
    elif i == 1:
        last_s, last_c, last_op, last_mc = re.match("^\s*([^_]+)(_\w+)?([<>=])(.*)\s*$", line).groups()
        #last_c = str(last_c)[1:] if last_c != None else last_c           
    elif line.strip():
        s, c, op, mc = re.match("^\s*([^_]+)(_\w+)?([<>=])(.*)\s*$", line).groups()
        #print(s, c, op, mc)
        #print(last_s, last_c, last_op, last_mc)
        if s != last_s:
            if repeat > 0:
                 print("CHILD", last_c or last_s)
                 print("Operator",op)
                 print("MEASURE_CHILD", mc)
            else:
                print("")
                print("STEP", ("" + last_s + last_c if last_c != None else last_s))
                print("CHILD", ("" + last_s + last_c if last_c != None else last_s))
                print("Operator",last_op)
                print("MEASURE_CHILD", last_mc)
            last_s = s
            last_c = c
            last_op = op
            last_mc = mc
            repeat = 0
        else:
            if repeat == 0:
                print("")
                print("STEP", last_s )
            print("CHILD", str(last_c)[1:] if last_c != None else last_c or last_s)
            print("Operator",op)
            print("MEASURE_CHILD", mc)
            last_s = s
            last_c = str(c)[1:] if c != None else c
            last_op = op
            last_mc = mc
            repeat += 1


if repeat == 0:
    print("")
    print("STEP", ("" + last_s + last_c if last_c != None else last_s))
    print("CHILD", ("" + last_s + last_c if last_c != None else last_s))
    print("Operator",last_op)
    print("MEASURE_CHILD", last_mc)
else:
    print("CHILD", str(last_c)[1:] if last_c != None else last_c or last_s)
    print("Operator",op)
    print("MEASURE_CHILD", mc)

OutPut:输出：

SCN: TEST BEGA

STEP R8
CHILD R8
Operator =
MEASURE_CHILD 11K(10,15A)B

STEP R9
CHILD R9
Operator =
MEASURE_CHILD 1K(0,3A)B

STEP R10_R84
CHILD R10_R84
Operator =
MEASURE_CHILD 13MEG(7,14K)R

STEP R85
CHILD R84
Operator =
MEASURE_CHILD 10K(9,11K)R
CHILD R85
Operator =
MEASURE_CHILD 10K(9,12K)R

STEP R92_R86
CHILD R92_R86
Operator =
MEASURE_CHILD 10K(9,12K)R

如何用正则表达式匹配数据

问题描述

3 个解决方案

解决方案1
1 2020-03-13 10:51:42

解决方案2
1 2020-03-13 11:07:07

解决方案3
1 已采纳 2020-03-13 11:37:54

如何用正则表达式匹配数据

问题描述

3 个解决方案

解决方案1 1 2020-03-13 10:51:42

解决方案2 1 2020-03-13 11:07:07

解决方案3 1 已采纳 2020-03-13 11:37:54

解决方案1
1 2020-03-13 10:51:42

解决方案2
1 2020-03-13 11:07:07

解决方案3
1 已采纳 2020-03-13 11:37:54