繁体   English   中英

Python正则表达式解析文本文件,获取列表中的项目并计算列表

[英]Python regex to parse text file, get the items in list and count the list

我有一个包含一些数据的文本文件。

我对查找v_dims中项目数的计数特别感兴趣

我的文本文件中的v_dims模式如下所示:

v_dims={
"Sales",
"Product Family",
"Sales Organization",
"Region",
"Sales Area",
"Sales office",
"Sales Division",
"Sales Person",
"Sales Channel",
"Sales Order Type",
"Sales Number",
"Sales Person",
"Sales Quantity",
"Sales Amount"
}

因此,我正在考虑获取v_dims中的所有元素并将其转储到Python列表中。 然后计算len(mylist)以获取项目数。

面临的挑战是从我的文本文件中获取v_dims的所有元素,并将它们放在空列表中。

我对文本文件中v_dims中的项目特别感兴趣。 该文本文件包含我在原始帖子中显示的v_dims模式形式的数据。 一些数据具有v_dims的嵌套模式。 谢谢。

这就是我尝试过的和失败的。 任何帮助表示赞赏。 TIA。

import re

fname = "C:\Users\XXXX\Test.mrk"
with open(fname, "r") as fo: 
    content_as_string = fo.read()
    match = re.findall(r'v_dims={\"(.+?)\"}',content_as_string)

尽管我的文本文件很大,但这是我的文本文件结构的一小段

version "1";
// Computer generated object language file
object 'MRKR' "Main" {
    Data_Type=2,
    HeaderBlock={
    Version_String="6.3 (25)"
},
    Printer_Info={
    Orientation=0,
    Page_Width=8.50000000,
    Page_Height=11.00000000,
    Page_Header="",
    Page_Footer="",
    Margin_type=0,
    Top_Margin=0.50000000,
    Left_Margin=0.50000000,
    Bottom_Margin=0.50000000,
    Right_Margin=0.50000000
},
    Marker_Options={
    Close_All="TRUE",
    Hide_Console="FALSE",
    Console_Left="FALSE",
    Console_Width=217,
    Main_Style="Maximized",
    MDI_Rect={
    0,
    0,
    892,
    1063
    }
},
    Dives={
    {
    Dive="A",
    Windows={
    {
    View_Index=0,
    Window_Info={
    Window_Rect={
    0,
    -288,
    400,
    1008
    },
    Window_Style="Maximized Front",
    Window_Name="Theater [Previous Qtr Diveplan-Dive A]"
},
    Dependent_bool="FALSE",
    Colset={
    Dive_Type="Normal",
    Dimension_Name="Theater",
    Action_List={
    Actions={
    {
    Action_Type="Select",
    select_type=5
},
    {
    Action_Type="Select",
    select_type=0,
    Key_Names={
    "Theater"
    },
    Key_Indexes={
    {
    "AMERICAS"
    }
    }
},
    {
    Action_Type="Focus",
    Focus_Rows="True"
},
    {
    Action_Type="Dimensions",
    v_dims={
    "Theater",
    "Product Family",
    "Division",
    "Region",
    "Install at Country Name",
    "Connect Home Type",
    "Connect In Type",
    "SymmConnect Enabled",
    "Connect Home Refusal Reason",
    "Sales Order Channel Type",
    "Maintained By Group",
    "PS Flag",
    "Avalanche Flag",
    "Product Item Family"
    },
    Xtab_Bool="False",
    Xtab_Flip="False"
},
    {
    Action_Type="Select",
    select_type=5
},
    {
    Action_Type="Select",
    select_type=0,
    Key_Names={
    "Theater",
    "Product Family",
    "Division",
    "Region",
    "Install at Country Name",
    "Connect Home Type",
    "Connect In Type",
    "SymmConnect Enabled",
    "Connect Home Refusal Reason",
    "Sales Order Channel Type",
    "Maintained By Group",
    "PS Flag",
    "Avalanche Flag"
    },
    Key_Indexes={
    {
    "AMERICAS",
    "ATMOS",
    "Latin America CS Division",
    "37000 CS Region",
    "Mexico",
    "",
    "",
    "",
    "",
    "DIRECT",
    "EMC",
    "N",
    "0"
    }
    }
}
    }
},
    Num_Palette_cols=0,
    Num_Palette_rows=0
},
    Format={
    Window_Type="Tabular",
    Tabular={
    Num_row_labels=8
}
}
}
    }
}
    },
    Widget_Set={
    Widget_Layout="Vertical",
    Go_Button=1,
    Picklist_Width=0,
    Sort_Subset_Dimensions="TRUE",
    Order={

    }
},
    Views={
    {
    Data_Type=1,
    dbname="Previous Qtr Diveplan",
    diveline_dbname="Current Qtr Diveplan",
    logical_name="Current Qtr Diveplan",
    cols={
    {
    name="Total TSS installs",
    column_type="Calc[Total TSS installs]",
    output_type="Number",
    format_string="."
},
    {
    name="TSS Valid Connectivity Records",
    column_type="Calc[TSS Valid Connectivity Records]",
    output_type="Number",
    format_string="."
},
    {
    name="% TSS Connectivity Record",
    column_type="Calc[% TSS Connectivity Record]",
    output_type="Number"
},
    {
    name="TSS Not Applicable",
    column_type="Calc[TSS Not Applicable]",
    output_type="Number",
    format_string="."
},
    {
    name="TSS Customer Refusals",
    column_type="Calc[TSS Customer Refusals]",
    output_type="Number",
    format_string="."
},
    {
    name="% TSS Refusals",
    column_type="Calc[% TSS Refusals]",
    output_type="Number"
},
    {
    name="TSS Eligible for Physical Connectivity",
    column_type="Calc[TSS Eligible for Physical Connectivity]",
    output_type="Number",
    format_string="."
},
    {
    name="TSS Boxes with Physical Connectivty",
    column_type="Calc[TSS Boxes with Physical Connectivty]",
    output_type="Number",
    format_string="."
},
    {
    name="% TSS Physical Connectivity",
    column_type="Calc[% TSS Physical Connectivity]",
    output_type="Number"
}
    },
    dim_cols={
    {
    name="Model",
    column_type="Dimension[Model]",
    output_type="None"
},
    {
    name="Model",
    column_type="Dimension[Model]",
    output_type="None"
},
    {
    name="Connect In Type",
    column_type="Dimension[Connect In Type]",
    output_type="None"
},
    {
    name="Connect Home Type",
    column_type="Dimension[Connect Home Type]",
    output_type="None"
},
    {
    name="SymmConnect Enabled",
    column_type="Dimension[SymmConnect Enabled]",
    output_type="None"
},
    {
    name="Theater",
    column_type="Dimension[Theater]",
    output_type="None"
},
    {
    name="Division",
    column_type="Dimension[Division]",
    output_type="None"
},
    {
    name="Region",
    column_type="Dimension[Region]",
    output_type="None"
},
    {
    name="Sales Order Number",
    column_type="Dimension[Sales Order Number]",
    output_type="None"
},
    {
    name="Product Item Family",
    column_type="Dimension[Product Item Family]",
    output_type="None"
},
    {
    name="Item Serial Number",
    column_type="Dimension[Item Serial Number]",
    output_type="None"
},
    {
    name="Sales Order Deal Number",
    column_type="Dimension[Sales Order Deal Number]",
    output_type="None"
},
    {
    name="Item Install Date",
    column_type="Dimension[Item Install Date]",
    output_type="None"
},
    {
    name="SYR Last Dial Home Date",
    column_type="Dimension[SYR Last Dial Home Date]",
    output_type="None"
},
    {
    name="Maintained By Group",
    column_type="Dimension[Maintained By Group]",
    output_type="None"
},
    {
    name="PS Flag",
    column_type="Dimension[PS Flag]",
    output_type="None"
},
    {
    name="Connect Home Refusal Reason",
    column_type="Dimension[Connect Home Refusal Reason]",
    output_type="None",
    col_width=177
},
    {
    name="Cust Name",
    column_type="Dimension[Cust Name]",
    output_type="None"
},
    {
    name="Sales Order Channel Type",
    column_type="Dimension[Sales Order Channel Type]",
    output_type="None"
},
    {
    name="Sales Order Type",
    column_type="Dimension[Sales Order Type]",
    output_type="None"
},
    {
    name="Part Model Key",
    column_type="Dimension[Part Model Key]",
    output_type="None"
},
    {
    name="Ship Date",
    column_type="Dimension[Ship Date]",
    output_type="None"
},
    {
    name="Model Number",
    column_type="Dimension[Model Number]",
    output_type="None"
},
    {
    name="Item Description",
    column_type="Dimension[Item Description]",
    output_type="None"
},
    {
    name="Customer Classification",
    column_type="Dimension[Customer Classification]",
    output_type="None"
},
    {
    name="CS Customer Name",
    column_type="Dimension[CS Customer Name]",
    output_type="None"
},
    {
    name="Install At Customer Number",
    column_type="Dimension[Install At Customer Number]",
    output_type="None"
},
    {
    name="Install at Country Name",
    column_type="Dimension[Install at Country Name]",
    output_type="None"
},
    {
    name="TLA Serial Number",
    column_type="Dimension[TLA Serial Number]",
    output_type="None"
},
    {
    name="Product Version",
    column_type="Dimension[Product Version]",
    output_type="None"
},
    {
    name="Avalanche Flag",
    column_type="Dimension[Avalanche Flag]",
    output_type="None"
},
    {
    name="Product Family",
    column_type="Dimension[Product Family]",
    output_type="None"
},
    {
    name="Project Number",
    column_type="Dimension[Project Number]",
    output_type="None"
},
    {
    name="PROJECT_STATUS",
    column_type="Dimension[PROJECT_STATUS]",
    output_type="None"
}
    },
    Available_Columns={
    "Total TSS installs",
    "TSS Valid Connectivity Records",
    "% TSS Connectivity Record",
    "TSS Not Applicable",
    "TSS Customer Refusals",
    "% TSS Refusals",
    "TSS Eligible for Physical Connectivity",
    "TSS Boxes with Physical Connectivty",
    "% TSS Physical Connectivity",
    "Total Installs",
    "All Boxes with Valid Connectivty Record",
    "% All Connectivity Record",
    "Overall Refusals",
    "Overall Refusals %",
    "All Eligible for Physical Connectivty",
    "Boxes with Physical Connectivity",
    "% All with Physical Conectivity"
    },
    Remaining_columns={
    {
    name="Total Installs",
    column_type="Calc[Total Installs]",
    output_type="Number",
    format_string="."
},
    {
    name="All Boxes with Valid Connectivty Record",
    column_type="Calc[All Boxes with Valid Connectivty Record]",
    output_type="Number",
    format_string="."
},
    {
    name="% All Connectivity Record",
    column_type="Calc[% All Connectivity Record]",
    output_type="Number"
},
    {
    name="Overall Refusals",
    column_type="Calc[Overall Refusals]",
    output_type="Number",
    format_string="."
},
    {
    name="Overall Refusals %",
    column_type="Calc[Overall Refusals %]",
    output_type="Number"
},
    {
    name="All Eligible for Physical Connectivty",
    column_type="Calc[All Eligible for Physical Connectivty]",
    output_type="Number"
},
    {
    name="Boxes with Physical Connectivity",
    column_type="Calc[Boxes with Physical Connectivity]",
    output_type="Number"
},
    {
    name="% All with Physical Conectivity",
    column_type="Calc[% All with Physical Conectivity]",
    output_type="Number"
}
    },
    calcs={
    {
    name="Total TSS installs",
    definition="Total[Total TSS installs]",
    ts_flag="Not TS Calc"
},
    {
    name="TSS Valid Connectivity Records",
    definition="Total[PS Boxes w/ valid connectivity record (1=yes)]",
    ts_flag="Not TS Calc"
},
    {
    name="% TSS Connectivity Record",
    definition="Total[PS Boxes w/ valid connectivity record (1=yes)] /Total[Total TSS installs]",
    ts_flag="Not TS Calc"
},
    {
    name="TSS Not Applicable",
    definition="Total[Bozes w/ valid connectivity record (1=yes)]-Total[Boxes Eligible (1=yes)]-Total[TSS Refusals]",
    ts_flag="Not TS Calc"
},
    {
    name="TSS Customer Refusals",
    definition="Total[TSS Refusals]",
    ts_flag="Not TS Calc"
},
    {
    name="% TSS Refusals",
    definition="Total[TSS Refusals]/Total[PS Boxes w/ valid connectivity record (1=yes)]",
    ts_flag="Not TS Calc"
},
    {
    name="TSS Eligible for Physical Connectivity",
    definition="Total[TSS Eligible]-Total[Exception]",
    ts_flag="Not TS Calc"
},
    {
    name="TSS Boxes with Physical Connectivty",
    definition="Total[PS Physical Connectivity] - Total[PS Physical Connectivity, SymmConnect Enabled=\"Capable not enabled\"]",
    ts_flag="Not TS Calc"
},
    {
    name="% TSS Physical Connectivity",
    definition="Total[Boxes w/ phys conn]/Total[Boxes Eligible (1=yes)]",
    ts_flag="Not TS Calc"
},
    {
    name="Total Installs",
    definition="Total[Total Installs]",
    ts_flag="Not TS Calc"
},
    {
    name="All Boxes with Valid Connectivty Record",
    definition="Total[Bozes w/ valid connectivity record (1=yes)]",
    ts_flag="Not TS Calc"
},
    {
    name="% All Connectivity Record",
    definition="Total[Bozes w/ valid connectivity record (1=yes)]/Total[Total Installs]",
    ts_flag="Not TS Calc"
},
    {
    name="Overall Refusals",
    definition="Total[Overall Refusals]",
    ts_flag="Not TS Calc"
},
    {
    name="Overall Refusals %",
    definition="Total[Overall Refusals]/Total[Bozes w/ valid connectivity record (1=yes)]",
    ts_flag="Not TS Calc"
},
    {
    name="All Eligible for Physical Connectivty",
    definition="Total[Boxes Eligible (1=yes)]-Total[Exception]",
    ts_flag="Not TS Calc"
},
    {
    name="Boxes with Physical Connectivity",
    definition="Total[Boxes w/ phys conn]-Total[Boxes w/ phys conn,SymmConnect Enabled=\"Capable not enabled\"]",
    ts_flag="Not TS Calc"
},
    {
    name="% All with Physical Conectivity",
    definition="Total[Boxes w/ phys conn]/Total[Boxes Eligible (1=yes)]",
    ts_flag="Not TS Calc"
}
    },
    merge_type="consolidate",
    merge_dbs={
    {
    dbname="connectivityallproducts.mdl",
    diveline_dbname="/DI_PSREPORTING/connectivityallproducts.mdl"
}
    },
    skip_constant_columns="FALSE",
    categories={
    {
    name="Geography",
    dimensions={
    "Theater",
    "Division",
    "Region",
    "Install at Country Name"
    }
},
    {
    name="Mappings and Flags",
    dimensions={
    "Connect Home Type",
    "Connect In Type",
    "SymmConnect Enabled",
    "Connect Home Refusal Reason",
    "Sales Order Channel Type",
    "Maintained By Group",
    "Customer Installable",
    "PS Flag",
    "Top Level Flag",
    "Avalanche Flag"
    }
},
    {
    name="Product Information",
    dimensions={
    "Product Family",
    "Product Item Family",
    "Product Version",
    "Item Description"
    }
},
    {
    name="Sales Order Info",
    dimensions={
    "Sales Order Deal Number",
    "Sales Order Number",
    "Sales Order Type"
    }
},
    {
    name="Dates",
    dimensions={
    "Item Install Date",
    "Ship Date",
    "SYR Last Dial Home Date"
    }
},
    {
    name="Details",
    dimensions={
    "Item Serial Number",
    "TLA Serial Number",
    "Part Model Key",
    "Model Number"
    }
},
    {
    name="Customer Infor",
    dimensions={
    "CS Customer Name",
    "Install At Customer Number",
    "Customer Classification",
    "Cust Name"
    }
},
    {
    name="Other Dimensions",
    dimensions={
    "Model"
    }
}
    },
    Maintain_Category_Order="FALSE",
    popup_info="false"
}
    }
};

这回答了您如何使用正则表达式匹配的问题。 但是,正如Mark所指出的,可能有一种更聪明的方法。

>>> import re
>>> f = open("/tmp/a")
>>> lines = f.readlines()
>>> f.close()
>>> items = []
>>> for line in lines:
...     m = re.search("\"(.+)\",$", line)
...     if m:
...             items.append(m.group(1))
>>> items
['Sales', 'Product Family', 'Sales Organization', 'Region', 'Sales Area', 'Sales office', 'Sales Division', 'Sales Person', 'Sales Channel', 'Sales Order Type', 'Sales Number', 'Sales Person', 'Sales Quantity']
  • 在这里,我们从文本文件中找到v_dims元素。
    • 从数据中删除空格。
    • 转换列表中的数据。
    • 计算列表的长度。

import re
with open(fname, "r") as fp: 
    data = fp.read()
rx_blanks=re.compile(r"\s+")
v_dims_list = eval("["+rx_blanks.sub("",data[re.search(r'v_dims={',data).end():][:re.search(r'},',data[re.search(r'v_dims={',data).end():]).start()])+']')
or
v_dims_list = list(eval(rx_blanks.sub('',data[re.search(r'v_dims={',data).end():][:re.search(r'},',data[re.search(r'v_dims={',data).end():]).start()])))

如果你想要长度

len(v_dims_list)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM