繁体   English   中英

逐行解析 output 时的奇怪行为

[英]Strange behavior when parsing output line by line

我正在运行以下代码:

use strict;
 use warnings;
 use Data::Dumper;
 use File::HomeDir;
 use File::Temp ();
 use File::Spec;
 
 open my $output, '<', '/tmp/cs.txt';
 
 my @color_clusters;
 my $image_number = 0;
 my $image_name = undef;
 my $last_image_name = '';
 my $line = undef;
 for $line (<$output>) {
     chomp($line);
     print "***${line}***\n";
     # image (file) name -> ^\S+
     # cluster number -> cluster \d,
     # HEX -> hex #([0-9A-Z])6,
     # Cluster Color -> cmyk \d+ \d+ \d+ \d+ []
     # Color Category -> (empty at the moment)
     # Pixels -> f 0.\d+
     # R, G, B -> rgb \d+ \d+ \d+
     # H, S, V -> hsv \d+ \d+ \d+
     $line =~ m/
         ^(?<IMAGE_NAME>.+) # image file name
         \ cluster\ (?<CLUST_NUM>\d+)\ n\ [0-9]+ # cluster number
         \ f\ (?<PIXELS>[-]?[0-9]+[,.]?[0-9]*) # percent of pixels belonging to this cluster
         \ rgb\ (?<RED>\d+)\ (?<GREEN>\d+)\ (?<BLUE>\d+)
         \ hex\ \#(?<HEX>[0-9A-F]+) # Hexadecimal notation used in HTML
         \ hsv\ (?<HUE>\d+)\ (?<SATURATION>\d+)\ (?<VALUE>\d+)
         \ .+\ (?<CLUSTER_COLOR>\w+)\[
     /x;
     $image_name = $+{IMAGE_NAME};
     if ($last_image_name ne $image_name) {
         $last_image_name = $image_name;
        $image_number++;
     }
     my $cluster_number = int($+{CLUST_NUM}) + 1; # convert to 1 based
     my $pixels = $+{PIXELS};
     if ($pixels) {
         $pixels = ''. int((0 + $pixels) * 100). '%'
     }
     my $cluster_color = $+{CLUSTER_COLOR};
     if ($cluster_color =~ m/_/) {
         $cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
     }
     my %color_cluster = (
         image_num => $image_number,
         image_name => $image_name,
         cluster_number => $cluster_number,
         hex_code => $+{HEX},
         cluster_color => $cluster_color,
         color_category => '', # currently empty, will be calculated from HSV values
         pixels => ''. int($+{PIXELS} * 100). '%', # percent of pixels within this cluster
         r => $+{RED}, g => $+{GREEN}, b => $+{BLUE},
         h => $+{HUE}, s => $+{SATURATION}, v => $+{VALUE}
     );
     push @color_clusters, %color_cluster;
     print Dumper \%color_cluster;
     $line =~ m/^.+$/;
 } # end of for loop

在看起来像这样的输入上:

IMG_0069_result.JPG cluster 0 n 69 f 0.0627272727272727 rgb 248 249 240 hex #F8F9F0 hsv 67 3 98 lab 98 -2 4 lch 98 4 114 xyz 0.88 0.94 0.96 cmyk 0 0 3 2 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 1 n 67 f 0.0609090909090909 rgb 251 252 247 hex #FBFCF7 hsv 66 2 99 lab 99 -1 2 lch 99 3 114 xyz 0.92 0.97 1.02 cmyk 0 0 2 1 baby_powder[1248][254,254,250](1.3):ceramic[2174][252,255,249](1.6):hint_of_grey[4499][252,255,249](1.6):sea_fog[7554][252,255,249](1.6):wan_white[8990][252,255,249](1.6):snow_drift[7811][247,250,247](1.7):bianca[1402][252,251,243](1.9):black_white[1483][255,254,246](2.1):romance[7283][255,254,253](2.1):quarter_alabaster[6916][247,246,242](2.2) 10 alabaster:baby:bianca:ceramic:drift:fog:hint:of:powder:quarter:romance:sea:snow:wan:black:grey:white
IMG_0069_result.JPG cluster 2 n 66 f 0.06 rgb 250 250 244 hex #FAFAF4 hsv 65 3 98 lab 98 -1 3 lch 98 3 113 xyz 0.9 0.95 0.99 cmyk 0 0 3 2 bianca[1402][252,251,243](1.1):spring_wood[7933][248,246,241](1.5):eighth_pearl_lusta[3414][249,248,240](1.6):quarter_bianca[6922][249,248,240](1.6):quarter_alabaster[6916][247,246,242](1.8):bridal_heath[1713][255,250,244](2.0):baby_powder[1248][254,254,250](2.1):snow_drift[7811][247,250,247](2.1):ceramic[2174][252,255,249](2.1):hint_of_grey[4499][252,255,249](2.1) 10 alabaster:baby:bianca:bridal:ceramic:drift:eighth:heath:hint:lusta:of:pearl:powder:quarter:snow:spring:wood:grey
IMG_0069_result.JPG cluster 3 n 65 f 0.0590909090909091 rgb 245 247 236 hex #F5F7EC hsv 66 4 97 lab 97 -2 5 lch 97 6 114 xyz 0.86 0.92 0.92 cmyk 0 0 4 3 twilight_blue[8616][244,246,236](1.0):filmpro_white[3624][249,246,237](1.6):half_bianca[4292][246,243,233](1.8):half_orchid_white[4363][247,244,234](1.8):eighth_pearl_lusta[3414][249,248,240](1.9):quarter_bianca[6922][249,248,240](1.9):glistening_white[3874][244,244,236](2.1):quarter_rice_cake[6986][246,244,237](2.1):ecru_white[3358][245,243,229](2.2):joanna[4771][245,243,229](2.2) 10 bianca:cake:ecru:eighth:filmpro:glistening:half:joanna:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 4 n 61 f 0.0554545454545455 rgb 248 249 240 hex #F8F9F0 hsv 65 4 97 lab 98 -2 4 lch 98 5 113 xyz 0.88 0.94 0.96 cmyk 0 0 4 3 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 5 n 60 f 0.0545454545454545 rgb 249 249 240 hex #F9F9F0 hsv 63 4 98 lab 98 -2 4 lch 98 5 111 xyz 0.89 0.94 0.96 cmyk 0 0 4 2 bianca[1402][252,251,243](0.7):eighth_pearl_lusta[3414][249,248,240](0.9):quarter_bianca[6922][249,248,240](0.9):filmpro_white[3624][249,246,237](1.1):orchid_white[6246][255,253,243](1.6):quarter_pearl_lusta[6978][255,253,244](1.6):floral_white[3694][255,250,240](1.7):glistening_white[3874][244,244,236](1.8):quarter_rice_cake[6986][246,244,237](1.8):twilight_blue[8616][244,246,236](1.9) 10 bianca:cake:eighth:filmpro:floral:glistening:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 6 n 60 f 0.0545454545454545 rgb 249 250 243 hex #F9FAF3 hsv 67 3 98 lab 98 -2 3 lch 98 4 114 xyz 0.9 0.95 0.98 cmyk 0 0 3 2 bianca[1402][252,251,243](0.9):eighth_pearl_lusta[3414][249,248,240](1.4):quarter_bianca[6922][249,248,240](1.4):spring_wood[7933][248,246,241](1.9):ceramic[2174][252,255,249](2.0):hint_of_grey[4499][252,255,249](2.0):sea_fog[7554][252,255,249](2.0):wan_white[8990][252,255,249](2.0):orchid_white[6246][255,253,243](2.1):quarter_pearl_lusta[6978][255,253,244](2.1) 10 bianca:ceramic:eighth:fog:hint:lusta:of:orchid:pearl:quarter:sea:spring:wan:wood:grey:white
IMG_0069_result.JPG cluster 7 n 58 f 0.0527272727272727 rgb 250 251 246 hex #FAFBF6 hsv 69 2 98 lab 98 -1 2 lch 98 2 116 xyz 0.9 0.96 1.01 cmyk 0 0 2 2 snow_drift[7811][247,250,247](1.6):baby_powder[1248][254,254,250](1.6):bianca[1402][252,251,243](1.8):quarter_alabaster[6916][247,246,242](1.9):ceramic[2174][252,255,249](1.9):hint_of_grey[4499][252,255,249](1.9):sea_fog[7554][252,255,249](1.9):wan_white[8990][252,255,249](1.9):spring_wood[7933][248,246,241](2.0):eighth_pearl_lusta[3414][249,248,240](2.2) 10 alabaster:baby:bianca:ceramic:drift:eighth:fog:hint:lusta:of:pearl:powder:quarter:sea:snow:spring:wan:wood:grey:white

The input is the output of colorsummarizer a program written in Perl that summarizes the colors of images ( http://mkweb.bcgsc.ca/color-summarizer/ ). 由于我使用的是 Perl,我可以直接调用库而不是从 Perl 运行命令行,但我决定运行命令行,因为它更容易或至少应该更容易......

运行上面的代码时,尽管所有行的结构看起来都非常相似,但有些行被正确解析,而另一些则没有。

这是我得到的 output 的一部分(STDOUT 和 STDIN 交错):

Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
***IMG_0069_result.JPG cluster 0 n 69 f 0.0627272727272727 rgb 248 249 240 hex #F8F9F0 hsv 67 3 98 lab 98 
-2 4 lch 98 4 114 xyz 0.88 0.94 0.96 cmyk 0 0 3 2 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][
249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_whit
e[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8
):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246
,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue
:white***
$VAR1 = {
          'pixels' => '6%',
          'b' => '240',
          's' => '3',
          'image_name' => 'IMG_0069_result.JPG',
          'image_num' => 1,
          'h' => '67',
          'cluster_number' => 1,
          'color_category' => '',
          'r' => '248',
          'v' => '98',
          'g' => '249',
          'cluster_color' => 'bianca',
          'hex_code' => 'F8F9F0'
        };
***IMG_0069_result.JPG cluster 1 n 67 f 0.0609090909090909 rgb 251 252 247 hex #FBFCF7 hsv 66 2 99 lab 99 
-1 2 lch 99 3 114 xyz 0.92 0.97 1.02 cmyk 0 0 2 1 baby_powder[1248][254,254,250](1.3):ceramic[2174][252,25
5,249](1.6):hint_of_grey[4499][252,255,249](1.6):sea_fog[7554][252,255,249](1.6):wan_white[8990][252,255,2
49](1.6):snow_drift[7811][247,250,247](1.7):bianca[1402][252,251,243](1.9):black_white[1483][255,254,246](
2.1):romance[7283][255,254,253](2.1):quarter_alabaster[6916][247,246,242](2.2) 10 alabaster:baby:bianca:ce
ramic:drift:fog:hint:of:powder:quarter:romance:sea:snow:wan:black:grey:white***
$VAR1 = {
          'cluster_number' => 2,
          'h' => undef,
          'image_num' => 1,
          'image_name' => 'IMG_0069_result.JPG',
          'b' => undef,
          'pixels' => '0%',
          's' => undef,
          'g' => undef,
          'r' => undef,
          'color_category' => '',
          'v' => undef,
          'hex_code' => undef,
          'cluster_color' => 'baby powder'
        };
***IMG_0069_result.JPG cluster 2 n 66 f 0.06 rgb 250 250 244 hex #FAFAF4 hsv 65 3 98 lab 98 -1 3 lch 98 3 
113 xyz 0.9 0.95 0.99 cmyk 0 0 3 2 bianca[1402][252,251,243](1.1):spring_wood[7933][248,246,241](1.5):eigh
th_pearl_lusta[3414][249,248,240](1.6):quarter_bianca[6922][249,248,240](1.6):quarter_alabaster[6916][247,
246,242](1.8):bridal_heath[1713][255,250,244](2.0):baby_powder[1248][254,254,250](2.1):snow_drift[7811][247,250,247](2.1):ceramic[2174][252,255,249](2.1):hint_of_grey[4499][252,255,249](2.1) 10 alabaster:baby:bianca:bridal:ceramic:drift:eighth:heath:hint:lusta:of:pearl:powder:quarter:snow:spring:wood:grey***
$VAR1 = {
          'image_name' => 'IMG_0069_result.JPG',
          'image_num' => 1,
          'h' => '65',
          'cluster_number' => 3,
          'pixels' => '6%',
          'b' => '244',
          's' => '3',
          'hex_code' => 'FAFAF4',
          'cluster_color' => 'bianca',
          'g' => '250',
          'color_category' => '',
          'r' => '250',
          'v' => '98'
        };
***IMG_0069_result.JPG cluster 3 n 65 f 0.0590909090909091 rgb 245 247 236 hex #F5F7EC hsv 66 4 97 lab 97 -2 5 lch 97 6 114 xyz 0.86 0.92 0.92 cmyk 0 0 4 3 twilight_blue[8616][244,246,236](1.0):filmpro_white[3624][249,246,237](1.6):half_bianca[4292][246,243,233](1.8):half_orchid_white[4363][247,244,234](1.8):eighth_pearl_lusta[3414][249,248,240](1.9):quarter_bianca[6922][249,248,240](1.9):glistening_white[3874][244,244,236](2.1):quarter_rice_cake[6986][246,244,237](2.1):ecru_white[3358][245,243,229](2.2):joanna[4771][245,243,229](2.2) 10 bianca:cake:ecru:eighth:filmpro:glistening:half:joanna:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
          'cluster_number' => 4,
          'h' => undef,
          'image_name' => 'IMG_0069_result.JPG',
          'image_num' => 1,
          'b' => undef,
          'pixels' => '0%',
          's' => undef,
          'g' => undef,
          'r' => undef,
          'color_category' => '',
          'v' => undef,
          'hex_code' => undef,
          'cluster_color' => 'twilight blue'
        };
***IMG_0069_result.JPG cluster 4 n 61 f 0.0554545454545455 rgb 248 249 240 hex #F8F9F0 hsv 65 4 97 lab 98 -2 4 lch 98 5 113 xyz 0.88 0.94 0.96 cmyk 0 0 4 3 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
          'b' => '240',
          'pixels' => '5%',
          's' => '4',
          'h' => '65',
          'cluster_number' => 5,
          'image_num' => 1,
          'image_name' => 'IMG_0069_result.JPG',
          'r' => '248',
          'color_category' => '',
          'v' => '97',
          'g' => '249',
          'hex_code' => 'F8F9F0',
          'cluster_color' => 'bianca'
        };
***IMG_0069_result.JPG cluster 5 n 60 f 0.0545454545454545 rgb 249 249 240 hex #F9F9F0 hsv 63 4 98 lab 98 -2 4 lch 98 5 111 xyz 0.89 0.94 0.96 cmyk 0 0 4 2 bianca[1402][252,251,243](0.7):eighth_pearl_lusta[3414][249,248,240](0.9):quarter_bianca[6922][249,248,240](0.9):filmpro_white[3624][249,246,237](1.1):orchid_white[6246][255,253,243](1.6):quarter_pearl_lusta[6978][255,253,244](1.6):floral_white[3694][255,250,240](1.7):glistening_white[3874][244,244,236](1.8):quarter_rice_cake[6986][246,244,237](1.8):twilight_blue[8616][244,246,236](1.9) 10 bianca:cake:eighth:filmpro:floral:glistening:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
          's' => '4',
          'pixels' => '5%',
          'b' => '240',
          'image_num' => 1,
          'image_name' => 'IMG_0069_result.JPG',
          'h' => '63',
          'cluster_number' => 6,
          'v' => '98',
          'color_category' => '',
          'r' => '249',
          'g' => '249',
          'hex_code' => 'F9F9F0',
          'cluster_color' => 'bianca'
        };
...

我找到了问题的原因。 问题出在以下几行:

     if ($cluster_color =~ m/_/) {
         $cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
     }

在 $cluster_color 中有下划线的 output 行上,第二行更改了捕获缓冲区,因此重置了其他捕获组。 解决方案是将每个捕获组分配给自己的变量,并在将所有捕获组分配给各自的变量添加上面的 3 行。

所以,直接分配给 hash 是个坏主意:-) 我还改进了我的正则表达式,使其更精确,虽然这不是问题的原因,但只是... =~ tr/_/\ /; 这改变了捕获缓冲区。

贝娄是我的工作代码:

 use strict;
 use warnings;
 use Data::Dumper;
 use File::HomeDir;
 use File::Temp ();
 use File::Spec;
 
 open my $output, '<', '/tmp/cs8.txt';
 
 my @color_clusters;
 my $image_number = 0;
 my $image_name = undef;
 my $last_image_name = '';
 my $line = undef;
 for $line (<$output>) {
     chomp($line);
     print "***${line}***\n";
     # image (file) name -> ^\S+
     # cluster number -> cluster \d,
     # HEX -> hex #([0-9A-Z])6,
     # Cluster Color -> cmyk \d+ \d+ \d+ \d+ []
     # Color Category -> (empty at the moment)
     # Pixels -> f 0.\d+
     # R, G, B -> rgb \d+ \d+ \d+
     # H, S, V -> hsv \d+ \d+ \d+
     $line =~ m/
         ^(?<IMAGE_NAME>.+) # image file name
         \ cluster\ (?<CLUST_NUM>\d+)\ n\ [0-9]+ # cluster number
         \ f\ (?<PIXELS>[0-9]+\.?[0-9]*) # percent of pixels belonging to this cluster
         \ rgb\ (?<RED>[0-9]{1,3})\ (?<GREEN>[0-9]{1,3})\ (?<BLUE>[0-9]{1,3})
         \ hex\ \#(?<HEX>[0-9A-F]{6}) # Hexadecimal notation used in HTML
         \ hsv\ (?<HUE>[0-9]{1,3})\ (?<SATURATION>[0-9]{1,3})\ (?<VALUE>[0-9]{1,3})
         \ .+\ (?<CLUSTER_COLOR>\w+)\[
     /x;
     $image_name = $+{IMAGE_NAME};
     if ($last_image_name ne $image_name) {
         $last_image_name = $image_name;
         $image_number++;
     }
     my $cluster_number = $+{CLUST_NUM};
     if (defined $cluster_number) {
         $cluster_number = 1 + $cluster_number; # convert to 1 based
     }
     my $pixels = $+{PIXELS};
     if (defined $pixels) {
         $pixels = ''. int((0 + $pixels) * 100). '%'
     }
     my $cluster_color = $+{CLUSTER_COLOR};
     my $hex = $+{HEX};
     my ($red, $green, $blue) = ($+{RED}, $+{GREEN}, $+{BLUE});
     my ($hue, $saturation, $value) = ($+{HUE}, $+{SATURATION}, $+{VALUE});
     if ($cluster_color =~ m/_/) {
         $cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
     }
     my %color_cluster = (
         image_num => $image_number,
         image_name => $image_name,
         cluster_number => $cluster_number,
         hex_code => $hex,
         cluster_color => $cluster_color,
         color_category => '', # currently empty, will be calculated from HSV values
         pixels => $pixels, # percent of pixels within this cluster
         r => $red, g => $green, b => $blue,
         h => $hue, s => $saturation, v => $value,
     );
     push @color_clusters, %color_cluster;
     print Dumper \%color_cluster;
 } # end of for loop

非常感谢@Yunnosch 的评论,他们给了我关于正确方向的提示。

干杯,阿萨夫

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM