[英]Strange behavior when parsing output line by line
我正在运行以下代码:
use strict;
use warnings;
use Data::Dumper;
use File::HomeDir;
use File::Temp ();
use File::Spec;
open my $output, '<', '/tmp/cs.txt';
my @color_clusters;
my $image_number = 0;
my $image_name = undef;
my $last_image_name = '';
my $line = undef;
for $line (<$output>) {
chomp($line);
print "***${line}***\n";
# image (file) name -> ^\S+
# cluster number -> cluster \d,
# HEX -> hex #([0-9A-Z])6,
# Cluster Color -> cmyk \d+ \d+ \d+ \d+ []
# Color Category -> (empty at the moment)
# Pixels -> f 0.\d+
# R, G, B -> rgb \d+ \d+ \d+
# H, S, V -> hsv \d+ \d+ \d+
$line =~ m/
^(?<IMAGE_NAME>.+) # image file name
\ cluster\ (?<CLUST_NUM>\d+)\ n\ [0-9]+ # cluster number
\ f\ (?<PIXELS>[-]?[0-9]+[,.]?[0-9]*) # percent of pixels belonging to this cluster
\ rgb\ (?<RED>\d+)\ (?<GREEN>\d+)\ (?<BLUE>\d+)
\ hex\ \#(?<HEX>[0-9A-F]+) # Hexadecimal notation used in HTML
\ hsv\ (?<HUE>\d+)\ (?<SATURATION>\d+)\ (?<VALUE>\d+)
\ .+\ (?<CLUSTER_COLOR>\w+)\[
/x;
$image_name = $+{IMAGE_NAME};
if ($last_image_name ne $image_name) {
$last_image_name = $image_name;
$image_number++;
}
my $cluster_number = int($+{CLUST_NUM}) + 1; # convert to 1 based
my $pixels = $+{PIXELS};
if ($pixels) {
$pixels = ''. int((0 + $pixels) * 100). '%'
}
my $cluster_color = $+{CLUSTER_COLOR};
if ($cluster_color =~ m/_/) {
$cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
}
my %color_cluster = (
image_num => $image_number,
image_name => $image_name,
cluster_number => $cluster_number,
hex_code => $+{HEX},
cluster_color => $cluster_color,
color_category => '', # currently empty, will be calculated from HSV values
pixels => ''. int($+{PIXELS} * 100). '%', # percent of pixels within this cluster
r => $+{RED}, g => $+{GREEN}, b => $+{BLUE},
h => $+{HUE}, s => $+{SATURATION}, v => $+{VALUE}
);
push @color_clusters, %color_cluster;
print Dumper \%color_cluster;
$line =~ m/^.+$/;
} # end of for loop
在看起来像这样的输入上:
IMG_0069_result.JPG cluster 0 n 69 f 0.0627272727272727 rgb 248 249 240 hex #F8F9F0 hsv 67 3 98 lab 98 -2 4 lch 98 4 114 xyz 0.88 0.94 0.96 cmyk 0 0 3 2 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 1 n 67 f 0.0609090909090909 rgb 251 252 247 hex #FBFCF7 hsv 66 2 99 lab 99 -1 2 lch 99 3 114 xyz 0.92 0.97 1.02 cmyk 0 0 2 1 baby_powder[1248][254,254,250](1.3):ceramic[2174][252,255,249](1.6):hint_of_grey[4499][252,255,249](1.6):sea_fog[7554][252,255,249](1.6):wan_white[8990][252,255,249](1.6):snow_drift[7811][247,250,247](1.7):bianca[1402][252,251,243](1.9):black_white[1483][255,254,246](2.1):romance[7283][255,254,253](2.1):quarter_alabaster[6916][247,246,242](2.2) 10 alabaster:baby:bianca:ceramic:drift:fog:hint:of:powder:quarter:romance:sea:snow:wan:black:grey:white
IMG_0069_result.JPG cluster 2 n 66 f 0.06 rgb 250 250 244 hex #FAFAF4 hsv 65 3 98 lab 98 -1 3 lch 98 3 113 xyz 0.9 0.95 0.99 cmyk 0 0 3 2 bianca[1402][252,251,243](1.1):spring_wood[7933][248,246,241](1.5):eighth_pearl_lusta[3414][249,248,240](1.6):quarter_bianca[6922][249,248,240](1.6):quarter_alabaster[6916][247,246,242](1.8):bridal_heath[1713][255,250,244](2.0):baby_powder[1248][254,254,250](2.1):snow_drift[7811][247,250,247](2.1):ceramic[2174][252,255,249](2.1):hint_of_grey[4499][252,255,249](2.1) 10 alabaster:baby:bianca:bridal:ceramic:drift:eighth:heath:hint:lusta:of:pearl:powder:quarter:snow:spring:wood:grey
IMG_0069_result.JPG cluster 3 n 65 f 0.0590909090909091 rgb 245 247 236 hex #F5F7EC hsv 66 4 97 lab 97 -2 5 lch 97 6 114 xyz 0.86 0.92 0.92 cmyk 0 0 4 3 twilight_blue[8616][244,246,236](1.0):filmpro_white[3624][249,246,237](1.6):half_bianca[4292][246,243,233](1.8):half_orchid_white[4363][247,244,234](1.8):eighth_pearl_lusta[3414][249,248,240](1.9):quarter_bianca[6922][249,248,240](1.9):glistening_white[3874][244,244,236](2.1):quarter_rice_cake[6986][246,244,237](2.1):ecru_white[3358][245,243,229](2.2):joanna[4771][245,243,229](2.2) 10 bianca:cake:ecru:eighth:filmpro:glistening:half:joanna:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 4 n 61 f 0.0554545454545455 rgb 248 249 240 hex #F8F9F0 hsv 65 4 97 lab 98 -2 4 lch 98 5 113 xyz 0.88 0.94 0.96 cmyk 0 0 4 3 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 5 n 60 f 0.0545454545454545 rgb 249 249 240 hex #F9F9F0 hsv 63 4 98 lab 98 -2 4 lch 98 5 111 xyz 0.89 0.94 0.96 cmyk 0 0 4 2 bianca[1402][252,251,243](0.7):eighth_pearl_lusta[3414][249,248,240](0.9):quarter_bianca[6922][249,248,240](0.9):filmpro_white[3624][249,246,237](1.1):orchid_white[6246][255,253,243](1.6):quarter_pearl_lusta[6978][255,253,244](1.6):floral_white[3694][255,250,240](1.7):glistening_white[3874][244,244,236](1.8):quarter_rice_cake[6986][246,244,237](1.8):twilight_blue[8616][244,246,236](1.9) 10 bianca:cake:eighth:filmpro:floral:glistening:lusta:orchid:pearl:quarter:rice:twilight:blue:white
IMG_0069_result.JPG cluster 6 n 60 f 0.0545454545454545 rgb 249 250 243 hex #F9FAF3 hsv 67 3 98 lab 98 -2 3 lch 98 4 114 xyz 0.9 0.95 0.98 cmyk 0 0 3 2 bianca[1402][252,251,243](0.9):eighth_pearl_lusta[3414][249,248,240](1.4):quarter_bianca[6922][249,248,240](1.4):spring_wood[7933][248,246,241](1.9):ceramic[2174][252,255,249](2.0):hint_of_grey[4499][252,255,249](2.0):sea_fog[7554][252,255,249](2.0):wan_white[8990][252,255,249](2.0):orchid_white[6246][255,253,243](2.1):quarter_pearl_lusta[6978][255,253,244](2.1) 10 bianca:ceramic:eighth:fog:hint:lusta:of:orchid:pearl:quarter:sea:spring:wan:wood:grey:white
IMG_0069_result.JPG cluster 7 n 58 f 0.0527272727272727 rgb 250 251 246 hex #FAFBF6 hsv 69 2 98 lab 98 -1 2 lch 98 2 116 xyz 0.9 0.96 1.01 cmyk 0 0 2 2 snow_drift[7811][247,250,247](1.6):baby_powder[1248][254,254,250](1.6):bianca[1402][252,251,243](1.8):quarter_alabaster[6916][247,246,242](1.9):ceramic[2174][252,255,249](1.9):hint_of_grey[4499][252,255,249](1.9):sea_fog[7554][252,255,249](1.9):wan_white[8990][252,255,249](1.9):spring_wood[7933][248,246,241](2.0):eighth_pearl_lusta[3414][249,248,240](2.2) 10 alabaster:baby:bianca:ceramic:drift:eighth:fog:hint:lusta:of:pearl:powder:quarter:sea:snow:spring:wan:wood:grey:white
The input is the output of colorsummarizer a program written in Perl that summarizes the colors of images ( http://mkweb.bcgsc.ca/color-summarizer/ ). 由于我使用的是 Perl,我可以直接调用库而不是从 Perl 运行命令行,但我决定运行命令行,因为它更容易或至少应该更容易......
运行上面的代码时,尽管所有行的结构看起来都非常相似,但有些行被正确解析,而另一些则没有。
这是我得到的 output 的一部分(STDOUT 和 STDIN 交错):
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
Use of uninitialized value $+{"PIXELS"} in multiplication (*) at /tmp/1.pl line 59, <$output> line 8.
***IMG_0069_result.JPG cluster 0 n 69 f 0.0627272727272727 rgb 248 249 240 hex #F8F9F0 hsv 67 3 98 lab 98
-2 4 lch 98 4 114 xyz 0.88 0.94 0.96 cmyk 0 0 3 2 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][
249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_whit
e[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8
):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246
,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue
:white***
$VAR1 = {
'pixels' => '6%',
'b' => '240',
's' => '3',
'image_name' => 'IMG_0069_result.JPG',
'image_num' => 1,
'h' => '67',
'cluster_number' => 1,
'color_category' => '',
'r' => '248',
'v' => '98',
'g' => '249',
'cluster_color' => 'bianca',
'hex_code' => 'F8F9F0'
};
***IMG_0069_result.JPG cluster 1 n 67 f 0.0609090909090909 rgb 251 252 247 hex #FBFCF7 hsv 66 2 99 lab 99
-1 2 lch 99 3 114 xyz 0.92 0.97 1.02 cmyk 0 0 2 1 baby_powder[1248][254,254,250](1.3):ceramic[2174][252,25
5,249](1.6):hint_of_grey[4499][252,255,249](1.6):sea_fog[7554][252,255,249](1.6):wan_white[8990][252,255,2
49](1.6):snow_drift[7811][247,250,247](1.7):bianca[1402][252,251,243](1.9):black_white[1483][255,254,246](
2.1):romance[7283][255,254,253](2.1):quarter_alabaster[6916][247,246,242](2.2) 10 alabaster:baby:bianca:ce
ramic:drift:fog:hint:of:powder:quarter:romance:sea:snow:wan:black:grey:white***
$VAR1 = {
'cluster_number' => 2,
'h' => undef,
'image_num' => 1,
'image_name' => 'IMG_0069_result.JPG',
'b' => undef,
'pixels' => '0%',
's' => undef,
'g' => undef,
'r' => undef,
'color_category' => '',
'v' => undef,
'hex_code' => undef,
'cluster_color' => 'baby powder'
};
***IMG_0069_result.JPG cluster 2 n 66 f 0.06 rgb 250 250 244 hex #FAFAF4 hsv 65 3 98 lab 98 -1 3 lch 98 3
113 xyz 0.9 0.95 0.99 cmyk 0 0 3 2 bianca[1402][252,251,243](1.1):spring_wood[7933][248,246,241](1.5):eigh
th_pearl_lusta[3414][249,248,240](1.6):quarter_bianca[6922][249,248,240](1.6):quarter_alabaster[6916][247,
246,242](1.8):bridal_heath[1713][255,250,244](2.0):baby_powder[1248][254,254,250](2.1):snow_drift[7811][247,250,247](2.1):ceramic[2174][252,255,249](2.1):hint_of_grey[4499][252,255,249](2.1) 10 alabaster:baby:bianca:bridal:ceramic:drift:eighth:heath:hint:lusta:of:pearl:powder:quarter:snow:spring:wood:grey***
$VAR1 = {
'image_name' => 'IMG_0069_result.JPG',
'image_num' => 1,
'h' => '65',
'cluster_number' => 3,
'pixels' => '6%',
'b' => '244',
's' => '3',
'hex_code' => 'FAFAF4',
'cluster_color' => 'bianca',
'g' => '250',
'color_category' => '',
'r' => '250',
'v' => '98'
};
***IMG_0069_result.JPG cluster 3 n 65 f 0.0590909090909091 rgb 245 247 236 hex #F5F7EC hsv 66 4 97 lab 97 -2 5 lch 97 6 114 xyz 0.86 0.92 0.92 cmyk 0 0 4 3 twilight_blue[8616][244,246,236](1.0):filmpro_white[3624][249,246,237](1.6):half_bianca[4292][246,243,233](1.8):half_orchid_white[4363][247,244,234](1.8):eighth_pearl_lusta[3414][249,248,240](1.9):quarter_bianca[6922][249,248,240](1.9):glistening_white[3874][244,244,236](2.1):quarter_rice_cake[6986][246,244,237](2.1):ecru_white[3358][245,243,229](2.2):joanna[4771][245,243,229](2.2) 10 bianca:cake:ecru:eighth:filmpro:glistening:half:joanna:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
'cluster_number' => 4,
'h' => undef,
'image_name' => 'IMG_0069_result.JPG',
'image_num' => 1,
'b' => undef,
'pixels' => '0%',
's' => undef,
'g' => undef,
'r' => undef,
'color_category' => '',
'v' => undef,
'hex_code' => undef,
'cluster_color' => 'twilight blue'
};
***IMG_0069_result.JPG cluster 4 n 61 f 0.0554545454545455 rgb 248 249 240 hex #F8F9F0 hsv 65 4 97 lab 98 -2 4 lch 98 5 113 xyz 0.88 0.94 0.96 cmyk 0 0 4 3 bianca[1402][252,251,243](1.0):eighth_pearl_lusta[3414][249,248,240](1.1):quarter_bianca[6922][249,248,240](1.1):filmpro_white[3624][249,246,237](1.4):orchid_white[6246][255,253,243](1.8):quarter_pearl_lusta[6978][255,253,244](1.8):twilight_blue[8616][244,246,236](1.8):glistening_white[3874][244,244,236](1.9):quarter_rice_cake[6986][246,244,237](1.9):half_bianca[4292][246,243,233](2.0) 10 bianca:cake:eighth:filmpro:glistening:half:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
'b' => '240',
'pixels' => '5%',
's' => '4',
'h' => '65',
'cluster_number' => 5,
'image_num' => 1,
'image_name' => 'IMG_0069_result.JPG',
'r' => '248',
'color_category' => '',
'v' => '97',
'g' => '249',
'hex_code' => 'F8F9F0',
'cluster_color' => 'bianca'
};
***IMG_0069_result.JPG cluster 5 n 60 f 0.0545454545454545 rgb 249 249 240 hex #F9F9F0 hsv 63 4 98 lab 98 -2 4 lch 98 5 111 xyz 0.89 0.94 0.96 cmyk 0 0 4 2 bianca[1402][252,251,243](0.7):eighth_pearl_lusta[3414][249,248,240](0.9):quarter_bianca[6922][249,248,240](0.9):filmpro_white[3624][249,246,237](1.1):orchid_white[6246][255,253,243](1.6):quarter_pearl_lusta[6978][255,253,244](1.6):floral_white[3694][255,250,240](1.7):glistening_white[3874][244,244,236](1.8):quarter_rice_cake[6986][246,244,237](1.8):twilight_blue[8616][244,246,236](1.9) 10 bianca:cake:eighth:filmpro:floral:glistening:lusta:orchid:pearl:quarter:rice:twilight:blue:white***
$VAR1 = {
's' => '4',
'pixels' => '5%',
'b' => '240',
'image_num' => 1,
'image_name' => 'IMG_0069_result.JPG',
'h' => '63',
'cluster_number' => 6,
'v' => '98',
'color_category' => '',
'r' => '249',
'g' => '249',
'hex_code' => 'F9F9F0',
'cluster_color' => 'bianca'
};
...
我找到了问题的原因。 问题出在以下几行:
if ($cluster_color =~ m/_/) {
$cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
}
在 $cluster_color 中有下划线的 output 行上,第二行更改了捕获缓冲区,因此重置了其他捕获组。 解决方案是将每个捕获组分配给自己的变量,并在将所有捕获组分配给各自的变量后添加上面的 3 行。
所以,直接分配给 hash 是个坏主意:-) 我还改进了我的正则表达式,使其更精确,虽然这不是问题的原因,但只是... =~ tr/_/\ /;
这改变了捕获缓冲区。
贝娄是我的工作代码:
use strict;
use warnings;
use Data::Dumper;
use File::HomeDir;
use File::Temp ();
use File::Spec;
open my $output, '<', '/tmp/cs8.txt';
my @color_clusters;
my $image_number = 0;
my $image_name = undef;
my $last_image_name = '';
my $line = undef;
for $line (<$output>) {
chomp($line);
print "***${line}***\n";
# image (file) name -> ^\S+
# cluster number -> cluster \d,
# HEX -> hex #([0-9A-Z])6,
# Cluster Color -> cmyk \d+ \d+ \d+ \d+ []
# Color Category -> (empty at the moment)
# Pixels -> f 0.\d+
# R, G, B -> rgb \d+ \d+ \d+
# H, S, V -> hsv \d+ \d+ \d+
$line =~ m/
^(?<IMAGE_NAME>.+) # image file name
\ cluster\ (?<CLUST_NUM>\d+)\ n\ [0-9]+ # cluster number
\ f\ (?<PIXELS>[0-9]+\.?[0-9]*) # percent of pixels belonging to this cluster
\ rgb\ (?<RED>[0-9]{1,3})\ (?<GREEN>[0-9]{1,3})\ (?<BLUE>[0-9]{1,3})
\ hex\ \#(?<HEX>[0-9A-F]{6}) # Hexadecimal notation used in HTML
\ hsv\ (?<HUE>[0-9]{1,3})\ (?<SATURATION>[0-9]{1,3})\ (?<VALUE>[0-9]{1,3})
\ .+\ (?<CLUSTER_COLOR>\w+)\[
/x;
$image_name = $+{IMAGE_NAME};
if ($last_image_name ne $image_name) {
$last_image_name = $image_name;
$image_number++;
}
my $cluster_number = $+{CLUST_NUM};
if (defined $cluster_number) {
$cluster_number = 1 + $cluster_number; # convert to 1 based
}
my $pixels = $+{PIXELS};
if (defined $pixels) {
$pixels = ''. int((0 + $pixels) * 100). '%'
}
my $cluster_color = $+{CLUSTER_COLOR};
my $hex = $+{HEX};
my ($red, $green, $blue) = ($+{RED}, $+{GREEN}, $+{BLUE});
my ($hue, $saturation, $value) = ($+{HUE}, $+{SATURATION}, $+{VALUE});
if ($cluster_color =~ m/_/) {
$cluster_color =~ tr/_/\ /; # replace '_' with space (' ')
}
my %color_cluster = (
image_num => $image_number,
image_name => $image_name,
cluster_number => $cluster_number,
hex_code => $hex,
cluster_color => $cluster_color,
color_category => '', # currently empty, will be calculated from HSV values
pixels => $pixels, # percent of pixels within this cluster
r => $red, g => $green, b => $blue,
h => $hue, s => $saturation, v => $value,
);
push @color_clusters, %color_cluster;
print Dumper \%color_cluster;
} # end of for loop
非常感谢@Yunnosch 的评论,他们给了我关于正确方向的提示。
干杯,阿萨夫
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.