I have the following file (this is semicolon delimited; the real file is tab-delimited)
abc;173959;172730
def;4186657;4187943
ghi;4703911;4702577
jkl;2243551;2242259
and I want to combine each line with each, so that my output would be:
abc;173959;172730;def;4186657;4187943
abc;173959;172730;ghi;4703911;4702577
abc;173959;172730;jkl;2243551;2242259
def;4186657;4187943;ghi;4703911;4702577
def;4186657;4187943;jkl;2243551;2242259
ghi;4703911;4702577;jkl;2243551;2242259
The order is not important.
I came up with the following awk-solution:
awk '{ a[$0] } END { for (i in a){ for (j in a){if (i != j) print (i "\t" j) } } }' file
But this prints me the combinations in both directions, so for example
abc;173959;172730;def;4186657;4187943
def;4186657;4187943;abc;173959;172730
Because I am pretty unfamiliar with python or perl, I kindly ask for a solution using awk/bash etc.
In awk:
$ awk '{ a[$0] }
END {
for(i in a) {
delete a[i] # new place for delete
for(j in a)
if(i!=j)
print i ";" j
# delete a[i] # previous and maybe wrong place
}
}' file
def;4186657;4187943;ghi;4703911;4702577
def;4186657;4187943;abc;173959;172730
def;4186657;4187943;jkl;2243551;2242259
ghi;4703911;4702577;abc;173959;172730
ghi;4703911;4702577;jkl;2243551;2242259
abc;173959;172730;jkl;2243551;2242259
Unfortunately the order is random.
Another way that restores the order and doesn't modify the a
while processing (see comments) is:
$ awk '{ a[NR]=$0 } # index on NR
END {
for(i=1;i<=NR;i++)
for(j=i+1;j<=NR;j++) # j=i+1 is the magic
print a[i] ";" a[j]
}' file
abc;173959;172730;def;4186657;4187943
abc;173959;172730;ghi;4703911;4702577
abc;173959;172730;jkl;2243551;2242259
def;4186657;4187943;ghi;4703911;4702577
def;4186657;4187943;jkl;2243551;2242259
ghi;4703911;4702577;jkl;2243551;2242259
This awk
should work as well:
awk -F ';' 'NR==FNR{a[++k]=$0; next} {for (i=FNR+1; i<=k; i++) print $0 FS a[i]}' file{,}
abc;173959;172730;def;4186657;4187943
abc;173959;172730;ghi;4703911;4702577
abc;173959;172730;jkl;2243551;2242259
def;4186657;4187943;ghi;4703911;4702577
def;4186657;4187943;jkl;2243551;2242259
ghi;4703911;4702577;jkl;2243551;2242259
Could you please try following one, it will give you same order as Input_file's field values only, by reading the Input_file once only.
awk '{a[FNR]=$0} END{j=1;while(length(a)>=++k){for(q=j+1;q<=FNR;q++){print a[j]";"a[q]}j++};}' Input_file
OR
awk '
{
a[FNR]=$0
}
END{
j=1;
while(length(a)>=++k){
for(q=j+1;q<=FNR;q++){
print a[j]";"a[q]
}
j++
}
}
' Input_file
Output will be as follows.
abc;173959;172730;def;4186657;4187943
abc;173959;172730;ghi;4703911;4702577
abc;173959;172730;jkl;2243551;2242259
def;4186657;4187943;ghi;4703911;4702577
def;4186657;4187943;jkl;2243551;2242259
ghi;4703911;4702577;jkl;2243551;2242259
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.