I have a log_file with all my backups and a column with value yes means it won't be deleted by the retention policy (Preserved). there could be 1 or more rows having that preserved column = yes for a specific vmname.
My input is:
= FULL == 20210105 == 2100 == ASR-FULL-20210105-2100 == YES
= FULL == 20210202 == 2100 == ASR-FULL-20210202-2100 == YES
= FULL == 20210302 == 2100 == ASR-FULL-20210302-2100 == YES
= FULL == 20210406 == 2100 == ASR-FULL-20210406-2100 == YES
= FULL == 20210105 == 2146 == DNS10_7-FULL-20210105-2146 == YES
= FULL == 20210202 == 2153 == DNS10_7-FULL-20210202-2153 == YES
= FULL == 20210302 == 2148 == DNS10_7-FULL-20210302-2148 == YES
= FULL == 20210406 == 2122 == DNS10_7-FULL-20210406-2122 == YES
= FULL == 20210105 == 2105 == execnet.0-FULL-20210105-2105 == YES
= FULL == 20210202 == 2106 == execnet.0-FULL-20210202-2106 == YES
= FULL == 20210302 == 2106 == execnet.0-FULL-20210302-2106 == YES
= FULL == 20210406 == 2105 == execnet.0-FULL-20210406-2105 == YES
= FULL == 20210106 == 0200 == Prtgadmin.0-FULL-20210106-0200 == YES
= FULL == 20210105 == 2216 == sandbox.0-FULL-20210105-2216 == YES
= FULL == 20210202 == 2227 == sandbox.0-FULL-20210202-2227 == YES
= FULL == 20210406 == 2152 == sandbox.0-FULL-20210406-2152 == YES
= FULL == 20210105 == 2236 == wwwp.0-FULL-20210105-2236 == YES
= FULL == 20210202 == 2249 == wwwp.0-FULL-20210202-2249 == YES
= FULL == 20210105 == 2259 == wwws.0-FULL-20210105-2259 == YES
= FULL == 20210202 == 2314 == wwws.0-FULL-20210202-2314 == YES
= FULL == 20210105 == 2259 == webhost.0-FULL-20210105-2259 == YES
My desired output is to print the n-1 oldest matches (top n-1)
ASR-FULL-20210105-2100
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
wwwp.0-FULL-20210105-2236
wwws.0-FULL-20210105-2259
I can so far have the below result by running the below awk commands but It shows the most recent matches instead. I'd also like to have one awk command ideally. The year filter is not that important.
# cat bkp_list.log| grep -E '*2021.*YES'| awk -F[==-] 'cnt[$8]++{if (cnt[$8]>1) print prev=$0;next}' |awk -F[==] '{print $8}'
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
ASR-FULL-20210406-2100
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
DNS10_7-FULL-20210406-2122
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
execnet.0-FULL-20210406-2105
sandbox.0-FULL-20210202-2227
sandbox.0-FULL-20210406-2152
wwwp.0-FULL-20210202-2249
wwws.0-FULL-20210202-2314
Thank you
To print all but last match of a substring of $8
you may use this awk
:
awk '
$NF != "YES" {next}
{
s = $8
sub(/-FULL-.*/, "", s)
}
s == ps {
print pval
}
{
ps = s
pval = $8
}' file
ASR-FULL-20210105-2100
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
wwwp.0-FULL-20210105-2236
wwws.0-FULL-20210105-2259
Or one liner:
awk '$NF != "YES"{next} {s=$8; sub(/-FULL-.*/, "", s)} s == ps {print pval} {ps = s; pval=$8}' file
If you want to filter on the column with the YES, you can do it with the contitional expressions before blocks
$ cat file
= FULL == 20210105 == 2100 == ASR-FULL-20210105-2100 == NO
= FULL == 20210202 == 2100 == ASR-FULL-20210202-2100 == YES
= FULL == 20210302 == 2100 == ASR-FULL-20210302-2100 == YES
= FULL == 20210406 == 2100 == ASR-FULL-20210406-2100 == YES
= FULL == 20210105 == 2146 == DNS10_7-FULL-20210105-2146 == YES
= FULL == 20210202 == 2153 == DNS10_7-FULL-20210202-2153 == YES
= FULL == 20210302 == 2148 == DNS10_7-FULL-20210302-2148 == YES
= FULL == 20210406 == 2122 == DNS10_7-FULL-20210406-2122 == YES
= FULL == 20210105 == 2105 == execnet.0-FULL-20210105-2105 == YES
= FULL == 20210202 == 2106 == execnet.0-FULL-20210202-2106 == YES
= FULL == 20210302 == 2106 == execnet.0-FULL-20210302-2106 == YES
= FULL == 20210406 == 2105 == execnet.0-FULL-20210406-2105 == YES
= FULL == 20210106 == 0200 == Prtgadmin.0-FULL-20210106-0200 == YES
= FULL == 20210105 == 2216 == sandbox.0-FULL-20210105-2216 == YES
= FULL == 20210202 == 2227 == sandbox.0-FULL-20210202-2227 == YES
= FULL == 20210406 == 2152 == sandbox.0-FULL-20210406-2152 == YES
= FULL == 20210105 == 2236 == wwwp.0-FULL-20210105-2236 == YES
= FULL == 20210202 == 2249 == wwwp.0-FULL-20210202-2249 == YES
= FULL == 20210105 == 2259 == wwws.0-FULL-20210105-2259 == YES
= FULL == 20210202 == 2314 == wwws.0-FULL-20210202-2314 == YES
= FULL == 20210105 == 2259 == webhost.0-FULL-20210105-2259 == YES
$ awk ' $NF == "YES" { print $(NF-2) }' file
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
ASR-FULL-20210406-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
DNS10_7-FULL-20210406-2122
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
execnet.0-FULL-20210406-2105
Prtgadmin.0-FULL-20210106-0200
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
sandbox.0-FULL-20210406-2152
wwwp.0-FULL-20210105-2236
wwwp.0-FULL-20210202-2249
wwws.0-FULL-20210105-2259
wwws.0-FULL-20210202-2314
webhost.0-FULL-20210105-2259
$ awk ' $NF == "NO" { print $(NF-2) }' file
ASR-FULL-20210105-2100
$
** note I changed the first line YES to NO to check the correct behaviour
Anyway, if u need to do any other special filtering, like checking the year, please specify
With GNU awk for gensub():
$ tac file | awk '$NF=="YES" && seen[gensub(/-.*/,"",1,$8)]++{print $8}' | tac
ASR-FULL-20210105-2100
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
wwwp.0-FULL-20210105-2236
wwws.0-FULL-20210105-2259
or with any awk:
$ tac file | awk '$NF!="YES"{next} {k=$8; sub(/-.*/,"",k)} seen[k]++{print $8}' | tac
ASR-FULL-20210105-2100
ASR-FULL-20210202-2100
ASR-FULL-20210302-2100
DNS10_7-FULL-20210105-2146
DNS10_7-FULL-20210202-2153
DNS10_7-FULL-20210302-2148
execnet.0-FULL-20210105-2105
execnet.0-FULL-20210202-2106
execnet.0-FULL-20210302-2106
sandbox.0-FULL-20210105-2216
sandbox.0-FULL-20210202-2227
wwwp.0-FULL-20210105-2236
wwws.0-FULL-20210105-2259
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.