简体   繁体   中英

Optimization of find and Replace Script

I developed a script which parse texts and on specific condition it replace text and generate new file.

I would like to know if i can optimize it or any alternative suggetions

cat *inputparams.txt   | awk -F"|" 'BEGIN {OFS=","} $2==1&&$3==1{$3="Subscription Creation without Previous"} $2==1&&$3==5{$3="Offer Upgrade"} $2==1&&$3==6{$3="Offer Downgrade"} $2==1&&$3==7{$3="Campaign Extend"} $2==1&&$3==8{$3="Campaign Change"} $2==1&&$3==27{$3="Subscription Update"} $2==2&&$3==2{$3="Charging Renewal"} $2==2&&$3==3{$3="Subscription Reactivation"} $2==2&&$3==4{$3="Subscription Reactivation with Recharge Monitoring"} $2==2&&$3==8 {$3="Campaign Change" } $2==2&&$3==30 {$3="Limited Service"}  $2==3&&$3==11{$3="Cancellation"} $2==3&&$3==17{$3="Subscriber Account Reactivation"} $2==4&&$3==11{$3="Cancellation"} $2==5&&$3==11{$3="Cancellation"} $2==5&&$3==12{$3="Expiration"} $2==5&&$3==13{$3="Inactivation due to Charging"} $2==5&&$3==14{$3="Inactivation due to Ceased Account"} $2==5&&$3==15 {$3="Inactivation due to Payment Method Change"} $2==5&&$3==16 {$3="Inactivation due to Ownership Change"} $2==5&&$3==18 {$3="Inactivation due to Offer Upgrade"} $2==5&&$3==19  {$3="Inactivation due to Offer Downgrade" } $2==6&&$3==9{$3="Campaign Schedule"} $2==6&&$3==10{$3="Offer Schedule"}  $1==5 && $2==2{$2="RENEWAL"} $1==4 && $2==2{$2="SUBS. CREATE RENEWAL AOC"} $1==6 && $2==3{$2="REFUND"} $1==4 && $2==5 {$2="INACTIVATION"} $1==5 && $2==4{$2="PENALTY"} $1==1{$1="RESERVE"} $1==2{$1="COMMIT"} $1==3{$1="ROLLBACK"} $1==5 && $2==1{$2="SUBS. CREATE DIRECT DEBIT"}  $2==1 {$2="CREATION"}  {print $1,$2,$3,$4,$5,$8}' >  $RECON_PATH/tx_id.txt

cat *inputparams.txt |  awk -F"[" '{print $4}' | awk -F"]" '{print $1}' | awk -F","  'BEGIN {OFS=","} $2==1{$2="POSTPAID"} $2==2{$2="PREPAID"} {print $2,$1}' > $RECON_PATH/msisdn_payment

cat *inputparams.txt |  awk -F"," '{print $3 }' > $RECON_PATH/package

paste -d","  $RECON_PATH/tx_id.txt $RECON_PATH/package  $RECON_PATH/msisdn_payment  > $RECON_PATH/final.txt

Following is sample record

5|2|3|rfe-29883066|9840311190936312183|2.0|49.0|20131119093631|[[],4900671,SOCIAL_DATA,null,SOCIAL DATA,20130710000000,,,[971508592346,2],null,7012183,20130926190549,[[{LIMITED_PERIOD_END_DATE=20131110093613}{INITIAL_CHARGED_AMOUNT=49.0}{INITIAL_CHARGE_OPTION=1}{ENE_EVENT_ID=24645862}{FULFILL_ON_RESERVE=0}],false,false,null,4900672,SOCIAL DATA,20130710000000,2,20131119093631,0,2,[111111111111,2],USSD,2592000000,[{[{PACKAGE_ID_ONE_TIME_NORMAL_QOS=2000002935}{PREFERRED_PACKAGE_ID=PACKAGE_ID_AUTO_RN}{PACKAGE_ID_ONE_TIME_WITH_THROTTLING=2000002935}{PACKAGE_ID_AUTO_RN=2000002881}{PROVISIONED_PACK_ID=2000002935}{PROVISIONED_PACK_TYPE=PACKAGE_ID_ONE_TIME_WITH_THROTTLING}],DATAN_SOCIAL_DATA,4900667,DATAN,DATAN,[]}{[{PACKAGE_ID_ONE_TIME_NORMAL_QOS=2000002922}{PREFERRED_PACKAGE_ID=PACKAGE_ID_AUTO_RN}{PACKAGE_ID_ONE_TIME_WITH_THROTTLING=2000002922}{PACKAGE_ID_AUTO_RN=2000002880}{PROVISIONED_PACK_ID=2000002922}{PROVISIONED_PACK_TYPE=PACKAGE_ID_ONE_TIME_WITH_THROTTLING}],DATAN_DATA_SOCIAL,4900669,DATAN,DATAN,[]}{[{PACKAGE_ID_ONE_TIME_NORMAL_QOS=2000003031}{PREFERRED_PACKAGE_ID=PACKAGE_ID_AUTO_RN}{PACKAGE_ID_ONE_TIME_WITH_THROTTLING=2000003031}{PACKAGE_ID_AUTO_RN=2000003030}],DATAN_SOCIAL_THROTT,5400425,DATAN,DATAN,[]}{[{RATE_PLAN_ID=629120}],MKTWSSOCIALDATA,4900665,CMN,CMN,[]}],2,null,6912967,20130926190549]]

Regards

Here's what I was thinking. Put the script into a file and make it executable so it can be run like:

script.awk *inputparams.txt

The script: #!/usr/bin/awk -f

BEGIN {
    FS="|"
    OFS="," 

    # $3 field strings
    arr[ "2==1 3==1" ] = "Subscription Creation without Previous"
    arr[ "2==1 3==5" ] = "Offer Upgrade"
    arr[ "2==1 3==6" ] = "Offer Downgrade"
    arr[ "2==1 3==7" ] = "Campaign Extend"
    arr[ "2==1 3==8" ] = "Campaign Change"
    arr[ "2==1 3==27" ] = "Subscription Update"
    arr[ "2==2 3==2" ] = "Charging Renewal"
    arr[ "2==2 3==3" ] = "Subscription Reactivation"
    arr[ "2==2 3==4" ] = "Subscription Reactivation with Recharge Monitoring"
    arr[ "2==2 3==8" ] = "Campaign Change"
    arr[ "2==2 3==30" ] = "Limited Service"
    arr[ "2==3 3==11" ] = "Cancellation"
    arr[ "2==3 3==17" ] = "Subscriber Account Reactivation"
    arr[ "2==4 3==11" ] = "Cancellation"
    arr[ "2==5 3==11" ] = "Cancellation"
    arr[ "2==5 3==12" ] = "Expiration"
    arr[ "2==5 3==13" ] = "Inactivation due to Charging"
    arr[ "2==5 3==14" ] = "Inactivation due to Ceased Account"
    arr[ "2==5 3==15" ] = "Inactivation due to Payment Method Change"
    arr[ "2==5 3==16" ] = "Inactivation due to Ownership Change"
    arr[ "2==5 3==18" ] = "Inactivation due to Offer Upgrade"
    arr[ "2==5 3==19" ] = "Inactivation due to Offer Downgrade"
    arr[ "2==6 3==9" ] = "Campaign Schedule" 
    arr[ "2==6 3==10" ] = "Offer Schedule"

    # $2 field strings
    arr[ "1==5 2==2" ] = "RENEWAL"
    arr[ "1==4 2==2" ] = "SUBS. CREATE RENEWAL AOC"
    arr[ "1==6 2==3" ] = "REFUND"
    arr[ "1==4 2==5" ] = "INACTIVATION"
    arr[ "1==5 2==4" ] = "PENALTY"
    arr[ "1==5 2==1" ] = "SUBS. CREATE DIRECT DEBIT"
    arr[ "0==0 2==1" ] = "CREATION" # this needs special attention

    # $1 field strings
    arr[ "0==0 1==1" ] = "RESERVE"
    arr[ "0==0 1==2" ] = "COMMIT"
    arr[ "0==0 1==3" ] = "ROLLBACK"

    # $9 field, subfield 10 (by comma) strings - a key name "hack"
    arr[ "9==10 2==1" ] = "POSTPAID"
    arr[ "9==10 2==2" ] = "PREPAID"
}

# print output line here
{
    print getString(0,1),getStringWithDefault(1,2),getString(2,3),$4,$5,$8,field9Strings()
}

function makeShortKey( field, value ) {
    gsub( value, "", " " )
    return field "==" value
}

function makeLongKey( f1, v1, f2, v2 ) {
    return makeShortKey( f1, v1 ) " " makeShortKey( f2, v2 )
}

function getStringWithDefault( a, b ) {
    sd = getString( a, b )
    if( s == "" ) sd = getString( 0, b )
    return sd
}

# take fields and their values to construct keys - 0 is special to 'standardize' keys
function getString( field1, field2 ) {
    s = arr[ makeLongKey( field1, field1 == 0 ? 0 : $field1, field2, $field2 ) ]
    return (s == "") ? $field2 : s
}

function field9Strings() {
    split( $9, carr, "," )
    sub( /\[/, "", carr[ 9 ] );
    sub( /]/, "", carr[ 10 ])
    s9 = arr[ "9==10 " makeShortKey( 2, carr[ 10 ] ) ]
    return carr[ 3 ] OFS ((s9 == "") ? carr[ 10 ] : s9) OFS carr[ 9 ]
}

I've chosen to use the arr because I think it makes the strings more maintainable. I've tried to chose keys that would make it possible to read in those strings from an string file in another revision.

The keys for field 9 parsing are "hard-coded" right now.

For arr[ "0==0 2==1" ] I ended up adding a special getStringWithDefault() function. If "CREATION" depended on a value from column 1, the key could be updated and that function could be removed.

You could try

awk -F"|" -f parse.awk *inputparams.txt

where parse.awk is:

BEGIN {
    OFS=","
} 
{
    getFields()
    split($0,a,"[");
    split(a[4],b,"]");
    split(b[1],c,",");
    if (c[2]==1) msi="POSTPAID"
    else if (c[2]==2) msi="PREPAID"
    msi=msi","c[1]
    split($0,d,",")
    pack=d[3]
    print f1,f2,f3,f4,f5,f8,pack,msi
}

function getFields() {
    f1=$1; f2=$2; f3=$3; f4=$4; f5=$5; f8=$8
    if ($2==1) {
        if ($3==1) {
            f3="Subscription Creation without Previous"
        } 
        if ($3==5) {
            f3="Offer Upgrade"
        } 
        if ($3==6) {
            f3="Offer Downgrade"
        } 
        if ($3==7) {
            f3="Campaign Extend"
        } 
        if ($3==8) {
            f3="Campaign Change"
        } 
        if ($3==27) {
            f3="Subscription Update"
        } 
    }
    if ($2==2) {
        if ($3==2) {
            f3="Charging Renewal"
        } 
        if ($3==3) {
            f3="Subscription Reactivation"
        } 
        if ($3==4) {
            f3="Subscription Reactivation with Recharge Monitoring"
        } 
        if ($3==8) {
            f3="Campaign Change" 
        } 
        if ($3==30) {
            f3="Limited Service"
        } 
    } 
    if ($2==3) {
        if ($3==11) {
            f3="Cancellation"
        }
        if ($3==17) {
            f3="Subscriber Account Reactivation"
        }
    } 
    if ($2==4) {
        if ($3==11) {
            f3="Cancellation"
        }
    }
    if ($2==5) {
        if ($3==11) {
            f3="Cancellation"
        } 
        if ($3==12) {
            f3="Expiration"
        } 
        if ($3==13) {
            f3="Inactivation due to Charging"
        }
        if ($3==14) {
            f3="Inactivation due to Ceased Account"
        } 
        if ($3==15) {
            f3="Inactivation due to Payment Method Change"
        } 
        if ($3==16) {
            f3="Inactivation due to Ownership Change"
        } 
        if ($3==18) {
            f3="Inactivation due to Offer Upgrade"
        } 
        if ($3==19)  {
            f3="Inactivation due to Offer Downgrade" 
        } 
    }
    if ($2==6) {
        if ($3==9) {
            f3="Campaign Schedule"
        } 
        if ($3==10) {
            f3="Offer Schedule"
        }
    }  
    if ($1==5 && $2==2) {
        f2="RENEWAL"
    } 
    if ($1==4 && $2==2) {
        f2="SUBS. CREATE RENEWAL AOC"
    } 
    if ($1==6 && $2==3) {
        f2="REFUND"
    } 
    if ($1==4 && $2==5) {
        f2="INACTIVATION"
    } 
    if ($1==5 && $2==4) {
        f2="PENALTY"
    } 
    if ($1==5 && $2==1) {
        f2="SUBS. CREATE DIRECT DEBIT"
    } 
    if ($2==1) {
        f2="CREATION"
    }
    if ($1==1) {
        f1="RESERVE"
    } 
    if ($1==2) {
        f1="COMMIT"
    } 
    if ($1==3) {
        f1="ROLLBACK"
    } 
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM