简体   繁体   中英

Parsing using Regular expression

I have a string like

DELIVERY 'AR_ACTDETAIL_UPD' 'AR_DETAIL_UPD'
MODULE TABLE
FILTER 'AR_ACT'
DEFINE UPDN 'UPDATE'
DEFINE REFH 'UPDATE'
DEFINE CIT '5000'
DEFINE TDB 'TARGET'
DEFINE TABLE 'AR_ACTIVITY_DETAIL'
ELEMENTS '"POSTING_PERIOD_SID","POSTING_DATE_SID","ACCTG_DOC_CLSS_SID","CUSTOMER_BLLT_SID","AR_ASSET_ACCNT_SID","CO_CNSLDTN_SID","AR_BSNSS_UNT_SID","LOCAL_CURRENCY_AMT","LCL_CUR_DSCNT_AAMT","LCL_CUR_DSCNT_TAMMT","TAXABLE_AMT","NO_TAXABLE_AMT","NO_FOREIGN_TAXABLE_AMT","TAX_CODE","BEFOREINVOICE_SID","ACCNT_BANK_PAYMENT_SID","GAIN_LOSS_LOC_AMT","LOCAL_OPEN_AMOUNT","FOREIGN_OPEN_AMOUNT"'
ELEDEFINE +3 IFNULL '0'
ELEDEFINE +4 IFNULL '0'
ELEDEFINE +5 IFNULL '0'
ELEDEFINE +6 IFNULL '0'

DELIVERY 'AR_DOC_EXINS' 'AR_DOC_EINS'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_INS'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'APPEND'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EXTNSN_COLS'
ELEMENTS '"AR_ACTVSID","DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTT_NO","INVOICE_MULT","PAYMENT_MULT","ADJUSTMENT_MULT","BAD_DEBT_MULT","SOURCE_INFO","CHG_INVC_PY_STTS_IND","CHG_GROUP_CURRENCY_AMT","CHG_GRP_CUR_DSCNT_AAMT","CHG_GRP_CUR_DSCNT_TAMT","CREATED_DT","CHANGED_DT","ACCOUNT_ID","FI_ACCOUNT_ID"'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_SHORT_CHAR}'

DELIVERY 'AR_DOC_EXS_UPD' 'AR_DOC__UPD'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_UPD'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'UPDATE'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EOLS'
ELEMENTS '"DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTNSN_NO","PAYMENT_ID","PAYMETED_DT","CHANGED_DT","AR_ACTVTY_DOC_SID_TMP","ACCOUNT_ID","FI_ACCOUNT_ID","O_GROUP_CURRENCY_AMT","O_GRP_CUR_DSCNT_TAMT","O_GRP_CUR_DSCNT_AAMT"'
ELEDEFINE +1 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_NUMBER}'

ALLOW DUPLICATES

The number of deliveries is unknown, but if I found ALLOW or REJECT or MERGE or AGGREGATE that's mean there are no more deliveries. My idea was to extract each DELIVERY source code apart(which is the substring existing between two Delivery or substring existing between delivery and one of these words (ALLOW,REJECT,MERGE,AGGREGATE)) so tried this Pattern:

Pattern p = Pattern.compile("DELIVERY\\s.*?[\\nDELIVERY|\\nALLOW|\\nREJECT|\\nMERGE|\\nAGGREGATE]?",Pattern.DOTALL);

but it doens't works for me.

Description

This regex will capture each of the three deliveries in your sample text

^delivery(?:'[^']*'|.)*?^(?=delivery|aggregate|reject|allow|merge)

在此处输入图片说明

Example

DELIVERY 'AR_ACTDETAIL_UPD' 'AR_DETAIL_UPD'
MODULE TABLE
FILTER 'AR_ACT'
DEFINE UPDN 'UPDATE'
DEFINE REFH 'UPDATE'
DEFINE CIT '5000'
DEFINE TDB 'TARGET'
DEFINE TABLE 'AR_ACTIVITY_DETAIL'
ELEMENTS '"POSTING_PERIOD_SID","POSTING_DATE_SID","ACCTG_DOC_CLSS_SID","CUSTOMER_BLLT_SID","AR_ASSET_ACCNT_SID","CO_CNSLDTN_SID","AR_BSNSS_UNT_SID","LOCAL_CURRENCY_AMT","LCL_CUR_DSCNT_AAMT","LCL_CUR_DSCNT_TAMMT","TAXABLE_AMT","NO_TAXABLE_AMT","NO_FOREIGN_TAXABLE_AMT","TAX_CODE","BEFOREINVOICE_SID","ACCNT_BANK_PAYMENT_SID","GAIN_LOSS_LOC_AMT","LOCAL_OPEN_AMOUNT","FOREIGN_OPEN_AMOUNT"'
ELEDEFINE +3 IFNULL '0'
ELEDEFINE +4 IFNULL '0'
ELEDEFINE +5 IFNULL '0'
ELEDEFINE +6 IFNULL '0'

DELIVERY 'AR_DOC_EXINS' 'AR_DOC_EINS'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_INS'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'APPEND'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EXTNSN_COLS'
ELEMENTS '"AR_ACTVSID","DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTT_NO","INVOICE_MULT","PAYMENT_MULT","ADJUSTMENT_MULT","BAD_DEBT_MULT","SOURCE_INFO","CHG_INVC_PY_STTS_IND","CHG_GROUP_CURRENCY_AMT","CHG_GRP_CUR_DSCNT_AAMT","CHG_GRP_CUR_DSCNT_TAMT","CREATED_DT","CHANGED_DT","ACCOUNT_ID","FI_ACCOUNT_ID"'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_SHORT_CHAR}'

DELIVERY 'AR_DOC_EXS_UPD' 'AR_DOC__UPD'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_UPD'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'UPDATE'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EOLS'
ELEMENTS '"DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTNSN_NO","PAYMENT_ID","PAYMETED_DT","CHANGED_DT","AR_ACTVTY_DOC_SID_TMP","ACCOUNT_ID","FI_ACCOUNT_ID","O_GROUP_CURRENCY_AMT","O_GRP_CUR_DSCNT_TAMT","O_GRP_CUR_DSCNT_AAMT"'
ELEDEFINE +1 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_NUMBER}'

ALLOW DUPLICATES

Code

import java.util.regex.Pattern;
import java.util.regex.Matcher;
class Module1{
  public static void main(String[] asd){
  String sourcestring = "source string to match with pattern";
  Pattern re = Pattern.compile("^delivery(?:'[^']*'|.)*?^(?=delivery|aggregate|reject|allow|merge)",Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
  Matcher m = re.matcher(sourcestring);
  int mIdx = 0;
    while (m.find()){
      for( int groupIdx = 0; groupIdx < m.groupCount()+1; groupIdx++ ){
        System.out.println( "[" + mIdx + "][" + groupIdx + "] = " + m.group(groupIdx));
      }
      mIdx++;
    }
  }
}

Matches

[0][0] = DELIVERY 'AR_ACTDETAIL_UPD' 'AR_DETAIL_UPD'
MODULE TABLE
FILTER 'AR_ACT'
DEFINE UPDN 'UPDATE'
DEFINE REFH 'UPDATE'
DEFINE CIT '5000'
DEFINE TDB 'TARGET'
DEFINE TABLE 'AR_ACTIVITY_DETAIL'
ELEMENTS '"POSTING_PERIOD_SID","POSTING_DATE_SID","ACCTG_DOC_CLSS_SID","CUSTOMER_BLLT_SID","AR_ASSET_ACCNT_SID","CO_CNSLDTN_SID","AR_BSNSS_UNT_SID","LOCAL_CURRENCY_AMT","LCL_CUR_DSCNT_AAMT","LCL_CUR_DSCNT_TAMMT","TAXABLE_AMT","NO_TAXABLE_AMT","NO_FOREIGN_TAXABLE_AMT","TAX_CODE","BEFOREINVOICE_SID","ACCNT_BANK_PAYMENT_SID","GAIN_LOSS_LOC_AMT","LOCAL_OPEN_AMOUNT","FOREIGN_OPEN_AMOUNT"'
ELEDEFINE +3 IFNULL '0'
ELEDEFINE +4 IFNULL '0'
ELEDEFINE +5 IFNULL '0'
ELEDEFINE +6 IFNULL '0'


[1][0] = DELIVERY 'AR_DOC_EXINS' 'AR_DOC_EINS'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_INS'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'APPEND'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EXTNSN_COLS'
ELEMENTS '"AR_ACTVSID","DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTT_NO","INVOICE_MULT","PAYMENT_MULT","ADJUSTMENT_MULT","BAD_DEBT_MULT","SOURCE_INFO","CHG_INVC_PY_STTS_IND","CHG_GROUP_CURRENCY_AMT","CHG_GRP_CUR_DSCNT_AAMT","CHG_GRP_CUR_DSCNT_TAMT","CREATED_DT","CHANGED_DT","ACCOUNT_ID","FI_ACCOUNT_ID"'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_SHORT_CHAR}'


[2][0] = DELIVERY 'AR_DOC_EXS_UPD' 'AR_DOC__UPD'
MODULE TABLE
FILTER 'AR_DOC_EXTNSN_COLS_UPD'
DEFINE UPDATE_DETECTION 'UPDATE'
DEFINE REFRESH 'UPDATE'
DEFINE COMMIT '5000'
DEFINE TABLEDB 'TARGET'
DEFINE TABLE 'AR_DOC_EOLS'
ELEMENTS '"DOCUMENT_CO_CD","TRANSACTION_TYP_CD","TRANSACTION_NO","DOCUMENT_SUFFIX_NO","DOC_SFFX_EXTNSN_NO","PAYMENT_ID","PAYMETED_DT","CHANGED_DT","AR_ACTVTY_DOC_SID_TMP","ACCOUNT_ID","FI_ACCOUNT_ID","O_GROUP_CURRENCY_AMT","O_GRP_CUR_DSCNT_TAMT","O_GRP_CUR_DSCNT_AAMT"'
ELEDEFINE +1 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +2 IFNULL '{$U_DEFAULT_SHORT_CHAR}'
ELEDEFINE +3 IFNULL '{$U_DEFAULT_NUMBER}'

Following code should work for you:

Pattern p = Pattern.compile("(?s)(\\bDELIVERY\\b.+?(?=\\b(?:DELIVERY|ALLOW|REJECT|MERGE|AGGREGATE)\\b))");
Matcher m = p.matcher(str); // str is your input string
boolean found = false;
while (m.find()) {
    System.out.printf("Deliver: %s%n", m.group(1));
    found = true;
}

if (!found) {
    System.out.println("Didn't Match");
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM