Files
abap-cheat-sheets/src/zcl_demo_abap_regex.clas.abap
danrega 270edb9962 Update
2024-12-20 16:12:45 +01:00

1119 lines
45 KiB
ABAP

"! <p class="shorttext"><strong>Regular Expressions in ABAP</strong><br/>ABAP cheat sheet example class</p>
"!
"! <p>The example class demonstrates regular expressions in ABAP.<br/>
"! Choose F9 in ADT to run the class.</p>
"!
"! <h2>Information</h2>
"! <p>Find information on getting started with the example class and the disclaimer in
"! the ABAP Doc comment of class {@link zcl_demo_abap_aux}.</p>
CLASS zcl_demo_abap_regex DEFINITION
PUBLIC
FINAL
CREATE PUBLIC .
PUBLIC SECTION.
INTERFACES if_oo_adt_classrun.
INTERFACES if_abap_matcher_callout.
PROTECTED SECTION.
PRIVATE SECTION.
CLASS-DATA callout TYPE i.
CLASS-DATA callout_tab TYPE string_table.
ENDCLASS.
CLASS zcl_demo_abap_regex IMPLEMENTATION.
METHOD if_oo_adt_classrun~main.
out->write( |ABAP cheat sheet example: Regular Expressions in ABAP\n\n| ).
out->write( `1) Characters and Character Types` ).
DATA string_chars_types TYPE string.
"Specific character
string_chars_types = replace( val = `abcdef` pcre = `a` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any character except a line break
string_chars_types = replace( val = |ab 1#?C\n D23| pcre = `.` with = `_` occ = 0 ).
out->write( string_chars_types ).
"Any digit
string_chars_types = replace( val = `a1-b2 3-4c9` pcre = `\d` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any non-digit
string_chars_types = replace( val = `a1-b2 3-4c9` pcre = `\D` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any whitespace character such as a blank, tab and new line
string_chars_types = replace( val = |ab cd\tef\ngh| pcre = `\s` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any character that is not a
string_chars_types = replace( val = |ab cd\tef\ngh| pcre = `\S` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any word character (letter, digit or the underscore)
string_chars_types = replace( val = `(ab 12_c)` pcre = `\w` with = `#` occ = 0 ).
out->write( string_chars_types ).
"Any character that is not a word character
string_chars_types = replace( val = `(ab 12_c)` pcre = `\W` with = `#` occ = 0 ).
out->write( string_chars_types ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `2) Escaped Characters and Special Variants` ) ).
DATA string_esc_chars TYPE string.
"Special characters
string_esc_chars = replace( val = `a[b]c\d/e^f.g` pcre = `\[|\]|\\|\/|\^|\.` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Line feeds
string_esc_chars = replace( val = |a\nb\nc| pcre = `\n` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Line feed negation
string_esc_chars = replace( val = |a\nb\nc| pcre = `\N` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Tabs
string_esc_chars = replace( val = |a\tb\tc| pcre = `\t` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Carriage return
string_esc_chars = replace( val = |d\re\rf| pcre = `\r` with = `#` occ = 0 ).
out->write( string_esc_chars ).
"Characters with hex code
"The example string includes a non-breaking space.
string_esc_chars = replace(
val = |#{ cl_abap_conv_codepage=>create_in( codepage = `UTF-16BE` )->convert( source = CONV xstring( `00A0` ) ) }#|
pcre = `\x{00A0}` with = `x` occ = 0 ).
out->write( string_esc_chars ).
"Characters with Unicode code point
"As above, the example string includes a non-breaking space.
"The PCRE syntax uses the control verb (*UTF) to enable UTF mode.
string_esc_chars = replace(
val = |#{ cl_abap_conv_codepage=>create_in( codepage = `UTF-16BE` )->convert( source = CONV xstring( `00A0` ) ) }#|
pcre = `(*UTF)\N{U+00A0}` with = `y` occ = 0 ).
out->write( string_esc_chars ).
"Characters with a specified Unicode character property
string_esc_chars = replace( val = `Hello ABAP` pcre = `\p{Ll}+` with = `#` occ = 0 ).
out->write( string_esc_chars ).
string_esc_chars = replace( val = `Hello ABAP` pcre = `\p{Lu}+` with = `#` occ = 0 ).
out->write( string_esc_chars ).
string_esc_chars = replace( val = `Hello ABAP` pcre = `\P{Ll}+` with = `#` occ = 0 ).
out->write( string_esc_chars ).
string_esc_chars = replace( val = `Hello ABAP` pcre = `\P{Lu}+` with = `#` occ = 0 ).
out->write( string_esc_chars ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `3) Quantifiers, Repetitions and Alternatives` ) ).
DATA string_quan_rep_alt TYPE string.
"Zero or more repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab*` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"One or more repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab+` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Between x and y repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab{2,3}` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Exactly n repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab{3}` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Exactly n or more repetitions
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab{2,}` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Optional matches
string_quan_rep_alt = replace( val = `abc abbc abbbc a ac` pcre = `ab?` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Alternative matches
string_quan_rep_alt = replace( val = `azbycxdwevfu` pcre = `a|b|c` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Capturing non-greedily (zero or more repetitions)
string_quan_rep_alt = replace( val = `abcd abccccd ab` pcre = `bc*?` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
string_quan_rep_alt = replace( val = `abcxdefghxi` pcre = `\A.*?x` with = `_` ).
out->write( string_quan_rep_alt ).
"Example to compare with greedy capturing
string_quan_rep_alt = replace( val = `abcxdefghxi` pcre = `\A.*x` with = `_` ).
out->write( string_quan_rep_alt ).
"Capturing non-greedily (one or more repetitions)
string_quan_rep_alt = replace( val = `abcd abccccd ab` pcre = `bc+?` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
"Example to compare with greedy capturing
string_quan_rep_alt = replace( val = `abcd abccccd ab` pcre = `bc+` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
string_quan_rep_alt = replace( val = `<span>Hallo</span>` pcre = `<.+?>` with = `#` occ = 0 ).
out->write( string_quan_rep_alt ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `4) Character Sets and Ranges` ) ).
DATA string_char_sets_ranges TYPE string.
"Defining a character set
string_char_sets_ranges = replace( val = `bit bat but bet` pcre = `b[iu]` with = `#` occ = 0 ).
out->write( string_char_sets_ranges ).
"Defining a character range
string_char_sets_ranges = replace( val = `aa1 ab2 ba3 cac4 da56 a7` pcre = `a[a-c0-5]` with = `#` occ = 0 ).
out->write( string_char_sets_ranges ).
"Matching any single character not present in the list
string_char_sets_ranges = replace( val = `ABap` pcre = `[^Ap]` with = `#` occ = 0 ).
out->write( string_char_sets_ranges ).
"Matching any single character not within the range
string_char_sets_ranges = replace( val = `ABCDabcd123456` pcre = `[^A-Ca-c1-4]` with = `#` occ = 0 ).
out->write( string_char_sets_ranges ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `5) Anchors and Positions` ) ).
DATA string_anchors_pos TYPE string.
"Start of subject, syntax \A
string_anchors_pos = replace( val = `abc def` pcre = `\A` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"Start of subject, syntax ^; find more information below regarding multi-line mode
string_anchors_pos = replace( val = |abc\ndef\nghi| pcre = `(?m)^` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"The following examples uses ^ without enabling the multi-line mode
string_anchors_pos = replace( val = |abc\ndef\nghi| pcre = `^` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"End of subject, syntax \Z
string_anchors_pos = replace( val = `abc def` pcre = `\Z` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"End of subject, syntax $
"The example uses multi-line mode
string_anchors_pos = replace( val = |abc\ndef\nghi| pcre = `(?m)$` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"Start or end of word
string_anchors_pos = replace( val = `abcd a12d ed` pcre = `\ba.` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
string_anchors_pos = replace( val = `abcd a12d ed` pcre = `\Dd\b` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
string_anchors_pos = replace( val = `abcd a12d ed` pcre = `\b.d\b` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"Not at the start or end of words
string_anchors_pos = replace( val = `see an elefant` pcre = `\Be\B` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
"Resetting the starting point of a match
string_anchors_pos = replace( val = `abcd` pcre = `a.\Kc` with = `#` occ = 0 ).
out->write( string_anchors_pos ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `6) Capturing Groups, Replacements and Backreferences` ) ).
DATA string_capt_group TYPE string.
string_capt_group = replace( val = `bit bat but bet` pcre = `b(a|u)t` with = `#` occ = 0 ).
out->write( string_capt_group ).
"------- Replacements with capturing groups -------
string_capt_group = replace( val = `APAB` pcre = `(..)(..)` with = `$2$1` ).
out->write( string_capt_group ).
"$0 representing the content of the whole match
string_capt_group = replace( val = `abcd` pcre = `(..)(..)` with = `#$0#` ).
out->write( string_capt_group ).
"Alternative replacement syntax with the curly brackets
string_capt_group = replace( val = `APAB` pcre = `(..)(..)` with = `${2}${1}` ).
out->write( string_capt_group ).
"------- Named capturing groups -------
"PCRE syntax (?<name>...), replacement syntax $name
string_capt_group = replace( val = xco_cp=>sy->date( )->as( xco_cp_time=>format->abap )->value
pcre = `(?<yr>\d{4})(?<mo>\d{2})(?<d>\d{2})`
with = `Day: $d, month: $mo, year: $yr` ).
out->write( string_capt_group ).
"Alternative PCRE syntax (?'name'...), replacement syntax ${name}
string_capt_group = replace( val = xco_cp=>sy->date( )->as( xco_cp_time=>format->abap )->value
pcre = `(?'yr'\d{4})(?'mo'\d{2})(?'d'\d{2})`
with = `${d}.${mo}.${yr}` ).
out->write( string_capt_group ).
"Creating a group but not capturing it
string_capt_group = replace( val = `abcd` pcre = `(?:..)(..)` with = `#$1#` ).
out->write( string_capt_group ).
"The following example raises an exception. $2 cannot be referred to.
TRY.
string_capt_group = replace( val = `abcd` pcre = `(?:..)(..)` with = `#$1#$2#` ).
CATCH cx_sy_invalid_regex_format INTO DATA(error).
out->write( error->get_text( ) ).
ENDTRY.
string_capt_group = replace( val = `abcd` pcre = `(?:..)(..)` with = `#$0#` ).
out->write( string_capt_group ).
"------- Back reference -------
"In the example, the capturing group 1 holds `ab`. The second capturing group captures
"all word characters until `ab` is found, including `ab`. The reference to the first
"capturing group is made using \1.
string_capt_group = replace( val = `abcdefabghij` pcre = `(a.)(\w*)\1` with = `#` ).
out->write( string_capt_group ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `7) Lookarounds` ) ).
DATA string_look_arounds TYPE string.
"Positive lookahead
string_look_arounds = replace( val = `abc ade` pcre = `a(?=b)` with = `#` occ = 0 ).
out->write( string_look_arounds ).
"Negative lookahead
string_look_arounds = replace( val = `abc ade` pcre = `a(?!b)` with = `#` occ = 0 ).
out->write( string_look_arounds ).
"Positive lookbehind
string_look_arounds = replace( val = `ab c abcd` pcre = `(?<=\s)c` with = `#` occ = 0 ).
out->write( string_look_arounds ).
"Negative lookbehind
string_look_arounds = replace( val = `ab c abcd` pcre = `(?<!\s)c` with = `#` occ = 0 ).
out->write( string_look_arounds ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `8) Case Conversions in Replacement Patterns` ) ).
DATA string_case_conv TYPE string.
"\u syntax
string_case_conv = replace( val = `abcdefg` pcre = `c(.*)` with = `c\u$1` ).
out->write( string_case_conv ).
"\U syntax
string_case_conv = replace( val = `abcdefg` pcre = `c(.*)` with = `c\U$1` ).
out->write( string_case_conv ).
"\l syntax
string_case_conv = replace( val = `HIJKLMNO` pcre = `K(.*)` with = `K\l$1` ).
out->write( string_case_conv ).
"\L syntax
string_case_conv = replace( val = `HIJKLMNO` pcre = `K(.*)` with = `K\L$1` ).
out->write( string_case_conv ).
"\E syntax
string_case_conv = replace( val = `abcdefg` pcre = `c(..)(..)` with = `c\U$1\E$2` ).
out->write( string_case_conv ).
"The following example is a comparison to similar syntax without specifying \E
string_case_conv = replace( val = `abcdefg` pcre = `c(..)(..)` with = `c\U$1$2` ).
out->write( string_case_conv ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `9) Setting Options and Control Verbs` ) ).
DATA string_opt_set TYPE string.
"----------- Examples with `.` and single line mode -----------
"`.` matches new lines feeds when the single line option is set
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `.` with = `#` occ = 0 ).
out->write( string_opt_set ).
"Result (11 replacements including the new line feeds):
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?s).` with = `#` occ = 0 ).
out->write( string_opt_set ).
string_opt_set = replace( val = |\n\n| pcre = `(?s).` with = `#` occ = 0 ).
out->write( string_opt_set ).
"----------- Examples for the start of subjects -----------
"Multi-line mode not enabled
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `^.` with = `#` occ = 0 ).
out->write( string_opt_set ).
"Multi-line mode enabled
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?m)^.` with = `#` occ = 0 ).
out->write( string_opt_set ).
"\A for comparison
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?m)\A.` with = `#` occ = 0 ).
out->write( string_opt_set ).
"----------- Examples for the end of subjects -----------
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `.$` with = `#` occ = 0 ).
out->write( string_opt_set ).
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?m).$` with = `#` occ = 0 ).
out->write( string_opt_set ).
"\Z syntax (the result is the same as with `.$`)
string_opt_set = replace( val = |abc\ndef\nghi| pcre = `(?m).\Z` with = `#` occ = 0 ).
out->write( string_opt_set ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `10) Extended Mode` ) ).
DATA some_string TYPE string.
"Extended mode is enabled by default
"No replacement in the example.
some_string = replace( val = `abc def` pcre = `abc def` with = `#` ).
out->write( some_string ).
"The following example works because there is no whitespace in the source string.
"The whitespace in the regular expression is ignored.
some_string = replace( val = `abcdef` pcre = `abc def` with = `#` ).
out->write( some_string ).
"To match whitespace characters, you can use the pattern \s
some_string = replace( val = `abc def` pcre = `abc\sdef` with = `#` ).
out->write( some_string ).
"Escaping a whitespace using \
some_string = replace( val = `abc def` pcre = `abc\ def` with = `#` ).
out->write( some_string ).
"The following example also performs the replacement as the unescaped whitespaces
"are ignored.
some_string = replace( val = `abc def` pcre = `abc\ def` with = `#` ).
out->write( some_string ).
"Disabling the extended mode so that whitespaces are not ignored
some_string = replace( val = `abc def` pcre = `(?-x)abc def` with = `#` ).
out->write( some_string ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `11) Control verbs` ) ).
DATA ctrl_verb_string TYPE string.
"Using (*UTF); the source string contains a non-breakable space that is to be replaced
ctrl_verb_string = replace(
val = |#{ cl_abap_conv_codepage=>create_in( codepage = `UTF-16BE` )->convert( source = CONV xstring( `00A0` ) ) }#|
pcre = `(*UTF)\N{U+00A0}` with = `x` occ = 0 ).
out->write( ctrl_verb_string ).
"Line breaks
"The results are demonstrated using string templates.
"In the examples, the multi-line mode is enabled in addition.
ctrl_verb_string = replace( val = |abc\ndef\rghi\r\njkl|
pcre = `(*CR)(?m)^` with = `_` occ = 0 ).
out->write( ctrl_verb_string ).
ctrl_verb_string = replace( val = |abc\ndef\rghi\r\njkl|
pcre = `(*LF)(?m)^` with = `_` occ = 0 ).
out->write( ctrl_verb_string ).
ctrl_verb_string = replace( val = |abc\ndef\rghi\r\njkl|
pcre = `(*CRLF)(?m)^` with = `_` occ = 0 ).
out->write( ctrl_verb_string ).
ctrl_verb_string = replace( val = |abc\ndef\rghi\r\njkl|
pcre = `(*ANYCRLF)(?m)^` with = `_` occ = 0 ).
out->write( ctrl_verb_string ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `12) Callouts` ) ).
"The following example shows how to use callouts to call an ABAP method from a PCRE
"regular expression. It creates an object-oriented representation of a PCRE regex using
"the CL_ABAP_REGEX class, applying different PCRE syntaxes for callout specifications.
"The class implements the IF_ABAP_MATCHER_CALLOUT interface, and the callout method uses
"a demo class instance as the callout handler. If the regex matches, the method is called
"for each callout position. The callout method populates a string table with accessible
"details. This demonstrates that regex processing can be influenced, and in the example,
"processing stops when a condition is met. As a result, found is false because the regex
"is not fully processed. For more details, refer to the class documentation.
DATA(text_to_search) = `abcdefghijklmnopq`.
DATA(regex) = cl_abap_regex=>create_pcre( pattern = `(...)(?C1)(..)(?C2)(....)(?C3)(.)(?C"D")(....)(?C"E")(...)(?C"F")` ).
DATA(matcher) = regex->create_matcher( text = text_to_search ).
DATA(handler) = NEW zcl_demo_abap_regex( ).
matcher->set_callout( handler ).
DATA(found) = matcher->match( ).
IF found = abap_false.
out->write( |Pattern not found.\n\n| ).
ENDIF.
out->write( callout_tab ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `13) Conditional Patterns` ) ).
DATA string_cond_pattern TYPE string.
"------- Conditions in the search pattern -------
"Example pattern:
"- The first capturing group matches a digit.
"- The '?' character specifies that the capturing group participates
" in the match optionally.
"- A condition is specified, introduced by '(?...' and referring to
" the first capturing group (the digit) indicated by 1.
"- If the condition is met, i.e. a digit is found, then the yes pattern
" is applied ('.pdf'), which participates in the match.
"- If the condition is not met, i.e. a digit is not found, then the no
" pattern is applied ('.md'), which participates in the match.
"- As a result, '2.pdf' and the two '.md' occurrences (also the one
" without the digit) are replaced.
string_cond_pattern = replace( val = `1.txt,a.txt,2.pdf,b.pdf,3.gif,c.gif,4.md,d.md,5.jpg,e.jpg`
pcre = `(\d)?(?(1)\.pdf|\.md)` with = `#` occ = 0 ).
out->write( string_cond_pattern ).
"------- Conditions in the replacement pattern -------
"Search pattern:
"- The '?' character specifies that the capturing group participates in the match optionally.
"Replacements pattern:
"- '$' followed by the specification of the capturing group that is referred to in curly brackets
"- The capturing group number is followed by a ':' and '+...' that denotes the value if true,
" the value following the other colon denotes the value if false
string_cond_pattern = replace( val = `correct` pcre = `(in)?correct` with = `This is ${1:+not:very} good.` ).
out->write( string_cond_pattern ).
string_cond_pattern = replace( val = `incorrect` pcre = `(in)?correct` with = `This is ${1:+not:very} good.` ).
out->write( string_cond_pattern ).
"Syntax {n:-default}
"Shortcut for the PCRE replacement pattern by specifying
"a default if the capture group does not participate in the match
"'ab' participates in the match, so the default replacement pattern is not
"applied
string_cond_pattern = replace( val = `abcd` pcre = `(ab)?cd` with = `${1:-yz}cd` ).
out->write( string_cond_pattern ).
"'ab' does not participate in the match, so the default replacement pattern is
"applied
string_cond_pattern = replace( val = `cd` pcre = `(ab)?cd` with = `${1:-yz}cd` ).
out->write( string_cond_pattern ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `14) ABAP Statements Using Regular Expressions` ) ).
"-----------------------------------------------------------------------------
"------------------------------- FIND ----------------------------------------
"-----------------------------------------------------------------------------
DATA(str) = `Cathy's black cat on the mat played with Matt.`.
"----- ALL OCCURRENCES addition: Finding all occurrences -----
"The MATCH COUNT addition stores the number of occurrences in a data object.
"Determining the number of letters in a string
FIND ALL OCCURRENCES OF PCRE `[A-Za-z]` IN str MATCH COUNT DATA(a).
out->write( a ).
"----- RESULTS addition: Finding all occurrences -----
"The example also uses the ALL OCCURRENCES addition. The findings are
"stored in a table of type match_result_tab.
FIND ALL OCCURRENCES OF PCRE `\s` IN str RESULTS DATA(b).
out->write( b ).
"----- SUBMATCHES addition: Storing capturing group content in variables -----
"Pattern: anything before and after ' on '
FIND PCRE `(.*)\son\s(.*)` IN str IGNORING CASE SUBMATCHES DATA(c) DATA(d).
out->write( c ).
out->write( d ).
"Using the RESULTS addition to get offset and length information of
"all capturing groups, among others
FIND PCRE `(.*)\son\s(.*)` IN str IGNORING CASE RESULTS DATA(e).
out->write( e ).
"----- MATCH OFFSET/LENGTH additions -----
"The following examples find the last occurrence of a comma and the following content.
"\K denotes that the comma is excluded from the result. Using the offset and length
"values, the part of the string is extracted. The examples underscore that you can
"achieve the same thing differently with different PCRE syntax patterns (which also
"applies to ABAP demonstrated with the use of the substring function and specifying
"the syntax string+off(len) to extract the substring).
str = `abc,def,ghi,jkl,mno,pqr,stu,vwx,yz`.
"Negative lookahead
FIND PCRE `,(?!.*,)\K.*` IN str MATCH OFFSET DATA(off_a) MATCH LENGTH DATA(len_a).
DATA(content_a) = substring( val = str off = off_a len = len_a ).
out->write( off_a ).
out->write( len_a ).
out->write( content_a ).
"Positive lookahead
FIND PCRE `,(?=[^,]*$)\K.*` IN str MATCH OFFSET DATA(off_b) MATCH LENGTH DATA(len_b).
DATA(content_b) = str+off_b(len_b).
out->write( off_b ).
out->write( len_b ).
out->write( content_b ).
ASSERT content_a = `yz`.
ASSERT content_b = content_a.
"----- IN TABLE addition: Searching in internal tables -----
"The internal table must have a character-like line type.
"Searching in an internal table and retrieving line, offset, length information
"The example only searches for the first occurrence. See another example below
"that uses the ALL OCCURRENCES and RESULTS additions.
DATA(itab) = VALUE string_table( ( `Cathy's black cat on the mat played with the friend of Matt.` ) ).
"Pattern: 't' at the beginning of a word followed by another character
FIND FIRST OCCURRENCE OF PCRE `\bt.` IN TABLE itab
IGNORING CASE MATCH LINE DATA(f) MATCH OFFSET DATA(g) MATCH LENGTH DATA(h).
out->write( f ).
out->write( g ).
out->write( h ).
"----- Searching in internal tables -----
"The RESULTS addition stores findings in an internal table of type match_result_tab when
"ALL OCCURRENCES is used.
"Submatches (i.e. length and offset values of the submatches) are stored in internal
"tables themselves. Therefore, the example uses nested loops and the substring function
"to retrieve the strings.
"The objective of the following example is to extract the content of the segments that
"are positioned within /.../ in a URL. The segments are stored in an internal table.
DATA(url) = `https://help.sap.com/docs/abap-cloud/abap-concepts/controlled-sap-luw/`.
DATA url_parts TYPE string_table.
FIND ALL OCCURRENCES OF PCRE `(?<=\/)([^\/]+)(?=\/)` IN url RESULTS DATA(res).
out->write( res ).
"Details on the regular expression:
"- Positive lookbehind (?<=\/) that determines that the content is preceded by `/`
"- Positive lookahead (?=\/) that determines that the content is followed by `/
"- ([^\/]+) in between determines that any sequence of characters that are not `/` are matched
"- The match is put in parentheses to store the submatch
LOOP AT res INTO DATA(finding).
LOOP AT finding-submatches INTO DATA(sub).
DATA(url_part) = substring( val = url off = sub-offset len = sub-length ).
APPEND url_part TO url_parts.
ENDLOOP.
ENDLOOP.
out->write( url_parts ).
"The following statement uses nested iteration expressions with FOR instead of nested
"LOOP statements.
DATA(url_parts_for_loop) = VALUE string_table( FOR wa1 IN res
FOR wa2 IN wa1-submatches
( substring( val = url off = wa2-offset len = wa2-length ) ) ).
ASSERT url_parts = url_parts_for_loop.
"-----------------------------------------------------------------------------
"------------------------------- REPLACE -------------------------------------
"-----------------------------------------------------------------------------
DATA(str_replace) = `ab apppc app`.
DATA(str_replace_copy) = str_replace.
"Changing the source field directly with a REPLACE statement; same as above
REPLACE PCRE `(.*?)PP(.*)` IN str_replace WITH `$2#$1` IGNORING CASE.
out->write( str_replace ).
str_replace = str_replace_copy.
"ALL OCCURRENCES addition
REPLACE ALL OCCURRENCES OF PCRE `\s` IN str_replace WITH `#`.
out->write( str_replace ).
str_replace = str_replace_copy.
REPLACE ALL OCCURRENCES OF PCRE `p.` IN str_replace WITH `#`
REPLACEMENT COUNT DATA(repl_cnt) "3
RESULTS DATA(repl_res).
out->write( repl_cnt ).
out->write( repl_res ).
DATA(str_table_original) = VALUE string_table( ( `a1bc2` ) ( `d3ef` ) ( `4ghi` ) ( `jkl` ) ).
DATA(str_table) = str_table_original.
"Replacing all occurrences in a table
"RESULTS addition: Storing information in an internal table of type repl_result_tab
REPLACE ALL OCCURRENCES OF PCRE `\d`
IN TABLE str_table
WITH `#`
RESULTS DATA(res_table).
out->write( res_table ).
str_table = str_table_original.
"Replacing the first occurrence in a table
"RESULTS addition: Storing information in a structure of type repl_result
REPLACE FIRST OCCURRENCE OF PCRE `\d`
IN TABLE str_table
WITH `#`
RESULTS DATA(res_structure).
out->write( res_structure ).
str_table = str_table_original.
"Restricting the search range in an internal table
REPLACE ALL OCCURRENCES OF PCRE `\d`
IN TABLE str_table
FROM 1 TO 2
WITH `#`.
out->write( str_table ).
str_table = str_table_original.
"Offsets can be optionally specified (also only the offset of start or end line possible)
REPLACE ALL OCCURRENCES OF PCRE `\d`
IN TABLE str_table
FROM 1 OFFSET 3 TO 2 OFFSET 2
WITH `#`.
out->write( str_table ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `15) Built-In Functions in ABAP Using Regular Expressions` ) ).
DATA(text) = `Pieces of cakes.`.
"---------------- find ----------------
"The find function searches for the subtexting specified and returns the offset
DATA(find) = find( val = text pcre = `\.` ).
out->write( find ).
"---------------- find_end ----------------
"find_end returns the sum of the offset of the occurrence plus the length of the match
DATA(find_end) = find_end( val = text pcre = `\s` ).
out->write( find_end ).
"---------------- count ----------------
DATA(count_a) = count( val = text pcre = `\s` ).
out->write( count_a ).
DATA(count_b) = count( val = text pcre = `.` ).
out->write( count_b ).
"---------------- match ----------------
DATA(match_a) = match( val = `The email address is jon.doe@email.com.`
pcre = `\w+(\.\w+)*@(\w+\.)+(\w{2,4})` ).
out->write( match_a ).
"Find blank (without inlcuding it in the result indicated by \K) and
"the following 2 characters, second occurrence
DATA(match_b) = match( val = `The email address is jon.doe@email.com.`
pcre = `\s\K..`
occ = 2 ).
out->write( match_b ).
"---------------- replace ----------------
DATA(replace_a) = replace( val = text pcre = `\s` with = `#` ).
out->write( replace_a ).
DATA(replace_b) = replace( val = text pcre = `\s` occ = 2 with = `#` ).
out->write( replace_b ).
"---------------- substring_* ----------------
text = `Lorem ipsum dolor sit amet`.
"Extracting a substring ...
"... after a matching regular expression
DATA(substring_after) = substring_after( val = text pcre = `\s` occ = 2 ).
out->write( substring_after ).
"... before a matching regular expression
DATA(substring_before) = substring_before( val = text pcre = `\s` occ = 2 ).
out->write( substring_before ).
"... from a matching regular expression on including the match
DATA(substring_from) = substring_from( val = text pcre = `\s` occ = 2 ).
out->write( substring_from ).
"... up to a matching regular expression including the match
DATA(substring_to) = substring_to( val = text pcre = `\s` occ = 2 ).
out->write( substring_to ).
"---------------- Predicate functions: contains, matches ----------------
"The built-in functions 'contains' returns a truth value. In the following
"examples, the truth value is demonstrated using the xsdbool function.
DATA(contains) = xsdbool( contains( val = `abc def` pcre = `\s` ) ).
out->write( contains ).
"The built-in functions 'matches' compares a search range of the textual argument
"with a regular expression.
DATA(matches) = xsdbool( matches( val = `jon.doe@email.com`
pcre = `\w+(\.\w+)*@(\w+\.)+(\w{2,4})` ) ).
out->write( matches ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `16) Built-In Functions in ABAP SQL and CDS Using Regular Expressions` ) ).
"Populating demo database tables
zcl_demo_abap_aux=>fill_dbtabs( ).
SELECT SINGLE
carrid, "LH
carrname, "Lufthansa
url, "http://www.lufthansa.com
"Checks if expression contains a PCRE expression;
"case-sensitive by default (case_sensitive parameter can be specified)
"Notes on the 1 = found, 0 = not found
"1
like_regexpr( pcre = '\..', "Period that is followed by any character
value = url ) AS like_regex,
"Searches a PCRE pattern, returns offset of match;
"many optional parameters: occurrence, case_sensitive, start, group
"21
locate_regexpr( pcre = '\..', "Period followed by any character
value = url,
occurrence = 2 ) "2nd occurrence in the string
AS locate_regexpr,
"Searches a PCRE pattern, returns offset of match + 1;
"many optional parameters: occurrence, case_sensitive, start, group
"2
locate_regexpr_after( pcre = '.', "Any character
value = url,
occurrence = 1 ) AS locate_regexpr_after,
"Counts all occurrences of found PCRE patterns
"2
occurrences_regexpr( pcre = '\..', "Period that is followed by any character
value = url ) AS occ_regex,
"Replaces a found PCRE expression;
"more parameters possible: occurrence, case_sensitive, start
"http://www#ufthansa#om
replace_regexpr( pcre = '\..', "Period that is followed by any character
value = url,
with = '#' ) AS replace_regex,
"Searches for a PCRE expression and returns the matched substring
"More parameters possible: occurrence, case_sensitive, start, group
".lu
substring_regexpr( pcre = '\...', "Period that is followed by any two characters
value = url ) AS substring_regexpr
FROM zdemo_abap_carr
WHERE carrid = 'LH'
INTO @DATA(builtin_func_regex).
out->write( builtin_func_regex ).
**********************************************************************
out->write( zcl_demo_abap_aux=>heading( `17) System Classes for Regular Expressions: CL_ABAP_REGEX and CL_ABAP_MATCHER` ) ).
"Note: The example results are not output except one.
"Example test_stringing
DATA(test_string) = `a1 # B2 ? cd . E3`.
"----------- Creating an instance of a regular expression -----------
"Creating an instance of a regular expression with PCRE syntax
"using cl_abap_regex
"Example pattern: Any-non digit followed by a digit
DATA(regular_expr) = cl_abap_regex=>create_pcre( pattern = `\D\d`
ignore_case = abap_true ).
"----------- Creating matchers -----------
"Two ways are possible (both ways return references of type
"ref to cl_abap_matcher):
"- create_matcher method of the cl_abap_regex class
"- create_pcre method of the cl_abap_matcher class
"Note that several importing parameters are available to enable
"further settings of the regular expression, e.g. ignoring the
"case, using the extended mode, etc. The examples pass a test_stringing
"to the 'text' parameter. You can also specify internal tables
"with the 'table' parameter and more.
"Creating a matcher using the create_matcher method of the cl_abap_regex class
DATA(matcher_1) = regular_expr->create_matcher( text = test_string ).
"Creating a matcher in one go using method chaining
DATA(matcher_2) = cl_abap_regex=>create_pcre( pattern = `\D\d`
ignore_case = abap_true
)->create_matcher( text = test_string ).
"Creating a matcher using the create_pcre method of the cl_abap_matcher class
DATA(matcher_3) = cl_abap_matcher=>create_pcre( pattern = `\D\d`
text = test_string
ignore_case = abap_true ).
"----------- Exploring searching and replacing -----------
"--- Finding all occurrences using the find_all method ---
"In the example, result has the type match_result_tab containing the findings.
DATA(result_fa1) = matcher_1->find_all( ).
DATA(result_fa2) = matcher_2->find_all( ).
ASSERT result_fa2 = result_fa1.
"Getting the result in one go using method chaining with cl_abap_matcher
DATA(result_fa3) = cl_abap_matcher=>create_pcre( pattern = `\D\d`
text = test_string
ignore_case = abap_true
)->find_all( ).
ASSERT result_fa3 = result_fa1.
"--- Example with submatches ---
test_string = `XabcdXXefgXXhXXijklmnXX`.
DATA(result_fa4) = cl_abap_matcher=>create_pcre( pattern = `X(.*?)X`
text = test_string
ignore_case = abap_true
)->find_all( ).
"--- Replacing all occurrences using the 'replace_all' method ---
DATA(matcher_repl_1) = cl_abap_regex=>create_pcre( pattern = `X(.*?)X`
)->create_matcher( text = test_string ).
DATA(repl_count_1) = matcher_repl_1->replace_all( newtext = `#$1#` ).
DATA(repl_result_1) = matcher_repl_1->text.
"Using cl_abap_matcher
DATA(matcher_repl_2) = cl_abap_matcher=>create_pcre( pattern = `X(.*?)X`
text = test_string ).
DATA(repl_count_2) = matcher_repl_2->replace_all( newtext = `#$1#` ).
DATA(repl_result_2) = matcher_repl_2->text.
"---- Sequential processing of the regular expression ---
"---- using the find_next method ------------------------
"The example explores various other methods, and writes
"information to a test_stringing table.
test_string = `a1bc2def3ghij45klm67opqr8stuvwx90yz`.
DATA(matcher_fn) = cl_abap_matcher=>create_pcre( pattern = `\d(\D.)`
text = test_string ).
DATA test_stringtab TYPE string_table.
WHILE matcher_fn->find_next( ) = abap_true.
APPEND |---- Finding { sy-index } -----| TO test_stringtab.
"Type match_result
DATA(match_result) = matcher_fn->get_match( ).
DATA(offset) = matcher_fn->get_offset( ).
DATA(length) = matcher_fn->get_length( ).
DATA(matched_content) = test_string+offset(length).
APPEND |Match offset: { offset }| TO test_stringtab.
APPEND |Match length: { length }| TO test_stringtab.
APPEND |Match content: { matched_content }| TO test_stringtab.
"Type match_result
DATA(subgroup) = matcher_fn->get_match( )-submatches.
LOOP AT subgroup INTO DATA(wa).
DATA(sub_tabix) = sy-tabix.
DATA(submatch_line) = wa.
DATA(submatch_offset) = wa-offset.
DATA(submatch_length) = wa-length.
DATA(submatch) = matcher_fn->get_submatch( sub_tabix ).
APPEND |Submatch { sub_tabix } offset: { submatch_offset }| TO test_stringtab.
APPEND |Submatch { sub_tabix } length: { submatch_length }| TO test_stringtab.
APPEND |Submatch { sub_tabix } content: { submatch }| TO test_stringtab.
ENDLOOP.
ENDWHILE.
out->write( test_stringtab ).
"---- Using an object of type cl_abap_regex in ABAP ---
"---- statements with the REGEX addition --------------
DATA(result_find_all_1) = cl_abap_matcher=>create_pcre( pattern = `\d(\D.)`
text = test_string
)->find_all( ).
DATA(result_find_all_2) = cl_abap_regex=>create_pcre( pattern = `\d(\D.)`
)->create_matcher( text = test_string
)->find_all( ).
DATA(reg_expr) = cl_abap_regex=>create_pcre( pattern = `\d(\D.)` ).
FIND ALL OCCURRENCES OF REGEX reg_expr IN test_string RESULTS DATA(result_find_all_3).
ASSERT result_find_all_3 = result_find_all_1.
ASSERT result_find_all_3 = result_find_all_2.
"Note that the REGEX addition is obsolete when using (POSIX) syntax patterns
"A syntax warning is displayed for the following example.
"FIND ALL OCCURRENCES OF REGEX `\d(\D.)` IN test_string RESULTS DATA(result_8).
"The syntax warning can be suppressed using a pragma
FIND ALL OCCURRENCES OF REGEX `\d(\D.)` IN test_string RESULTS DATA(result_find_all_4) ##REGEX_POSIX.
"Using PCRE instead
FIND ALL OCCURRENCES OF PCRE `\d(\D.)` IN test_string RESULTS DATA(result_find_all_5).
ASSERT result_find_all_5 = result_find_all_3.
"---------------- Exploring more parameters of the create_pcre method ----------------
"See the class documentation for more parameters and information.
"--- enable_multiline parameter ---
test_string = |abc\ndef\nghi\njkl|.
DATA(matcher_no_ml) = cl_abap_matcher=>create_pcre( pattern = `^`
text = test_string ).
DATA(repl_count_no_ml) = matcher_no_ml->replace_all( newtext = `#` ).
DATA(repl_result_no_ml) = matcher_no_ml->text.
DATA(matcher_w_ml) = cl_abap_matcher=>create_pcre( pattern = `^`
text = test_string
enable_multiline = abap_true ).
DATA(repl_count_w_ml) = matcher_w_ml->replace_all( newtext = `#` ).
DATA(repl_result_w_ml) = matcher_w_ml->text.
"--- table/ignore_case parameters ---
DATA(test_string_table) = VALUE string_table( ( `abZdez` ) ( `zZfghZ` ) ( `ijkZZz` ) ( `zzzzZ` ) ).
DATA(matcher_tab) = cl_abap_matcher=>create_pcre( pattern = `z+`
table = test_string_table
ignore_case = abap_true ).
DATA(repl_count_tab) = matcher_tab->replace_all( newtext = `#` ).
DATA(repl_result_tab) = matcher_tab->table.
"--- extended parameter ---
test_string = `abc def`.
DATA(matcher_w_extended) = cl_abap_matcher=>create_pcre( pattern = `abc def`
text = test_string ).
"No replacement in the following example as the extended mode is
"enabled by default.
DATA(repl_count_w_extended) = matcher_w_extended->replace_all( newtext = `#` ).
DATA(repl_result_w_extended) = matcher_w_extended->text.
"Disabling the extended mode so that whitespaces are not ignored
DATA(matcher_not_extended) = cl_abap_matcher=>create_pcre( pattern = `abc def`
text = test_string
extended = abap_false ).
DATA(repl_count_not_extended) = matcher_not_extended->replace_all( newtext = `#` ).
DATA(repl_result_not_extended) = matcher_not_extended->text.
ENDMETHOD.
METHOD if_abap_matcher_callout~callout.
DATA(ts) = utclong_current( ).
callout += 1.
IF callout_string = `F`.
callout_result = if_abap_matcher_callout=>c_callout_result-abort.
ELSE.
callout_result = if_abap_matcher_callout=>c_callout_result-pass.
APPEND INITIAL LINE TO callout_tab.
APPEND |----------------- Callout { callout } -----------------| TO callout_tab.
APPEND INITIAL LINE TO callout_tab.
APPEND |callout_num: { callout_num }| TO callout_tab.
APPEND |callout_string: { callout_string }| TO callout_tab.
APPEND |regex: { regex }| TO callout_tab.
APPEND |subject: { subject }| TO callout_tab.
APPEND |current_subject_pos: { current_subject_pos }| TO callout_tab.
APPEND |current_pattern_pos: { current_pattern_pos }| TO callout_tab.
APPEND |capture_last: { capture_last }| TO callout_tab.
APPEND |capture_last_len: { capture_last_len }| TO callout_tab.
APPEND |capture_last_off: { capture_last_off }| TO callout_tab.
APPEND |start_match_off: { start_match_off }| TO callout_tab.
TRY.
APPEND |Content of submatch: { subject+capture_last_off(capture_last_len) }| TO callout_tab.
CATCH cx_sy_range_out_of_bounds.
ENDTRY.
APPEND |This callout was called at { ts }| TO callout_tab.
ENDIF.
ENDMETHOD.
ENDCLASS.