Oracle REGEXP_SUBSTR
Some examples of use of REGEXP_SUBSTR to analyze data or for data validation/loading.

Example 1: E-mail Address Analysis
-- Split email address into name, domain and gTLD

WITH q_addr AS
 (SELECT 'email_address@domain.com'  the_address FROM DUAL
 UNION
  SELECT 'some.emaddress@domain.org' the_address FROM DUAL
 )
SELECT 
 q_addr.the_address email_address,
 REGEXP_SUBSTR(q_addr.the_address,'[^@]+') email_name,
 REGEXP_SUBSTR(q_addr.the_address,'[^@]+$') email_domain,
 REGEXP_SUBSTR(q_addr.the_address,'[^.]+$') gtdl
FROM  q_addr;
Email Address Splitting
 
Example 2: Street Address Splitting
-- Split address string into street number, street name, 
-- city, state code and zip (postal code). The data
-- should be in same format as 
-- "number address, city, state, postal code"

WITH q_addr AS
 (SELECT '1600 Pennsylvania Ave., Washington, DC, 20500'
   st_addr  FROM DUAL
 UNION
  SELECT '500 Oracle Parkway, Redwood Shores, CA, 94065'
   st_addr  FROM DUAL
 )
SELECT  -- q_addr.st_addr,
 REGEXP_SUBSTR(q_addr.st_addr, '[[:digit:]]+')  addr_No,
 --REGEXP_SUBSTR(q_addr.st_addr, '[^[:digit:]]+',2,1)
 -- addr_all,
 REGEXP_SUBSTR(q_addr.st_addr, '[^,]+', 1, 1) address_line,
 TRIM(REGEXP_SUBSTR(q_addr.st_addr, '[^,]+', 1, 2)) city_,
 TRIM(REGEXP_SUBSTR(q_addr.st_addr, '[^,]+', 1, 3)) state_,
 REGEXP_SUBSTR(q_addr.st_addr, '[^ ]+$')   AS zip_
FROM  q_addr;
Address String Splitting
 
Example 3: Phone Data Analysis (USA format)
-- Split two names and phone number into full name, first
-- name, second_name, full_phone, area code and local 
-- extension

WITH q_str AS
 (SELECT 'Some Name1 (111)-222-3333' the_str FROM DUAL
 UNION
  SELECT 'Some Name2 (222)-333-4444' the_str FROM DUAL)
SELECT  -- the_str the_string,
 REGEXP_SUBSTR(the_str, '[^(]+')          full_name,
 REGEXP_SUBSTR(the_str, '[^ ]+')          first_,
 REGEXP_SUBSTR(the_str, '[^\ ]+',1,2)     second_,
 REGEXP_SUBSTR(the_str,
   '\((.*)\)+-[[:digit:]]+-[[:digit:]]+')  full_phone,
 REGEXP_SUBSTR(the_str,
   '([[:digit:]])+[[:digit:]]+[[:digit:]]') area_code,
 REGEXP_SUBSTR(the_str,
   '[[:digit:]]+-[[:digit:]]+')  phone_No
FROM  q_str;
Name Phone String Splitting
 
Example 4: Name and Code Analysis
-- Split Name and code string into code and name
-- vendor_cd_11/_12 show two ways of getting the
-- same result string

WITH q_str AS
 (SELECT 'ABC Vendor (2134T)' the_str FROM DUAL
 UNION
  SELECT 'DEF Vendor (5678)' the_str FROM DUAL
 )
SELECT the_str the_string,
  REGEXP_SUBSTR(the_str, '([[:digit:]]+[[:alnum:]])')
   vendor_cd,
  REGEXP_SUBSTR(the_str, '[^(]+')       vendor_name,
  REGEXP_SUBSTR(the_str, '\((.*)\)')    vendor_cd_11,
  REGEXP_SUBSTR(the_str, '\([^)]+[\)]') vendor_cd_12
FROM  q_str;
Name and Code String Splitting
 
Example 5: Data Pattern Analysis

-- Data pattern can be analyzed using Regexp_Substr to
-- capture data with certain patterns (NH) or non-NH.

WITH q_str AS
(SELECT 1 idx, '10001 (CA)' state_info FROM DUAL UNION
 SELECT 2 idx, '10002 (NC)' state_info FROM DUAL UNION
 SELECT 3 idx, '10003 (NH)' state_info FROM DUAL UNION
 SELECT 4 idx, '10004 (NY)' state_info FROM DUAL UNION
 SELECT 5 idx, '10005 (NH)' state_info FROM DUAL UNION
 SELECT 6 idx, '10006 (CT)' state_info FROM DUAL
),
q_data AS
(SELECT 
 idx,
 state_info,
 REGEXP_SUBSTR(state_info,'\(N[^H)]+[\)]') non_NH_data,
 REGEXP_SUBSTR(state_info,'\(N[H)]+[\)]') nh_data
FROM q_str)
SELECT
  idx,
 DECODE(non_nh_data,NULL,state_info,NULL) nh_data,
 DECODE(nh_data,NULL,state_info,NULL) non_nh_data
FROM  q_data
WHERE nh_data IS NOT NULL OR
 non_nh_data IS NOT NULL;

Data Pattern Analysis

Oracle registered trademark of Oracle Corporation.

Last Revised on: March 01st, 2012

  55674