Labels

Monday, March 2, 2015

Regular Expression Matching

Implement regular expression matching with support for '.' and '*'.
'.' Matches any single character.
'*' Matches zero or more of the preceding element.

The matching should cover the entire input string (not partial).

The function prototype should be:
bool isMatch(const char *s, const char *p)

Some examples:
isMatch("aa","a") → false
isMatch("aa","aa") → true
isMatch("aaa","aa") → false
isMatch("aa", "a*") → true
isMatch("aa", ".*") → true
isMatch("ab", ".*") → true
isMatch("aab", "c*a*b") → true 
 
 
Naive Way:  Cam up with DP idea at first glance. Should not be a complicated logic.

opt[i][j] // represents whether S[0....i] matches P[0...j] or not
// base case
opt[0][0] = true;
opt[0][j] = true; if consecutive '.*' or 'x*' are met.
// iteration
opt[i][j] =
if(s[i]==p[j] || p[j] == '.') opt[i-1][j-1]
if(p[j] == '*')
    if(s[i] == p[j-1] || p[j-1] == '.')
        opt[i][j-2]  // don't take s[i] to match p[j-1],p[j]
        || opt[i-1][j] // take s[i] to match p[j-1],p[j]
    else
        opt[i][j-2] // cannot take s[i] to match p[j-1],p[j]

This algorithm is O(nm) run time and space. Can be reduced to O(m) since current column is affected only by previous column.

 public class Solution {  
   public boolean isMatch(String s, String p) {  
     // DP  
     boolean opt[][] = new boolean[s.length()+1][p.length()+1];  
     // base case  
     opt[0][0] = true;  
     boolean valid = false;  
     for(int j = 2;j <= p.length();j+=2){  
       if(p.charAt(j-1)=='*'){ valid = true; opt[0][j] = true;}  
       else{ valid = false;}  
       if(!valid) break;  
     }  
     // iteration  
     for(int i = 1;i <= s.length();i++){  
       for(int j = 1;j <= p.length();j++){  
         opt[i][j] = false;  
         if(s.charAt(i-1)==p.charAt(j-1) || p.charAt(j-1)=='.') opt[i][j] = opt[i-1][j-1];  
         else if(p.charAt(j-1)=='*'){  
           if(s.charAt(i-1)==p.charAt(j-2) || p.charAt(j-2)=='.')  
             opt[i][j] = opt[i-1][j] || opt[i][j-2];  
           else  
             opt[i][j] = opt[i][j-2];  
         }  
       }  
     }  
     return opt[s.length()][p.length()];  
   }  
 }  

Below is a O(n) space version. Could use only one array, but two arrays is more clear.

 public class Solution {  
   public boolean isMatch(String s, String p) {  
     // DP  
     boolean opt[] = new boolean[p.length()+1];  
     boolean pre[] = new boolean[p.length()+1];  
     // base case  
     pre[0] = true;  
     boolean valid = false;  
     for(int j = 2;j <= p.length();j+=2){  
       if(p.charAt(j-1)=='*'){ valid = true; pre[j] = true;}  
       else{ valid = false;}  
       if(!valid) break;  
     }  
     // iteration  
     for(int i = 1;i <= s.length();i++){  
       for(int j = 1;j <= p.length();j++){  
         opt[j] = false;  
         if(s.charAt(i-1)==p.charAt(j-1) || p.charAt(j-1)=='.') opt[j] = pre[j-1];  
         else if(p.charAt(j-1)=='*'){  
           if(s.charAt(i-1)==p.charAt(j-2) || p.charAt(j-2)=='.')  
             opt[j] = pre[j] || opt[j-2];  
           else  
             opt[j] = opt[j-2];  
         }  
       }  
       for(int j = 0;j <= p.length();j++)  
         pre[j] = opt[j];  
     }  
     return pre[p.length()];  
   }  
 }  

There is also a DFS version in Discuss. But the time complexity as far as I am concerned is O(2^n). However the OJ seems just listing as many complicated test cases as possible without large size test cases.

A DFS with path memorizing is as follows.

 public class Solution {  
   public boolean isMatch(String s, String p) {  
     Map<List<Integer>, Boolean> map = new HashMap<List<Integer>, Boolean>();  
     for(int i = p.length();i>=0;i-=2){  
       if(i==p.length() || i < p.length()-1 && p.charAt(i+1)=='*'){  
         List<Integer> list = new ArrayList<Integer>();  
         list.add(s.length());  
         list.add(i);  
         map.put(list, true);  
       }else  
         break;  
     }  
     return isMatch(s.toCharArray(), 0, p.toCharArray(), 0, map);  
   }  
   private boolean isMatch(char[] s, int i, char[] p, int j, Map<List<Integer>, Boolean> map){  
     // check memory  
     List<Integer> list = new ArrayList<Integer>();  
     list.add(i);  
     list.add(j);  
     if(map.containsKey(list)) return map.get(list);  
     // ending case  
     if(i < 0 || j < 0 || j >= p.length) return false;  
     // recursion  
     if(p[j] == '*'){  
       int u = i-2;  
       while(u < s.length && (u==i-2 || s[u] == p[j-1] || p[j-1] == '.'))  
         if(isMatch(s, ++u, p, j+1, map)){   
           map.put(list, true);  
           return true;  
         }  
     }  
     else if(i < s.length && (s[i] == p[j] || p[j] == '.')) return isMatch(s, i+1, p, j+1, map);  
     else if(j+1 < p.length && p[j+1] == '*') return isMatch(s, i, p, j+2, map);  
     map.put(list, false);  
     return false;  
   }  
 }  

No comments:

Post a Comment