The problem is:
Implement strStr().
Returns a pointer to the first occurrence of needle in haystack, or null if needle is not part of haystack.
A naive method: (1) find the the first letter of needle
in haystack
, (2)comparing other consecutive characters of needle and haystack, (3) if all letters in needle
can be found in haystack
consecutively, then return the position in haystack
.
class Solution {
public:
char *strStr(char *haystack, char *needle) {
if(haystack==NULL || needle==NULL)
return NULL;
if(needle[0]=='\0')
return haystack;
// Find the first character of needle in haystack
int hay_len=strlen(haystack);
int need_len=strlen(needle);
int pos=0;
bool found=false;
while(hay_len-pos>=need_len && haystack[pos]!='\0') {
if(haystack[pos] == needle[0]){
int th=pos+1;
int tn=1;
while(haystack[th] == needle[tn]) {
if(needle[tn]=='\0')
break;
th++;
tn++;
}
if(tn == need_len) {
found=true;
break;
}
}
pos++;
}
if(found)
return haystack+pos;
else
return NULL;
}
};
A better solution is to use the famous KMP algorithm(http://www.geeksforgeeks.org/searching-for-patterns-set-2-kmp-algorithm/).
class Solution {
public:
char *strStr(char *haystack, char *needle) {
if(haystack==NULL || needle==NULL)
return NULL;
if(needle[0]=='\0')
return haystack;
int n=strlen(haystack);
int m=strlen(needle);
vector<int> pi(m); // the longest proper prefix which is also suffix
computePrefix(needle,pi);
//printPrefix(pi); // TEST ONLY
int q=0; // index for needle
int i=0; // index for haystack
while(i<n) {
//cout<<"needle["<<q<<"]="<<needle[q]<<", haystack["<<i<<"]="<<haystack[i]<<endl; // TEST ONLY
if(needle[q]==haystack[i]) {
q++;
i++;
}
if(q==m) {
//cout<<"Pattern occurs with shift "<<i-m<<endl; // TEST ONLY
return haystack+i-m;
} else if(needle[q]!=haystack[i]) {
if(q!=0)
q=pi[q-1];
else
i++;
}
}
return NULL;
}
// Calculate the longest proper prefix which is also suffix.
// Examples:
// For the pattern “AABAACAABAA”, lps[] is [0, 1, 0, 1, 2, 0, 1, 2, 3, 4, 5]
// For the pattern “ABCDE”, lps[] is [0, 0, 0, 0, 0]
// For the pattern “AAAAA”, lps[] is [0, 1, 2, 3, 4]
// For the pattern “AAABAAA”, lps[] is [0, 1, 2, 0, 1, 2, 3]
// For the pattern “AAACAAAAAC”, lps[] is [0, 1, 2, 0, 1, 2, 3, 3, 3, 4]
void computePrefix(char* P, vector<int>& pi) {
int m=strlen(P);
pi[0]=0; // lps[0] is always 0
int k=0; // length of the previous prefix suffix
int i=1;
while(i<m) {
if(P[k]==P[i]) {
k++;
pi[i]=k;
i++;
} else {
if(k!=0) {
k=pi[k-1];
} else {
pi[i]=0;
i++;
}
}
}
}
void printPrefix(vector<int>& pi) {
cout<<"pi={ ";
for(auto& x:pi)
cout<<x<<" ";
cout<<"}"<<endl;
}
};