Previously published algorithms for finding the longest common subsequence of two sequences of length n have had a best-case running time of O(n^2). Hunt and Szymanski presented an algorithm for LCS problem in O((r + n) log n) time, where r is the total number of ordered pairs of positions at which the two sequences match. Thus in the worst case the algorithm has a running time of O(n^2 log n). However, for those applications where most positions of one sequence match relatively few positions in the other sequence, a running time of O(n log n) can be expected.
int HuntSzymanski_Alg1(unsigned char *stringA, unsigned char *stringB, int m, int n)
{
int i, j, k, LCS, high, low,mid, alphabet_size = 256;
int *matchlist[alphabet_size], *L;
for (i = 0; i < alphabet_size; i++){
matchlist[i] = (int*)calloc((n + 2), sizeof(int));
}
L = (int*)calloc((n + 1), sizeof(int));
/*-----make the matchlist-----*/
for (i = 1; i <= m; i++){
if (matchlist[stringA[i]][0] == 0){
matchlist[stringA[i]][0] = 1;
for (k = 1,j = n; j>0; j--){
if (stringA[i] == stringB[j]){
matchlist[stringA[i]][k] = j;
k++;
}
matchlist[stringA[i]][k] = -1;
}
}
}
/*-----finding the LCS-----*/
for (LCS = 0, i = 1; i <= m; i++){
for (j = 1; matchlist[stringA[i]][j] != -1; j++){
/*-----if the number bigger then the biggest number in the L, LCS + 1-----*/
if (matchlist[stringA[i]][j] > L[LCS]){
LCS++;
L[LCS] = matchlist[stringA[i]][j];
}
/*-----else, do the binary search to find the place to insert the number-----*/
else{
high = LCS;
low = 0;
k = 0;
while (1){
mid = low + ((high - low) / 2);
if (L[mid] == matchlist[stringA[i]][j]){
k = 1;
break;
}
if (high - low <= 1){
mid = high;
break;
}
if (L[mid] > matchlist[stringA[i]][j]){
high = mid;
}
else if (L[mid] < matchlist[stringA[i]][j]){
low = mid;
}
}
if (k == 0){
L[mid] = matchlist[stringA[i]][j];
}
}
}
}
for (i = 0; i < 256; i++){
free(matchlist[i]);
}
free(matchlist);
free(L);
return LCS;
}
int HuntSzymanski_Alg(char *stringA, char *stringB, int m, int n)
{
return HuntSzymanski_Alg1((unsigned char*)stringA, (unsigned char*)stringB, m, n);
}