Smiley face

Our Algorithm_BMLCS(Diagonal Extension and Domination, 2017)


Main features
Abstract of the paper [Chan 2017]

Given a pair of merging sequences A, B and a target sequence T , the merged longest common subsequence (MLCS) problem is to find out the longest common subsequence (LCS) between sequences E(A, B) and T , where E(A, B) is obtained from merging two subsequences of A and B. In this paper, we first propose an algorithm for solving the MLCS problem in O(n|Σ|+(r−L+1)Lm) time and space, where r and L denote the lengths of T and MLCS, respectively, and m and n denote the minimum and maximum lengths of A and B, respectively. With the time complexity, it is clear that our algorithm is very efficient when T and E(A, B) are very similar. With slight modification, our algorithm can also solve another merged LCS problem variant, the block-merged LCS (BMLCS) problem, in O(n|Σ| + (r − L + 1)Lδ) time and space, where δ denotes the maximum number of blocks of A and B. Experimental results show that our algorithms are faster than other previously published MLCS and BMLCS algorithms for sequences with high similarities.

C++ source code
class Btriple {
public:
    int x;
    int y;
    int z;
    Btriple(int posA, int posB, int posT) { x = posA; y = posB; z = posT; }
};
int BMLCS_nakatzu(vector< int> T, vector< int> A, vector< int>B, int Tlen, int Alen, int Blen, int size, vector< int> EOBA, vector< int> EOBB) {
    vector< vector< int>> nextA(size + 1, vector< int>(Alen + 1, 0));
    vector< vector< int>> nextB(size + 1, vector< int>(Blen + 1, 0));
    vector< int> PosBlockA(Alen + 1, 0);
    vector< int> PosBlockB(Blen + 1, 0);
    vector< int> FEOBA(Alen + 1, 0);
    vector< int> FEOBB(Blen + 1, 0);
    int MaxLen = 0;
    vector< vector< Btriple>> QL(Tlen + 2, vector< Btriple>());
    vector< vector< Btriple>> tempQL(Tlen + 2, vector< Btriple>());
    int qlsize = 0;
    int locationA = 0;
    int locationB = 0;
    //construct array of nextA
    for (int i = 1; i <= size; ++i) {
        locationA = Alen + 1;
        for (int j = Alen; j > 0; --j) {
            if (j == Alen)
                nextA[i][j] = locationA;
            if (A[j] == i) {
                locationA = j;
                nextA[i][j - 1] = locationA;
            }
            else {
                nextA[i][j - 1] = locationA;
            }

        }
    }

    //construct array of nextB
    for (int i = 1; i <= size; ++i) {
        locationB = Blen + 1;
        for (int j = Blen; j > 0; --j) {
            if (j == Blen)
                nextB[i][j] = locationB;
            if (B[j] == i) {
                locationB = j;
                nextB[i][j - 1] = locationB;
            }
            else {
                nextB[i][j - 1] = locationB;
            }

        }
    }

    //construct PosA and PosB
    for (int i = 0, count = 0; i < Alen; ++i) {
        if (i <= EOBA[count]) {
            PosBlockA[i] = EOBA[count];
        }
        else {
            ++count;
            PosBlockA[i] = EOBA[count];
        }
    }
    PosBlockA[Alen] = EOBA[EOBA.size() - 1];
    for (int i = 0, count = 0; i < Blen; ++i) {
        if (i <= EOBB[count]) {
            PosBlockB[i] = EOBB[count];
        }
        else {
            ++count;
            PosBlockB[i] = EOBB[count];
        }
    }
    PosBlockB[Blen] = EOBB[EOBB.size() - 1];

    //construct FEOBA and FEOBB
    int sizeEOBA = EOBA.size(), sizeEOBB = EOBB.size();
    for (int i = 0; i < sizeEOBA; ++i) {
        FEOBA[EOBA[i]] = 1;
    }
    FEOBA[0] = 1;
    FEOBA[Alen] = 1;
    for (int i = 0; i < sizeEOBB; ++i) {
        FEOBB[EOBB[i]] = 1;
    }
    FEOBB[0] = 1;
    FEOBB[Blen] = 1;


    //main algorithm
    QL[0].push_back(Btriple(0, 0, 0));

    int PosA = 0;
    int PosB = 0;
    int PosT = 0;
    int LocalLen = 0;
    bool f = false;
    int TurnLen = 0;
    int tmpti = 0;
    for (int i = 1; i <= Tlen; ++i) {
        if (i > Tlen - MaxLen)
            break;
        //PosT = i;
        LocalLen = 0;
        tmpti = Tlen - i;
        for (int j = 0; j <= tmpti; ++j) {
            f = false;
            TurnLen = LocalLen;
            PosT = i + j;
            for (auto mytriple = QL[TurnLen].begin(); mytriple != QL[TurnLen].end(); ++mytriple) {
                //if (PosT > mytriple->z){
                PosA = nextA[T[PosT]][mytriple->x];
                PosB = nextB[T[PosT]][mytriple->y];
                if ((PosA <= Alen || PosB <= Blen) && f == false) {
                    ++LocalLen;
                    f = true;
                }
                //A[i] and B[j] are end of block.
                if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 1) {
                    if (PosA <= Alen) {
                        QL[LocalLen].push_back(Btriple(PosA, mytriple->y, PosT));
                    }
                    if (PosB <= Blen) {
                        QL[LocalLen].push_back(Btriple(mytriple->x, PosB, PosT));
                    }
                }
                //A[i] is EOB, but B[j] is not.
                else if (FEOBA[mytriple->x] == 1 && FEOBB[mytriple->y] == 0) {
                    if (PosB <= Blen) {
                        QL[LocalLen].push_back(Btriple(mytriple->x, PosB, PosT));
                    }
                    if (PosB > PosBlockB[mytriple->y] && PosA <= Alen) {
                        QL[LocalLen].push_back(Btriple(PosA, PosBlockB[mytriple->y], PosT));
                    }
                }
                //B[j] is EOB, but A[i] is not.
                else if (FEOBA[mytriple->x] == 0 && FEOBB[mytriple->y] == 1) {
                    if (PosA <= Alen) {
                        QL[LocalLen].push_back(Btriple(PosA, mytriple->y, PosT));
                    }
                    if (PosA > PosBlockA[mytriple->x] && PosB <= Blen) {
                        QL[LocalLen].push_back(Btriple(PosBlockA[mytriple->x], PosB, PosT));
                    }
                }
                //}
            }
            if (LocalLen > MaxLen)
            {
                MaxLen = LocalLen;
            }

            //2-D minima algorithm
            qlsize = QL[LocalLen].size();
            if (qlsize > 1 && f == true) {
                for (int i = 0; i < qlsize; ++i) {
                    Btriple temptri = QL[LocalLen][i];
                    if (!tempQL[temptri.x].empty() && temptri.y < tempQL[temptri.x][0].y) {
                        tempQL[temptri.x].erase(tempQL[temptri.x].begin());
                        tempQL[temptri.x].push_back(temptri);
                    }
                    else if (tempQL[temptri.x].empty() == true) {
                        tempQL[temptri.x].push_back(temptri);
                    }
                }
                QL[LocalLen].clear();
                int prevMaxY = Blen + 1;
                for (int i = 0; i <= Alen; ++i) {
                    if (!tempQL[i].empty()) {
                        if (tempQL[i][0].y < prevMaxY) {
                            prevMaxY = tempQL[i][0].y;
                            QL[LocalLen].push_back(tempQL[i][0]);
                        }
                        tempQL[i].clear();
                    }
                }
            }
            ++PosT;
            if (PosT > Tlen)
                break;
            if (f == false && QL[TurnLen + 1].empty() == true) {
                break;
            }
            if (f == false && QL[TurnLen + 1].empty() == false) {
                ++LocalLen;
            }

        }
    }


    return MaxLen;
}



The files
  All_BMLCS.cpp

Reference
Kuo-Tsung Tseng., De-Sheng Chan., Chang-Biau Yang., Shou-Fu Lo., 2017, Efficient Merged Longest Common Subsequence Algorithms for Similar Sequences. (Unpublished)