Smiley face

Huang et al. Algorithm_BMLCS(S-table, 2008)


Main features
Abstract of the paper [Huang 2008]

The longest common subsequence and sequence alignment problems have been studied extensively and they can be regarded as the relationship measurement between sequences. However, most of them treat sequences evenly or consider only two sequences. Recently, with the rise of whole-genome duplication research, the doubly conserved synteny relationship among three sequences should be considered. It is a brand new model to find a merging way for understanding the interleaving relationship of sequences. Here, we define the merged LCS problem for measuring the interleaving relationship among three sequences. An O (n3 ) algorithm is first proposed for solving the problem, where n is the sequence length. We further discuss the variant version of this problem with the block information. For the blocked merged LCS problem, we propose an algorithm with time complexity O (n2 m), where m is the number of blocks. An improved O (n2 + nm2 ) algorithm is further proposed for the same blocked problem.

C++ source code

int BMLCS_stable(vector< int> T, vector< int> A, vector< int> B, int Tlen, int Alen, int Blen, vector< int> EOBA, vector< int> EOBB, int size) {
    constructAllStable_int(T, A, B, Tlen, Alen, Blen, EOBA, EOBB, size);
    
    int Asize = (int)EOBA.size(), Bsize = (int)EOBB.size();
    vector< vector< vector< int>>> vb(EOBA.size() + 1, vector< vector< int>>(EOBB.size() + 1, vector< int>(Tlen + 1)));
    vector< int> minv(Tlen + 1);
    int range = 0, tmpvalue = 0;
    //constrcut vb[i][0]
    for (int tmpx = 1; tmpx < AllStable[0][0].size(); ++tmpx) {
        vb[1][0][tmpx] = AllStable[0][0][tmpx];
    }
    for (int i = 2; i <= Asize; ++i) {
        for (int s = 1; s <= Tlen; ++s) {
            minv[s] = Tlen + 1;
        }
        for (int p = 1; p < vb[i - 1][0].size(); ++p) {
            if (vb[i - 1][0][p] == 0)
                break;
            if (p == 1) {
                if (vb[i - 1][0][p] >= Tlen) {
                    break;
                }
                range = (int)(AllStable[i - 1][vb[i - 1][0][p]].size());
                for (int k = 0; k < range; ++k) {
                    minv[k + p] = AllStable[i - 1][vb[i - 1][0][p]][k];
                }
            }
            else {
                if (vb[i - 1][0][p] >= Tlen) {
                    break;
                }
                range = (int)(AllStable[i - 1][vb[i - 1][0][p]].size());
                for (int k = 0; k < range; ++k) {
                    tmpvalue = AllStable[i - 1][vb[i - 1][0][p]][k];
                    if (tmpvalue < minv[k + p])
                        minv[k + p] = tmpvalue;
                }
            }
        }
        for (int tmpi = 1; tmpi <= Tlen; ++tmpi) {
            if (minv[tmpi] == Tlen + 1)
                break;
            vb[i][0][tmpi] = minv[tmpi];
        }
    }

    //constrcut vb[0][j]
    for (int tmpx = 1; tmpx < AllStable[Asize][0].size(); ++tmpx) {
        vb[0][1][tmpx] = AllStable[Asize][0][tmpx];
    }
    for (int j = 2; j <= Bsize; ++j) {
        for (int s = 1; s <= Tlen; ++s) {
            minv[s] = Tlen + 1;
        }
        for (int p = 1; p < vb[0][j - 1].size(); ++p) {
            if (vb[0][j - 1][p] == 0)
                break;
            if (p == 1) {
                if (vb[0][j - 1][p] >= Tlen) {
                    break;
                }
                range = (int)(AllStable[j - 1 + Asize][vb[0][j - 1][p]].size());
                for (int k = 0; k < range; ++k) {
                    minv[k + p] = AllStable[j - 1 + Asize][vb[0][j - 1][p]][k];
                }
            }
            else {
                if (vb[0][j - 1][p] >= Tlen) {
                    break;
                }
                range = (int)(AllStable[j - 1 + Asize][vb[0][j - 1][p]].size());
                for (int k = 0; k < range; ++k) {
                    tmpvalue = AllStable[j - 1 + Asize][vb[0][j - 1][p]][k];
                    if (tmpvalue < minv[k + p])
                        minv[k + p] = tmpvalue;
                }
            }
        }
        for (int tmpi = 1; tmpi <= Tlen; ++tmpi) {
            if (minv[tmpi] == Tlen + 1)
                break;
            vb[0][j][tmpi] = minv[tmpi];
        }
    }

    //construct vb[i][j]
    vector< int> mini(Tlen + 1);
    vector< int> minj(Tlen + 1);
    //vector< int> maxv(Tlen + 1);
    for (int i = 1; i <= Asize; ++i) {
        for (int j = 1; j <= Bsize; ++j) {
            for (int s = 1; s <= Tlen; ++s) {
                mini[s] = Tlen + 1;
                minj[s] = Tlen + 1;
                //maxv[s] = 0;
            }

            for (int p = 1; p < vb[i][j - 1].size(); ++p) {
                if (vb[i][j - 1][p] == 0) {
                    break;
                }
                if (p == 1) {
                    if (vb[i][j - 1][p] >= Tlen) {
                        break;
                    }
                    range = AllStable[j - 1 + Asize][vb[i][j - 1][p]].size();
                    for (int k = 0; k < range; ++k) {
                        mini[k + p] = AllStable[j - 1 + Asize][vb[i][j - 1][p]][k];
                    }
                }
                else {
                    if (vb[i][j - 1][p] >= Tlen) {
                        break;
                    }
                    range = AllStable[j - 1 + Asize][vb[i][j - 1][p]].size();
                    for (int k = 0; k < range; ++k) {
                        tmpvalue = AllStable[j - 1 + Asize][vb[i][j - 1][p]][k];
                        if (tmpvalue < mini[k + p])
                            mini[k + p] = tmpvalue;
                    }
                }
            }

            for (int p = 1; p < vb[i - 1][j].size(); ++p) {
                if (vb[i - 1][j][p] == 0) {
                    break;
                }
                if (p == 1) {
                    if (vb[i - 1][j][p] >= Tlen) {
                        break;
                    }
                    range = AllStable[i - 1][vb[i - 1][j][p]].size();
                    for (int k = 0; k < range; ++k) {
                        minj[k + p] = AllStable[i - 1][vb[i - 1][j][p]][k];
                    }
                }
                else {
                    if (vb[i - 1][j][p] >= Tlen) {
                        break;
                    }
                    range = AllStable[i - 1][vb[i - 1][j][p]].size();
                    for (int k = 0; k < range; ++k) {
                        tmpvalue = AllStable[i - 1][vb[i - 1][j][p]][k];
                        if (tmpvalue < minj[k + p])
                            minj[k + p] = tmpvalue;
                    }
                }
            }

            for (int tmpk = 1; tmpk <= Tlen; ++tmpk) {
                if (mini[tmpk] != Tlen + 1 && minj[tmpk] != Tlen + 1) {
                    vb[i][j][tmpk] = max(mini[tmpk], minj[tmpk]);
                }
                else if (mini[tmpk] != Tlen + 1 && minj[tmpk] == Tlen + 1) {
                    vb[i][j][tmpk] = mini[tmpk];
                }
                else if (mini[tmpk] == Tlen + 1 && minj[tmpk] != Tlen + 1) {
                    vb[i][j][tmpk] = minj[tmpk];
                }

            }
        }
    }
    int answer = 0;
    for (int i = 1; i <= Tlen; ++i) {
        if (vb[Asize][Bsize][i] != 0)
            ++answer;
    }
    return answer;
}

void constructNextMatch_int(vector< int> S, int Slen, int symbolNumber)
{
    //nextMatchs = new vector< int>[symbolNumber];

    vector < int> *tempMatchs;  // store the occurring position for each symbol
    int *tempMatchsIndex;
    tempMatchsIndex = new int[symbolNumber];
    tempMatchs = new vector< int>[symbolNumber];


    for (int i = 0; i< symbolNumber; i++)
    {
        tempMatchsIndex[i] = 0;
        tempMatchs[i].clear();
        nextMatchs[i].clear();
    }

    for (int i = 0; i< Slen; i++)
    {
        tempMatchs[S[i] - 1].push_back(i);
    }
    int *lenTempMatchs = new int[symbolNumber];
    for (int i = 0; i < symbolNumber; i++)
    {
        lenTempMatchs[i] = (int)tempMatchs[i].size();
    }

    for (int i = 0; i < Slen; i++)
    {
        for (int j = 0; j < symbolNumber; j++)
        {
            if (tempMatchsIndex[j] < lenTempMatchs[j])
            {
                nextMatchs[j].push_back(tempMatchs[j][tempMatchsIndex[j]]);
                if ((tempMatchs[j][tempMatchsIndex[j]]) == i)
                {
                    tempMatchsIndex[j]++;
                }
            }
        }
    }

    /*
    // test part for printing nextMatchs[]
    for (int i=0;i < symbolNumber;i++)
    {
    for (int j=0;j < (int)nextMatchs[i].size();j++)
    {
    cout << nextMatchs[i][j] << " ";
    }
    cout << endl;
    }
    cout <<  "-" << endl;
    // test part for printing nextMatchs[]
    */
    for (int i = 0; i < symbolNumber; i++)
    {
        tempMatchs[i].clear();
    }
    delete[] tempMatchs;
    delete[] tempMatchsIndex;
}


void constructAllStable_int(vector< int> T, vector< int> A, vector< int> B, int Tlen, int Alen, int Blen, vector< int> EOBA, vector< int> EOBB, int symbolNumber) {
    nextMatchs = new vector< int>[symbolNumber];

    int *lenNextMatchs = new int[symbolNumber];
    vector < int> *partitionPs;
    int lenT = Tlen;
    partitionPs = new vector< int>[lenT];
    vector< int> smallestJs;
    vector< int> strT(T.begin() + 1, T.begin() + 1 + Tlen);
    //string strT(T.begin() + 1, T.begin() + 1 + Tlen);

    //theSTables = new vector< int>[lenT];
    int totalBlockNum = EOBA.size() + EOBB.size();
    AllStable = new vector< int>*[totalBlockNum];
    for (int i = 0; i < totalBlockNum; ++i) {
        AllStable[i] = new vector< int>[Tlen];
    }

    int thisBlockStringLen = Alen / (int)(EOBA.size());
    int ranga = 0, rangb = 0;
    for (int round = 0; round < totalBlockNum; ++round) {
        if (round < EOBA.size()) {
            if (round == 0) {
                ranga = 0; rangb = EOBA[round];
            }
            else {
                ranga = EOBA[round - 1]; rangb = EOBA[round];
            }
            //string strA(A.begin() + 1 + ranga, A.begin() + 1 + rangb);
            vector< int> strA(A.begin() + 1 + ranga, A.begin() + 1 + rangb);
            constructNextMatch_int(strA, thisBlockStringLen, symbolNumber);
        }
        else {
            if (round == EOBA.size()) {
                ranga = 0; rangb = EOBB[round - EOBA.size()];
            }
            else {
                ranga = EOBB[round - 1 - EOBA.size()]; rangb = EOBB[round - EOBA.size()];
            }
            //string strB(B.begin() + 1 + ranga, B.begin() + 1 + rangb);
            vector< int> strB(B.begin() + 1 + ranga, B.begin() + 1 + rangb);
            constructNextMatch_int(strB, thisBlockStringLen, symbolNumber);
        }
        // construct and initialize
        for (int i = 0; i < lenT; i++)
        {
            partitionPs[i].clear();
        }
        for (int i = 0; i < symbolNumber; i++)
        {
            lenNextMatchs[i] = (int)(nextMatchs[i].size());
        }

        for (int k = lenT - 1; k >= 0; k--)
        {
            if (k == 1)
                int aas = 1;
            // Create a new partition point of strT[k]
            int level = 0;

            if (0 < nextMatchs[strT[k] - 1].size())  // matched at strT[k]
            {
                partitionPs[k].push_back(nextMatchs[strT[k] - 1][0]);

                if (level + 1 > (int)smallestJs.size())
                {
                    smallestJs.push_back(k);
                }
                else
                {
                    smallestJs[level] = k;
                }
            }
            // update the partition point datastructure
            for (int j = k + 1; j < lenT; j++)
            {
                //if ( partitionPs[j-1].empty() )  // ? if ( level < paritionPs[j-1].size() )
                //if ( level >= (int) partitionPs[j-1].size() )
                //{  // if ( partitionPs[j-1][level] == NULL )  // no new partitionPoint                  
                //   continue;
                //}
                if (0 == partitionPs[k].size())
                {
                    break;
                }


                if (level < (int)partitionPs[j].size())
                {
                    /*
                    if ( partitionPs[j-1][level] == partitionPs[j][level] )
                    {
                    level++;
                    }
                    */
                    //               }

                    //               if ( level < (int) partitionPs[j].size() ) 
                    //               {
                    if (partitionPs[j - 1][level] < partitionPs[j][level])
                    {
                        partitionPs[j].insert(partitionPs[j].begin() + level, partitionPs[j - 1][level]);

                        if (partitionPs[j].size() > smallestJs.size())
                        {
                            smallestJs.push_back(j);
                        }
                        else
                        {
                            if (j < smallestJs[partitionPs[j].size() - 1])
                            {
                                smallestJs[partitionPs[j].size() - 1] = j;
                            }
                        }

                    }
                    else // if ( partitionPs[j-1][level] > partitionPs[j][level] )
                    {
                        int minPartitionPoint = thisBlockStringLen;

                        if (level + 1 < (int)partitionPs[j - 1].size())
                        {
                            minPartitionPoint = partitionPs[j - 1][level + 1];
                        }

                        if (partitionPs[j - 1][level] + 1 < (int)nextMatchs[strT[j] - 1].size())
                        {
                            if (minPartitionPoint > nextMatchs[strT[j] - 1][partitionPs[j - 1][level] + 1])
                            {
                                minPartitionPoint = nextMatchs[strT[j] - 1][partitionPs[j - 1][level] + 1];
                            }
                        }


                        if (minPartitionPoint < thisBlockStringLen)
                        {
                            if ((level + 1) < (int)partitionPs[j].size())
                            {
                                //partitionPs[j][level+1] = minPartitionPoint;
                                partitionPs[j].insert(partitionPs[j].begin() + level + 1, minPartitionPoint);
                            }
                            else
                            {
                                partitionPs[j].push_back(minPartitionPoint);
                            }
                            level++;

                            if (partitionPs[j].size() > smallestJs.size())
                            {
                                smallestJs.push_back(j);
                            }
                            else
                            {
                                if (j < smallestJs[partitionPs[j].size() - 1])
                                {
                                    smallestJs[partitionPs[j].size() - 1] = j;
                                }
                            }
                        }
                        else
                        {
                            j = lenT;
                        }
                    }
                }
                else //if ( level == (int) partitionPs[j].size() )
                {
                    int minPartitionPoint = thisBlockStringLen;
                    if (level < (int)partitionPs[j - 1].size())
                    {
                        minPartitionPoint = partitionPs[j - 1][level];
                    }
                    if (level > 0)
                    {
                        if (partitionPs[j - 1][level - 1] + 1 < (int)nextMatchs[strT[j] - 1].size())
                        {
                            if (minPartitionPoint > nextMatchs[strT[j] - 1][partitionPs[j - 1][level - 1] + 1])
                            {
                                minPartitionPoint = nextMatchs[strT[j] - 1][partitionPs[j - 1][level - 1] + 1];
                            }
                        }
                    }

                    if (minPartitionPoint < thisBlockStringLen)
                    {
                        partitionPs[j].push_back(minPartitionPoint);
                        if (partitionPs[j].size() > smallestJs.size())
                        {
                            smallestJs.push_back(j);
                        }
                        else
                        {
                            if (j < smallestJs[partitionPs[j].size() - 1])
                            {
                                smallestJs[partitionPs[j].size() - 1] = j;
                            }
                        }

                    }
                    else
                    {
                        j = lenT;
                    }
                }
            }

            AllStable[round][k].push_back(k);
            for (int p = 0; p < (int)smallestJs.size(); p++)
            {
                AllStable[round][k].push_back(smallestJs[p] + 1);
            }

            // test part for printing smallestJs[], partitionPs[]
            /*
            cout << "k = " << k << ", " << inALen << endl;

            cout << "smallestJs[]" << endl;
            for (int p=0;p< (int) smallestJs.size();p++)
            {
            cout << smallestJs[p] << " ";
            }
            cout << endl;

            cout << "partitionPs[][]: " << endl;
            for (int j=k; j< inALen; j++)
            {
            if ( 0 == partitionPs[j].size() )
            {
            cout << "-";
            }
            for (int p=0;p<(int) partitionPs[j].size();p++)
            {
            cout << partitionPs[j][p] << " ";
            }
            cout << endl;
            }
            cout << endl;
            */
            // End of test part for printing smallestJs[], partitionPs[]
        }
        //theSTables[inALen-1].pop_back();

        // clear and deconstruct
        for (int i = 0; i < Tlen; i++)
        {
            partitionPs[i].clear();
        }
        smallestJs.clear();
    }
    delete[] partitionPs;
    delete[] lenNextMatchs;

}

The files
  All_BMLCS.cpp

Reference
Huang, K.-S., Yang, C.-B., Tseng, K.-T., Ann, H.-Y., Peng, Y.-H., 2008. Efficient algorithms for finding interleaving relationship between sequences. Information Processing Letters 105, 188–193.