In the MLCS problem, We define the similarity of three input sequences T、 A and B, denoted as Simlarity(T,A,B) which shown in below.
Using the formula of Similarity, we present a random method to generate specific similarity string data.
The pseudorandom datasets are generated with various lengths = {1000, 2000, 5000, 10000}, similarities = {10% ~ 100%}, alphabet sizes = {4, 64, 1000}, and ratio γ = |A| / |T| = m / r = {0.1, 0.2, 0.3, 0.4, 0.5}, where |A| = m, |B| = n, |T | = r, m + n = r.
Suppose that a dataset with similarity p is desired to be generated. Our method for randomly generating experimental data is presented as follows.
Randomly generate T . Then, randomly put each character of T into either A or B in order.
Compute the MLCS (BMLCS) of A, B and T , and calculate their similarity λ.
If λ ∈ [p − ε, p + ε], output A, B and T , and stop. Otherwise, randomly mutate some characters of T , and go to Step 2. Here, we set ε = 1%.
int Generate(int TSIZE, int ASIZE, int BSIZE, int size,int TS)
{
int index=1,MLCSlength,j,k;
int ChangNum=0, Count=0, temp;
string str,data;
ChangNum = TSIZE - (TSIZE*TS/100);
while(1)
{
vector T (TSIZE+1);
vector A (ASIZE+1);
vector B (BSIZE+1);
for(int i = 1; i <= TSIZE; i++)
T[i]= rand()% size + 1;
j = 1; k = 1;
for(int i = 1; i <= TSIZE; i++)
{
if (rand() % 2 == 0 && j < ASIZE || k > BSIZE)
{
A[j] = T[i];
j++;
}
else
{
B[k] = T[i];
k++;
}
}
//Reduce the similarity, mutate some characters of T
for (int i = 1; i <= ChangNum; ++i) {
T[rand() % TSIZE + 1]=rand()% size+1;
}
MLCSlength=MLCS_Bitpar(T, A, B, TSIZE, ASIZE, BSIZE,size,64);
cout << index<< ":" << MLCSlength << " SIZE:"<< TSIZE<< " "<< ASIZE << " "<< BSIZE << endl;
if(MLCSlength >= TS*TSIZE/100 - TSIZE*0.01 && MLCSlength <= TS*TSIZE/100 + TSIZE*0.01) // ε = 1%
{
allwrite_to_file(T, A, B,TSIZE, ASIZE, BSIZE,size,TS,index);
index++;
if(MLCSlength != TSIZE)
ChangNum-=(5*TSIZE/1000);
else
ChangNum+=(10*TSIZE/1000);
}
else if( MLCSlength > TS*TSIZE/100+TSIZE*0.01)
{
ChangNum+=(15*TSIZE/1000);
}
else
{
ChangNum-=(15*TSIZE/1000);
}
if(index == 101)
break;
}
return 0;
}