When reading Evolutionary Computation for Modeling and Optimization[1], I found following problem in section 1.2.3:
A string evolver is an evolutionary algorithm that tries to match a reference string from a population of random strings. The underlying character set of the string evolver is the alphabet from which the strings are drawn.
The solution given in the context is:
- Start with a reference string and a population of random strings.
- The fitness of a string is the number of positions in which it has the same character as the reference string.
- To evolve the population, split it into small random groups called tournaments.
- Copy the most fit string(break ties by picking at random among the most fit strings) over the least fit string in each tournament.
- Then change one randomly chosen character in each copy(mutation).
- Repeat until an exact match with the reference string is obtained.
When trying this solution, I noticed that it won't converge even after a long time. Therefore, I modified the crossover and mutation strategy.A subset of population(strings) will be selected based on fitness in a tournament, and the bottom-ranked
50% of each tournament will be deleted. New population is formed by crossing the remaining high 20 individuals in a tournament, while the high-ranked parents are kept unchanged[2]. Finally, a random selected string in a tournament will be mutated.
My codes are:
#include <stdio.h> #include <stdlib.h> #include <string.h> #define POP 20 #define TNMT 4 // Total number of tournaments #define MAXSTR 80 char strPop[POP][MAXSTR]; // population of strings char* REF="Hello,world!"; // Reference string char* mostFit[TNMT]; // the most fit string of each tournament char* leastFit[TNMT]; // the least fit string of each tournament char GenRandChar() { return (char)(32+rand()%96); } char * GenRandStr(char* str, unsigned int len) { int i; for(i=0;i<len-1;++i) { str[i]=GenRandChar(); } str[i]=''; return str; } // Copy the most fit string(break ties by picking at // random among the most fit strings) over the least // fit string in each tournament. unsigned int CrossoverM2L(char* strMostFit, char* strLeastFit, unsigned int len) { int i,p; char tmp; // Generate random position. Make sure the string // won't be broken at the first or last character. p=rand()%(len-2)+1; for(i=0;i<p;++i) { tmp=strMostFit[i]; strMostFit[i]=strLeastFit[i]; strLeastFit[i]=tmp; } return p; } // Change one randomly chosen character in each copy. unsigned int Mutate(char* str,unsigned int len) { int p; p=rand()%len; str[p]=GenRandChar(); return p; } // Count the number of identical chars between ref and str. unsigned int GetFitness(char* str,unsigned int len) { int i; int cnt=0; for(i=0;i<len;++i) { if(REF[i]==str[i]) cnt++; } return cnt; } // Find out the most and least fit strings in a tournament. // tourn -- the number of tourn, starting from 0. void GetFitOfTnmt(char* mostFit[],char* leastFit[],unsigned int tourn) { int i,fit; char* str; int most_fit,least_fit; // Make sure the most and least fit strings are different mostFit[tourn]=strPop[tourn*POP/TNMT]; most_fit=GetFitness(mostFit[tourn],strlen(REF)); leastFit[tourn]=strPop[tourn*POP/TNMT+1]; least_fit=GetFitness(leastFit[tourn],strlen(REF)); for(i=1;i<POP/TNMT;++i) { str=strPop[tourn*POP/TNMT+i]; fit=GetFitness(str,strlen(REF)); if(most_fitfit) { least_fit=fit; leastFit[tourn]=str; } } } // Get the most fit string of the population char* GetMostFit(char* mostFit[]) { int i,fit; char* strFit=mostFit[0]; int most_fit=GetFitness(strFit,strlen(REF)); for(i=1;i<TNMT;++i) { fit=GetFitness(mostFit[i],strlen(REF)); if(most_fit<fit) { most_fit=fit; strFit=mostFit[i]; } } return strFit; } // Sort the strings in a tournament by fitness. void SortTournByFit(unsigned int tourn) { int tmpInt; unsigned int fit[POP/TNMT]; int i,j; int len=strlen(REF); char tmpStr[MAXSTR]; // Get the fitness of each string in this tournament for(i=0;i<POP/TNMT;i++) { fit[i]=GetFitness(strPop[tourn*POP/TNMT+i],len); } // Sort strings by fitness for(i=0;i<POP/TNMT;i++) { for(j=i+1;j<POP/TNMT;j++) { if(fit[i]<fit[j]) { tmpInt=fit[i]; fit[i]=fit[j]; fit[j]=tmpInt; strcpy(tmpStr,strPop[tourn*POP/TNMT+i]); strcpy(strPop[tourn*POP/TNMT+i],strPop[tourn*POP/TNMT+j]); strcpy(strPop[tourn*POP/TNMT+j],tmpStr); } } // end of for j } // end of for i } // Crossover the top half strings. // Remove the second half strings after sorting, keep the top half, // and generate children by crossing over top half strings. void CrossoverByFit(unsigned int tourn) { int i,j,p,len; char* strTmp; SortTournByFit(tourn); // Generate random position. Make sure the string // won't be broken at the first or last character. len=strlen(REF); p=rand()%(len-2)+1; for(i=0;i<(int)POP/TNMT/2;i+=2) { for(j=0;j p;--j) { strPop[(tourn+1)*POP/TNMT-1-i][j]=strPop[tourn*POP/TNMT+i+1][j]; strPop[(tourn+1)*POP/TNMT-2-i][j]=strPop[tourn*POP/TNMT+i][j]; } } } // Print population by tournaments. void DumpPop() { int i,j; char* curStr; for(i=0;i<TNMT;i++) { printf("Tournament %d:n",i); for(j=0;j<POP/TNMT;j++) { curStr=strPop[i*POP/TNMT+j]; printf("t%stFitness: %dn",curStr,GetFitness(curStr,strlen(curStr))); } printf("n"); } } // // MAIN START HERE // int main() { int LEN=strlen(REF); int i,j,p; long cnt=0; char * strFit=NULL; // Initionlization for(i=0;i<POP;++i) { GenRandStr(strPop[i],LEN); } printf("---------------- Original Population ---------------n"); DumpPop(); for(i=0;i<TNMT;++i) GetFitOfTnmt(mostFit,leastFit,i); strFit=GetMostFit(mostFit); while(GetFitness(strFit,LEN) != LEN) //while(GetFitness(strFit,LEN)<5) { printf("n---------------- Generation %d ----------------n",cnt); DumpPop(); printf("%stReferencen",REF); printf("%stGen: %dtFitness: %dn",strFit,cnt++,GetFitness(strFit,LEN)); for(i=0;i<TNMT;++i) { // Crossover //CrossoverM2L(mostFit[i],leastFit[i],LEN); CrossoverByFit(i); // Mutation p=rand()%(POP/TNMT); Mutate(strPop[i*POP/TNMT+p],LEN); // Re-calculate the fitness GetFitOfTnmt(mostFit,leastFit,i); } strFit=GetMostFit(mostFit); } strFit=GetMostFit(mostFit); printf("n---------------- Generation %d ----------------n",cnt); DumpPop(); printf("%stReferencen",REF); printf("%stGen: %dtFitness: %dn",strFit,cnt++,GetFitness(strFit,LEN)); return 0; }
And the output is:
---------------- Generation 0 ---------------- Tournament 0: F~AKK[b;tV] Fitness: 0 \agA_q>vr Fitness: 0 OGG&Gy*(|yj Fitness: 0 6s[rEA8|45/ Fitness: 0 W.e[:bYXlCz Fitness: 0 Tournament 1: 6n?y9o??VN# Fitness: 0 8mmZ~f:Dfn< Fitness: 0 i-nV!P`{oQ Fitness: 0 8%p%)3-j;( Fitness: 0 92S2}&jU"f Fitness: 0 Tournament 2: 38IrA~lrHq| Fitness: 0 RF51Vo7V9]v Fitness: 0 Z(tUX<w|!{m Fitness: 1 qNoPy'a;OS] Fitness: 0 1`]em-+SM~~ Fitness: 0 Tournament 3: o^f(T&zQC3Q Fitness: 0 ($g[j@ Asi Fitness: 0 @gb#E`!QGUb Fitness: 0 %H/>N)Kt_Pm Fitness: 0 i>Z+F0@6/'L Fitness: 0 Hello,world! Reference Z(tUX<w|!{m Gen: 0 Fitness: 1 ---------------- Generation 1 ---------------- Tournament 0: F~AKK[b;tVx Fitness: 0 \agA_q>vr Fitness: 0 OGG&Gy*(|yj Fitness: 0 \agA_q>5] Fitness: 0 F~AKK[b;tCr Fitness: 0 Tournament 1: 6n?y9o??VN# Fitness: 0 kmmZ~f:Dfn< Fitness: 0 i-nV!P`{oQ Fitness: 0 8mm%9o??VN# Fitness: 0 6n?2~f:Dfn< Fitness: 0 Tournament 2: Z(tUX<w|!{m Fitness: 1 RF51Vo7V9]v Fitness: 0 38IrA~lrHq| Fitness: 0 RFoUX<w|r{m Fitness: 2 Z(]1Vo7V9]v Fitness: 0 Tournament 3: o^f(T&zQC3Q Fitness: 0 ($g[j@ Asi Fitness: 0 @gb#E`!QGUb Fitness: 0 ($/(T&zQC3Q Fitness: 0 @^Z[j@ Asi Fitness: 0 Hello,world! Reference RFoUX<w|r{m Gen: 1 Fitness: 2 ---------------- Generation 2 ---------------- Tournament 0: F~AKK[b;tVx Fitness: 0 \agA_q>vr Fitness: 0 OGG&Gy*(|yj Fitness: 0 \agA_q>v] Fitness: 0 F~AeK[b;tVr Fitness: 0 Tournament 1: Pn?y9o??VN# Fitness: 0 kmmZ~f:Dfn< Fitness: 0 i-nV!P`{oQ Fitness: 0 kmmZ~f??VN# Fitness: 0 6n?y9o:Dfn< Fitness: 0 Tournament 2: RFoDX<w|r{m Fitness: 2 Z(tUX<w|!{m Fitness: 1 38IrA~lrHq| Fitness: 0 Z(t1X<w|r{m Fitness: 2 RFo1X<w|!{m Fitness: 1 Tournament 3: o^f(T&zQC3Q| Fitness: 0 ($g[j@ Asi Fitness: 0 @gb#E`!QGUb Fitness: 0 ($f(T&zQC3Q Fitness: 0 o^g[j@ Asi Fitness: 0 Hello,world! Reference RFoDX<w|r{m Gen: 2 Fitness: 2 ---------------- Generation 3 ---------------- Tournament 0: F~AKK[b;tVx Fitness: 0 \agA_q>vr Fitness: 0 OGG&Gy*(|yj Fitness: 0 \KK[b;tVx Fitness: 0 F~Aag~_q>vr Fitness: 0 Tournament 1: Pn?y9o??VN# Fitness: 0 kmmZ~f:Dfn<@ Fitness: 0 i-nV!P`{oQ Fitness: 0 km?y9o??VN# Fitness: 0 PnmZ~f:Dfn< Fitness: 0 Tournament 2: RFoDX<w|r{m Fitness: 2 Z(t1X<w|r{m Fitness: 2 Z(tUX~w|!{m Fitness: 1 Z(oDX<w|r{m Fitness: 2 RFI1X<w|r{m Fitness: 2 Tournament 3: o^f(T&zQC3Q| Fitness: 0 ($g[j@ Asi Fitness: 0 @gb#E`!QGUb Fitness: 0 ($g[W@ QC3Q| Fitness: 0 o^f(T&zAsi Fitness: 0 Hello,world! Reference RFoDX<w|r{m Gen: 3 Fitness: 2 ---------------- Generation 4 ---------------- Tournament 0: F~AKK[b;tVx Fitness: 0 \JgA_q>vr Fitness: 0 OGG&Gy*(|yj Fitness: 0 \KK[b;tVx Fitness: 0 F~AagA_q>vr Fitness: 0 Tournament 1: Pn?y9o??VN# Fitness: 0 kmmZ~f:Dfn<@ Fitness: 0 i-nV!P`{oQ Fitness: 0 kmmZ~o??VN# Fitness: 0 Pnsy9f:Dfn<@ Fitness: 0 Tournament 2: RFoDX<w|r{m Fitness: 2 Z(t1X<w|r{m Fitness: 2 Z(oDXFw|r{m Fitness: 2 Z(t1X<w|r{m Fitness: 2 RFoDX<w|r{m Fitness: 2 Tournament 3: o^f(T'zQC3Q| Fitness: 0 ($g[j@ Asi Fitness: 0 @gb#E`!QGUb Fitness: 0 ($g[j@ AsQ| Fitness: 0 o^f(T&zQC3i Fitness: 0 Hello,world! Reference RFoDX<w|r{m Gen: 4 Fitness: 2 ...... ...... ---------------- Generation 2342 ---------------- Tournament 0: Hpllk,world! Fitness: 10 Hpllo,world! Fitness: 11 Hpllo,world! Fitness: 11 Hpllo,world! Fitness: 11 Hpllo,world! Fitness: 11 Tournament 1: Hello,worlF! Fitness: 11 Hello,worlf! Fitness: 11 Hello,worlf! Fitness: 11 Hello,worlf! Fitness: 11 Hello,worlf! Fitness: 11 Tournament 2: Hello,worldh Fitness: 11 Hello,worldJ Fitness: 11 Hello,worldh Fitness: 11 Hello,worldh Fitness: 11 Hello,worVdJ Fitness: 10 Tournament 3: He:lo,w4rld! Fitness: 10 He:lo,w0rld! Fitness: 10 He:lo,w4rld! Fitness: 10 He:lo,w0rld! Fitness: 10 He:2o,w4rld! Fitness: 9 Hello,world! Reference Hpllo,world! Gen: 2342 Fitness: 11 ---------------- Generation 2343 ---------------- Tournament 0: Hp|lo,world! Fitness: 10 Hpllo,world! Fitness: 11 Hpllo,world! Fitness: 11 Hpllo,world! Fitness: 11 Hpllo,world! Fitness: 11 Tournament 1: Hello,worlF! Fitness: 11 Hello,wo&lf! Fitness: 10 Hello,worlf! Fitness: 11 Hello,worlF! Fitness: 11 Hello,worlf! Fitness: 11 Tournament 2: Hello,worldh Fitness: 11 Hello,worldJ Fitness: 11 Hello,worldh Fitness: 11 Hello,wor&dh Fitness: 10 Hello,worldJ Fitness: 11 Tournament 3: He:lo,w4rld! Fitness: 10 He:lo,w0rld! Fitness: 10 He:lo,w4rld! Fitness: 10 He:lo,w0rld! Fitness: 10 He:lozw4rld! Fitness: 9 Hello,world! Reference Hpllo,world! Gen: 2343 Fitness: 11 ---------------- Generation 2344 ---------------- Tournament 0: Hpllo,world! Fitness: 11 Hpllo,world! Fitness: 11 Hllo,world! Fitness: 11 Hpllo,world! Fitness: 11 Hpllo,world! Fitness: 11 Tournament 1: Hello,worlF! Fitness: 11 Hello,worlf! Fitness: 11 Hello,world! Fitness: 12 Hello,worlF! Fitness: 11 Hello,wo&lf! Fitness: 10 Tournament 2: Hello,world Fitness: 11 Hello,worldJ Fitness: 11 Hello,worldh Fitness: 11 Hello,worldh Fitness: 11 Hello,worldJ Fitness: 11 Tournament 3: He:lo,w44ld! Fitness: 9 He:lo,w0rld! Fitness: 10 He:lo,w4rld! Fitness: 10 He:lo,w0rld! Fitness: 10 He:lo,w4rld! Fitness: 10 Hello,world! Reference Hello,world! Gen: 2344 Fitness: 12
-----------------------------------------------------------------
References:
[1]: Daniel Ashlock, Evolutionary Computation for Modeling and Optimization, Springer, 2005.
[2]: FORREST, S., WEIMER, W., NGUYEN, T., AND GOUES, C. L., A Genetic Programming Approach to Automated Software Repair, In GECCO (July 2009).