/*
 * @author		Alfonso Muñoz-Pomer Fuentes, 
 * 				<a href="mailto:alfonso.munozpomer@biotechvana.com">
 * 				alfonso.munozpomer@biotechvana.com</a>,  
 * 				<a href="http://www.biotechvana.com">Biotechvana</a>
 *
 * @date		2010-11-01
 * 
 * @copyright	Copyright Biotech Vana, S.L. 2006-2010
 */

package com.biotechvana.javabiotoolkit;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Serializable;
import java.io.StringReader;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.HashMap;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.NullProgressMonitor;
import org.eclipse.core.runtime.OperationCanceledException;

import com.biotechvana.javabiotoolkit.exceptions.AnnotationInvalidRangeException;
import com.biotechvana.javabiotoolkit.exceptions.GeneticCodeIncorrectFrameException;
import com.biotechvana.javabiotoolkit.exceptions.GeneticCodeIncorrectSyntaxException;
import com.biotechvana.javabiotoolkit.exceptions.IllegalCodonException;
import com.biotechvana.javabiotoolkit.exceptions.InvalidSequenceCharacterException;
import com.biotechvana.javabiotoolkit.exceptions.InvalidSequenceClassException;

/**
 * The class <code>GeneticCode</code> keeps a translation table or mapping between tri-nucleotide RNA/DNA sequences 
 * (codons) and amino acids. Note that more than one triplet codon may code for the same amino acid.
 * <p>
 * At the moment this class does not support context-dependent encoding (i.e. the same codon translates to different 
 * amino acids depending on its surrounding bases).
 *   
 * @version	1.3, 2011-03-23
 * 
 * @author	<a href="mailto:alfonso.munozpomer@biotechvana.com">Alfonso Muñoz-Pomer Fuentes</a>,
 * 			<a href="http://www.biotechvana.com">Biotechvana</a>.
 * 
 * @see	RNASequence
 * @see DNASequence
 * @see	AminoAcid
 */
public class GeneticCode
implements Cloneable, Serializable
{
	private static final long serialVersionUID = -5206751247887450086L;	// Autogenerated by Eclipse
	private static final NumberFormat numberFormat = NumberFormat.getInstance();
	private StringBuilder descriptionSB;
	private Map<BioSequence, AminoAcid> codonMap;
	private Set<BioSequence> startCodons;
	private Set<BioSequence> stopCodons;
	
	private static int codonLength = 3;
	
	/**
	 * Build a standard genetic code. The description is set to "Standard genetic code".
	 * <p>
	 * This code is detailed in the table below, including stop and start codons. (Source: Wikipedia)
	 * <table>
	 * <tr>
	 * <td rowspan="2" colspan="2"></td>
	 * <th colspan="4">2nd base</th>
	 * </tr>
	 * 
	 * <tr>
	 * <th>U</th>
	 * <th>C</th>
	 * <th>A</th>
	 * <th>G</th>
	 * </tr>
	 * 
	 * <tr>
	 * <th rowspan="10">1st<br />base</th>
	 * <th rowspan="3">U</th>
	 * <td style="background:#ffe75f;">UUU (Phe/F) Phenylalanine<br />UUC (Phe/F) Phenylalanine</td>
	 * <td style="background:#b3dec0;">UCU (Ser/S) Serine<br />UCC (Ser/S) Serine</td>
	 * <td style="background:#b3dec0;">UAU (Tyr/Y) Tyrosine<br />UAC (Tyr/Y) Tyrosine</td>
	 * <td style="background:#b3dec0;">UGU (Cys/C) Cysteine<br />UGC (Cys/C) Cysteine</td>
	 * </tr>
	 * 
	 * <tr>
	 * <td style="background:#ffe75f;">UUA (Leu/L)Leucine</td>
	 * <td style="background:#b3dec0;">UCA (Ser/S) Serine</td>
	 * <td style="background:#B0B0B0;">UAA Ochre (<i>Stop</i>)</td>
	 * <td style="background:#B0B0B0;">UGA Opal (<i>Stop</i>)</td>
	 * </tr>
	 * 
	 * <tr>
	 * <td style="background:#ffe75f;">UUG (Leu/L) Leucine</td>
	 * <td style="background:#b3dec0;">UCG (Ser/S) Serine</td>
	 * <td style="background:#B0B0B0;">UAG Amber (<i>Stop</i>)</td>
	 * <td style="background:#ffe75f;">UGG (Trp/W) Tryptophan</td>
	 * </tr>
	 * 
	 * <tr>
	 * <th rowspan="2">C</th>
	 * <td style="background:#ffe75f;">CUU (Leu/L) Leucine<br />CUC (Leu/L) Leucine</td>
	 * <td style="background:#ffe75f;">CCU (Pro/P) Proline<br />CCC (Pro/P) Proline</td>
	 * <td style="background:#bbbfe0;">CAU (His/H) Histidine<br />CAC (His/H) Histidine</td>
	 * <td style="background:#bbbfe0;">CGU (Arg/R) Arginine<br />CGC (Arg/R) Arginine</td>
	 * </tr>
	 * 
	 * <tr>
	 * <td style="background:#ffe75f;">CUA (Leu/L) Leucine<br />CUG (Leu/L) Leucine</td>
	 * <td style="background:#ffe75f;">CCA (Pro/P) Proline<br />CCG (Pro/P) Proline</td>
	 * <td style="background:#b3dec0;">CAA (Gln/Q) Glutamine<br />CAG (Gln/Q) Glutamine</td>
	 * <td style="background:#bbbfe0;">CGA (Arg/R) Arginine<br />CGG (Arg/R) Arginine</td>
	 * </tr>
	 * 
	 * <tr>
	 * <th rowspan="3">A</th>
	 * <td style="background:#ffe75f;">AUU (Ile/I) Isoleucine<br />AUC (Ile/I) Isoleucine</td>
	 * <td style="background:#b3dec0;">ACU (Thr/T) Threonine<br />ACC (Thr/T) Threonine</td>
	 * <td style="background:#b3dec0;">AAU (Asn/N) Asparagine<br />AAC (Asn/N) Asparagine</td>
	 * <td style="background:#b3dec0;">AGU (Ser/S) Serine<br />AGC (Ser/S) Serine</td>
	 * </tr>
	 * 
	 * <tr>
	 * <td style="background:#ffe75f;">AUA (Ile/I) Isoleucine</td>
	 * <td style="background:#b3dec0;">ACA (Thr/T) Threonine</td>
	 * <td style="background:#bbbfe0;">AAA (Lys/K) Lysine</td>
	 * <td style="background:#bbbfe0;">AGA (Arg/R) Arginine</td>
	 * </tr>gc
	 * 
	 * <tr>
	 * <td style="background:#ffe75f;">AUG<sup>[A]</sup> (Met/M) Methionine</td>
	 * <td style="background:#b3dec0;">ACG (Thr/T) Threonine</td>
	 * <td style="background:#bbbfe0;">AAG (Lys/K) Lysine</td>
	 * <td style="background:#bbbfe0;">AGG (Arg/R) Arginine</td>
	 * </tr>
	 * 
	 * <tr>
	 * <th rowspan="3">G</th>
	 * <td style="background:#ffe75f;">GUU (Val/V) Valine<br />GUC (Val/V) Valine</td>
	 * <td style="background:#ffe75f;">GCU (Ala/A) Alanine<br />GCC (Ala/A) Alanine</td>
	 * <td style="background:#f8b7d3;">GAU (Asp/D) Aspartic acid<br />GAC (Asp/D) Aspartic acid</td>
	 * <td style="background:#b3dec0;">GGU (Gly/G) Glycine<br />GGC (Gly/G) Glycine</td>
	 * </tr>
	 * 
	 * <tr>
	 * <td style="background:#ffe75f;">GUA (Val/V) Valine<br />GUG (Val/V) Valine</td>
	 * <td style="background:#ffe75f;">GCA (Ala/A) Alanine<br />GCG (Ala/A) Alanine</td>
	 * <td style="background:#f8b7d3;">GAA (Glu/E) Glutamic acid<br />GAG (Glu/E) Glutamic acid</td>
	 * <td style="background:#b3dec0;">GGA (Gly/G) Glycine<br />GGG (Gly/G) Glycine</td>
	 * </tr>
	 * </table>
	 * <p>
	 * Legend:
	 * <table>
	 * <tr>
	 * <td style="background:#ffe75f;">nonpolar</td>
	 * <td style="background:#b3dec0;">polar</td>
	 * <td style="background:#bbbfe0;">basic</td>
	 * <td style="background:#f8b7d3;">acidic</td>
	 * <td style="background:#B0B0B0;">(stop codon)</td>
	 * </tr>
	 * </table>
	 * <b><sup>[A]</sup></b> The codon AUG both codes for methionine and serves 
	 * as an initiation site: the first AUG in an mRNA's coding region is where 
	 * translation into protein begins.
	 * 
	 * @since 0.1
	 */
	public static GeneticCode standardGeneticCode()
	{
		GeneticCode standardCode = new GeneticCode();
		standardCode.descriptionSB.append("Standard genetic code");
		
		//U
		standardCode.addCodon(new RNASequence("UUU"), AminoAcid.F);
		standardCode.addCodon(new RNASequence("UUC"), AminoAcid.F);
		standardCode.addCodon(new RNASequence("UUA"), AminoAcid.L);
		standardCode.addCodon(new RNASequence("UUG"), AminoAcid.L);

		standardCode.addCodon(new RNASequence("UCU"), AminoAcid.S);
		standardCode.addCodon(new RNASequence("UCC"), AminoAcid.S);
		standardCode.addCodon(new RNASequence("UCA"), AminoAcid.S);
		standardCode.addCodon(new RNASequence("UCG"), AminoAcid.S);

		standardCode.addCodon(new RNASequence("UAU"), AminoAcid.Y);
		standardCode.addCodon(new RNASequence("UAC"), AminoAcid.Y);
		standardCode.addCodon(new RNASequence("UAA"), AminoAcid.$);
		standardCode.addCodon(new RNASequence("UAG"), AminoAcid.$);

		standardCode.addCodon(new RNASequence("UGU"), AminoAcid.C);
		standardCode.addCodon(new RNASequence("UGC"), AminoAcid.C);
		standardCode.addCodon(new RNASequence("UGA"), AminoAcid.$);
		standardCode.addCodon(new RNASequence("UGG"), AminoAcid.W);

		//C
		standardCode.addCodon(new RNASequence("CUU"), AminoAcid.L);
		standardCode.addCodon(new RNASequence("CUC"), AminoAcid.L);
		standardCode.addCodon(new RNASequence("CUA"), AminoAcid.L);
		standardCode.addCodon(new RNASequence("CUG"), AminoAcid.L);

		standardCode.addCodon(new RNASequence("CCU"), AminoAcid.P);
		standardCode.addCodon(new RNASequence("CCC"), AminoAcid.P);
		standardCode.addCodon(new RNASequence("CCA"), AminoAcid.P);
		standardCode.addCodon(new RNASequence("CCG"), AminoAcid.P);

		standardCode.addCodon(new RNASequence("CAU"), AminoAcid.H);
		standardCode.addCodon(new RNASequence("CAC"), AminoAcid.H);
		standardCode.addCodon(new RNASequence("CAA"), AminoAcid.Q);
		standardCode.addCodon(new RNASequence("CAG"), AminoAcid.Q);

		standardCode.addCodon(new RNASequence("CGU"), AminoAcid.R);
		standardCode.addCodon(new RNASequence("CGC"), AminoAcid.R);
		standardCode.addCodon(new RNASequence("CGA"), AminoAcid.R);
		standardCode.addCodon(new RNASequence("CGG"), AminoAcid.R);

		//A
		standardCode.addCodon(new RNASequence("AUU"), AminoAcid.I);
		standardCode.addCodon(new RNASequence("AUC"), AminoAcid.I);
		standardCode.addCodon(new RNASequence("AUA"), AminoAcid.I);
		standardCode.addCodon(new RNASequence("AUG"), AminoAcid.M);

		standardCode.addCodon(new RNASequence("ACU"), AminoAcid.T);
		standardCode.addCodon(new RNASequence("ACC"), AminoAcid.T);
		standardCode.addCodon(new RNASequence("ACA"), AminoAcid.T);
		standardCode.addCodon(new RNASequence("ACG"), AminoAcid.T);

		standardCode.addCodon(new RNASequence("AAU"), AminoAcid.N);
		standardCode.addCodon(new RNASequence("AAC"), AminoAcid.N);
		standardCode.addCodon(new RNASequence("AAA"), AminoAcid.K);
		standardCode.addCodon(new RNASequence("AAG"), AminoAcid.K);

		standardCode.addCodon(new RNASequence("AGU"), AminoAcid.S);
		standardCode.addCodon(new RNASequence("AGC"), AminoAcid.S);
		standardCode.addCodon(new RNASequence("AGA"), AminoAcid.R);
		standardCode.addCodon(new RNASequence("AGG"), AminoAcid.R);

		//G
		standardCode.addCodon(new RNASequence("GUU"), AminoAcid.V);
		standardCode.addCodon(new RNASequence("GUC"), AminoAcid.V);
		standardCode.addCodon(new RNASequence("GUA"), AminoAcid.V);
		standardCode.addCodon(new RNASequence("GUG"), AminoAcid.V);

		standardCode.addCodon(new RNASequence("GCU"), AminoAcid.A);
		standardCode.addCodon(new RNASequence("GCC"), AminoAcid.A);
		standardCode.addCodon(new RNASequence("GCA"), AminoAcid.A);
		standardCode.addCodon(new RNASequence("GCG"), AminoAcid.A);

		standardCode.addCodon(new RNASequence("GAU"), AminoAcid.D);
		standardCode.addCodon(new RNASequence("GAC"), AminoAcid.D);
		standardCode.addCodon(new RNASequence("GAA"), AminoAcid.E);
		standardCode.addCodon(new RNASequence("GAG"), AminoAcid.E);

		standardCode.addCodon(new RNASequence("GGU"), AminoAcid.G);
		standardCode.addCodon(new RNASequence("GGC"), AminoAcid.G);
		standardCode.addCodon(new RNASequence("GGA"), AminoAcid.G);
		standardCode.addCodon(new RNASequence("GGG"), AminoAcid.G);

		// Start and stop codons
		standardCode.addStartCodon(new RNASequence("AUG"));
		standardCode.addStopCodon(new RNASequence("UAA"));
		standardCode.addStopCodon(new RNASequence("UAG"));
		standardCode.addStopCodon(new RNASequence("UGA"));
		
		return standardCode;
	}
		
	/**
	 * Constructs an empty genetic code.
	 * 
	 * @since	1.1
	 */
	public GeneticCode()
	{
		descriptionSB = new StringBuilder();
		codonMap = new HashMap<BioSequence, AminoAcid>(128);
		stopCodons = new HashSet<BioSequence>(6);
		startCodons = new HashSet<BioSequence>(2);
	}
	
	/**
	 * Constructor for custom genetic codes. A set of coding rules is passed in the argument 
	 * <code>codeString</code>, plus a description and a list of start and stop codons (also as <code>String</code>s), 
	 * respectively.
	 * 
	 * @param description	descriptive <code>String</code>.
	 * @param codeString	a multiline <code>String</code> with the coding rules, one per line. The rules are 
	 * 						described by these "informal" regular expressions:<br />
	 * 						[Rna Codon | Dna Codon]\s*->\s*[Amino Acid]
	 * 						<p>
	 * 						NOTE: Additionally, comments starting with "#" and blank lines are allowed.
	 * @param startStringCodons	list of codons that mark initiation sites.
	 * @param stopStringCodons	list of codons that mark termination sites.
	 * 
	 * @throws	IOException
	 * @throws	GeneticCodeIncorrectSyntaxException 
	 * @throws	InvalidSequenceCharacterException if there is an error creating the codon sequences. 
	 * @throws IllegalCodonException 
	 * 
	 * @see	AminoAcid
	 * @see RNASequence
	 * @see DNASequence
	 * 
	 * @since	0.9
	 */
	public GeneticCode
	(String description, String codeString, List<String> startStringCodons, List<String> stopStringCodons)
	throws GeneticCodeIncorrectSyntaxException, IOException
	{
		this();
		this.descriptionSB = new StringBuilder(description);
		
		// Building blocks for rules regexes
		String rnaCodonRegex = "([" + RNABase.getRegexes() + "]{" + codonLength + "})";
		String dnaCodonRegex = "([" + DNABase.getRegexes() + "]{" + codonLength + "})";
		String aminoRegex = "([" + AminoAcid.getRegexes() + "]{1})";
		String separatorRegex = "(\\s*\\-\\>\\s*)";
		
		// Regexes:
		// Ex. rnaRuleRegex: "UUG -> K", "UUG->K", "UUG->    K"
		// Ex. dnaRuleRegex: "ATT -> O", "ATT   -> O", "ATT-> O"
		String rnaRuleRegex = "^\\s*" +	rnaCodonRegex + separatorRegex + aminoRegex + "\\s*$";
		String dnaRuleRegex = "^\\s*" +	dnaCodonRegex + separatorRegex + aminoRegex + "\\s*$";
		// Compiled Patterns for the Matchers
		Pattern rnaRulePattern = Pattern.compile(rnaRuleRegex);
		Pattern dnaRulePattern = Pattern.compile(dnaRuleRegex);
		
		// Parse start and stop codons
		for (String startStringCodon : startStringCodons)
		{
			RNASequence rnaCodon = new RNASequence(startStringCodon);
			validateCodon(rnaCodon);
			startCodons.add(rnaCodon);
			startCodons.add(rnaCodon.reverseTranscribe(new NullProgressMonitor()));
		}

		for (String stopStringCodon : stopStringCodons)
		{
			RNASequence rnaCodon = new RNASequence(stopStringCodon);
			validateCodon(rnaCodon);
			stopCodons.add(rnaCodon);
			stopCodons.add(rnaCodon.reverseTranscribe(new NullProgressMonitor()));
		}

		// Read line by line
		BufferedReader bReader = new BufferedReader(new StringReader(codeString));
		String line;
		while ((line = bReader.readLine()) != null)
		{
			line = line.trim();
			line = line.toUpperCase();
			
			if (line.equals("") || line.startsWith("#"))
			{
				// Blank line or comment: do nothing, read next line
			}
			else
			{
				Matcher rnaMatcher = rnaRulePattern.matcher(line);
				Matcher dnaMatcher = dnaRulePattern.matcher(line);
				
				if (rnaMatcher.matches())
				{
					// Extract values from RNA rule
					BioSequence codon = new RNASequence(rnaMatcher.group(1));
					AminoAcid transAmino = AminoAcid.valueOf(rnaMatcher.group(3));
					validateCodon(codon);
					addCodon(codon, transAmino);
					addCodon(((RNASequence) codon).reverseTranscribe(new NullProgressMonitor()), transAmino);
				}
				else if (dnaMatcher.matches())
				{
					// Extract values from DNA rule
					BioSequence codon= new DNASequence(dnaMatcher.group(1));
					AminoAcid transAmino = AminoAcid.valueOf(dnaMatcher.group(3));
					validateCodon(codon);
					addCodon(codon, transAmino);
					addCodon(((DNASequence) codon).transcribe(new NullProgressMonitor()), transAmino);

				}
				else
				{
					// Wrong formatted line, throw exception
					throw new GeneticCodeIncorrectSyntaxException(
						line +": incorrect syntax in this line when creating a new instance of GeneticCode.");
				}
			}
		}
	}
	
	/**
	 * Parameterized constructor for creating custom genetic codes. A set of coding rules is passed as the argument 
	 * <code>codeString</code>, plus a description and one or more start and stop amino acids (also as 
	 * <code>String</code>s), respectively.
	 * <p>
	 * NOTE: If this constructor is used, <strong>all</strong> codons which encode for a stop and start aminoacid will 
	 * be considered as start and stop codons, respectively.
	 * 
	 * @param description	descriptive <code>String</code>.
	 * @param codeString	a multiline <code>String</code> with the coding rules, one per line. The rules are 
	 * 						described by these "informal" regular expressions:<br />
	 * 						[Rna Codon | Dna Codon]\s*->\s*[Amino Acid]gc
	 * 						<p>
	 * 						NOTE: Additionally, comments starting with "#" and blank lines are allowed.
	 * @param startAminos	a <code>String</code> with the one letter abbreviation of the start amino acids (no spaces).
	 * @param stopAminos	a <code>String</code> with the one letter abbreviation of the stop amino acids (no spaces).
	 * 
	 * @throws	IllegalArgumentException if any of the lines in 
	 * 			<code>codeString</code> do not adhere to the specified syntax 
	 * 			or the amino acids.
	 * @throws	IOException
	 * @throws	GeneticCodeIncorrectSyntaxException 
	 * @throws	InvalidSequenceCharacterException if there is an error creating the codons sequences.
	 * @throws	IllegalCodonException 
	 * 
	 * @see	AminoAcid
	 * @see RNASequence
	 * @see DNASequence
	 * 
	 * @since	0.9
	 */
	public GeneticCode
	(String description, String codeString, String startAminos, String stopAminos)
	throws IOException, GeneticCodeIncorrectSyntaxException 
	{
		this();
		this.descriptionSB = new StringBuilder(description);
		
		// Building blocks for rules regexes
		String rnaCodonRegex = "([" + RNABase.getRegexes() + "]{" + codonLength + "})";
		String dnaCodonRegex = "([" + DNABase.getRegexes() + "]{" + codonLength + "})";
		String aminoRegex = "([" + AminoAcid.getRegexes() + "]{1})";
		String separatorRegex = "(\\s*\\-\\>\\s*)";
		
		// Regexes:
		// Ex. rnaRuleRegex: "UUG -> K", "UUG->K", "UUG->    K"
		// Ex. dnaRuleRegex: "ATT -> O", "ATT   -> O", "ATT-> O"
		String rnaRuleRegex = "^" + rnaCodonRegex + separatorRegex + aminoRegex + "$";
		String dnaRuleRegex = "^" + dnaCodonRegex + separatorRegex + aminoRegex + "$";
		// Compiled Patterns for the Matchers
		Pattern rnaRulePattern = Pattern.compile(rnaRuleRegex);
		Pattern dnaRulePattern = Pattern.compile(dnaRuleRegex);

		Set<AminoAcid> startAminoAcids = new HashSet<AminoAcid>();
		Set<AminoAcid> stopAminoAcids = new HashSet<AminoAcid>();
		
		// Parse start amino acids
		for (int i = 0 ; i < startAminos.length() ; i++)
		{
			if (AminoAcid.valueOf(startAminos.charAt(i)) == null)
			{
				throw new GeneticCodeIncorrectSyntaxException(
					startAminos.charAt(i) +": invalid start amino acid symbol when creating a new instance of " +
					"GeneticCode.");
			}
			else
			{
				startAminoAcids.add(AminoAcid.valueOf(startAminos.charAt(i)));
			}
		}
		
		// Parse stop amino acids
		for (int i = 0 ; i < stopAminos.length() ; i++)
		{
			if (AminoAcid.valueOf(stopAminos.charAt(i)) == null)
			{
				throw new GeneticCodeIncorrectSyntaxException(
					startAminos.charAt(i) +": invalid stop amino acid symbol when creating a new instance of " +
					"GeneticCode.");
			}
			else
			{
				stopAminoAcids.add(AminoAcid.valueOf(stopAminos.charAt(i)));
			}
		}

		// Read line by line
		BufferedReader bReader = new BufferedReader(new StringReader(codeString));
		String line;
		while ((line = bReader.readLine()) != null)
		{
			line = line.trim();
			line = line.toUpperCase();
			
			if (line.equals("") || line.startsWith("#"))
			{
				// Blank line or comment: do nothing, read next line
			}
			else
			{
				Matcher rnaMatcher = rnaRulePattern.matcher(line);
				Matcher dnaMatcher = dnaRulePattern.matcher(line);
				
				if (rnaMatcher.matches())
				{
					// Extract values from RNA rule
					BioSequence codon = new RNASequence(rnaMatcher.group(1));
					AminoAcid transAmino = AminoAcid.valueOf(rnaMatcher.group(3));
					validateCodon(codon);
					addCodon(codon, transAmino);
					addCodon(((RNASequence) codon).reverseTranscribe(new NullProgressMonitor()), transAmino);
				}
				else if (dnaMatcher.matches())
				{
					// Extract values from DNA rule
					BioSequence codon= new DNASequence(dnaMatcher.group(1));
					AminoAcid transAmino = AminoAcid.valueOf(dnaMatcher.group(3));
					validateCodon(codon);
					addCodon(codon, transAmino);
					addCodon(((DNASequence) codon).transcribe(new NullProgressMonitor()), transAmino);

				}
				else
				{
					// Wrong formatted line, throw exception
					throw new GeneticCodeIncorrectSyntaxException(
						line +": incorrect syntax in this line when creating a new instance of GeneticCode.");
				}
			}
		}
	}
	
	/**
	 * Parameterized constructor for creating custom genetic codes. A set of coding rules is passed in the argument 
	 * <code>codeString</code>, plus a description. Start and stop codons default to methionine (M) and stop ($) 
	 * encoding codons, respecitvely. 
	 * 
	 * @param	description	descriptive <code>String</code>.
	 * @param	codeString	a multiline <code>String</code> with the coding rules, one per line. The rules are 
	 * 						described by these "informal" regular expressions:<br />
	 * 						[Rna Codon | Dna Codon]\s*->\s*[Amino Acid]
	 * 						<p>
	 * 						NOTE: Additionally, comments starting with "#" and blank lines are allowed.
	 * 
	 * @throws	IOException
	 * @throws GeneticCodeIncorrectSyntaxException 
	 * @throws InvalidSequenceCharacterException if there is an error creating the codon sequences.
	 * @throws IllegalCodonException 
	 * 
	 * @see	AminoAcid
	 * @see RNASequence
	 * @see DNASequence
	 * 
	 * @since	0.9
	 */
	public GeneticCode(String description, String codeString)
	throws IOException, GeneticCodeIncorrectSyntaxException 
	{
		this(description, codeString, "M", "$");
	}
	
	/*
	 * (non-Javadoc)
	 * @see java.lang.Object#clone()
	 */
	public GeneticCode clone()
	throws CloneNotSupportedException
	{
		GeneticCode gc = new GeneticCode();

		gc.descriptionSB = new StringBuilder(this.descriptionSB);
		for (BioSequence codon : this.codonMap.keySet())
		{
			gc.codonMap.put((BioSequence)(codon.clone()), this.codonMap.get(codon));
		}
		for (BioSequence codon : this.startCodons)
		{
			gc.startCodons.add((BioSequence)(codon.clone()));
		}
		for (BioSequence codon : this.stopCodons)
		{
			gc.stopCodons.add((BioSequence)(codon.clone()));
		}

		for (BioSequence codon : this.codonMap.keySet())
		{
			gc.codonMap.put((BioSequence)(codon.clone()), this.codonMap.get(codon));
		}
		for (BioSequence codon : this.startCodons)
		{
			gc.startCodons.add((BioSequence)(codon.clone()));
		}
		for (BioSequence codon : this.stopCodons)
		{
			gc.stopCodons.add((BioSequence)(codon.clone()));
		}

		return gc;
	}
	
	/**
	 * Returns the description of this code.
	 * 
	 * @return	description of the receiver.
	 * 
	 * @since	0.9
	 */
	public String getDescription()
	{
		return descriptionSB.toString();
	}

	/**
	 * Checks if a genetic code has the same translation table, start codons and termination codons as the 
	 * canonical genetic code. 
	 * 
	 * @return	<code>true</code> if this code is equivalent to a the genetic code returned by
	 * 			<code>standardGeneticCode()</code> and <code>false</code> otherwise.
	 * 
	 * @since	0.6
	 */
	@SuppressWarnings("deprecation")
	public boolean isStandard()
	{
		GeneticCode standardCode = standardGeneticCode();

		// TODO Could be solved by:
		// return this.equals(standardCode);
		
		for (BioSequence codon : new RNASequence("NNN").expandToUnambiguous())
		{
			if (standardCode.translateCodon(codon) != this.translateCodon(codon))
			{
				return false;
			}
			if (!this.startCodons.containsAll(standardCode.startCodons) ||
				!standardCode.startCodons.containsAll(this.startCodons) ||
				!this.stopCodons.containsAll(standardCode.stopCodons) ||
				!standardCode.stopCodons.containsAll(this.stopCodons))
			{
				return false;
			}
		}
		
		return true;
	}

	/**
	 * Associates the specified codon with an amino acid. If the codon already* exists its corresponding amino acid is 
	 * overwritten. If the codon exists as a start or stop codon, such "status" is removed.
	 * 
	 * @param	codon	RNA sequence of length 3.
	 * @param	a		encoding amino acid.
	 * 
	 * @return	previous amino acid associated with the codon, or <code>null</code> if there was no mapping for the 
	 * 			codon.
	 * @throws IllegalCodonException 
	 * 
	 * @since	0.1
	 */
	public AminoAcid addCodon(BioSequence codon, AminoAcid a)
	throws IllegalArgumentException
	{
		validateCodon(codon);
		if (codon instanceof RNASequence)
		{
			codonMap.put(((RNASequence)codon).reverseTranscribe(new NullProgressMonitor()), a);
		}
		else if (codon instanceof DNASequence)
		{
			codonMap.put(((DNASequence)codon).transcribe(new NullProgressMonitor()), a);
		}
		return codonMap.put(codon, a);
	}
	
	/**
	 * Removes a codon from the set of start codons.
	 * 
	 * @param 	codon	a RNA sequence three bases long.
	 * 
	 * @return	<code>true</code> if removed and <code>false</code> if  <code>codon</code>wasn't a start codon.
	 * @throws IllegalCodonException 
	 * 
	 * @since	0.8
	 */
	public boolean removeStartCodon(BioSequence codon)
	{
		// Validation isn't necessary: if it's a ProteinSequence or whatever it's not in the map
		if (codon instanceof RNASequence)
		{
			startCodons.remove(((RNASequence)codon).reverseTranscribe(new NullProgressMonitor()));
		}
		else if (codon instanceof DNASequence)
		{
			startCodons.remove(((DNASequence)codon).transcribe(new NullProgressMonitor()));
		}
		return startCodons.remove(codon);
	}
	
	/**
	 * Removes a codon from the set of stop codons.
	 * 
	 * @param 	codon	a RNA sequence three bases long.
	 * 
	 * @return	<code>true</code> if removed and <code>false</code> if  <code>codon</code>wasn't a stop codon.
	 * @throws IllegalCodonException 
	 * 
	 * @since	0.8
	 */
	public boolean removeStopCodon(BioSequence codon)
	{
		// Validation isn't necessary: if it's a ProteinSequence or whatever it's not in the map
		if (codon instanceof RNASequence)
		{
			stopCodons.remove(((RNASequence)codon).reverseTranscribe(new NullProgressMonitor()));
		}
		else if (codon instanceof DNASequence)
		{
			stopCodons.remove(((DNASequence)codon).transcribe(new NullProgressMonitor()));
		}
		return stopCodons.remove(codon);
	}

	
	/**
	 * Adds the specified codon to the set of stop codons.
	 * 
	 * @param	stopCodon	RNA sequence of length 3.
	 * @return	<code>true</code> if the codon was not listed as a stop codon. False
	 * 			otherwise.
	 * @throws IllegalCodonException 
	 * 
	 * @since	0.1
	 */
	public boolean addStopCodon(BioSequence codon)
	throws IllegalArgumentException
	{
		validateCodon(codon);
		if (codon instanceof RNASequence)
		{
			stopCodons.add(((RNASequence)codon).reverseTranscribe(new NullProgressMonitor()));
		}
		else if (codon instanceof DNASequence)
		{
			stopCodons.add(((DNASequence)codon).transcribe(new NullProgressMonitor()));
		}
		return stopCodons.add(codon);
	}
	
	/**
	 * Adds the specified codon to the set of start codons.
	 * 
	 * @param	startCodon	RNA sequence to remove from the start codons.
	 * 
	 * @return	<code>true</code> if the codon was not listed as a start codon.
	 * 
	 * @throws IllegalCodonException	thrown if <code>stopCodon</code> is not a codon.
	 * 
	 * @since	0.1
	 */
	public boolean addStartCodon(BioSequence codon)
	{
		validateCodon(codon);
		if (codon instanceof RNASequence)
		{
			startCodons.add(((RNASequence)codon).reverseTranscribe(new NullProgressMonitor()));
		}
		else if (codon instanceof DNASequence)
		{
			startCodons.add(((DNASequence)codon).transcribe(new NullProgressMonitor()));
		}
		return startCodons.add(codon);
	}
	
	/**
	 * Sets a new <code>String</code> description for this code.
	 *  
	 * @param	description	new description for the receiver.
	 * 
	 * @since	0.1
	 */
	public void setDescription(String description)
	{
		descriptionSB.delete(0, this.descriptionSB.length());
		descriptionSB.append(description);
	}
		
	/**
	 * Get the initiation codons.
	 * 
	 * @return	start codons.
	 * 
	 * @since	0.6
	 */
	public Set<BioSequence> getStartCodons()
	{
		return startCodons;
	}
	
	/**
	 * Get the termination codons.
	 * 
	 * @return	stop codons.
	 * 
	 * @since	0.6
	 */
	public Set<BioSequence> getStopCodons()
	{
		return stopCodons;
	}
	
	/**
	 * Get the start amino acids.
	 * 
	 * @return	start amino acids.
	 * 
	 * @since	0.6
	 */
	public Set<AminoAcid> startAminoAcids()
	{
		Set<AminoAcid> startAminoAcids = new HashSet<AminoAcid>();
		for (BioSequence startCodon: startCodons)
		{
			startAminoAcids.add(codonMap.get(startCodon));
		}
		return startAminoAcids;
	}
	
	/**
	 * Get the stop amino acids
	 * 
	 * @return	stop amino acids.
	 * 
	 * @since	0.6
	 */
	public Set<AminoAcid> stopAminoAcids()
	{
		Set<AminoAcid> stopAminoAcids = new HashSet<AminoAcid>();
		for (BioSequence stopCodon: stopCodons)
		{
			stopAminoAcids.add(codonMap.get(stopCodon));
		}
		return stopAminoAcids;
	}
	
	/**
	 * Translates a RNA codon to its coding amino acid.
	 *  
	 * @param	codon	RNA sequence of length 3.
	 * 
	 * @return	amino acid to which the specified codon is translated to.
	 * 
	 * @throws	IllegalCodonException	if <code>rnaCodon</code> is not a codon.
	 * 
	 * @since	0.2
	 */
	@SuppressWarnings("deprecation")
	public AminoAcid translateCodon(BioSequence codon)
	{
		if (codon.getLength() != codonLength)
		{
			return null;
		}
		
		// If there's a gap, return a gap
		if (codon.sequence.contains(RNABase.GAP) || codon.sequence.contains(DNABase.GAP))
		{
			return AminoAcid.GAP;
		}
		// Expand the sequence into all possible unambiguous sequences
		List<BioSequence> expandedCodons = codon.expandToUnambiguous();
		
		// Get all possible different translations
		Set<AminoAcid> transAminos = new HashSet<AminoAcid>();
		for (BioSequence bs : expandedCodons)
		{
			if (codonMap.get(bs) != null)
			{
				transAminos.add(codonMap.get(bs));	
			}			
		}
		
		// If it translates to only one amino acid return it
		if (transAminos.size() == 1)
		{
			return transAminos.iterator().next();
		}
		// If it translates to two amino acids...
		// TODO Ugly fix: this logic should be moved to the AminoAcid class or maybe not? Is it ugly? I dunno...
		else if (transAminos.size() == 2)
		{
			if (transAminos.contains(AminoAcid.N) && transAminos.contains(AminoAcid.D))
			{
				return AminoAcid.B;
			}
			else if (transAminos.contains(AminoAcid.E) && transAminos.contains(AminoAcid.Q))
			{
				return AminoAcid.Z;
			}
			else if (transAminos.contains(AminoAcid.I) && transAminos.contains(AminoAcid.L))
			{
				return AminoAcid.J;
			}
		}
		// If nothing of the above holds, return X
		return AminoAcid.X;
	}
	
	/**
	 * Returns the codons which are translated to the specified amino acid.
	 * 
	 * @param	translatedAmino	amino acid.
	 * 
	 * @return	RNA codons which, in this code, are translated to <code>translatedAmino</code>.
	 * 
	 * @since	0.7
	 */
	public List<BioSequence> codonsForAminoAcid(AminoAcid translatedAmino)
	{
		List<BioSequence> codons = new ArrayList<BioSequence>();
		
			for (BioSequence codon : codonMap.keySet())
			{
				if (translateCodon(codon) == translatedAmino)
				{
					codons.add(codon);
				}
			}
		
		return codons;
	}
	
	/**
	 * Translates a <code>RnaSequence</code> to a <code>ProteinSequence</code>.
	 * 
	 * @param	rs	a RNA sequence to translate.
	 * 
	 * @return	the peptide sequence translated by this genetic code.
	 * 
	 * @since	0.8
	 */
	@Deprecated
	public ProteinSequence translateSequence(BioSequence bs)
	throws IllegalArgumentException
	{
		if (!(bs instanceof DNASequence) && !(bs instanceof RNASequence))
		{
			throw new IllegalArgumentException(
				bs.getClass().getName() + " is not a valid BioSequence class for translation.");
		}
		
		List<BioResidue> translatedAminos = new ArrayList<BioResidue>();
		for (int i = 0 ; i <= bs.getLength() - codonLength ; i += codonLength)
		{
			translatedAminos.add(translateCodon(bs.getSubsequence(i, i + codonLength)));
		}
		PeptideSequenceDirectionality direction =
			bs.direction == NucleotideSequenceDirectionality.C5_C3 ?
					PeptideSequenceDirectionality.N_C : PeptideSequenceDirectionality.C_N;
		return new ProteinSequence(bs.getDescription(), direction, translatedAminos);
	}
	
	/**
	 * Translates a <code>DnaSequence</code> to a <code>ProteinSequence</code>.
	 * 
	 * @param	bs				a DNA sequence to translate.
	 * @param	readingFrame	the frame to translate.
	 * 
	 * @return	the peptide sequence translated by this genetic code.
	 *
	 * @throws GeneticCodeIncorrectFrameException	thrown if the frame is not in the range (1..3).
	 * @throws IllegalCodonException		thrown if any given triplet of the sequence is not a codon.
	 * @throws InvalidSequenceClassException 
	 * 
	 * @since	1.2
	 */
	@Deprecated
	public ProteinSequence translateSequence(BioSequence bs, int readingFrame)
	throws IllegalArgumentException
	{
		// The method assumes a genetic code of triplets with 3 reading frames 
		if (readingFrame < -codonLength || readingFrame > codonLength || readingFrame == 0)
		{
			throw new IllegalArgumentException(Integer.toString(readingFrame));
		}
		
		if (!(bs instanceof DNASequence) && !(bs instanceof RNASequence))
		{
			throw new IllegalArgumentException(
				bs.getClass().getName() + " is not a valid BioSequence class for translation.");
		}
		
		ArrayList<BioResidue> translatedAminos = new ArrayList<BioResidue>(bs.getLength() / codonLength + 1);
		AminoAcid newAA = null;
		
		// Downstream reading frames
		if (readingFrame > 0)	// BEFORE: if (ds.directionality() == NucleotideSequenceDirectionality.C5_C3)
		{
			for (int i = readingFrame - 1 ; i < bs.getLength() - (codonLength - 1) ; i = i + codonLength)
			{
				newAA = translateCodon(bs.getSubsequence(i, i + codonLength));
				if (newAA != null)
				{
					translatedAminos.add(newAA);
				}
			}
			PeptideSequenceDirectionality direction =
				bs.direction == NucleotideSequenceDirectionality.C5_C3 ?
					PeptideSequenceDirectionality.N_C : PeptideSequenceDirectionality.C_N;
			
			return new ProteinSequence(bs.getDescription(), direction, translatedAminos);
		}
		// Upstream reading frames
		else if (readingFrame < 0)	// BEFORE: else if (ds.directionality() == NucleotideSequenceDirectionality.C3_C5)
		{
			// Reversing each codon takes *A LOT MORE* time than reversing, translating and reversing back
			bs.reverse(new NullProgressMonitor());
			for (int i = - readingFrame - 1 ; i < bs.getLength() - (codonLength - 1) ; i = i + codonLength)
			{
				newAA = translateCodon(bs.getSubsequence(i, i + codonLength));
				if (newAA != null)
				{
					translatedAminos.add(newAA);
				}
			}
			Collections.reverse(translatedAminos);
			bs.reverse(new NullProgressMonitor());
			PeptideSequenceDirectionality direction =
				bs.direction == NucleotideSequenceDirectionality.C5_C3 ?
					PeptideSequenceDirectionality.N_C : PeptideSequenceDirectionality.C_N;

			return new ProteinSequence(bs.getDescription(), direction, translatedAminos);
		}
		return null;
	}
	
	/**
	 * 
	 * @param fromIndex
	 * @param ds
	 * @return
	 * @throws GeneticCodeIncorrectFrameException
	 * @throws InterruptedException 
	 * @throws IllegalCodonException
	 * @throws InvalidSequenceClassException
	 *
	 * @since	x.y.z
	 */
	@Deprecated
	public ProteinSequence translateSequence
	(DNASequence ds, int fromIndex, boolean forward, IProgressMonitor progressMonitor)
	throws IllegalArgumentException, IndexOutOfBoundsException, OperationCanceledException
	{
		if (fromIndex < 0 || fromIndex > ds.sequence.size() - 1)
		{
			throw new IndexOutOfBoundsException(
				fromIndex + " index out of bounds when translating " + ds.getDescription() + ".");
		}

		int frame = 0;
		if (forward)
		{
			frame = fromIndex % codonLength + 1;
		}
		else // if (!forward)
		{
			frame = (fromIndex - ds.sequence.size() + 1) % codonLength - 1;
		}
		progressMonitor.subTask("Translating frame " + numberFormat.format(frame) + ".");

		StringBuilder sequenceSB = new StringBuilder();
		for (int i = 0 ; i < codonLength ; i++)
		{
			sequenceSB.append('-');
		}
		DNASequence codon = new DNASequence(sequenceSB.toString());

		ArrayList<BioResidue> translatedAminos = new ArrayList<BioResidue>(ds.sequence.size() / codonLength + 1);
		AminoAcid newAA = null;
		// Downstream reading frames
		if (forward)
		{
			for (int i = fromIndex ; i < ds.sequence.size() - (codonLength - 1) ;
				 i += codonLength, progressMonitor.worked(codonLength))
			{
				// Dirty but cool: speedup 18+% woohoo!
				for (int j = 0 ; j < codonLength ; j++)
				{
					codon.sequence.set(j, ds.sequence.get(i + j));
				}

				newAA = translateCodon(codon);
				if (newAA != null)
				{
					translatedAminos.add(newAA);
				}
				// Disabled, updating with subTask each iteration slows it dooooown too much
				//progressMonitor.subTask("Translating frame " + numberFormat.format(frame) + ": " + 
				//		numberFormat.format(i/3) + " / " + numberFormat.format(bs.sequence.size()/3) + " codons.");

				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
			PeptideSequenceDirectionality direction =
				ds.direction == NucleotideSequenceDirectionality.C5_C3 ?
					PeptideSequenceDirectionality.N_C : PeptideSequenceDirectionality.C_N;

			return new ProteinSequence(ds.getDescription(), direction, translatedAminos);
		}
		// Upstream reading frames
		else // if (!forward)
		{
			for (int i = fromIndex ; i >= codonLength - 1 ; i -= codonLength, progressMonitor.worked(codonLength))
			{
				// Dirty but cool: speedup 18+% woohoo!
				for (int j = 0 ; j < codonLength ; j++)
				{
					codon.sequence.set(j, ((DNABase)ds.sequence.get(i - j)).getComplementary());
				}
				newAA = translateCodon(codon);
				if (newAA != null)
				{
					translatedAminos.add(newAA);
				}
				// Disabled, updating with subTask each iteration slows it dooooown too much
				//progressMonitor.subTask("Translating frame " + numberFormat.format(frame) + ": " + 
				//		numberFormat.format(i/3) + " / " + numberFormat.format(bs.sequence.size()/3) + " codons.");
				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
			Collections.reverse(translatedAminos);
			PeptideSequenceDirectionality direction =
				ds.direction == NucleotideSequenceDirectionality.C5_C3 ?
					PeptideSequenceDirectionality.N_C : PeptideSequenceDirectionality.C_N;

			return new ProteinSequence(ds.getDescription(), direction, translatedAminos);
		}
	}
	
	/**
	 * 
	 * @param fromIndex
	 * @param ds
	 * @return
	 * @throws GeneticCodeIncorrectFrameException
	 * @throws InterruptedException 
	 * @throws IllegalCodonException
	 * @throws InvalidSequenceClassException
	 *
	 * @since	x.y.z
	 */
	public ProteinSequence translateSequence
	(DNASequence ds, int fromIndex, int toIndex, IProgressMonitor progressMonitor)
	throws IllegalArgumentException, IndexOutOfBoundsException, OperationCanceledException
	{
		if (ds.getLength() < 3)
		{
			return new ProteinSequence("");
		}
		
		if (fromIndex <= toIndex && (fromIndex < 0 || toIndex > ds.sequence.size()))
		{
			throw new IndexOutOfBoundsException(
				fromIndex + ".." + toIndex + ": range out of bounds when translating " + ds.getDescription() + ".");
		}
		else if (fromIndex > toIndex && (fromIndex > ds.sequence.size() - 1 || toIndex < -1))
		{
			throw new IndexOutOfBoundsException(
				fromIndex + ".." + toIndex + ": range out of bounds when translating " + ds.getDescription() + ".");
		}

		int frame = 0;
		if (fromIndex <= toIndex)
		{
			frame = fromIndex % codonLength + 1;
		}
		else // if (fromIndex > toIndex)
		{
			frame = (fromIndex - ds.sequence.size() + 1) % codonLength - 1;
		}
		progressMonitor.subTask("Translating frame " + numberFormat.format(frame) + ".");

		StringBuilder sequenceSB = new StringBuilder();
		for (int i = 0 ; i < codonLength ; i++)
		{
			sequenceSB.append('-');
		}
		DNASequence codon = new DNASequence(sequenceSB.toString());

		ArrayList<BioResidue> translatedAminos = new ArrayList<BioResidue>(ds.sequence.size() / codonLength + 1);
		AminoAcid newAA = null;
		// Downstream reading frames
		if (fromIndex <= toIndex)
		{
			for (int i = fromIndex ; i <= toIndex - codonLength; i += codonLength, progressMonitor.worked(codonLength))
			{
				// Dirty but cool: speedup 18+% woohoo!
				for (int j = 0 ; j < codonLength ; j++)
				{
					codon.sequence.set(j, ds.sequence.get(i + j));
				}

				newAA = translateCodon(codon);
				if (newAA != null)
				{
					translatedAminos.add(newAA);
				}
				// Disabled, updating with subTask each iteration slows it dooooown too much
				//progressMonitor.subTask("Translating frame " + numberFormat.format(frame) + ": " + 
				//		numberFormat.format(i/3) + " / " + numberFormat.format(bs.sequence.size()/3) + " codons.");

				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
			PeptideSequenceDirectionality direction =
				ds.direction == NucleotideSequenceDirectionality.C5_C3 ?
					PeptideSequenceDirectionality.N_C : PeptideSequenceDirectionality.C_N;

			return new ProteinSequence(ds.getDescription(), direction, translatedAminos);
		}
		// Upstream reading frames
		else // if (fromIndex > toIndex)
		{
			for (int i = fromIndex ; i >= toIndex + codonLength ; i -= codonLength, progressMonitor.worked(codonLength))
			{
				// Dirty but cool: speedup 18+% woohoo!
				for (int j = 0 ; j < codonLength ; j++)
				{
					codon.sequence.set(j, ((DNABase)ds.sequence.get(i - j)).getComplementary());
				}
				newAA = translateCodon(codon);
				if (newAA != null)
				{
					translatedAminos.add(newAA);
				}
				// Disabled, updating with subTask each iteration slows it dooooown too much
				//progressMonitor.subTask("Translating frame " + numberFormat.format(frame) + ": " + 
				//		numberFormat.format(i/3) + " / " + numberFormat.format(bs.sequence.size()/3) + " codons.");
				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
			Collections.reverse(translatedAminos);
			PeptideSequenceDirectionality direction =
				ds.direction == NucleotideSequenceDirectionality.C5_C3 ?
					PeptideSequenceDirectionality.N_C : PeptideSequenceDirectionality.C_N;

			return new ProteinSequence(ds.getDescription(), direction, translatedAminos);
		}
	}
	
	/**
	 * 
	 * @param fromIndex
	 * @param bs
	 * @return
	 * @throws InterruptedException 
	 * @throws GeneticCodeIncorrectFrameException
	 * @throws IllegalCodonException
	 * @throws InvalidSequenceClassException
	 *
	 * @since	x.y.z
	 */
	public List<Annotation> findNextEncodedMotif
	(boolean complementAnnotate, ProteinSequence motif, BioSequence bs, int fromIndex, 
	 boolean matchAmbiguous, boolean reverse, IProgressMonitor progressMonitor)
	throws IllegalArgumentException, IndexOutOfBoundsException, OperationCanceledException
	{
		if (!(bs instanceof DNASequence) && !(bs instanceof RNASequence))
		{
			throw new IllegalArgumentException(
				"Find error:\\n" + bs.getDescription() + " is not a nucleotide sequence.");
		}

		if (fromIndex < 0 || fromIndex > bs.sequence.size() - 1)
		{
			throw new IndexOutOfBoundsException(
				"Find error:\\n" + fromIndex + " index out of bounds (" + bs.sequence.size() + ").");
		}

		// Initialise and prepare all the motif variations
		List<List<BioResidue>> explodedMotif = new ArrayList<List<BioResidue>>(motif.sequence.size());
		for (BioResidue b : motif.sequence)
		{
			List<BioResidue> explodedResidue = new ArrayList<BioResidue>();
			if (matchAmbiguous)
			{
				explodedResidue.addAll(Arrays.asList(b.expandToAll()));
			}
			else
			{
				explodedResidue.addAll(Arrays.asList(b.expandToUnambiguous()));
			}
			explodedMotif.add(explodedResidue);
		}
		
		// Let's look for treasure!
		progressMonitor.subTask("Translating and matching protein products.");
		StringBuilder sequenceSB = new StringBuilder();
		for (int i = 0 ; i < codonLength ; i++)
		{
			sequenceSB.append('-');
		}

		// Create a dummy codon of DnaSequence or RnaSequence type
		BioSequence codon;
		if (bs instanceof DNASequence)
		{
			codon = new DNASequence(sequenceSB.toString());
		}
		else // if (bs instanceof RnaSequence)
		{
			codon = new RNASequence(sequenceSB.toString());
		}

		AminoAcid newAA = null;
		// Downstream
		for (int i = fromIndex ; i <= bs.sequence.size() - (motif.sequence.size() * codonLength) ;
				i++, progressMonitor.worked(1))
		{
			// Dirty but cool: speedup 18+% woohoo! Instead of creating a subList, fill codon with calls to .set()
			for (int j = 0 ; j < codonLength ; j++)
			{
				codon.sequence.set(j, bs.sequence.get(i + j));
			}

			// If the first codon matches...
			newAA = translateCodon(codon);
			if (explodedMotif.get(0).contains(newAA))
			{
				// .. continue with search *in this* frame (in triplets)
				boolean match = true;
				for (int j = 1; j < explodedMotif.size() ; j++)
				{
					// Dirty but cool: speedup 18+% woohoo!
					for (int k = 0 ; k < codonLength ; k++)
					{
						codon.sequence.set(k, bs.sequence.get(i + j * codonLength + k));
					}

					newAA = translateCodon(codon);
					if (explodedMotif.get(j).contains(newAA))
					{
						continue;
					}
					match = false;
					break;
				}
				if (match)
				{
					List<Annotation> annotationInList = new ArrayList<Annotation>();
					annotationInList.add(
							new Annotation(
									motif.descriptionSB.toString(), i, i + motif.getLength() * codonLength,
									complementAnnotate, true));
					return annotationInList;
				}
			}

			if (progressMonitor.isCanceled())
			{
				throw new OperationCanceledException();
			}
		}
		// Upstream reading frames
		if (reverse)
		{
			for (int i = fromIndex ; i >= 0 + (codonLength - 1) ; i--, progressMonitor.worked(1))
			{
				// Dirty but cool: speedup 18+% woohoo!
				for (int j = 0 ; j < codonLength ; j++)
				{
					codon.sequence.set(j, bs.sequence.get(i + j));
				}

				newAA = translateCodon(codon);
				if (explodedMotif.get(0).contains(newAA))
				{
					// Proceed with triplets search
					boolean match = true;
					for (int j = 1; j < explodedMotif.size() ; j++)
					{
						// Dirty but cool: speedup 18+% woohoo!
						for (int k = 0 ; k < codonLength ; k++)
						{
							codon.sequence.set(k, bs.sequence.get(i + j * codonLength + k));
						}

						newAA = translateCodon(codon);
						if (explodedMotif.get(j).contains(newAA))
						{
							continue;
						}
						match = false;
						break;
					}
					if (match)
					{
						List<Annotation> annotationInList = new ArrayList<Annotation>();
						annotationInList.add(
								new Annotation(
										motif.descriptionSB.toString(), i, i - motif.getLength() * 3, complementAnnotate, true));
						return annotationInList;
					}
				}

				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
		}
			
		return null;
	}
	
	/**
	 * 
	 * @param fromIndex
	 * @param bs
	 * @return
	 * @throws GeneticCodeIncorrectFrameException
	 * @throws IllegalCodonException
	 * @throws InvalidSequenceClassException
	 *
	 * @since	x.y.z
	 */
	public List<Annotation> findPreviousEncodedMotif
	(boolean complementAnnotate, ProteinSequence motif, BioSequence bs, int fromIndex, boolean matchAmbiguous, boolean reverse,   
	 IProgressMonitor progressMonitor)
	throws IllegalArgumentException, IndexOutOfBoundsException, OperationCanceledException
	{
		if (!(bs instanceof DNASequence) && !(bs instanceof RNASequence))
		{
			throw new IllegalArgumentException(
				"Find error:\\n" + bs.getDescription() + " is not a nucleotide sequence.");
		}

		if (fromIndex < 0 || fromIndex > bs.sequence.size() - 1)
		{
			throw new IndexOutOfBoundsException(
				"Find error:\\n" + fromIndex + " index out of bounds (" + bs.sequence.size() + ").");
		}

		// Initialise and prepare all the motif variations
		List<List<BioResidue>> explodedMotif = new ArrayList<List<BioResidue>>(motif.sequence.size());
		for (BioResidue b : motif.sequence)
		{
			List<BioResidue> explodedResidue = new ArrayList<BioResidue>();
			if (matchAmbiguous)
			{
				explodedResidue.addAll(Arrays.asList(b.expandToAll()));
			}
			else
			{
				explodedResidue.addAll(Arrays.asList(b.expandToUnambiguous()));
			}
			explodedMotif.add(explodedResidue);
		}
		
		// Let's look for treasure!
		progressMonitor.subTask("Translating and matching protein products.");
		
		StringBuilder sequenceSB = new StringBuilder();
		for (int i = 0 ; i < codonLength ; i++)
		{
			sequenceSB.append('-');
		}

		// Create a dummy codon of DnaSequence or RnaSequence type
		BioSequence codon;
		if (bs instanceof DNASequence)
		{
			codon = new DNASequence(sequenceSB.toString());
		}
		else // if (bs instanceof RnaSequence)
		{
			codon = new RNASequence(sequenceSB.toString());
		}

		AminoAcid newAA = null;
		// Downstream
		for (int i = fromIndex ; i - (motif.sequence.size() * codonLength) + 1 >= 0 ; i--, progressMonitor.worked(1))
		{
			// Dirty but cool: speedup 18+% woohoo! Instead of creating a subList, fill codon with calls to .set()
			for (int j = 0 ; j < codonLength ; j++)
			{
				codon.sequence.set(j, bs.sequence.get(i - codonLength + 1 + j));
			}

			// If the last codon matches...
			newAA = translateCodon(codon);
			if (explodedMotif.get(explodedMotif.size() - 1).contains(newAA))
			{
				// .. continue with search *in this* frame (in triplets)
				boolean match = true;
				for (int j = 1; j < explodedMotif.size() ; j++)
				{
					// Dirty but cool: speedup 18+% woohoo!
					for (int k = 0 ; k < codonLength ; k++)
					{
						codon.sequence.set(k, bs.sequence.get(i - (j + 1) * codonLength + 1 + k));
					}

					newAA = translateCodon(codon);
					if (explodedMotif.get(explodedMotif.size() - 1 - j).contains(newAA))
					{
						continue;
					}
					match = false;
					break;
				}
				if (match)
				{
					List<Annotation> annotationInList = new ArrayList<Annotation>();
					annotationInList.add(
						new Annotation(
							motif.descriptionSB.toString(), i - motif.getLength() * codonLength + 1, i + 1, complementAnnotate, true ));
					return annotationInList;
				}
			}
			
			if (progressMonitor.isCanceled())
			{
				throw new OperationCanceledException();
			}
		}
		// Upstream reading frames
		if (reverse)
		{
			for (int i = fromIndex ; i >= 0 + (codonLength - 1) ; i--, progressMonitor.worked(1))
			{
				// Dirty but cool: speedup 18+% woohoo!
				for (int j = 0 ; j < codonLength ; j++)
				{
					codon.sequence.set(j, bs.sequence.get(i + j));
				}

				newAA = translateCodon(codon);
				if (explodedMotif.get(0).contains(newAA))
				{
					// Proceed with triplets search
					boolean match = true;
					for (int j = 1; j < explodedMotif.size() ; j++)
					{
						// Dirty but cool: speedup 18+% woohoo!
						for (int k = 0 ; k < codonLength ; k++)
						{
							codon.sequence.set(k, bs.sequence.get(i + j * codonLength + k));
						}

						newAA = translateCodon(codon);
						if (explodedMotif.get(j).contains(newAA))
						{
							continue;
						}
						match = false;
						break;
					}
					if (match)
					{
						List<Annotation> annotationInList = new ArrayList<Annotation>();
						annotationInList.add(
							new Annotation(
								motif.descriptionSB.toString(), i, i - motif.getLength() * codonLength, complementAnnotate, true));
						return annotationInList;
					}
				}
				
				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
		}
		
		return null;
	}
		
	/**
	 * 
	 * @return
	 *
	 * @since	1.3
	 */
	public int getCodonLength()
	{
		return codonLength;
	}

	/**
	 * 
	 * @param fromIndex
	 * @param bs
	 * @return
	 * @throws InterruptedException 
	 * @throws GeneticCodeIncorrectFrameException
	 * @throws IllegalCodonException
	 * @throws InvalidSequenceClassException
	 *
	 * @since	x.y.z
	 */
	public List<List<Annotation>> findEncodedMotifsList
	(boolean complementAnnotate, ProteinSequence motif, BioSequence bs, int fromIndex, 
	 boolean matchAmbiguous, boolean reverse, IProgressMonitor progressMonitor)
	throws IllegalArgumentException, IndexOutOfBoundsException, OperationCanceledException
	{
		if (!(bs instanceof DNASequence) && !(bs instanceof RNASequence))
		{
			throw new IllegalArgumentException(
				"Find error:\\n" + bs.getDescription() + " is not a nucleotide sequence.");
		}

		if (fromIndex < 0 || fromIndex > bs.sequence.size() - 1)
		{
			throw new IndexOutOfBoundsException(
				"Find error:\\n" + fromIndex + " index out of bounds (" + bs.sequence.size() + ").");
		}

		// Initialise and prepare all the motif variations
		List<List<BioResidue>> explodedMotif = new ArrayList<List<BioResidue>>(motif.sequence.size());
		for (BioResidue b : motif.sequence)
		{
			List<BioResidue> explodedResidue = new ArrayList<BioResidue>();
			if (matchAmbiguous)
			{
				explodedResidue.addAll(Arrays.asList(b.expandToAll()));
			}
			else
			{
				explodedResidue.addAll(Arrays.asList(b.expandToUnambiguous()));
			}
			explodedMotif.add(explodedResidue);
		}
		
		// Let's look for treasure!
		List<List<Annotation>> hits = new ArrayList<List<Annotation>>();
		progressMonitor.subTask("Translating and matching protein products.");
		StringBuilder sequenceSB = new StringBuilder();
		for (int i = 0 ; i < codonLength ; i++)
		{
			sequenceSB.append('-');
		}

		// Create a dummy codon of DnaSequence or RnaSequence type
		BioSequence codon;
		if (bs instanceof DNASequence)
		{
			codon = new DNASequence(sequenceSB.toString());
		}
		else // if (bs instanceof RnaSequence)
		{
			codon = new RNASequence(sequenceSB.toString());
		}

		AminoAcid newAA = null;
		// Downstream
		for (int i = fromIndex ; i <= bs.sequence.size() - (motif.sequence.size() * codonLength) ;
				i++, progressMonitor.worked(1))
		{
			// Dirty but cool: speedup 18+% woohoo! Instead of creating a subList, fill codon with calls to .set()
			for (int j = 0 ; j < codonLength ; j++)
			{
				codon.sequence.set(j, bs.sequence.get(i + j));
			}

			// If the first codon matches...
			newAA = translateCodon(codon);
			if (explodedMotif.get(0).contains(newAA))
			{
				// .. continue with search *in this* frame (in triplets)
				boolean match = true;
				for (int j = 1; j < explodedMotif.size() ; j++)
				{
					// Dirty but cool: speedup 18+% woohoo!
					for (int k = 0 ; k < codonLength ; k++)
					{
						codon.sequence.set(k, bs.sequence.get(i + j * codonLength + k));
					}

					newAA = translateCodon(codon);
					if (explodedMotif.get(j).contains(newAA))
					{
						continue;
					}
					match = false;
					break;
				}
				if (match)
				{
					List<Annotation> annotationInList = new ArrayList<Annotation>();
					annotationInList.add(
							new Annotation(
									motif.descriptionSB.toString(), i, i + motif.getLength() * codonLength,
									complementAnnotate, true));
					hits.add(annotationInList);
				}
			}

			if (progressMonitor.isCanceled())
			{
				throw new OperationCanceledException();
			}
		}
		// Upstream reading frames
		if (reverse)
		{
			for (int i = fromIndex ; i >= 0 + (codonLength - 1) ; i--, progressMonitor.worked(1))
			{
				// Dirty but cool: speedup 18+% woohoo!
				for (int j = 0 ; j < codonLength ; j++)
				{
					codon.sequence.set(j, bs.sequence.get(i + j));
				}

				newAA = translateCodon(codon);
				if (explodedMotif.get(0).contains(newAA))
				{
					// Proceed with triplets search
					boolean match = true;
					for (int j = 1; j < explodedMotif.size() ; j++)
					{
						// Dirty but cool: speedup 18+% woohoo!
						for (int k = 0 ; k < codonLength ; k++)
						{
							codon.sequence.set(k, bs.sequence.get(i + j * codonLength + k));
						}

						newAA = translateCodon(codon);
						if (explodedMotif.get(j).contains(newAA))
						{
							continue;
						}
						match = false;
						break;
					}
					if (match)
					{
						List<Annotation> annotationInList = new ArrayList<Annotation>();
						annotationInList.add(
								new Annotation(
										motif.descriptionSB.toString(), i, i - motif.getLength() * 3, complementAnnotate, true));
						hits.add(annotationInList);
					}
				}

				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
		}
			
		return hits;
	}
	
	public List<List<Annotation>> 
	findORFs(boolean complementAnnotate, BioSequence bs, int frame, int minLength)
	throws IllegalArgumentException
	{
		return this.findORFs(complementAnnotate, bs, frame, minLength);
	}
	
	/**
	 * 
	 * @param frame
	 * @param minLength
	 * @param anchorAtBeginning
	 * @return
	 * @throws GeneticCodeIncorrectFrameException 
	 * @throws AnnotationInvalidRangeException 
	 * @throws InvalidSequenceClassException 
	 */
	public List<List<Annotation>> 
	findORFs(boolean complementAnnotate, BioSequence bs, int frame, int minLength, IProgressMonitor progressMonitor)
	throws IllegalArgumentException, OperationCanceledException
	{
		// Check arguments
		if (frame < 1 || frame > codonLength)
		{
			throw new IllegalArgumentException(Integer.toString(frame));
		}
		if (!(bs instanceof DNASequence) && !(bs instanceof RNASequence))
		{
			throw new IllegalArgumentException(bs + " is not a nucleotide sequence");
		}
		
		// Set the frame-dependent start position
		int start = frame - 1;
		
		// Find the start and stop codons
		List<Integer> startCodonsPositions = new ArrayList<Integer>();
		List<Integer> stopCodonsPositions = new ArrayList<Integer>();
		for (int i = start ; i <= bs.getLength() - codonLength ; i = i + codonLength)
		{
			BioSequence codon = bs.getSubsequence(i, i + codonLength);
			if (startCodons.contains(codon))
			{
				startCodonsPositions.add(i);
			}
			else if (stopCodons.contains(codon))
			{
				stopCodonsPositions.add(i + codonLength - 1);
			}
		}
				
		List<List<Annotation>> orfAnnotations = new ArrayList<List<Annotation>>();
		Iterator<Integer> startCodonsIterator =	startCodonsPositions.iterator();
		Iterator<Integer> stopCodonsIterator = stopCodonsPositions.iterator();
		if (startCodonsIterator.hasNext())
		{
			int currentStart = startCodonsIterator.next();
			while (stopCodonsIterator.hasNext())
			{
				int currentStop = stopCodonsIterator.next();
				// If the start codon is before the stop codon add this ORF
				if (currentStart < currentStop)
				{
					if (currentStop - currentStart + 1 >= minLength)
					{
						// The stop codon is not included in the ORF, therefore currentStop -2
						List<Annotation> orfAnnotation = new ArrayList<Annotation>();
						orfAnnotation.add(
							new Annotation("ORF", currentStart, currentStop - 2, complementAnnotate, true));
						orfAnnotations.add(orfAnnotation);
					}
					
					// Skip all start positions until the currentStop
					if (startCodonsIterator.hasNext())
					{
						while (startCodonsIterator.hasNext())
						{
							currentStart = startCodonsIterator.next();
							if (currentStart > currentStop)
							{
								break;
							}
						}
					}
					else
					{
						break;
					}
				}
				// else go see if the next stop codon is after currentStart
				
				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
		}
		return orfAnnotations;
	}
	
	public List<List<Annotation>> findORFs2
	(BioSequence bs, int fromIndex, int minLength)
	throws IllegalArgumentException, IndexOutOfBoundsException
	{
		return this.findORFs2(bs, fromIndex, minLength, new NullProgressMonitor());
	}
	
	/**
	 * 
	 * @param frame
	 * @param minLength
	 * @param anchorAtBeginning
	 * @return
	 * @throws GeneticCodeIncorrectFrameException 
	 * @throws AnnotationInvalidRangeException 
	 * @throws InvalidSequenceClassException 
	 */
	public List<List<Annotation>> findORFs2
	(BioSequence bs, int fromIndex, int minLength, IProgressMonitor progressMonitor)
	throws IllegalArgumentException, IndexOutOfBoundsException, OperationCanceledException
	{
		if (!(bs instanceof DNASequence) && !(bs instanceof RNASequence))
		{
			throw new IllegalArgumentException(
				"Find error:\\n" + bs.getDescription() + " is not a nucleotide sequence.");
		}

		if (fromIndex < 0 || fromIndex > bs.sequence.size() - 1)
		{
			throw new IndexOutOfBoundsException(
				"Find error:\\n" + fromIndex + " index out of bounds (" + bs.sequence.size() + ").");
		}
		
		// Let's look for treasure!
		progressMonitor.subTask("Translating and matching protein products.");
		StringBuilder sequenceSB = new StringBuilder();
		for (int i = 0 ; i < codonLength ; i++)
		{
			sequenceSB.append('-');
		}

		// Initialize start positions (we can always skip parts of a frame if there are stop codons)
		int[] startPositions = new int[codonLength];
		
		// Create a dummy codon of DnaSequence or RnaSequence type
		BioSequence codon;
		if (bs instanceof DNASequence)
		{
			codon = new DNASequence(sequenceSB.toString());
		}
		else // if (bs instanceof RnaSequence)
		{
			codon = new RNASequence(sequenceSB.toString());
		}

		List<List<Annotation>> orfAnnotations = new ArrayList<List<Annotation>>();
		// Find a start codon
		for (int i = fromIndex ; i <= bs.getLength() - codonLength ; i++, progressMonitor.worked(1))
		{
			if (i < startPositions[i % codonLength])
			{
				continue;
			}
			
			for (int j = 0 ; j < codonLength ; j++)
			{
				codon.sequence.set(j, bs.sequence.get(i + j));
			}
			
			boolean hasStopCodon = false;
			boolean hasMinLength = false;
			if (startCodons.contains(codon))
			{
				// Find the next stop codon in the same frame
				for (int j = i + codonLength ; j <= bs.getLength() - codonLength ; j += codonLength)
				{
					for (int k = 0 ; k < codonLength ; k++)
					{
						codon.sequence.set(k, bs.sequence.get(j + k));
					}
					
					if (stopCodons.contains(codon))
					{
						if ((j - i) >= minLength && (j - i) >= 6)
						{
							List<Annotation> orfAnnotation = new ArrayList<Annotation>();
							// To include stop codon, end of annotation would be j + codonLength
							orfAnnotation.add(new Annotation("ORF", i, j, false, true));
							orfAnnotations.add(orfAnnotation);
							
							hasStopCodon = true;
							hasMinLength = true;
						}
						startPositions[i % codonLength] = j + codonLength;
						break;
					}	
				}
				
				// Reaches end of sequence but no stop codon has been found
				if (hasStopCodon == false && hasMinLength) {
					List<Annotation> orfAnnotation = new ArrayList<Annotation>();
					orfAnnotation.add(new Annotation("ORF", i, bs.getLength(), false, true));
					orfAnnotations.add(orfAnnotation);
				}
				
				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
		}
				
		return orfAnnotations;
	}
	
	/*
	 * Checks the given nucleotide sequence conforms to a codon. 
	 * 
	 * @param	dnaCodon	DNA sequence to check.
	 * 
	 * @throws	IllegalArgumentException	if <code>dnaCodon</code> is not a codon (for whatever reason).
	 */
	private static void validateCodon(BioSequence codon)
	throws IllegalArgumentException
	{
		if (!(codon instanceof DNASequence) && !(codon instanceof RNASequence) || codon.getLength() != codonLength)
		{
			throw new IllegalArgumentException(codon.toString() + " is not a valid codon.");
		}
	}		
	
	/*
	 * (non-Javadoc)
	 * @see java.lang.Object#toString()
	 */
	public String toString()
	{
		StringBuilder codeSB = new StringBuilder();
		
		codeSB.append("# " + descriptionSB.toString() + "\n");
		for (BioSequence codon : codonMap.keySet())
		{
			if (startCodons.contains(codon))
			{
				codeSB.append("# Start codon\n");
			}
			else if (stopCodons.contains(codon))
			{
				codeSB.append("# Stop codon\n");
			}
			codeSB.append(codon + " -> " + codonMap.get(codon));
			if (startCodons.contains(codon))
			{
				codeSB.append("  start\n");
			}
			else if (stopCodons.contains(codon))
			{
				codeSB.append("  stop\n");
			}
			else
			{
				codeSB.append("\n");
			}
		}
		
		return codeSB.toString();
	}
	
	/**
	 * 
	 * @param gc
	 * @return
	 */
	public boolean translateEquals(GeneticCode gc)
	{
		for (BioSequence bs : codonMap.keySet())
		{
			if (codonMap.get(bs) != gc.codonMap.get(bs))
			{
				return false;
			}
		}
		return true;
	}
	
	@Override
	public boolean equals(Object obj)
	{
		if (obj instanceof GeneticCode && this.translateEquals((GeneticCode)obj))
		{
			if (startCodons.equals(((GeneticCode)obj).startCodons) &&
				stopCodons.equals(((GeneticCode)obj).stopCodons))
			{
				return true;
			}
		}
		return false;
	}
}
