/*
 * @author		Alfonso Muñoz-Pomer Fuentes, 
 * 				<a href="mailto:alfonso.munozpomer@biotechvana.com">
 * 				alfonso.munozpomer@biotechvana.com</a>,  
 * 				<a href="http://www.biotechvana.com">Biotechvana</a>
 *
 * @date		2011-07-20
 * 
 * @copyright	Copyright Biotech Vana, S.L. 2006-2011
 */

package com.biotechvana.javabiotoolkit;

import java.util.ArrayList;
import java.util.Random;

/**
 * Factory class that generates random biological sequences.
 * 
 * @version	0.2, 2011-09-13
 * 
 * @author	<a href="mailto:alfonso.munozpomer@biotechvana.com">Alfonso Muñoz-Pomer Fuentes</a>,
 * 			<a href="http://www.biotechvana.com">Biotechvana</a>.
 *
 * <style type="text/css">
 * 		table.t0 {
 * 			border:0px solid black;
 * 			border-collapse: collapse;
 * 		}
 * 		table.t0 td {
 * 			text-align: center;
 * 			padding: 4px;
 * 		}
 * 		tr.d0 td {
 * 			background-color: #FFFFFF; color: black;
 * 		}
 * 		tr.d1 td {
 * 			background-color: #DDDDDD; color: black;
 * 		}
 * </style>
 */
public class BioSequenceFactory
{
	private Random randomizer;
	
	/**
	 * Constructor that initalizes the pseudorandom number generator. This constructor sets the seed of the random 
	 * number generator to a value very likely to be distinct from any other invocation of this constructor.  
	 *
	 * @since	0.1, 2011-07-20
	 */
	public BioSequenceFactory()
	{
		randomizer = new Random();
	}
	
	/**
	 * Constructor that initalizes the pseudo-random number generatorusing a single <code>long</code> seed. The seed is 
	 * the initial value of the internal state of the pseudorandom number generator.   
	 *
	 * @since	0.1, 2011-07-20
	 */
	public BioSequenceFactory(long seed)
	{
		randomizer = new Random(seed);
	}
	
	/**
	 * Returns a new DNA sequence formed by randomly chosen nucleotide bases. A probability of ambiguous bases and gaps 
	 * can be specified. This means that, on average, randomly generated sequences have a proportion (i.e. frequency) 
	 * of ambiguous bases and gaps equal to the values <code>ambiguousFreq</code> and <code>gapFreq</code>, 
	 * respectively. If the sum of those two values is greater than <code>1</code> the behaviour of this method is 
	 * undefined.
	 * <p>
	 * The new sequences have as description "Random sequence &lt;<code>length</code>&gt;" and 5'-3' directionality.
	 * 
	 * @param	length			length of the sequence in bases.
	 * @param	ambiguousFreq	proportion of ambiguous bases.
	 * @param	gapFreq			proportion of gaps.
	 * 
	 * @return	DNA sequence with random bases.
	 * 
	 * @since	0.2, 2011-09-13
	 * 
	 * @see		DNABase
	 * @see		DNASequence
	 */
	public DNASequence generateRandomDnaSequence(int length, double ambiguousFreq, double gapFreq)
	{
		double normUnambiguousFreq = 1.0 - ambiguousFreq - gapFreq;
		double normAmbiguousFreq = normUnambiguousFreq + ambiguousFreq;
		double normGapFreq = normUnambiguousFreq + ambiguousFreq + gapFreq;
		
		ArrayList<BioResidue> randomSequence = new ArrayList<BioResidue>(length);
		
		// Random generation of bases, with a proportion of ambiguous ones and gaps
		for (int i = 0 ; i < length ; i++)
		{
			double p = randomizer.nextDouble();
			if (p < normUnambiguousFreq)
			{
				switch(randomizer.nextInt(4))
				{
					case 0: randomSequence.add(DNABase.A); break;
					case 1: randomSequence.add(DNABase.C); break;
					case 2: randomSequence.add(DNABase.G); break;
					case 3: randomSequence.add(DNABase.T); break;
				}
			}
			else if (p < normAmbiguousFreq)
			{
				switch (randomizer.nextInt(11))
				{
					case 0:  randomSequence.add(DNABase.R); break;
					case 1:  randomSequence.add(DNABase.Y); break;
					case 2:  randomSequence.add(DNABase.K); break;
					case 3:  randomSequence.add(DNABase.M); break;
					case 4:  randomSequence.add(DNABase.S); break;
					case 5:  randomSequence.add(DNABase.W); break;
					case 6:  randomSequence.add(DNABase.B); break;
					case 7:  randomSequence.add(DNABase.V); break;
					case 8:  randomSequence.add(DNABase.D); break;
					case 9:  randomSequence.add(DNABase.H); break;
					case 10: randomSequence.add(DNABase.N); break;
				}
			}
			else if (p < normGapFreq)
			{
				randomSequence.add(DNABase.GAP);
			}
		}
		
		return 
			new DNASequence("Random " + randomSequence.size(), NucleotideSequenceDirectionality.C5_C3, randomSequence);
	}

	/**
	 * Returns a new RNA sequence formed by randomly chosen nucleotide bases. A probability of ambiguous bases and gaps 
	 * can be specified. This means that, on average, randomly generated sequences have a proportion (i.e. frequency) 
	 * of ambiguous bases and gaps equal to the values <code>ambiguousFreq</code> and <code>gapFreq</code>, 
	 * respectively. If the sum of those two values is greater than <code>1</code> the behaviour of this method is 
	 * undefined.
	 * <p>
	 * The new sequences have as description "Random sequence &lt;<code>length</code>&gt;" and 5'-3' directionality.
	 * 
	 * @param	length			length of the sequence in bases.
	 * @param	ambiguousFreq	proportion of ambiguous bases.
	 * @param	gapFreq			proportion of gaps.
	 * 
	 * @return	RNA sequence with random bases.
	 * 
	 * @since	0.2, 2011-09-13
	 * 
	 * @see		RNABase
	 * @see		RNASequence
	 */
	public RNASequence generateRandomRnaSequence(int length, double ambiguousFreq, double gapFreq)
	{
		double normUnambiguousFreq = 1.0 - ambiguousFreq - gapFreq;
		double normAmbiguousFreq = normUnambiguousFreq + ambiguousFreq;
		double normGapFreq = normUnambiguousFreq + ambiguousFreq + gapFreq;
		
		ArrayList<BioResidue> randomSequence = new ArrayList<BioResidue>(length);
		
		// Random generation of bases, with a proportion of ambiguous ones and gaps
		for (int i = 0 ; i < length ; i++)
		{
			double p = randomizer.nextDouble();
			if (p < normUnambiguousFreq)
			{
				switch(randomizer.nextInt(4))
				{
					case 0: randomSequence.add(RNABase.A); break;
					case 1: randomSequence.add(RNABase.C); break;
					case 2: randomSequence.add(RNABase.G); break;
					case 3: randomSequence.add(RNABase.U); break;
				}
			}
			else if (p < normAmbiguousFreq)
			{
				switch (randomizer.nextInt(11))
				{
					case 0:  randomSequence.add(RNABase.R); break;
					case 1:  randomSequence.add(RNABase.Y); break;
					case 2:  randomSequence.add(RNABase.K); break;
					case 3:  randomSequence.add(RNABase.M); break;
					case 4:  randomSequence.add(RNABase.S); break;
					case 5:  randomSequence.add(RNABase.W); break;
					case 6:  randomSequence.add(RNABase.B); break;
					case 7:  randomSequence.add(RNABase.V); break;
					case 8:  randomSequence.add(RNABase.D); break;
					case 9:  randomSequence.add(RNABase.H); break;
					case 10: randomSequence.add(RNABase.N); break;
				}
			}
			else if (p < normGapFreq)
			{
				randomSequence.add(RNABase.GAP);
			}
		}
		return 
			new RNASequence("Random " + randomSequence.size(), NucleotideSequenceDirectionality.C5_C3, randomSequence);
	}

	/**
	 * Returns a new protein sequence formed by randomly chosen amino acids. A probability of ambiguous amino acids, 
	 * gaps and termination codon encoded symbols can be specified. This means that, on average, randomly generated 
	 * sequences have a proportion (i.e. frequency) of ambiguous amino acids, gaps and stops equal to the values 
	 * <code>ambiguousFreq</code>, <code>gapFreq</code> and <code>stopFreq</code>, respectively. If the sum of those 
	 * three values is greater than <code>1</code> the behaviour of this method is undefined.
	 * <p>
	 * The new sequences have as description "Random sequence &lt;<code>length</code>&gt;" and N-C directionality.
	 * 
	 * @param	length			length of the sequence in amino acids.
	 * @param	ambiguousFreq	proportion of ambiguous amino acids.
	 * @param	gapFreq			proportion of gaps.
	 * @param	stopFreq		proportion of stop codons resulting positions.
	 * 
	 * @return	protein sequence with random amino acids.
	 * 
	 * @since	0.2, 2011-09-13
	 * 
	 * @see		AminoAcid
	 * @see		ProteinSequence
	 */
	public ProteinSequence generateRandomProteinSequence
	(int length, double ambiguousFreq, double gapFreq, double stopFreq)
	{
		double normUnambiguousFreq = 1.0 - ambiguousFreq - gapFreq - stopFreq;
		double normAmbiguousFreq = normUnambiguousFreq + ambiguousFreq;
		double normGapFreq = normUnambiguousFreq + ambiguousFreq + gapFreq;
		double normStopFreq = normUnambiguousFreq + ambiguousFreq + gapFreq + stopFreq;
		
		ArrayList<BioResidue> randomSequence = new ArrayList<BioResidue>(length);
		
		// Random generation of amino acids, with a proportion of ambiguous amino acids, gaps and stops
		for (int i = 0 ; i < length ; i++)
		{
			double p = randomizer.nextDouble();
			if (p < normUnambiguousFreq)
			{
				switch(randomizer.nextInt(22))
				{
					case  0: randomSequence.add(AminoAcid.A); break;
					case  1: randomSequence.add(AminoAcid.R); break;
					case  2: randomSequence.add(AminoAcid.N); break;
					case  3: randomSequence.add(AminoAcid.D); break;
					case  4: randomSequence.add(AminoAcid.C); break;
					case  5: randomSequence.add(AminoAcid.E); break;
					case  6: randomSequence.add(AminoAcid.Q); break;
					case  7: randomSequence.add(AminoAcid.G); break;
					case  8: randomSequence.add(AminoAcid.H); break;
					case  9: randomSequence.add(AminoAcid.I); break;
					case 10: randomSequence.add(AminoAcid.L); break;
					case 11: randomSequence.add(AminoAcid.K); break;
					case 12: randomSequence.add(AminoAcid.M); break;
					case 13: randomSequence.add(AminoAcid.F); break;
					case 14: randomSequence.add(AminoAcid.O); break;
					case 15: randomSequence.add(AminoAcid.P); break;
					case 16: randomSequence.add(AminoAcid.S); break;
					case 17: randomSequence.add(AminoAcid.T); break;
					case 18: randomSequence.add(AminoAcid.U); break;
					case 19: randomSequence.add(AminoAcid.V); break;
					case 20: randomSequence.add(AminoAcid.W); break;
					case 21: randomSequence.add(AminoAcid.Y); break;
				}
			}
			else if (p < normAmbiguousFreq)
			{
				// Without gaps
				switch(randomizer.nextInt(3))
				{
					case 0: randomSequence.add(AminoAcid.B); break;
					case 1: randomSequence.add(AminoAcid.Z); break;
					case 2: randomSequence.add(AminoAcid.J); break;
				}
			}
			else if (p < normGapFreq)
			{
				randomSequence.add(AminoAcid.GAP);
			}
			else if (p < normStopFreq)
			{
				randomSequence.add(AminoAcid.$);
			}
		}
		return 
			new ProteinSequence("Random " + randomSequence.size(), PeptideSequenceDirectionality.N_C, randomSequence);
	}
}
