/*
 * @author		Alfonso Muñoz-Pomer Fuentes,
 * 				<a href="mailto:alfonso.munozpomer@biotechvana.com">
 * 				alfonso.munozpomer@biotechvana.com</a>,  
 * 				<a href="http://www.biotechvana.com">Biotechvana</a>
 *
 * @date		2010-11-01
 * 
 * @copyright	Copyright Biotech Vana, S.L. 2006-2011
 */

package com.biotechvana.javabiotoolkit;

import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;

/**
 * Enumeration of all possible symbols (nucleotide bases) which can appear in a <code>{@link DnaSequence}</code>. These
 * are 4 nucleotide bases, 11 ambiguous symbols (including N) and a gap symbol. The naming conventionas this class 
 * follows are the ones established in <em>IUPAC-IUB symbols for nucleotide nomenclature</em>, Cornish-Bowden (1985) 
 * Nucl. Acids Res. 13: 3021-3030.
 * <p>
 * Note: RNA bases are provided in a different <code>enum</code> for convenience, despite differing in only one base.
 * 
 * @version	1.6, 2012-04-24
 * 
 * @author	<a href="mailto:alfonso.munozpomer@biotechvana.com">Alfonso Muñoz-Pomer Fuentes</a>,
 * 			<a href="http://www.biotechvana.com">Biotechvana</a>.
 * 
 * @see		DnaSequence
 * @see		RNABase
 * 
 * <style type="text/css">
 * 		table.t0 {
 * 			border:0px solid black;
 * 			border-collapse: collapse;
 * 		}
 * 		table.t0 td {
 * 			text-align: center;
 * 			padding: 4px;
 * 		}
 * 		tr.d0 td {
 * 			background-color: #FFFFFF; color: black;
 * 		}
 * 		tr.d1 td {
 * 			background-color: #DDDDDD; color: black;
 * 		}
 * </style>
 */
public enum DNABase implements BioResidue, Serializable
{
	/**
	 * Adenine.
	 */
	A('A', "aA", "Adenine"),
	/**
	 * Cytosine.
	 */
	C('C', "cC", "Cytosine"),
	/**
	 * Guanine.
	 */
	G('G', "gG", "Guanine"),
	/**
	 * Thymine.
	 */
	T('T', "tT", "Thymine"),
	/**
	 * Guanine or adenine (purine).
	 */
	R('R', "rR", "Guanine or adenine"),
	/**
	 * Thymine or cytosine (pyrimidine).
	 */
	Y('Y', "yY", "Thymine or cytosine"),
	/**
	 * Guanine or thymine (keto).
	 */
	K('K', "kK", "Guanine or thymine"),
	/**
	 * Adenine or cytosine (amino).
	 */
	M('M', "mM", "Adenine or cytosine"),
	/**
	 * Guanine or cytosine (strong bonds).
	 */
	S('S', "sS", "Guanine or cytosine"),
	/**
	 * Adenine or thymine (weak bonds).
	 */
	W('W', "wW", "Adenine or thymine"),
	/**
	 * Guanine, thymine or cytosine (all but adenine).
	 */
	B('B', "bB", "Guanine, thymine or cytosine"),
	/**
	 * Guanine, cytosine or adenine (all but thymine).
	 */
	V('V', "vV", "Guanine, cytosine or adenine"),
	/**
	 * Guanine, adenine or thymine (all but cytosine).
	 */
	D('D', "dD", "Guanine, adenine or thymine"),
	/**
	 * Adenine, cytosine or thymine (all but guanine).
	 */
	H('H', "hH", "Adenine, cytosine or thymine"),
	/**
	 * Unspecified or unknown.
	 */
	N('N', "nN", "Any"),
	/**
	 * Gap.
	 */
	GAP('-', "\\_\\-\\.\\*", "Gap"),
	;
	
	private static Map<Character, DNABase> charToBaseMap;
	private static Map<DNABase, DNABase> baseTocomplementaryBaseMap;
	private static Map<DNABase, DNABase[]> baseToUnambiguousBasesMap;
	private static Map<DNABase, DNABase[]> baseToAmbiguousBasesMap;
	private static Map<DNABase, RNABase> baseToRNABaseMap;
	
	private char baseChar;
	private String regex;
	private String fullName;

	/*
	 * Private enum constructor. Each of the enum literals has a one-character name and a regex for text processing, 
	 * plus a full name.
	 */
	private DNABase (char name, String regex, String fullName)
	{
		this.baseChar = name;
		this.regex = regex;
		this.fullName = fullName;
	}

	/**
	 * Returns a <code>String</code> regex of all <code>DnaBase</code> regular expressions.
	 * <p>
	 * Note: the returned <code>String</code> does not include enclosing square brackets, so it cannot be used directly 
	 * as a character class.
	 * 
	 * @return	a regular expression that matches a character representing a IUPAC-encoded DNA nucleotide.
	 * 
	 * @since	0.8
	 */
	public static String getRegexes()
	{
		StringBuilder allowed = new StringBuilder();
		for (DNABase b : values())
		{
			allowed.append(b.regex);
		}
		return allowed.toString();
	}

	/**
	 * Returns a DNA base represented by a <code>char</code> argument.
	 * 
	 * @param	c	one character which may represent a DNA base.
	 * 
	 * @return	the <code>DnaBase</code> represented by <code>c</code> or <code>null</code> if it does not correspond 
	 * 			to any DNA base.
	 * 
	 * @since	1.0rc2
	 */
	public static DNABase valueOf(char c)
	{
		// Build the map if it hasn't been built already (lazy lazy)
		if (charToBaseMap == null)
		{
			charToBaseMap = new HashMap<Character, DNABase>(33);
			for (DNABase b : DNABase.values())
			{
				// For letters
				if (Character.isUpperCase(b.baseChar))
				{
					charToBaseMap.put(Character.toUpperCase(b.baseChar), b);
					charToBaseMap.put(Character.toLowerCase(b.baseChar), b);
				}
			}
			// For non-letters
			charToBaseMap.put('_', GAP);
			charToBaseMap.put('-', GAP);
			charToBaseMap.put('*', GAP);
			charToBaseMap.put('.', GAP);
		}
		return charToBaseMap.get(c);
	}

	/**
	 * Returns a descriptive <code>String</code> associated to this <code>DnaBase</code>.
	 * 
	 * @return	DNA base full name.
	 * 
	 * @since	0.2
	 */
	public String getFullName()
	{
		return fullName;
	}

	/**
	 * Returns a lower case <code>char</code> value representing this base. In the case of gaps it returns 
	 * <code>'-'</code>.
	 * 
	 * @return	a lower case <code>char</code> representation of this DNA nucleotide.
	 * 
	 * @since	1.2
	 */
	public char getLowerCaseChar()
	{
		return Character.toLowerCase(baseChar);
	}

	/**
	 * Returns an upper case <code>char</code> value representing this base. In the case of gaps it returns 
	 * <code>'-'</code>.
	 * 
	 * @return	an upper case <code>char</code> representation of this DNA nucleotide.
	 * 
	 * @since	1.2
	 */
	public char getUpperCaseChar()
	{
		return Character.toUpperCase(baseChar);
	}

	/**
	 * Returns a regular expression which matches a one-letter abbrevation of the receiving <code>DnaBase</code>. The 
	 * regex is case insensitive.
	 * <p>
	 * Note: the returned <code>String</code> does not include enclosing square brackets, so it cannot be used directly 
	 * as a character class.
	 * 
	 * @return	one-letter regular expression.
	 * 
	 * @since	1.0rc2
	 */
	public String getRegex()
	{
		return regex;
	}

	/**
	 * Returns a list containing all the receiver&rsquo;s equivalent ambiguous and unambiguous bases. If an unambiguous 
	 * base calls this method, the result is the same as {@link #expandToUnambiguous()}. 
	 * <p>
	 * The equivalences among unambiguous bases are included in the table below.
	 * <table class = "t0">
	 * <tr class = "d0">
	 * <td>B</td><td>S, K, Y</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>V</td><td>R, S, M</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>D</td><td>K, R, W</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>H</td><td>W, M, Y</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>N</td><td>S, K, Y, R, M, W, <br />B, V, D, H</td>
	 * </tr>
	 * </table>
	 * 
	 * @return	array of equivalent bases, including this base.
	 * 
	 * @since	1.2
	 */
	public DNABase[] expandToAll()
	{
		// Build the map if it hasn't been built already (lazy lazy)
		if (baseToAmbiguousBasesMap == null)
		{
			baseToAmbiguousBasesMap = new HashMap<DNABase, DNABase[]>(DNABase.values().length);
			
			baseToAmbiguousBasesMap.put(A, new DNABase[]{ A });
			baseToAmbiguousBasesMap.put(C, new DNABase[]{ C });
			baseToAmbiguousBasesMap.put(G, new DNABase[]{ G });
			baseToAmbiguousBasesMap.put(T, new DNABase[]{ T });
			baseToAmbiguousBasesMap.put(R, new DNABase[]{ R, G, A });
			baseToAmbiguousBasesMap.put(Y, new DNABase[]{ Y, T, C });
			baseToAmbiguousBasesMap.put(K, new DNABase[]{ K, G, T });
			baseToAmbiguousBasesMap.put(M, new DNABase[]{ M, A, C });
			baseToAmbiguousBasesMap.put(S, new DNABase[]{ S, G, C });
			baseToAmbiguousBasesMap.put(W, new DNABase[]{ W, A, T });
			baseToAmbiguousBasesMap.put(B, new DNABase[]{ B, G, T, C, S, K, Y });
			baseToAmbiguousBasesMap.put(V, new DNABase[]{ V, G, C, A, R, S, M });
			baseToAmbiguousBasesMap.put(D, new DNABase[]{ D, G, A, T, K, R, W });
			baseToAmbiguousBasesMap.put(H, new DNABase[]{ H, A, C, T, W, M, Y });
			baseToAmbiguousBasesMap.put(N, new DNABase[]{ N, G, T, C, A, R, Y, K, M, S, W, B, V, D, H });
			baseToAmbiguousBasesMap.put(GAP, new DNABase[]{ GAP });
		}
		
		return baseToAmbiguousBasesMap.get(this);
	}

	/**
	 * Returns an array with this <code>DnaBase</code>&rsquo;s equivalent unambiguous bases. If the receiver is either 
	 * A, C, G or T, then a one-element list with this base is returned.
	 * <p>
	 * Note: the equivalences are described above in this document.
	 * 
	 * @return	array of equivalent bases.
	 * 
	 * @since	1.2
	 */
	public DNABase[] expandToUnambiguous()
	{
		// Build the map if it hasn't been built already (lazy lazy)
		if (baseToUnambiguousBasesMap == null)
		{
			baseToUnambiguousBasesMap = new HashMap<DNABase, DNABase[]>(DNABase.values().length);
			
			baseToUnambiguousBasesMap.put(A, new DNABase[]{ A });
			baseToUnambiguousBasesMap.put(C, new DNABase[]{ C });
			baseToUnambiguousBasesMap.put(G, new DNABase[]{ G });
			baseToUnambiguousBasesMap.put(T, new DNABase[]{ T });
			baseToUnambiguousBasesMap.put(R, new DNABase[]{ G, A });
			baseToUnambiguousBasesMap.put(Y, new DNABase[]{ T, C });
			baseToUnambiguousBasesMap.put(K, new DNABase[]{ G, T });
			baseToUnambiguousBasesMap.put(M, new DNABase[]{ A, C });
			baseToUnambiguousBasesMap.put(S, new DNABase[]{ G, C });
			baseToUnambiguousBasesMap.put(W, new DNABase[]{ A, T });
			baseToUnambiguousBasesMap.put(B, new DNABase[]{ G, T, C });
			baseToUnambiguousBasesMap.put(V, new DNABase[]{ G, C, A });
			baseToUnambiguousBasesMap.put(D, new DNABase[]{ G, A, T });
			baseToUnambiguousBasesMap.put(H, new DNABase[]{ A, C, T });
			baseToUnambiguousBasesMap.put(N, new DNABase[]{ A, C, G, T });
			baseToUnambiguousBasesMap.put(GAP, new DNABase[]{ GAP });
		}
		return baseToUnambiguousBasesMap.get(this);
	}

	/**
	 * Returns this base&rsquo;s complementary base.
	 * <p>
	 * The complementarity rules are shown in the table below.
	 * 
	 * <table class = "t0">
	 * <tr class = "d0">
	 * <td>A</td><td>T</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>C</td><td>G</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>R</td><td>Y</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>K</td><td>M</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>S</td><td>S</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>W</td><td>W</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>B</td><td>V</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>D</td><td>H</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>N</td><td>N</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>Gap (-)</td><td>Gap (-)</td>
	 * </tr>
	 * </table>
	 * 
	 * @return	this base&rsquo;s complementary base.
	 * 
	 * @since	1.1
	 */
	public DNABase getComplementary()
	{
		// Build the map if it hasn't been built already (lazy lazy)
		if (baseTocomplementaryBaseMap == null)
		{
			baseTocomplementaryBaseMap = new HashMap<DNABase, DNABase>(DNABase.values().length);
			baseTocomplementaryBaseMap.put(A, T);
			baseTocomplementaryBaseMap.put(C, G);
			baseTocomplementaryBaseMap.put(G, C);
			baseTocomplementaryBaseMap.put(T, A);
			baseTocomplementaryBaseMap.put(R, Y);
			baseTocomplementaryBaseMap.put(Y, R);
			baseTocomplementaryBaseMap.put(K, M);
			baseTocomplementaryBaseMap.put(M, K);
			baseTocomplementaryBaseMap.put(S, S);
			baseTocomplementaryBaseMap.put(W, W);
			baseTocomplementaryBaseMap.put(B, V);
			baseTocomplementaryBaseMap.put(V, B);
			baseTocomplementaryBaseMap.put(D, H);
			baseTocomplementaryBaseMap.put(H, D);
			baseTocomplementaryBaseMap.put(N, N);
			baseTocomplementaryBaseMap.put(GAP,GAP);
		}
		return baseTocomplementaryBaseMap.get(this);
	}

	/**
	 * Returns this base&rsquo;s transcribed RNA base.
	 * <p>
	 * 
	 * @return	this base&rsquo;s transcript.
	 * 
	 * @since	1.5
	 */
	public RNABase getTranscript()
	{
		// Build the map if it hasn't been built already (lazy lazy)
		if (baseToRNABaseMap == null)
		{
			baseToRNABaseMap = new HashMap<DNABase, RNABase>(DNABase.values().length);
			baseToRNABaseMap.put(A, RNABase.A);
			baseToRNABaseMap.put(C, RNABase.C);
			baseToRNABaseMap.put(G, RNABase.G);
			baseToRNABaseMap.put(T, RNABase.U);
			baseToRNABaseMap.put(R, RNABase.R);
			baseToRNABaseMap.put(Y, RNABase.Y);
			baseToRNABaseMap.put(K, RNABase.K);
			baseToRNABaseMap.put(M, RNABase.M);
			baseToRNABaseMap.put(S, RNABase.S);
			baseToRNABaseMap.put(W, RNABase.W);
			baseToRNABaseMap.put(B, RNABase.B);
			baseToRNABaseMap.put(V, RNABase.V);
			baseToRNABaseMap.put(D, RNABase.D);
			baseToRNABaseMap.put(H, RNABase.H);
			baseToRNABaseMap.put(N, RNABase.N);
			baseToRNABaseMap.put(GAP, RNABase.GAP);
		}
		return baseToRNABaseMap.get(this);
	}
	
	/*
	 * (non-Javadoc) @see java.lang.Enum#toString()
	 */
	public String toString()
	{
		return new String("" + baseChar);
	}
}