/*
 * @author		Alfonso Muñoz-Pomer Fuentes, 
 * 				<a href="mailto:alfonso.munozpomer@biotechvana.com">
 * 				alfonso.munozpomer@biotechvana.com</a>,  
 * 				<a href="http://www.biotechvana.com">Biotechvana</a>
 *
 * @date		2010-11-01
 * 
 * @copyright	Copyright Biotech Vana, S.L. 2006-2010
 */

package com.biotechvana.javabiotoolkit;

import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;

/**
 * Enumeration of all possible symbols (nucleotide bases) which can appear in a <code>{@link RnaSequence}</code>.These
 * are 4 nucleotide bases, 11 ambiguous symbols (including N) and a gap symbol. The naming conventionas this class 
 * follows are the ones established in <em>IUPAC-IUB symbols for nucleotide nomenclature</em>, Cornish-Bowden (1985) 
 * Nucl. Acids Res. 13: 3021-3030.
 * <p>
 * Note: DNA bases are provided in a different <code>enum</code> for convenience, despite differing in only one base.
 * 
 * @version	1.6, 2012-04-24
 * 
 * @author	<a href="mailto:alfonso.munozpomer@biotechvana.com">Alfonso Muñoz-Pomer Fuentes</a>,
 * 			<a href="http://www.biotechvana.com">Biotechvana</a>.
 * 
 * @see		RnaSequence
 * @see		DNABase
 * 
 * <style type="text/css">
 * 		table.t0 {
 * 			border:0px solid black;
 * 			border-collapse: collapse;
 * 		}
 * 		table.t0 td {
 * 			text-align: center;
 * 			padding: 4px;
 * 		}
 * 		tr.d0 td {
 * 			background-color: #FFFFFF; color: black;
 * 		}
 * 		tr.d1 td {
 * 			background-color: #DDDDDD; color: black;
 * 		}
 * </style>
 */
public enum RNABase implements BioResidue, Serializable
{
	/**
	 * Adenine.
	 */
	A('A', "aA", "Adenine"),
	/**
	 * Cytosine.
	 */
	C('C', "cC", "Cytosine"),
	/**
	 * Guanine
	 */
	G('G', "gG", "Guanine"),
	/**
	 * Uracil.
	 */
	U('U', "uU", "Uracil"),
	/**
	 * Guanine or adenine (purine).
	 */
	R('R', "rR", "Guanine or adenine"),
	/**
	 * Thymine or cytosine (pyrimidine).
	 */
	Y('Y', "yY", "Uracil or cytosine"),
	/**
	 * Guanine or thymine (keto).
	 */
	K('K', "kK", "Guanine or uracil"),
	/**
	 * Adenine or cytosine (amino).
	 */
	M('M', "mM", "Adenine or cytosine"),
	/**
	 * Guanine or cytosine (strong bonds).
	 */
	S('S', "sS", "Guanine or cytosine"),
	/**
	 * Adenine or thymine (weak bonds).
	 */
	W('W', "wW", "Adenine or uracil"),
	/**
	 * Guanine, thymine or cytosine (all but adenine).
	 */
	B('B', "bB", "Guanine, uracil or cytosine"),
	/**
	 * Guanine, cytosine or adenine (all but uracil).
	 */
	V('V', "vV", "Guanine, cytosine or adenine"),
	/**
	 * Guanine, adenine or thymine (all but cytosine).
	 */
	D('D', "dD", "Guanine, adenine or uracil"),
	/**
	 * Adenine, cytosine or thymine (all but guanine).
	 */
	H('H', "hH", "Adenine, cytosine or uracil"),
	/**
	 * Unspecified or unknown.
	 */
	N('N', "nN", "Any"),
	/**
	 * Gap.
	 */
	GAP('-', "\\_\\-\\.\\*", "Gap"),
	;
	
	private static Map<Character, RNABase> charToBaseMap;
	private static Map<RNABase, RNABase> baseToComplementaryBaseMap;
	private static Map<RNABase, RNABase[]> baseToUnambiguousBasesMap;
	private static Map<RNABase, RNABase[]> baseToAmbiguousBasesMap;
	private static Map<RNABase, DNABase> baseToDNABaseMap;

	private char baseChar;
	private String regex;
	private String fullName;
	
	/*
	 * Private enum constructor. Each of the enum literals has a one-character name and a regex for text processing, 
	 * plus a full name.
	 */
	private RNABase (char name, String regex, String fullName)
	{
		this.baseChar = name;
		this.regex = regex;
		this.fullName = fullName;
	}

	/**
	 * Returns a <code>String</code> regex of all <code>RnaBase</code> regular expressions.
	 * <p>
	 * Note: the returned <code>String</code> does not include enclosing square brackets, so it cannot be used directly 
	 * as a character class.
	 * 
	 * @return	a regular expression that matches a character representing a IUPAC-encoded DNA nucleotide.
	 * 
	 * @since	0.8
	 */
	public static String getRegexes()
	{
		StringBuilder allowed = new StringBuilder();
		for (RNABase r : values())
		{
			allowed.append(r.regex);
		}
		return allowed.toString();
	}

	/**
	 * Returns a RNA base represented by a <code>char</code> argument.
	 * 
	 * @param	c	one character which may represent a DNA base.
	 * 
	 * @return	the <code>RnaBase</code> represented by <code>c</code> or <code>null</code> if it does not correspond 
	 * 			to any RNA base.
	 * 
	 * @since	1.0rc2
	 */
	public static RNABase valueOf(char c)
	{
		// Build the map if it hasn't been built already
		if (charToBaseMap == null)
		{
			charToBaseMap = new HashMap<Character, RNABase>(33);
			for (RNABase b : RNABase.values())
			{
				// For letters
				if (Character.isUpperCase(b.baseChar))
				{
					charToBaseMap.put(Character.toUpperCase(b.baseChar), b);
					charToBaseMap.put(Character.toLowerCase(b.baseChar), b);
				}
			}
			// For non-letters
			charToBaseMap.put('_', GAP);
			charToBaseMap.put('-', GAP);
			charToBaseMap.put('*', GAP);
			charToBaseMap.put('.', GAP);
		}
		return charToBaseMap.get(c);
	}

	/**
	 * Returns a descriptive <code>String</code> associated to this <code>DnaBase</code>.
	 * 
	 * @return	RNA base full name.
	 * 
	 * @since	0.2
	 */
	public String getFullName()
	{
		return fullName;
	}

	/**
	 * Returns a lower case <code>char</code> value representing this base. In the case of gaps it returns 
	 * <code>'-'</code>.
	 * 
	 * @return	a lower case <code>char</code> representation of this RNA nucleotide.
	 * 
	 * @since	1.2
	 */
	public char getLowerCaseChar()
	{
		return Character.toLowerCase(baseChar);
	}

	/**
	 * Returns an upper case <code>char</code> value representing this base. In the case of gaps it returns 
	 * <code>'-'</code>.
	 * 
	 * @return	an upper case <code>char</code> representation of this RNA nucleotide.
	 * 
	 * @since	1.2
	 */
	public char getUpperCaseChar()
	{
		return Character.toUpperCase(baseChar);
	}

	/**
	 * Returns a regular expression which matches a one-letter abbrevation of the receiving <code>RnaBase</code>. The 
	 * regex is case insensitive.
	 * <p>
	 * Note: the returned <code>String</code> does not include enclosing square brackets, so it cannot be used directly 
	 * as a character class.
	 * 
	 * @return	one-letter regular expression.
	 * 
	 * @since	1.0rc2
	 */
	public String getRegex()
	{
		return regex;
	}

	/**
	 * Returns an array containing all the receiver&rsquo;s equivalent ambiguous and unambiguous bases. If an 
	 * unambiguous base calls this method, the result is the same as {@link #expandToUnambiguous()}. 
	 * <p>
	 * The equivalences among unambiguous bases are included in the table below.
	 * <table class = "t0">
	 * <tr class = "d0">
	 * <td>B</td><td>S, K, Y</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>V</td><td>R, S, M</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>D</td><td>K, R, W</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>H</td><td>W, M, Y</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>N</td><td>S, K, Y, R, M, W, <br />B, V, D, H</td>
	 * </tr>
	 * </table>
	 * 
	 * @return	<code>List</code> of equivalent bases, including this base.
	 * 
	 * @since	1.2
	 */
	public RNABase[] expandToAll()
	{
		// Build the map if it hasn't been built already (lazy lazy)
		if (baseToAmbiguousBasesMap == null)
		{
			baseToAmbiguousBasesMap = new HashMap<RNABase, RNABase[]>(RNABase.values().length);
			
			baseToAmbiguousBasesMap.put(A, new RNABase[]{A});
			baseToAmbiguousBasesMap.put(C, new RNABase[]{C});
			baseToAmbiguousBasesMap.put(G, new RNABase[]{G});
			baseToAmbiguousBasesMap.put(U, new RNABase[]{U});
			baseToAmbiguousBasesMap.put(R, new RNABase[]{R, G, A});
			baseToAmbiguousBasesMap.put(Y, new RNABase[]{Y, U, C});
			baseToAmbiguousBasesMap.put(K, new RNABase[]{K, G, U});
			baseToAmbiguousBasesMap.put(M, new RNABase[]{M, A, C});
			baseToAmbiguousBasesMap.put(S, new RNABase[]{S, G, C});
			baseToAmbiguousBasesMap.put(W, new RNABase[]{W, A, U});
			baseToAmbiguousBasesMap.put(B, new RNABase[]{B, G, U, C, S, K, Y});
			baseToAmbiguousBasesMap.put(V, new RNABase[]{V, G, C, A, R, S, M});
			baseToAmbiguousBasesMap.put(D, new RNABase[]{D, G, A, U, K, R, W});
			baseToAmbiguousBasesMap.put(H, new RNABase[]{H, A, C, U, W, M, Y});
			baseToAmbiguousBasesMap.put(N, new RNABase[]{N, G, U, C, A, R, Y, K, M, S, W, B, V, D, H});
			baseToAmbiguousBasesMap.put(GAP, new RNABase[]{GAP});
		}
		
		return baseToAmbiguousBasesMap.get(this);
	}

	/**
	 * Returns a <code>List</code> with this <code>RnaBase</code>&rsquo;s equivalent unambiguous bases. If the receiver 
	 * is either A, C, G or U, then a one-element list with this base is returned.
	 * <p>
	 * Note: the equivalences are described above in this document.
	 * 
	 * @return	<code>List</code> of equivalent bases.
	 * 
	 * @since	1.2
	 */
	public RNABase[] expandToUnambiguous()
	{
		// Build the map if it hasn't been built already (lazy lazy)
		if (baseToUnambiguousBasesMap == null)
		{
			baseToUnambiguousBasesMap = new HashMap<RNABase, RNABase[]>(RNABase.values().length);
			
			baseToUnambiguousBasesMap.put(A, new RNABase[]{A});
			baseToUnambiguousBasesMap.put(C, new RNABase[]{C});
			baseToUnambiguousBasesMap.put(G, new RNABase[]{G});
			baseToUnambiguousBasesMap.put(U, new RNABase[]{U});
			baseToUnambiguousBasesMap.put(R, new RNABase[]{A, G});
			baseToUnambiguousBasesMap.put(Y, new RNABase[]{C, U});
			baseToUnambiguousBasesMap.put(K, new RNABase[]{G, U});
			baseToUnambiguousBasesMap.put(M, new RNABase[]{A, C});
			baseToUnambiguousBasesMap.put(S, new RNABase[]{G, C});
			baseToUnambiguousBasesMap.put(W, new RNABase[]{A, U});
			baseToUnambiguousBasesMap.put(B, new RNABase[]{C, G, U});
			baseToUnambiguousBasesMap.put(V, new RNABase[]{A, C, G});
			baseToUnambiguousBasesMap.put(D, new RNABase[]{A, G, U});
			baseToUnambiguousBasesMap.put(H, new RNABase[]{A, C, U});
			baseToUnambiguousBasesMap.put(N, new RNABase[]{A, C, G, U});
			baseToUnambiguousBasesMap.put(GAP, new RNABase[]{GAP});
		}
		return baseToUnambiguousBasesMap.get(this);
	}

	/**
	 * Returns this base&rsquo;s complementary base.
	 * <p>
	 * The complementarity rules are shown in the table below.
	 * <table class = "t0">
	 * <tr class = "d0">
	 * <td>A</td><td>U</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>C</td><td>G</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>R</td><td>Y</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>K</td><td>M</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>S</td><td>S</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>W</td><td>W</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>B</td><td>V</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>D</td><td>H</td>
	 * </tr>
	 * <tr class = "d0">
	 * <td>N</td><td>N</td>
	 * </tr>
	 * <tr class = "d1">
	 * <td>Gap (-)</td><td>Gap (-)</td>
	 * </tr>
	 * </table>
	 * 
	 * @return	this base&rsquo;s complementary base.
	 * 
	 * @since	1.1
	 */
	public RNABase getComplementary()
	{
		if (baseToComplementaryBaseMap == null)
		{
			baseToComplementaryBaseMap = new HashMap<RNABase, RNABase>(RNABase.values().length);
			baseToComplementaryBaseMap.put(A, U);
			baseToComplementaryBaseMap.put(C, G);
			baseToComplementaryBaseMap.put(G, C);
			baseToComplementaryBaseMap.put(U, A);
			baseToComplementaryBaseMap.put(R, Y);
			baseToComplementaryBaseMap.put(Y, R);
			baseToComplementaryBaseMap.put(K, M);
			baseToComplementaryBaseMap.put(M, K);
			baseToComplementaryBaseMap.put(S, S);
			baseToComplementaryBaseMap.put(W, W);
			baseToComplementaryBaseMap.put(B, V);
			baseToComplementaryBaseMap.put(V, B);
			baseToComplementaryBaseMap.put(D, H);
			baseToComplementaryBaseMap.put(H, D);
			baseToComplementaryBaseMap.put(N, N);
			baseToComplementaryBaseMap.put(GAP, GAP);
		}
		return baseToComplementaryBaseMap.get(this);
	}

	/**
	 * Returns this base&rsquo;s retrotranscribed DNA base.
	 * <p>
	 * 
	 * @return	this base&rsquo;s transcript.
	 * 
	 * @since	1.1
	 */
	public DNABase getReverseTranscript()
	{
		// Build the map if it hasn't been built already (lazy lazy)
		if (baseToDNABaseMap == null)
		{
			baseToDNABaseMap = new HashMap<RNABase, DNABase>(RNABase.values().length);
			baseToDNABaseMap.put(A, DNABase.A);
			baseToDNABaseMap.put(C, DNABase.C);
			baseToDNABaseMap.put(G, DNABase.G);
			baseToDNABaseMap.put(U, DNABase.T);
			baseToDNABaseMap.put(R, DNABase.R);
			baseToDNABaseMap.put(Y, DNABase.Y);
			baseToDNABaseMap.put(K, DNABase.K);
			baseToDNABaseMap.put(M, DNABase.M);
			baseToDNABaseMap.put(S, DNABase.S);
			baseToDNABaseMap.put(W, DNABase.W);
			baseToDNABaseMap.put(B, DNABase.B);
			baseToDNABaseMap.put(V, DNABase.V);
			baseToDNABaseMap.put(D, DNABase.D);
			baseToDNABaseMap.put(H, DNABase.H);
			baseToDNABaseMap.put(N, DNABase.N);
			baseToDNABaseMap.put(GAP, DNABase.GAP);
		}
		return baseToDNABaseMap.get(this);
	}
	
	/*
	 * (non-Javadoc) @see java.lang.Enum#toString()
	 */
	public String toString()
	{
		return new String("" + baseChar);
	}
}
