/*
 * @author		Alfonso Muñoz-Pomer Fuentes,
 * 				<a href="mailto:alfonso.munozpomer@biotechvana.com">
 * 				alfonso.munozpomer@biotechvana.com</a>,
 *				<a href="http://www.biotechvana.com">Biotechvana</a>
 *
 * @date		2010-11-01
 * 
 * @copyright	Copyright Biotech Vana, S.L. 2006-2011
 */

package com.biotechvana.javabiotoolkit;

import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;

/**
 * Enumeration of amino acid symbols for {@link ProteinSequence}s. With the exception of stop codons and the gap 
 * symbol, the <a href="http://www.iupac.org/objID/Article/pac5605x0595">IUPAC nomenclature</a> is followed for the 
 * remaining twenty-two proteinogenic amino acids and four ambiguous symbols.
 * 
 * @version	1.6, 2012-04-24
 * 
 * @author	<a href="mailto:alfonso.munozpomer@biotechvana.com">Alfonso Muñoz-Pomer Fuentes</a>,
 * 			<a href="http://www.biotechvana.com">Biotechvana</a>.
 * 
 * @see		ProteinSequence
 * 
 * <style type="text/css">
 * 		table.t0 {
 * 			border:0px solid black;
 * 			border-collapse: collapse;
 * 		}
 * 		table.t0 td {
 * 			text-align: center;
 * 			padding: 4px;
 * 		}
 * 		tr.d0 td {
 * 			background-color: #FFFFFF; color: black;
 * 		}
 * 		tr.d1 td {
 * 			background-color: #DDDDDD; color: black;
 * 		}
 * </style>
 */
public enum AminoAcid implements BioResidue, Serializable
{
	/**
	 * Alanine.
	 */
	A('A', "aA", "Ala", "Alanine"), 
	/**
	 * Arginine.
	 */
	R('R', "rR", "Arg", "Arginine"),
	/**
	 * Asparagine.
	 */
	N('N', "nN", "Asn", "Asparagine"),
	/**
	 * Aspartic acid.
	 */
	D('D', "dD", "Asp", "Aspartic acid"),
	/**
	 * Cysteine.
	 */
	C('C', "cC", "Cys", "Cysteine"),
	/**
	 * Glutamic acid.
	 */
	E('E', "eE", "Glu", "Glutamic acid"),
	/**
	 * Glutamine.
	 */
	Q('Q', "qQ", "Gln", "Glutamine"),
	/**
	 * Glycine.
	 */
	G('G', "gG", "Gly", "Glycine"),
	/**
	 * Histidine.
	 */
	H('H', "hH", "His", "Histidine"),
	/**
	 * Isoleucine.
	 */
	I('I', "iI", "Ile", "Isoleucine"),
	/**
	 * Leucine.
	 */
	L('L', "lL", "Leu", "Leucine"),
	/**
	 * Lysine.
	 */
	K('K', "kK", "Lys", "Lysine"),
	/**
	 * Methionine.
	 */
	M('M', "mM", "Met", "Methionine"),
	/**
	 * Phenylalanine.
	 */
	F('F', "fF", "Phe", "Phenylalanine"),
	/**
	 * Pyrrolysine.
	 */
	O('O', "oO", "Pyl", "Pyrrolysine"),
	/**
	 * Proline.
	 */
	P('P', "pP", "Pro", "Proline"),
	/**
	 * Serine
	 */
	S('S', "sS", "Ser", "Serine"),
	/**
	 * Threonine.
	 */
	T('T', "tT", "Thr", "Threonine"),
	/**
	 * Selenocysteine.
	 */
	U('U', "uU", "Sec", "Selenocysteine"),
	/**
	 * Valine.
	 */
	V('V', "vV", "Val", "Valine"),
	/**
	 * Tryptophan.
	 */
	W('W', "wW", "Trp", "Tryptophan"),
	/**
	 * Tyrosine.
	 */
	Y('Y', "yY", "Tyr", "Tyrosine"),
	/**
	 * Asparagine or aspartic acid.
	 */
	B('B', "bB", "Asx", "Asparagine or aspartic acid"),
	/**
	 * Glutamine or glutamic acid.
	 */
	Z('Z', "zZ", "Glx", "Glutamine or glutamic acid"),
	/**
	 * Leucine or isoleucine.
	 */
	J('J', "jJ", "Xle", "Leucine or isoleucine"),
	/**
	 * Unspecified or unknown amino acid.
	 */
	X('X', "xX", "Xaa", "Unspecified or unknown amino acid"),
	/**
	 * Special value for stop codons.
	 */
	$('*', "$\\*\\&\\#\\@", "Stp", "Stop"),
	/**
	 * Gap.
	 */
	GAP('-', "\\_\\-\\.", "Gap", "Gap"),
	;

	private static Map<Character, AminoAcid> charToAminoAcidMap;
	private static Map<String, AminoAcid> abbreviationToAminoAcidMap;
	private static Map<AminoAcid, AminoAcid[]> aminoAcidsToUnambiguousAminoAcidsMap;
	private static HashMap<AminoAcid, AminoAcid[]> aminoAcidsToAmbiguousAminoAcidsMap;
	
	private char aminoChar;
	private String regex;
	private String abbreviation;
	private String fullName;
	
	/*
	 * Private enum constructor. Each of the enum literals has a one-character name and a regex for text processing, 
	 * plus a three letter abbreviation and a full name.
	 */
	private AminoAcid(char aminoChar, String regex, String abbreviation, String fullName)
	{
		this.aminoChar = aminoChar;
		this.regex = regex;
		this.abbreviation = abbreviation;
		this.fullName = fullName;
	}

	/**
	 * Returns a <code>String</code> regex that will match any character iff this character represents an 
	 * <code>AminoAcid</code> according to the symbols above.
	 * <p>
	 * Note: the returned <code>String</code> does not include enclosing square brackets, so it cannot be used directly 
	 * as a character class.
	 * 
	 * @return	a regular expression that matches a character representing a IUPAC-encoded amino acid.  
	 * 
	 * @since	0.5
	 */
	public static String getRegexes()
	{
		StringBuilder allAminoAcidsRegex = new StringBuilder();
		for (AminoAcid a : values())
		{
			allAminoAcidsRegex.append(a.regex);
		}
		return allAminoAcidsRegex.toString();
	}

	/**
	 * Returns the <code>AminoAcid</code> instance represented by the specified <code>char</code>.
	 * 
	 * @param	c	one character which may represent an amino acid.
	 * 
	 * @return	the <code>AminoAcid</code> represented by <code>c</code> or	<code>null</code> if <code>c</code> does 
	 * 			not correspond to any amino acid.
	 * 
	 * @since	1.0rc2
	 */
	public static AminoAcid valueOf(char c)
	{
		// Build the map if it hasn't been built already (lazy method)
		if (charToAminoAcidMap == null)
		{
			charToAminoAcidMap = new HashMap<Character, AminoAcid>();
			for (AminoAcid a : AminoAcid.values())
			{
				for (int i = 0 ; i < a.regex.length() ; i++)
				{
					if (a.regex.charAt(i) != '\\')
					{
						charToAminoAcidMap.put(a.regex.charAt(i), a);
					}
				}
			}
		}
		return charToAminoAcidMap.get(c);
	}

	/**
	 * Returns the <code>AminoAcid</code> <code>enum</code> constant represented by a <code>String</code> abbreviation. 
	 * The abbreviation can be either a one-letter or a three-letter <code>String</code>. Case is ignored. See the 
	 * top of this document for the equivalences between text abbreviations and amino acids. 
	 * 
	 * @param	abbreviation	<code>String</code> to match against an amino acid one-letter symbol or three-letter 
	 * 							abbrevation.
	 * 
	 * @return	<code>AminoAcid</code> represented by <code>abbreviation</code> or <code>null</code> if the argument 
	 * 			does not conatin a valid abbreviation.
	 * 
	 * @since	1.0rc2
	 */
	public static AminoAcid valueOfAbbreviation(String abbreviation)
	{
		// Build the map if it hasn't been built already (lazy method)
		if (abbreviationToAminoAcidMap == null)
		{
			abbreviationToAminoAcidMap = new HashMap<String, AminoAcid>();
			for (AminoAcid a : AminoAcid.values())
			{
				abbreviationToAminoAcidMap.put(a.getAbbreviation(), a);
			}
			// A few extra abbreviations
			abbreviationToAminoAcidMap.put("Och", $);
			abbreviationToAminoAcidMap.put("Amb", $);
			abbreviationToAminoAcidMap.put("Umb", $);
			abbreviationToAminoAcidMap.put("---", GAP);
		}
		return abbreviationToAminoAcidMap.get(abbreviation.trim());
	}

	/**
	 * Returns this amino acid&rsquo;s three-letter abbreviation.
	 * 
	 * @return	this amino acid&rsquo;s three-letter abbreviation.
	 * 
	 * @since	0.1
	 */
	public String getAbbreviation()
	{
		return abbreviation;
	}

	/**
	 * Returns a descriptive <code>String</code> associated to the receiving <code>AminoAcid</code>.
	 * 
	 * @return	this amino acid&rsquo;s full name.
	 * 
	 * @since	0.1
	 */
	@Override
	public String getFullName()
	{
		return fullName;
	}

	/**
	 * Returns a lower case <code>char</code> value representing this amino acid. In the case of gaps and stop codons 
	 * there is no upper/lower case variant.
	 * 
	 * @return	a lower case <code>char</code> representation of this amino acid.
	 * 
	 * @since	1.2
	 */
	@Override
	public char getLowerCaseChar()
	{
		return Character.toLowerCase(aminoChar);
	}

	/**
	 * Returns an upper case <code>char</code> value representing this amino acid. In the case of gaps and stop codons 
	 * there is no upper/lower case variant.
	 * 
	 * @return	an upper case <code>char</code> representation of this amino acid.
	 * 
	 * @since	1.2
	 */
	@Override
	public char getUpperCaseChar()
	{
		return Character.toUpperCase(aminoChar);
	}

	/**
	 * Returns an array with the receiver&rsquo;s equivalent unambiguous amino acids. If this is any of the regular 
	 * amino acids, then a one-element list with this amino acid is returned.
	 * <p>
	 * Note: the equivalences are descibed in this same file, above.
	 * 
	 * @return	Array of equivalent <code>AminoAcid</code>s.
	 * 
	 * @since	1.0rc1
	 */
	@Override
	public AminoAcid[] expandToUnambiguous()
	{
		if (aminoAcidsToUnambiguousAminoAcidsMap == null)
		{
			aminoAcidsToUnambiguousAminoAcidsMap = new HashMap<AminoAcid, AminoAcid[]>();
			
			aminoAcidsToUnambiguousAminoAcidsMap.put(A, new AminoAcid[]{ A });
			aminoAcidsToUnambiguousAminoAcidsMap.put(R, new AminoAcid[]{ R });
			aminoAcidsToUnambiguousAminoAcidsMap.put(N, new AminoAcid[]{ N });
			aminoAcidsToUnambiguousAminoAcidsMap.put(D, new AminoAcid[]{ D });
			aminoAcidsToUnambiguousAminoAcidsMap.put(C, new AminoAcid[]{ C });
			aminoAcidsToUnambiguousAminoAcidsMap.put(E, new AminoAcid[]{ E });
			aminoAcidsToUnambiguousAminoAcidsMap.put(Q, new AminoAcid[]{ Q });
			aminoAcidsToUnambiguousAminoAcidsMap.put(G, new AminoAcid[]{ G });
			aminoAcidsToUnambiguousAminoAcidsMap.put(H, new AminoAcid[]{ H });
			aminoAcidsToUnambiguousAminoAcidsMap.put(I, new AminoAcid[]{ I });
			aminoAcidsToUnambiguousAminoAcidsMap.put(L, new AminoAcid[]{ L });
			aminoAcidsToUnambiguousAminoAcidsMap.put(K, new AminoAcid[]{ K });
			aminoAcidsToUnambiguousAminoAcidsMap.put(M, new AminoAcid[]{ M });
			aminoAcidsToUnambiguousAminoAcidsMap.put(F, new AminoAcid[]{ F });
			aminoAcidsToUnambiguousAminoAcidsMap.put(O, new AminoAcid[]{ O });
			aminoAcidsToUnambiguousAminoAcidsMap.put(P, new AminoAcid[]{ P });
			aminoAcidsToUnambiguousAminoAcidsMap.put(S, new AminoAcid[]{ S });
			aminoAcidsToUnambiguousAminoAcidsMap.put(T, new AminoAcid[]{ T });
			aminoAcidsToUnambiguousAminoAcidsMap.put(U, new AminoAcid[]{ U });
			aminoAcidsToUnambiguousAminoAcidsMap.put(V, new AminoAcid[]{ V });
			aminoAcidsToUnambiguousAminoAcidsMap.put(W, new AminoAcid[]{ W });
			aminoAcidsToUnambiguousAminoAcidsMap.put(Y, new AminoAcid[]{ Y });
			aminoAcidsToUnambiguousAminoAcidsMap.put(B, new AminoAcid[]{ N, D });
			aminoAcidsToUnambiguousAminoAcidsMap.put(Z, new AminoAcid[]{ Q, E });
			aminoAcidsToUnambiguousAminoAcidsMap.put(J, new AminoAcid[]{ L, I });
			aminoAcidsToUnambiguousAminoAcidsMap.put(
					X, new AminoAcid[]{ A, R, N, D, C, E, Q, G, H, I, L, K, M, F, O, P, S, T, U, V, W, Y });
			aminoAcidsToUnambiguousAminoAcidsMap.put($, new AminoAcid[]{ $ });
			aminoAcidsToUnambiguousAminoAcidsMap.put(GAP, new AminoAcid[]{ GAP });
		}
		return aminoAcidsToUnambiguousAminoAcidsMap.get(this);
	}

	/**
	 * Returns an array containing all the receiver's equivalent ambiguous and unambiguous amino acids. If an 
	 * unambiguous amino acid calls this method, the result is the same as calling {@link #expandToUnambiguous()}.
	 * <p>
	 * Note: An additional equivalence among unambiguous amino acids is the substitution of <code>X</code> with 
	 * <code>B</code>, <code>Z</code> and <code>J</code>. 
	 * 
	 * @return	Array of equivalent <code>AminoAcid</code>s, including the receiver.
	 * 
	 * @since	1.0rc1
	 */
	@Override
	public AminoAcid[] expandToAll()
	{
		if (aminoAcidsToAmbiguousAminoAcidsMap == null)
		{
			aminoAcidsToAmbiguousAminoAcidsMap = new HashMap<AminoAcid, AminoAcid[]>();
			
			aminoAcidsToAmbiguousAminoAcidsMap.put(A, new AminoAcid[]{ A });
			aminoAcidsToAmbiguousAminoAcidsMap.put(R, new AminoAcid[]{ R });
			aminoAcidsToAmbiguousAminoAcidsMap.put(N, new AminoAcid[]{ N });
			aminoAcidsToAmbiguousAminoAcidsMap.put(D, new AminoAcid[]{ D });
			aminoAcidsToAmbiguousAminoAcidsMap.put(C, new AminoAcid[]{ C });
			aminoAcidsToAmbiguousAminoAcidsMap.put(E, new AminoAcid[]{ E });
			aminoAcidsToAmbiguousAminoAcidsMap.put(Q, new AminoAcid[]{ Q });
			aminoAcidsToAmbiguousAminoAcidsMap.put(G, new AminoAcid[]{ G });
			aminoAcidsToAmbiguousAminoAcidsMap.put(H, new AminoAcid[]{ H });
			aminoAcidsToAmbiguousAminoAcidsMap.put(I, new AminoAcid[]{ I });
			aminoAcidsToAmbiguousAminoAcidsMap.put(L, new AminoAcid[]{ L });
			aminoAcidsToAmbiguousAminoAcidsMap.put(K, new AminoAcid[]{ K });
			aminoAcidsToAmbiguousAminoAcidsMap.put(M, new AminoAcid[]{ M });
			aminoAcidsToAmbiguousAminoAcidsMap.put(F, new AminoAcid[]{ F });
			aminoAcidsToAmbiguousAminoAcidsMap.put(O, new AminoAcid[]{ O });
			aminoAcidsToAmbiguousAminoAcidsMap.put(P, new AminoAcid[]{ P });
			aminoAcidsToAmbiguousAminoAcidsMap.put(S, new AminoAcid[]{ S });
			aminoAcidsToAmbiguousAminoAcidsMap.put(T, new AminoAcid[]{ T });
			aminoAcidsToAmbiguousAminoAcidsMap.put(U, new AminoAcid[]{ U });
			aminoAcidsToAmbiguousAminoAcidsMap.put(V, new AminoAcid[]{ V });
			aminoAcidsToAmbiguousAminoAcidsMap.put(W, new AminoAcid[]{ W });
			aminoAcidsToAmbiguousAminoAcidsMap.put(Y, new AminoAcid[]{ Y });
			aminoAcidsToAmbiguousAminoAcidsMap.put(B, new AminoAcid[]{ B, N, D });
			aminoAcidsToAmbiguousAminoAcidsMap.put(Z, new AminoAcid[]{ Z, Q, E });
			aminoAcidsToAmbiguousAminoAcidsMap.put(J, new AminoAcid[]{ J, L, I });
			aminoAcidsToAmbiguousAminoAcidsMap.put(
					X, new AminoAcid[]{ X, B, Z, J, A, R, N, D, C, E, Q, G, H, I, L, K, M, F, O, P, S, T, U, V, W, Y });
			aminoAcidsToAmbiguousAminoAcidsMap.put($, new AminoAcid[]{ $ });
			aminoAcidsToAmbiguousAminoAcidsMap.put(GAP, new AminoAcid[]{ GAP });
		}
		return aminoAcidsToAmbiguousAminoAcidsMap.get(this);
	}

	/**
	 * Returns a regular expression which matches a one-letter abbreviation of this <code>AminoAcid</code>. The regex 
	 * is case-insensitive.
	 * <p>
	 * Note: the returned <code>String</code> does not include enclosing square brackets, so it cannot be used directly 
	 * as a character class.
	 * 
	 * @return	one-symbol regular expression.
	 * 
	 * @since	1.0
	 */
	@Override
	public String getRegex()
	{
		return regex;
	}

	/*
	 * (non-Javadoc) @see java.lang.Enum#toString()
	 */
	public String toString()
	{
		return new String("" + aminoChar);
	}
}
