/*
 * @author		Alfonso Muñoz-Pomer Fuentes, 
 * 				<a href="mailto:alfonso.munozpomer@biotechvana.com">
 * 				alfonso.munozpomer@biotechvana.com</a>,  
 * 				<a href="http://www.biotechvana.com">Biotechvana</a>
 *
 * @date		2010-09-01
 * 
 * @license		See <a href="http://www.biotechvana.com></a>
 *
 * @copyright	Copyright Biotech Vana, S.L. 2006-2010
 */

package com.biotechvana.javabiotoolkit.io;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.SortedMap;
import java.util.TreeMap;

import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.NullProgressMonitor;
import org.eclipse.core.runtime.OperationCanceledException;

import com.biotechvana.javabiotoolkit.AminoAcid;
import com.biotechvana.javabiotoolkit.BioSequence;
import com.biotechvana.javabiotoolkit.DNABase;
import com.biotechvana.javabiotoolkit.DNASequence;
import com.biotechvana.javabiotoolkit.NucleotideSequenceDirectionality;
import com.biotechvana.javabiotoolkit.PeptideSequenceDirectionality;
import com.biotechvana.javabiotoolkit.ProteinSequence;
import com.biotechvana.javabiotoolkit.RNABase;
import com.biotechvana.javabiotoolkit.RNASequence;
import com.biotechvana.javabiotoolkit.exceptions.FastaReaderNotParsedException;
import com.biotechvana.javabiotoolkit.exceptions.InvalidSequenceCharacterException;
import com.biotechvana.javabiotoolkit.exceptions.SequenceTooLongException;
import com.biotechvana.javabiotoolkit.text.LineSeparatorFormat;
import com.biotechvana.javabiotoolkit.text.StringBuilderTrimmer;
import com.biotechvana.javabiotoolkit.text.UTF8BufferTrimmer;
import com.biotechvana.javabiotoolkit.utils.FASTAFileRecordNaturalOrderComparator;

/**
 * Instances of this class are reading parsers associated to plain text files with one or more biological sequences in 
 * FASTA format. A description of the format can be found at 
 * <a href="http://www.ncbi.nlm.nih.gov/blast/fasta.shtml">National Center for Biotechnology Information (NCBI) </a>.
 * <p>
 * Typically, a <code>FastaReader</code> is used in three steps:
 * <ol>
 * <li>Creation of the object, which scans the file for FASTA headers</li>
 * <li>Determine the type of sequences stored the file</li>
 * <li>Ask the reader for <code>BioSequence</code> objects</li>
 * </ol>
 * Please note that the second step detects the type of sequences in the file based on its contents, thus ignoring 
 * certain file extensions such as <code>.fna</code>, <code>.ffn</code>, <code>.faa</code> and <code>.frn</code>.
 * <p>
 * Additionally, the file type detection is performed by a parameterized random sampling of different areas of the 
 * file which account for a total proportion of the sequence blocks (e.g. 1% of the sequence contents, in pieces of 
 * 128KB, chosen at random positions in the file). This is only an estimation for the client programmer, so that even 
 * if a file contains mainly DNA sequences and the next sequence is asked for using <code>nextSequence()</code>, and 
 * it happens to be a protein sequence, an instance of <code>ProteinSequence</code> will be returned. It is up to the 
 * client to decide whether mixed sequences are contemplated (forcing the reader to return a certain type of sequence).
 * <p>
 * Portions of files which do not conform to the FASTA format standard are ignored. Text may therefore be interspersed 
 * in the file, but sequences will be parsed correctly, nonetheless. There even could be headers with no sequence, 
 * which are considered to be FASTA headers with empty sequences.
 * 
 * @version	1.2
 * 
 * @author	Alfonso Muñoz-Pomer Fuentes, <a href="http://www.biotechvana.com">Biotechvana</a>
 * 
 * @see	DNASequence
 * @see	RNASequence
 * @see	ProteinSequence
 *
 */
public class FASTAReader
{
	
	
	static public  int I_byteBufferSize = 2*1024*1024; // 20*512KB = 20M
	private static final NumberFormat numberFormat = NumberFormat.getInstance();
	
	/**
	 * 
	 * @param retrieveDescriptionSB
	 * @return
	 */
	

	


	
	/*
	 * File associated to each instance of FastaReader. Contains one or more FASTA sequences. Nucleotide and amino 
	 * acid sequences are both allowed in the same file.
	 */
	private File filePath;
	private Charset fileCharset;
	private LineSeparatorFormat fileLineSeparatorFormat;
	private boolean ignoreBlankLines;
	private long invalidCharacters;
	private boolean fileParsed;
	private List<FASTAFileRecord> fastaRecords;
	private int position = 0;			// Global position for "next" methods
	public static final int SORT_NO = 0;
	public static final int SORT_REGULAR = 1;
	public static final int SORT_NATURAL = 2;
	private int sortMethod = SORT_NO;
	
	/**
	 * Constructor that 
	 * 
	 * @param filePath
	 * @param bufferedSequences
	 * @param fileCharset
	 * @param bufferSize
	 * 
	 * @throws	FileNotFoundException	if the file path stored in the <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * 
	 * @since	0.1
	 */
	public FASTAReader
	(File filePath, Charset fileCharset, LineSeparatorFormat fileLineSeparatorFormat, boolean ignoreBlankLines)
	{
		this.filePath = filePath;
		this.fileCharset = fileCharset;
		this.fileLineSeparatorFormat = fileLineSeparatorFormat;
		this.invalidCharacters = 0;
		this.fileParsed = false;
		this.ignoreBlankLines = ignoreBlankLines;
		fastaRecords = new ArrayList<FASTAFileRecord>();
	}
	
	/**
	 * 
	 * @param filePath
	 * @param fileCharset
	 *
	 * @since	x.y.z
	 */
	public FASTAReader(File filePath, Charset fileCharset)
	{
		this(filePath, fileCharset, LineSeparatorFormat.SYSTEM_DEFAULT, true);
	}
	
	/**
	 * 
	 * @param filePath
	 *
	 * @since	x.y.z
	 */
	public FASTAReader(File filePath)
	{
		this(filePath, Charset.defaultCharset(), LineSeparatorFormat.SYSTEM_DEFAULT, true);
	}
	
	/**
	 * 
	 * @return
	 *
	 * @since	x.y.z
	 */
	public File filePath()
	{
		return filePath;
	}
	
	public void setSortMethod(int sortMethod) {
		this.sortMethod = sortMethod;
	}
	
	/**
	 * 
	 * @return
	 *
	 * @since	x.y.z
	 */
	public LineSeparatorFormat fileLineSeparatorFormat()
	{
		return fileLineSeparatorFormat;
	}
	
	/**
	 * Parse a UTF-8 encoded FASTA file and scan it for FASTA records. This method loads only the header into the 
	 * calling <code>FastaReader</code>. Use for very large files. Afterwards, you may use 
	 * <code>FastaRecord</code>&rsquo;s <code>readSequenceFromFile</code> to load the sequence on demand.
	 * <p> 
	 * Note: invalid characters in the sequence blocks are discarded.
	 * 
	 * @param	bufferSize	size in bytes of the buffer used to read the file.
	 * 
  	 * @return	number of invalid characters discarded from sequence blocks.
	 * 
	 * @throws	FileNotFoundException	if the file path stored in the <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * 
	 * @since	0.8
	 */
	public long parse(boolean forceParse)
	throws FileNotFoundException, IOException
	{
		if (fileParsed)
		{
			if (forceParse)
			{
				fastaRecords.clear();
			}
			else
			{
				return invalidCharacters;
			}
		}
		
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel();	// FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
		
		// To keep track of the offset within the file
		long fileByteOffset = 0;
		long cBytes = 0;
		//long fileCharOffset = 0;
		// To keep track of the description block 
		long descriptionByteOffset = 0;
		long descriptionBytes = 0;
		// To keep track of the sequence block
		long sequenceByteOffset = 0;
		long sequenceBytes = 0;
		SortedMap<Long, Long> componentsByteOffsets = null;
		// A few variables to keep track of each parsing stage
		boolean lineStart = true;
		boolean parseDescription = false;
		boolean parseSequence = false;
		boolean hasParsedRecord = false;
		char c = '\n';
		char previousChar;
		FASTAFileRecordNaturalOrderComparator naturalComparator = new FASTAFileRecordNaturalOrderComparator();
		
		long sequenceLength = 0;
		try
		{
			invalidCharacters = 0;
			// Read-decode the file's ByteBuffer in the loop
			while (inFC.read(bBuffer) != -1)
			{
				bBuffer.flip();
				// If not EOF and encoding is UTF-8...
				if (fileCharset == Charset.forName("UTF-8") && inFC.size() - inFC.position() > 0)	// IOException
				{
					// ... maybe the buffer ends at an incomplete muliple byte character 
					inFC.position(inFC.position() - UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
				}
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();
				
				while(cBuffer.hasRemaining())
				{
					previousChar = c;
					c = cBuffer.get();
					cBytes = fileCharset.encode(String.valueOf(c)).limit();
					
					// TODO Maybe stick to fileLineSeparator (?)
					// lineStart is "true" if c is the first character in a line
					if (previousChar == '\n' ||					// Windows, Linux, Mac OS X
						(previousChar == '\r' && c != '\n'))	// Mac OS Classic
					{
						lineStart = true;
					}
					else
					{
						lineStart = false;
					}
					
					// New line after description initiates sequence parse
					if (lineStart && parseDescription)
					{
						parseDescription = false;
						parseSequence = true;
						sequenceByteOffset = fileByteOffset;
					}
					
					// Do we ignore blank lines or not?
					if (lineStart && (c == '\r' || c == '\n'))
					{
						if (!ignoreBlankLines)
						{
							parseDescription = false;
							parseSequence = false;
						}
					}
					// Start of new FASTA record
					else if (lineStart && c == '>')
					{
						if (hasParsedRecord)
						{
							componentsByteOffsets.put(sequenceLength, sequenceByteOffset + sequenceBytes);
							
							FASTAFileRecord rec = new FASTAFileRecord(filePath, fileCharset, fileLineSeparatorFormat,
									descriptionByteOffset, descriptionBytes,
									sequenceByteOffset, sequenceBytes, componentsByteOffsets,
									sequenceLength);
									
							if (sortMethod == SORT_NATURAL) {
								// Ordered by natural sort of sequence names
								int pos = Collections.binarySearch(fastaRecords, rec, naturalComparator);
								if (pos < 0) {
									fastaRecords.add((-pos)-1, rec);
								}
								else {
									fastaRecords.add(pos, rec);
								}
							}
							else if (sortMethod == SORT_REGULAR) {
								// Ordered straight
								int pos = Collections.binarySearch(fastaRecords, rec, new Comparator<FASTAFileRecord>() {
									@Override
									public int compare(FASTAFileRecord o1, FASTAFileRecord o2) {
										return o1.getDescriptionSB().toString().compareTo(o2.getDescriptionSB().toString());
									}
								});
								if (pos < 0) {
									fastaRecords.add((-pos)-1, rec);
								}
								else {
									fastaRecords.add(pos, rec);
								}
							}
							else {
								// Ordered like input file
								fastaRecords.add(rec);
							}
						}
						hasParsedRecord = true;
						descriptionByteOffset = fileByteOffset;
						descriptionBytes = 0;
						sequenceBytes = 0;
						sequenceLength = 0;
						componentsByteOffsets = new TreeMap<Long, Long>();
						
						parseSequence = false;
						parseDescription = true;
					}
										
					// Get the rest of the description until new line
					if (parseDescription)
					{	
						descriptionBytes += cBytes;
					}
					else if (parseSequence)
					{	// Get the rest of the sequence and discard invalid characters
						sequenceBytes += cBytes;
						if (c != '\n' && c != '\r')
						{
							if (AminoAcid.valueOf(c) != null)
							{
								if (sequenceLength % IContigProvider.I_componentLength == 0)
								{
									componentsByteOffsets.put(sequenceLength, fileByteOffset);
								}
								sequenceLength++;
							}
							else
							{
								invalidCharacters++;
							}
						}
					}
					fileByteOffset += cBytes;
					//fileCharOffset++;
				}
			}
			// Add sequence that reaches EOF
			if (hasParsedRecord)
			{
				componentsByteOffsets.put(sequenceLength, sequenceByteOffset + sequenceBytes);
				FASTAFileRecord rec = new FASTAFileRecord(filePath, fileCharset, fileLineSeparatorFormat,
						descriptionByteOffset, descriptionBytes,
						sequenceByteOffset, sequenceBytes, componentsByteOffsets,
						sequenceLength);
				
				if (sortMethod == SORT_NATURAL) {
					// Ordered by natural sort of sequence names
					int pos = Collections.binarySearch(fastaRecords, rec, naturalComparator);
					if (pos < 0) {
						fastaRecords.add((-pos)-1, rec);
					}
					else {
						fastaRecords.add(pos, rec);
					}
				}
				else if (sortMethod == SORT_REGULAR) {
					// Ordered straight
					int pos = Collections.binarySearch(fastaRecords, rec, new Comparator<FASTAFileRecord>() {
						@Override
						public int compare(FASTAFileRecord o1, FASTAFileRecord o2) {
							return o1.getDescriptionSB().toString().compareTo(o2.getDescriptionSB().toString());
						}
					});
					if (pos < 0) {
						fastaRecords.add((-pos)-1, rec);
					}
					else {
						fastaRecords.add(pos, rec);
					}
				}
				else {
					// Ordered like input file
					fastaRecords.add(rec);
				}
			}
		}
		finally
		{
			inFC.close();
		}
		fileParsed = true;
		return invalidCharacters;
	}
	
	/**
	 * 
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 *
	 * @since	x.y.z
	 */
	public long parse()
	throws FileNotFoundException, IOException
	{
		return parse(false);
	}
	
	/**
	 * Parse a UTF-8 encoded FASTA file and scan it for FASTA records. This method loads only the header into the 
	 * calling <code>FastaReader</code>. Use for very large files. Afterwards, you may use 
	 * <code>FastaRecord</code>&rsquo;s <code>readSequenceFromFile</code> to load the sequence on demand.
	 * <p> 
	 * Note: invalid characters in the sequence blocks are ignored.
	 * 
	 * @param	bufferSize		size in bytes of the buffer used to read the file.
	 * @param	progressMonitor	monitor to be informed of the parsing progress.
	 * 
	 * @return	number of discarded invalid characters
	 * 
	 * @throws	FileNotFoundException	if the file path stored in the <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	OperationCanceledException	if the operation progress was cancelled by the user
	 * 
	 * @since	0.8
	 */
	@SuppressWarnings("unused")
	public long parse(boolean forceParse, IProgressMonitor progressMonitor) throws FileNotFoundException, IOException, OperationCanceledException
	{
		
		if(progressMonitor == null)
			progressMonitor = new NullProgressMonitor();
		
		progressMonitor.subTask(filePath.getName());
		if (fileParsed)
		{
			if (forceParse)
			{
				fastaRecords.clear();
			}
			else
			{
				progressMonitor.worked((int)filePath.length());
				return invalidCharacters;
			}
		}
		
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel();	// FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
		
		// To keep track of the offset within the file
		long fileByteOffset = 0;
		long cBytes = 0;
		long fileCharOffset = 0;
		// To keep track of the description block 
		long descriptionByteOffset = 0;
		long descriptionBytes = 0;
		// To keep track of the sequence block
		long sequenceByteOffset = 0;
		long sequenceBytes = 0;
		SortedMap<Long, Long> componentsByteOffsets = null;
		// A few variables to keep track of each parsing stage
		boolean lineStart = true;
		boolean parseDescription = false;
		boolean parseSequence = false;
		boolean hasParsedRecord = false;
		char c = '\n';
		char previousChar;
		
		long sequenceLength = 0;
		try
		{
			invalidCharacters = 0;
			// Read-decode the file's ByteBuffer in the loop
			while (inFC.read(bBuffer) != -1)	// IOException
			{
				bBuffer.flip();
				// If not EOF and encoding is UTF-8...
				if (fileCharset == Charset.forName("UTF-8") && inFC.size() - inFC.position() > 0)	// IOException
				{
					// ... maybe the buffer ends at an incomplete muliple byte character 
					inFC.position(inFC.position() - UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
				}
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();
				
				while(cBuffer.hasRemaining())
				{
					previousChar = c;
					c = cBuffer.get();
					cBytes = fileCharset.encode(String.valueOf(c)).limit();
					
					// lineStart is "true" if c is the first character in a line
					if (previousChar == '\n' ||					// Windows, Linux, Mac OS X
						(previousChar == '\r' && c != '\n'))	// Mac OS Classic
					{
						lineStart = true;
					}
					else
					{
						lineStart = false;
					}
					
					// New line after description initiates sequence parse
					if (lineStart && parseDescription)
					{
						parseDescription = false;
						parseSequence = true;
						sequenceByteOffset = fileByteOffset;
					}
					
					// Do we ignore blank lines or not?
					if (lineStart && (c == '\r' || c == '\n'))
					{
						if (!ignoreBlankLines)
						{
							parseDescription = false;
							parseSequence = false;
						}
					}
					// Start of new FASTA record
					else if (lineStart && c == '>')
					{	
						if (hasParsedRecord)
						{
							componentsByteOffsets.put(sequenceLength, sequenceByteOffset + sequenceBytes);
							fastaRecords.add(
									new FASTAFileRecord(filePath, fileCharset, fileLineSeparatorFormat,
														descriptionByteOffset, descriptionBytes,
														sequenceByteOffset, sequenceBytes, componentsByteOffsets,
														sequenceLength));
							if (fastaRecords.size() % 1000 == 0) {
								progressMonitor.subTask("FASTA records found: " + numberFormat.format(fastaRecords.size()));
							}
						}
						
						hasParsedRecord = true;
						descriptionByteOffset = fileByteOffset;
						descriptionBytes = 0;
						sequenceBytes = 0;
						componentsByteOffsets = new TreeMap<Long, Long>();
						sequenceLength = 0;
						
						parseSequence = false;
						parseDescription = true;
					}
					// Get the rest of the description until new line
					if (parseDescription)
					{	
						descriptionBytes += cBytes;
					}
					// Get the rest of the sequence and discard invalid characters
					else if (parseSequence)
					{
						// Get the rest of the sequence and discard invalid characters
						sequenceBytes += cBytes;
						if (c != '\n' && c != '\r')
						{
							if (AminoAcid.valueOf(c) != null)
							{
								if (sequenceLength % IContigProvider.I_componentLength == 0)
								{
									componentsByteOffsets.put(sequenceLength, fileByteOffset);
								}
								sequenceLength++;
							}
							else
							{
								invalidCharacters++;
							}
						}
					}
					fileByteOffset += cBytes;
					fileCharOffset++;
				}
				progressMonitor.worked(bBuffer.capacity());
				if (progressMonitor.isCanceled())
				{
					throw new OperationCanceledException();
				}
			}
			// Add sequence that reaches EOF
			if (hasParsedRecord)
			{
				componentsByteOffsets.put(sequenceLength, sequenceByteOffset + sequenceBytes);
				fastaRecords.add(
						new FASTAFileRecord(filePath, fileCharset, fileLineSeparatorFormat,
											descriptionByteOffset, descriptionBytes, 
											sequenceByteOffset, sequenceBytes, componentsByteOffsets, 
											sequenceLength));
				progressMonitor.subTask("Number of sequences found: " + fastaRecords.size());
			}
		}
		finally
		{
			inFC.close();	// IOException
		}
		fileParsed = true;
		return invalidCharacters;
	}

	/**
	 * Returns all the sequence blocks starting and ending byte offsets within the file associated to this 
	 * <code>FastaReader</code>.
	 *
	 * @return	starting and ending offset of all FASTA records stored in this file. The returned <code>List</code> 
	 * 			has an even number of elements where each even index is the starting byte (inclusive) and each odd 
	 * 			index is the ending byte (exclusive). Therefore, the byte offsets of the <code>i</code>th record are 
	 * 			in the <code>List</code>&rsquo;s <code>i/2</code>th element.
	 * 
	 * @since	1.0rc3
	 */
	private List<Long> sequenceByteRanges()
	{
		List<Long> sequenceRanges =
			new ArrayList<Long>(fastaRecords.size() * 2);
		
		for (int i = 0 ; i < fastaRecords.size() ; i++)
		{
			sequenceRanges.add(fastaRecords.get(i).sequenceByteOffset);
			sequenceRanges.add(fastaRecords.get(i).sequenceByteOffset + fastaRecords.get(i).sequenceBytes);
		}
		//Collections.sort(sequenceRanges);
		return sequenceRanges;
	}
	
	/**
	 * Returns a series of starting and ending sequence byte offsets that total  a specified number of bytes, 
	 * beginning at a byte offset. 
	 *
	 * @param byteOffsetStart	byte offset to start reading. If this byte is part of a comment, it will be adjusted 
	 * 							to the next nearest sequence block starting byte.
	 * @param nBytes	number of bytes to read.
	 *
	 * @return	Starting and ending offset of all FASTA records stored in this file. The returned <code>List</code> 
	 * 			has an even number of elements where each even index is the starting byte (inclusive) and each odd 
	 * 			index is the ending byte (exclusive). Therefore, the byte offsets of the <code>i</code>th record are 
	 * 			in the <code>List</code>'s <code>i/2</code>th element.
	 * 
	 * @throws	IllegalArgumentException	if <code>byteOffsetStart</code> is negative or larger than the file size; 
	 * 										if <code>nBytes</code> is larger than the total number of sequence block 
	 * 										bytes.
	 *  
	 * @since	1.0rc3
	 */
	private List<Long> sequenceByteRanges(long byteOffsetStart, int nBytes)
	throws IllegalArgumentException
	{	
		List<Long> toReadByteRanges = new ArrayList<Long>();
		
		if (fastaRecords.size() == 0)
		{
			toReadByteRanges.add(0L);
			toReadByteRanges.add(0L);
			return toReadByteRanges;
		}
		
		if (byteOffsetStart < 0 || byteOffsetStart > filePath.length() - 1)
		{
			throw new IllegalArgumentException(byteOffsetStart + ": the " +
					"starting offset must be a value between 0 and (file size - 1)");
		}
		if (nBytes > totalSequenceBytes())
		{
			throw new IllegalArgumentException(nBytes + ": the number of " +
					"bytes to read is larger than the file size");
		}
		
		// Get the ranges that store the sequence blocks and...
		List<Long> sequenceByteRanges = sequenceByteRanges();
		
		// ... check there are actual bytes that can be read...
		boolean somethingToRead = false;
		for (int i = 0 ; i < sequenceByteRanges.size() - 1; i = i + 2)
		{
			if (sequenceByteRanges.get(i + 1) - sequenceByteRanges.get(i) > 0)
			{
				somethingToRead = true;
				break;
			}
		}
		if (!somethingToRead)
		{
			return toReadByteRanges;
		}
		
		// ... find the offset position in the list
		int byteOffsetStartPosition = Collections.binarySearch(sequenceByteRanges, byteOffsetStart);
		
		if (byteOffsetStartPosition < 0)
		{	// If it's not there (very likely), where is it?
			byteOffsetStartPosition = -(byteOffsetStartPosition + 1);
			
			if (byteOffsetStartPosition % 2 != 0)
			{   // If it's in a sequence block, adjust position to start
				byteOffsetStartPosition--;
			}
			else
			{	// If it's a comment block, set offset to the start offset
				byteOffsetStart = sequenceByteRanges.get(byteOffsetStartPosition);
			}
		}
		else
		{	// Adjust to a starting offset if it's a comment block
			if (byteOffsetStartPosition % 2 != 0)
			{
				byteOffsetStartPosition++;
				byteOffsetStart = sequenceByteRanges.get(byteOffsetStartPosition);
			}
		}
		// byteOffsetStart is now in the middle or start of a sequence block

		// Read nBytes starting at the given offset, only sequence block ranges
		long currentBlockStartOffset = byteOffsetStart;
		long currentBlockEndOffset = sequenceByteRanges.get(byteOffsetStartPosition + 1);
		while (nBytes > 0)
		{	// While there are bytes to be read: add the current start position
			toReadByteRanges.add(currentBlockStartOffset);
			// If nBytes is greater than the end, add the end...
			if (currentBlockStartOffset + nBytes >= currentBlockEndOffset)
			{
				toReadByteRanges.add(currentBlockEndOffset);
				// ... decrement the amount of bytes read...
				nBytes -= currentBlockEndOffset - currentBlockStartOffset;
			}
			else
			{
				toReadByteRanges.add(currentBlockStartOffset + nBytes);
				break;
			}
			
			// ... if there's at least another sequence left ...
			if (byteOffsetStartPosition + 2 < sequenceByteRanges.size())
			{
				 // skip (the comment) to the next sequence range
				byteOffsetStartPosition += 2;
				currentBlockStartOffset = sequenceByteRanges.get(byteOffsetStartPosition);
				currentBlockEndOffset = sequenceByteRanges.get(byteOffsetStartPosition + 1);
			}
		}
		return toReadByteRanges;
	}
	
	/**
	 * Fills a <code>ByteBuffer</code> with portions of the file associated to this <code>FastaReader</code>. The 
	 * contents are specified in pairs of non-overlapping, strictly ascending, byte offsets.
	 * 
	 * @param	bBuffer		Buffer to read file contents to.
	 * @param	byteRanges	Pairs of non-overlapping, strictly ascending, byte offsets. Even positions (starting 
	 * 						offsets) are inclusive and odd position (ending offsets) are exclusive.
	 * 
	 * @throws	IllegalArgumentException	if <code>byteRanges</code> has an odd number of elements, if they are in 
	 * 										non-increasing order or if the sum of bytes exceeds the 
	 * 										<code>bBuffer</code>'s capacity.
	 * @throws	IOException	if there's an I/O error reading the file.
	 * 
	 * @since	1.0rc3
	 */
	private void readByteRanges(ByteBuffer bBuffer, List<Long> byteRanges)
	throws FileNotFoundException, IOException
	{
		if (byteRanges.size() % 2 != 0)
		{
			throw new IllegalArgumentException("Size of ranges list is odd");
		}
		// Check validity of ranges and that buffer is big enough
		long nBytesTotal = 0;
		for (int i = 0 ; i < byteRanges.size() - 1 ; i ++)
		{
			if (byteRanges.get(i) <= byteRanges.get(i+1))
			{
				if (i % 2 == 0)
				{
					nBytesTotal += byteRanges.get(i + 1) - byteRanges.get(i);
				}
				continue;
			}
			else
			{
				throw new IllegalArgumentException("Ranges not valid at " + i);
			}
		}
		if (nBytesTotal > bBuffer.capacity())
		{
			throw new IllegalArgumentException(nBytesTotal + ": the number " +
					"of bytes to read exceeds the buffer size " + bBuffer.capacity());
		}
		
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel();	// FileNotFoundException
		try
		{
			for (int i = 0 ; i < byteRanges.size() - 1 ; i += 2)
			{
				ByteBuffer middleBuffer = ByteBuffer.allocate((int)(byteRanges.get(i + 1) - byteRanges.get(i)));
				middleBuffer.clear();
				inFC.position(byteRanges.get(i));	// IOException
				inFC.read(middleBuffer);			// IOException
				middleBuffer.flip();
				bBuffer.put(middleBuffer);
			}
		}
		finally
		{
			inFC.close();
		}
	}
	
	/**
	 * Returns the total sum of bytes which store sequence blocks in the file of this <code>FastaReader</code>. 
	 *  
	 * @return	Number of bytes between the comment sections of this <code>FastaReader</code>&rsquo;s file. Possibly 
	 * <code>0</code>.
	 * 
	 * @since	1.0rc3
	 */
	private long totalSequenceBytes()
	{
		List<Long> byteRanges = sequenceByteRanges();
		long sequenceByteCount = 0;
		for (int i = 0 ; i < byteRanges.size() - 1 ; i = i + 2)
		{
			sequenceByteCount += byteRanges.get(i + 1) - byteRanges.get(i);
		}
		return sequenceByteCount;
	}
	
	/**
	 * Returns the highest byte offset at which reading a determined amount of sequence bytes in this file will not 
	 * fail due to reaching EOF. 
	 * 
	 * @param	byteCount	number of bytes to read.
	 * 
	 * @return	byte offset at which the call
	 * 			<br /><code>readByteRanges(..., sequenceByteRanges(..., offset))</code><br />
	 * 			will not fail because of reaching EOF.
	 * 
	 * @throws	IllegalArgumentException	if <code>byteCount</code> is negative or greater than the value returned 
	 * 			by <code>totalSequenceBytes()</code>.
	 * 
	 * @since	1.0rc3
	 */
	private long countSequenceBytesBackwards(long byteCount)
	throws IllegalArgumentException
	{
		if (byteCount < 0 || byteCount > totalSequenceBytes())
		{
			throw new IllegalArgumentException(
					byteCount + ": the amount of bytes to read must be between 0 and " + totalSequenceBytes());
		}
			
		List<Long> byteRanges = sequenceByteRanges();
		int i = byteRanges.size() - 1;
		
		if (byteCount == 0)
		{
			return filePath.length() - 1;
		}
		
		while (byteCount > 0 && i >= 0)
		{
			byteCount -= byteRanges.get(i) - byteRanges.get(i - 1);
			i = i - 2;
		}
		return byteRanges.get(i + 1) - byteCount - 1;
	}
	
	/**
	 * Determines if a specified amount of bytes in a FASTA file contents (i.e. excluding the comment lines) 
	 * correspond to DNA nucleotide sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets. The file needs to be parsed first in order to analyze its contents.
	 * 
	 * @param nBytes	number of sequence bytes to probe. 
	 * @param threshold	minimum proportion of correctly read characters to positively identify this file as a DNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching 
	 * 			characters are A, C, G and T.
	 * 
	 * @see		DNABase
	 * 
	 * @throws	FileNotFoundException	if the file path stored in this <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	FastaReaderNotParsedException	if the file has not been parsed. 
	 * @throws	IllegalArgumentException	if <code>nBytes</code> is negative or greater than 
	 * 										<code>totalSequenceBytes()</code>.
	 * 
	 * @since	1.0rc3
	 */
	private boolean isDnaFile(long nBytes, double threshold, 
							  boolean includeN, boolean includeAmbiguous, boolean includeGaps)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException 
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		if (nBytes < 0 || nBytes > totalSequenceBytes())
		{
			throw new IllegalArgumentException(nBytes + ": the number of read bytes must be between 0 and file size");
		}
		// Adjust nBytes to be a multiple of the encoding
		int bytesPerChar = (int)fileCharset.newEncoder().averageBytesPerChar();
		while (nBytes % bytesPerChar != 0)
		{
			nBytes++;
		}
		
		// Until which byte can we read without reaching EOF?
		long readLimit = countSequenceBytesBackwards(nBytes);
		Random randomizer = new Random();
		
		int matchingChars = 0;
		int nonMatchingChars = 0;
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		List<DNABase> goodBases = new ArrayList<DNABase>(Arrays.asList(DNABase.N.expandToUnambiguous()));
		if (includeN)
		{
			goodBases.add(DNABase.N);
		}
		if (includeAmbiguous)
		{
			goodBases.add(DNABase.B); goodBases.add(DNABase.C); goodBases.add(DNABase.D); goodBases.add(DNABase.H);
			goodBases.add(DNABase.K); goodBases.add(DNABase.M); goodBases.add(DNABase.R); goodBases.add(DNABase.S); 
			goodBases.add(DNABase.V); goodBases.add(DNABase.W); goodBases.add(DNABase.Y);
		}
		if (includeGaps)
		{
			goodBases.add(DNABase.GAP);
		}

		// Read in chunks of I_bufferSize bytes
		while (nBytes > I_byteBufferSize)
		{
			bBuffer.clear();
			// Find a random starting position
			long randomSafeByte = 
				(long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * (double)readLimit));
			// Adjust to "fall" into place
			while (randomSafeByte % bytesPerChar != 0)
			{
				randomSafeByte--;
			}
			readByteRanges(bBuffer, sequenceByteRanges(randomSafeByte, I_byteBufferSize));
			bBuffer.flip();
			CharBuffer cBuffer = fileCharset.decode(bBuffer);
	
			while (cBuffer.hasRemaining())
			{
				char c = cBuffer.get();
				if (goodBases.contains(DNABase.valueOf(c)) || c == '\n' || c == '\r')
				{
					matchingChars++;
				}
				else
				{
					nonMatchingChars++;
				}
			}
			nBytes -= I_byteBufferSize;
		}
		bBuffer.clear();
		// Find a random starting position
		long randomSafeByte =
			(long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * (double)readLimit));
		// Adjust to "fall" into place
		while (randomSafeByte % bytesPerChar != 0)
		{
			randomSafeByte--;
		}
	
		readByteRanges(bBuffer, sequenceByteRanges(randomSafeByte, (int)nBytes));
		bBuffer.flip();
		CharBuffer cBuffer = fileCharset.decode(bBuffer);
	
		while (cBuffer.hasRemaining())
		{
			char c = cBuffer.get();
			if (goodBases.contains(DNABase.valueOf(c)) || c == '\n' || c == '\r')
			{
				matchingChars++;
			}
			else
			{
				nonMatchingChars++;
			}
		}
		
		return ((double)matchingChars / (double)(matchingChars + nonMatchingChars)) >= threshold;
	}

	/**
	 * Determines if a specified amount of bytes in a FASTA file contents (i.e. excluding the comment lines) 
	 * correspond to RNA nucleotide sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.

	 * @param nBytes	Number of sequence bytes to probe. 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a RNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and U.
	 * 
	 * @see		RNABase
	 * 
	 * @throws	FileNotFoundException	if the file path stored in this <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	IllegalArgumentException	if <code>nBytes</code> is negative or greater than 
	 * 										<code>totalSequenceBytes()</code>; if <code>threshold</code> is not a 
	 * 										value between 0.0 and 1.0.
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	private boolean isRnaFile(long nBytes, double threshold, 
							  boolean includeN, boolean includeAmbiguous, boolean includeGaps)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		if (nBytes < 0 || nBytes > totalSequenceBytes())
		{
			throw new IllegalArgumentException(nBytes + ": the number of read bytes must be between 0 and file size");
		}
		
		// Adjust nBytes to be a multiple of the encoding
		int bytesPerChar = (int)fileCharset.newEncoder().averageBytesPerChar();
		while (nBytes % bytesPerChar != 0)
		{
			nBytes++;
		}
		
		// Until which byte can we read without reaching EOF?
		long readableLimit = countSequenceBytesBackwards(nBytes);
		Random randomizer = new Random();
		
		int matchingChars = 0;
		int nonMatchingChars = 0;
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
	
		List<RNABase> goodBases = new ArrayList<RNABase>(Arrays.asList(RNABase.N.expandToUnambiguous()));
		if (includeN)
		{
			goodBases.add(RNABase.N);
		}
		if (includeAmbiguous)
		{
			goodBases.add(RNABase.B); goodBases.add(RNABase.C); goodBases.add(RNABase.D); goodBases.add(RNABase.H);
			goodBases.add(RNABase.K); goodBases.add(RNABase.M); goodBases.add(RNABase.R); goodBases.add(RNABase.S); 
			goodBases.add(RNABase.V); goodBases.add(RNABase.W); goodBases.add(RNABase.Y);
		}
		if (includeGaps)
		{
			goodBases.add(RNABase.GAP);
		}
		
		// Read in chunks of I_bufferSize bytes
		while (nBytes > I_byteBufferSize)
		{
			bBuffer.clear();
			// Find a random starting position
			long randomSafeByte = (long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * 
					 					  (double)readableLimit));
			// Adjust to "fall" into place
			while (randomSafeByte % bytesPerChar != 0)
			{
				randomSafeByte--;
			}
			readByteRanges(bBuffer, sequenceByteRanges(randomSafeByte, I_byteBufferSize));
			bBuffer.flip();
			CharBuffer cBuffer = fileCharset.decode(bBuffer);
	
			while (cBuffer.hasRemaining())
			{
				char c = cBuffer.get();
				if (goodBases.contains(RNABase.valueOf(c)) || c == '\n' || c == '\r')
				{
					matchingChars++;
				}
				else
				{
					nonMatchingChars++;
				}
			}
			nBytes -= I_byteBufferSize;
		}
		bBuffer.clear();
		// Find a random starting position
		long randomSafeByte = (long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * 
				 					  (double)readableLimit));
		// Adjust to "fall" into place
		while (randomSafeByte % bytesPerChar != 0)
		{
			randomSafeByte--;
		}
	
		readByteRanges(bBuffer, sequenceByteRanges(randomSafeByte, (int)nBytes));
		bBuffer.flip();
		CharBuffer cBuffer = fileCharset.decode(bBuffer);
	
		while (cBuffer.hasRemaining())
		{
			char c = cBuffer.get();
			if (goodBases.contains(RNABase.valueOf(c)) || c == '\n' || c == '\r')
			{
				matchingChars++;
			}
			else
			{
				nonMatchingChars++;
			}
		}
		return ((double)matchingChars / (double)(matchingChars + nonMatchingChars)) >= threshold;
	}

	/**
	 * Determines if a specified amount of bytes in a FASTA file contents (i.e. excluding the comment lines) 
	 * correspond to protein amino acid sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.

	 * @param nBytes	number of sequence bytes to probe. 
	 * @param threshold	minimum proportion of correctly read characters to positively identify this file as a DNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching 
	 * 			characters are A, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W and Y.
	 * 
	 * @see		AminoAcid
	 * 
	 * @throws	FileNotFoundException	if the file path stored in this <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	IllegalArgumentException	if <code>nBytes</code> is negative or greater than 
	 * 										<code>totalSequenceBytes()</code>; if <code>threshold</code> is not a 
	 * 										value between 0.0 and 1.0.
	 * 
	 * @since	1.0rc3
	 */
	private boolean isProteinFile(long nBytes, double threshold, 
								  boolean includeX, boolean includeAmbiguous, boolean includeGaps, boolean includeStops)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		if (nBytes < 0 || nBytes > totalSequenceBytes())
		{
			throw new IllegalArgumentException(nBytes + ": the number of read bytes must be between 0 and file size");
		}
		
		// Adjust nBytes to be a multiple of the encoding
		int bytesPerChar = (int)fileCharset.newEncoder().averageBytesPerChar();
		while (nBytes % bytesPerChar != 0)
		{
			nBytes++;
		}
		
		// Until which byte can we read without reaching EOF?
		long readableLimit = countSequenceBytesBackwards(nBytes);
		Random randomizer = new Random();
		
		int matchingChars = 0;
		int nonMatchingChars = 0;
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
		
		// Read in chunks of I_bufferSize bytes
		while (nBytes > I_byteBufferSize)
		{
			bBuffer.clear();
			// Find a random starting position
			long randomSafeByte = (long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * 
					 					  (double)readableLimit));
			// Adjust to "fall" into place
			while (randomSafeByte % bytesPerChar != 0)
			{
				randomSafeByte--;
			}
			readByteRanges(bBuffer, sequenceByteRanges(randomSafeByte, I_byteBufferSize));
			bBuffer.flip();
			CharBuffer cBuffer = fileCharset.decode(bBuffer);

			List<AminoAcid> goodAminos = new ArrayList<AminoAcid>(Arrays.asList(AminoAcid.X.expandToUnambiguous()));
			if (includeX)
			{
				goodAminos.add(AminoAcid.X);
			}
			if (includeAmbiguous)
			{
				goodAminos.add(AminoAcid.B); goodAminos.add(AminoAcid.Z); goodAminos.add(AminoAcid.J);
			}
			if (includeGaps)
			{
				goodAminos.add(AminoAcid.GAP);
			}
			if (includeStops)
			{
				goodAminos.add(AminoAcid.$);
			}
			
			while (cBuffer.hasRemaining())
			{
				char c = cBuffer.get();
				if (goodAminos.contains(AminoAcid.valueOf(c)) || c == '\n' || c == '\r')
				{
					matchingChars++;
				}
				else
				{
					nonMatchingChars++;
				}
			}
			nBytes -= I_byteBufferSize;
		}
		bBuffer.clear();
		// Find a random starting position
		long randomSafeByte = (long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * 
				 					  (double)readableLimit));
		// Adjust to "fall" into place
		while (randomSafeByte % bytesPerChar != 0)
		{
			randomSafeByte--;
		}
	
		readByteRanges(bBuffer, sequenceByteRanges(randomSafeByte, (int)nBytes));
		bBuffer.flip();
		CharBuffer cBuffer = fileCharset.decode(bBuffer);

		List<AminoAcid> unambiguousAminos = new ArrayList<AminoAcid>(Arrays.asList(AminoAcid.X.expandToUnambiguous()));
		unambiguousAminos.add(AminoAcid.X);
		unambiguousAminos.add(AminoAcid.GAP);
		unambiguousAminos.add(AminoAcid.$);
		while (cBuffer.hasRemaining())
		{
			char c = cBuffer.get();
			if (unambiguousAminos.contains(AminoAcid.valueOf(c)) || c == '\n' || c == '\r')
			{
				matchingChars++;
			}
			else
			{
				nonMatchingChars++;
			}
		}
		return ((double)matchingChars / (double)(matchingChars + nonMatchingChars)) >= threshold;
	}
	
	/**
	 * Determines if a specified proportion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * DNA nucleotide sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.
	 * 
	 * @param contentsRatio	Proportion of sequence bytes to probe. 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a DNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and T.
	 * 
	 * @see		DNABase
	 * 
	 * @throws	FileNotFoundException	if the file path stored in this <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	IllegalArgumentException	if <code>nBytes</code> is negative or greater than 
	 * 										<code>totalSequenceBytes()</code>; if <code>threshold</code> is not a 
	 * 										value between 0.0 and 1.0.
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isDnaFile(double contentsRatio, double threshold, 
							 boolean includeN, boolean includeAmbiguous, boolean includeGaps)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		if (contentsRatio < 0 || contentsRatio > 1.0)
		{
			throw new IllegalArgumentException(
					contentsRatio + ": the proportion of the file to be read must be a value between 0.0 and 1.0");
		}
		
		long nBytes = (long) (totalSequenceBytes() * contentsRatio);
		return isDnaFile(nBytes, threshold, includeN, includeAmbiguous, includeGaps);
	}
	
	/**
	 * 
	 * @param contentsRatio
	 * @param threshold
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 * @throws FastaReaderNotParsedException
	 * @throws IllegalArgumentException
	 *
	 * @since	x.y.z
	 */
	public boolean isDnaFile(double contentsRatio, double threshold)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		return isDnaFile(contentsRatio, threshold, true, true, false);
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to DNA 
	 * nucleotide sequences with a given probability.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to a default buffer size (e.g. 512KB).
	 * 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a DNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and T.
	 * 
	 * @see		DNABase
	 * 
	 * @throws	IllegalArgumenException	if <code>threshold</code> is not a value between 0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isDnaFile(double threshold)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		long nBytes = totalSequenceBytes();
		
		if (nBytes > I_byteBufferSize)
		{
			return isDnaFile(I_byteBufferSize, threshold, true, true, false);
		}
		else if (nBytes > I_byteBufferSize / 512)
		{
			return isDnaFile(nBytes, threshold, true, true, false);
		}
		else
		{
			return isDnaFile(nBytes, threshold, true, true, true);
		}
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to DNA 
	 * nucleotide sequences with a probability of 0.95.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to an implementation, 
	 * system-dependent value.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than 0.95. In this case, matching characters are A, C, G and T.
	 * 
	 * @see		DNABase
	 * 
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isDnaFile()
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		return isDnaFile(0.95);
	}
		
	/**
	 * Determines if a specified proportion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * RNA nucleotide sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.
	 * 
	 * @param contentsRatio	Proportion of sequence bytes to probe. 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a RNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and U.
	 * 
	 * @see		RNABase
	 * 
	 * @throws	IllegalArgumenException	if <code>contentsRatio</code> or <code>threshold</code> is not a value between 
	 * 			0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isRnaFile(double contentsRatio, double threshold, 
							 boolean includeN, boolean includeAmbiguous, boolean includeGaps)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		if (contentsRatio < 0 || contentsRatio > 1.0)
		{
			throw new IllegalArgumentException(
					contentsRatio + ": the proportion of the file to be read must be a value between 0.0 and 1.0");
		}

		long nBytes = (long) (totalSequenceBytes() * contentsRatio);
		return isRnaFile(nBytes, threshold, includeN, includeAmbiguous, includeGaps);
	}
	
	/**
	 * 
	 * @param contentsRatio
	 * @param threshold
	 * @return
	 * @throws FastaReaderNotParsedException
	 * @throws IllegalArgumentException
	 * @throws FileNotFoundException
	 * @throws IOException
	 *
	 * @since	x.y.z
	 */
	public boolean isRnaFile(double contentsRatio, double threshold)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		return isRnaFile(contentsRatio, threshold, true, false, false);
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to DNA 
	 * nucleotide sequences with a given probability.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to an implementation, 
	 * system-dependent value.
	 * 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a RNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and U.
	 * 
	 * @see		RNABase
	 * 
	 * @throws	IllegalArgumenException	if <code>threshold</code> is not a value between 0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isRnaFile(double threshold)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		long nBytes = totalSequenceBytes();
		
		if (nBytes > I_byteBufferSize)
		{
			return isRnaFile(I_byteBufferSize, threshold, true, false, false);
		}
		else if (nBytes > I_byteBufferSize / 512)
		{
			return isRnaFile(nBytes, threshold, true, false, false);
		}
		else
		{
			return isRnaFile(nBytes, threshold, true, true, true);
		}
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to RNA 
	 * nucleotide sequences with a probability of 0.95.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to an implementation, 
	 * system-dependent value.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than 0.95. In this case, matching characters are A, C, G and U.
	 * 
	 * @see		RNABase
	 * 
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isRnaFile()
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		return isRnaFile(0.95);
	}
		
	/**
	 * Determines if a specified proportion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * protein amino acid sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.
	 * 
	 * @param	contentsRatio	Proportion of sequence bytes to probe. 
	 * @param	threshold	Minimum proportion of correctly read characters to positively identify this file as an 
	 * 						amino acid sequence file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W and Y.
	 * 
	 * @see		AminoAcid
	 * 
	 * @throws	IllegalArgumenException	if <code>contentsRatio</code> or <code>threshold</code> is not a value between 
	 * 									0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isProteinFile(double contentsRatio, double threshold, 
								 boolean includeX, boolean includeAmbiguous, boolean includeGaps, boolean includeStops)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		if (contentsRatio < 0 || contentsRatio > 1.0)
		{
			throw new IllegalArgumentException(
					contentsRatio + ": the proportion of the file to be read must be a value between 0.0 and 1.0");
		}
		
		long nBytes = (long) (totalSequenceBytes() * contentsRatio);
		return isProteinFile(nBytes, threshold, includeX, includeAmbiguous, includeGaps, includeStops);
	}
	
	/**
	 * 
	 * @param contentsRatio
	 * @param threshold
	 * @return
	 * @throws FastaReaderNotParsedException
	 * @throws IllegalArgumentException
	 * @throws FileNotFoundException
	 * @throws IOException
	 *
	 * @since	x.y.z
	 */
	public boolean isProteinFile(double contentsRatio, double threshold)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		return isProteinFile(contentsRatio, threshold, true, false, true, true);
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * protein amino acid sequences with a given probability.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to an implementation, 
	 * system-dependent value.
	 * 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as an amino 
	 * 					acid sequence file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W and Y.
	 * 
	 * @see		AminoAcid
	 * 
	 * @throws	IllegalArgumenException	if <code>threshold</code> is not a value between 0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isProteinFile(double threshold)
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		long nBytes = totalSequenceBytes();
		
		if (nBytes > I_byteBufferSize)
		{
			return isProteinFile(I_byteBufferSize, threshold, true, false, true, true);
		}
		else if (nBytes > I_byteBufferSize / 512)
		{
			return isProteinFile(nBytes, threshold, true, false, true, true);
		}
		else
		{
			return isProteinFile(nBytes, threshold, true, true, true, true);
		}
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * protein amino acid sequences with a probability of 0.95.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contentsup to an implementation, system-dependent 
	 * value.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than 0.95. In this case, matching characters are A, C, D, E, F, 
	 * 			G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W and Y.
	 * 
	 * @see		AminoAcid
	 * 
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isProteinFile()
	throws FastaReaderNotParsedException, IllegalArgumentException, FileNotFoundException, IOException
	{
		return isProteinFile(0.95);
	}
	
	/**
	 * 
	 * @return
	 */
	public Charset charset()
	{
		return fileCharset;
	}
	
	/**
	 * Returns the index of the <code>FastaRecord</code> in this file which will be returned by a call to 
	 * <code><netxSequence()</code>.
	 * 
	 * @return	A value between 0 and <code>getFastaRecords().size()</code>
	 * 
	 * @since	1.0rc2
	 */
	public int getPosition()
	{
		return position;
	}
	
	/**
	 * Set the reading index to the specified value.
	 * 
	 * @param	position	New position of the reading index.
	 * 
	 * @throws	IllegalArgumentException	if <code>position</code> is negative or greater than the number of records 
	 * 			in the file associated to this <code>FastaReader</code>.
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc2	
	 */
	public void setPosition(int position)
	throws IllegalArgumentException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		if (position < 0 || position > fastaRecords.size())
		{
			throw new IllegalArgumentException(position + ": position must " + 
					"be a value between 0 and the number of FASTA records");
		}
		this.position = position;
	}
	
	/**
	 *  
	 * @return
	 * 
	 * @param	index
	 * @param	removeGaps
	 * @throws SequenceTooLongException 
	 * @throws FileNotFoundException 
	 * 
	 * @throws	IndexOutOfBoundsException
	 * @throws	IllegalArgumentException
	 * @throws	IOException
	 * @throws InvalidSequenceCharacterException 
	 * @throws FastaReaderNotParsedException 
	 * @throws RecordTooLongException 
	 * 
	 *  @since	1.0rc2
	 */
	public BioSequence getSequence(int index)
	throws FileNotFoundException, IOException, InvalidSequenceCharacterException, SequenceTooLongException, 
		   FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		if (index < 0 || index > fastaRecords.size() - 1)
		{
			throw new IndexOutOfBoundsException(
					index + ": index must be a positive value smaller than fastaRecords.size()");
		}
		
		BioSequence bs = fastaRecords.get(index).retrieveBioSequence();
		return bs;
	}

	/**
	 * 
	 * 
	 * @return
	 * 
	 * @param	index
	 * @param	removeGaps
	 * @throws SequenceTooLongException 
	 * @throws InvalidSequenceCharacterException 
	 * @throws FileNotFoundException 
	 * 
	 * @throws	IndexOutOfBoundsException
	 * @throws	IllegalArgumentException
	 * @throws	IOException
	 * @throws FastaReaderNotParsedException 
	 * @throws RecordTooLongException 
	 * 
	 *  @since	1.0rc2
	 */
	public DNASequence getDnaSequence(int index)
	throws FileNotFoundException, IOException, InvalidSequenceCharacterException, SequenceTooLongException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		if (index < 0 || index > fastaRecords.size() - 1)
		{
			throw new IndexOutOfBoundsException(
					index + ": index must be a positive value smaller than fastaRecords.size()");
		}
		
		return fastaRecords.get(index).retrieveDnaSequence(true);
	}

	/**
	 * 
	 * 
	 * @return
	 * 
	 * @param	index
	 * @param	removeGaps
	 * @throws SequenceTooLongException 
	 * @throws InvalidSequenceCharacterException 
	 * @throws FileNotFoundException 
	 * 
	 * @throws	IndexOutOfBoundsException
	 * @throws	IllegalArgumentException
	 * @throws	IOException
	 * @throws FastaReaderNotParsedException 
	 * @throws RecordTooLongException 
	 * 
	 *  @since	1.0rc2
	 */
	public RNASequence getRnaSequence(int index)
	throws FileNotFoundException, IOException, InvalidSequenceCharacterException, SequenceTooLongException,
	FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		if (index < 0 || index > fastaRecords.size() - 1)
		{
			throw new IndexOutOfBoundsException(
					index + ": index must be a positive value smaller than fastaRecords.size()");
		}
		
		return fastaRecords.get(index).retrieveRnaSequence(true);
	}

	/**
	 * 
	 * 
	 * @return
	 * 
	 * @param	index
	 * @param	removeGaps
	 * @throws InvalidSequenceCharacterException 
	 * @throws FileNotFoundException 
	 * 
	 * @throws	IndexOutOfBoundsException
	 * @throws	IllegalArgumentException
	 * @throws	IOException
	 * @throws FastaReaderNotParsedException 
	 * @throws RecordTooLongException 
	 * 
	 *  @since	1.0rc2
	 */
	public ProteinSequence getProteinSequence(int index)
	throws FileNotFoundException, IOException, InvalidSequenceCharacterException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		if (index < 0 || index > fastaRecords.size() - 1)
		{
			throw new IndexOutOfBoundsException(
					index + ": index must be a positive value smaller than fastaRecords.size()");
		}

		return fastaRecords.get(index).retrieveProteinSequence(true);
	}
	
	/**
	 * 
	 * @param index
	 * @return
	 * @throws FastaReaderNotParsedException 
	 * @throws IOException 
	 * @throws InvalidSequenceCharacterException 
	 * @throws SequenceTooLongException 
	 * @throws FileNotFoundException 
	 *
	 * @since	x.y.z
	 */
	/*
	public DnaContig getDnaContig(int index)
	throws FastaReaderNotParsedException, FileNotFoundException, SequenceTooLongException, 
		   InvalidSequenceCharacterException, IOException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		if (index < 0 || index > fastaRecords.size() - 1)
		{
			throw new IndexOutOfBoundsException(
					index + ": index must be a positive value smaller than fastaRecords.size()");
		}

		return new DnaContig(fastaRecords.get(index).contigProvider);
	}
	*/

	/**
	 * 
	 * @param forceBuffering
	 * @throws FileNotFoundException, IOException 
	 * @throws IOException
	 *
	 * @since	x.y.z
	 */
	public void loadDescriptionsIntoRecords(boolean force) throws FileNotFoundException, IOException
	{
		FileChannel inFC = new FileInputStream(this.filePath).getChannel();	// FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
		// Get the channel from the File argument and allocate byte buffer
		try
		{
			for (FASTAFileRecord fastaRecord : fastaRecords)
			{
				if (force || fastaRecord.headerLineSB == null)
				{
					// To keep track of the offset within the file
					long fileByteOffset = fastaRecord.descriptionByteOffset;
					long cBytes = 0;
					char c;
	
					StringBuilder descriptionSB = new StringBuilder();
					inFC.position(fastaRecord.descriptionByteOffset);	// IOException
					while (inFC.read(bBuffer) != -1 &&
							fileByteOffset < fastaRecord.descriptionByteOffset + fastaRecord.descriptionBytes)
					{
						bBuffer.flip();
						// If not EOF and encoding is UTF-8...				// v IOException v
						if (fileCharset == Charset.forName("UTF-8") && inFC.size() - inFC.position() > 0)
						{
							// ... maybe the buffer ends at an incomplete muliple byte character 
							inFC.position(inFC.position() - UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
						}
						CharBuffer cBuffer = fileCharset.decode(bBuffer);
						bBuffer.clear();
	
						// The sequence starts/is in this buffer
						while (cBuffer.hasRemaining() &&
								fileByteOffset < fastaRecord.descriptionByteOffset + fastaRecord.descriptionBytes)
						{
							c = cBuffer.get();
							cBytes = fileCharset.encode(String.valueOf(c)).limit();
	
							descriptionSB.append(c);
	
							// Update current byte and character count
							fileByteOffset += cBytes;
						}
						break;
					}
					if (descriptionSB.charAt(0) == '>')
					{
						descriptionSB.deleteCharAt(0);
					}
					StringBuilderTrimmer.trim(descriptionSB, fileLineSeparatorFormat);
					fastaRecord.headerLineSB = descriptionSB;
				}
			}
		}
		finally
		{
			inFC.close();
		}
	}

	/**
	 * Returns the next sequence, or <code>null</code> if there are no more
	 * sequences.
	 * 
	 * @param	removeGaps
	 * @throws SequenceTooLongException 
	 * @throws InvalidSequenceCharacterException 
	 * @throws FileNotFoundException 
	 * 
	 * @throws	IllegalArgumentException 
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * @throws RecordTooLongException 
	 * 
	 * @since	1.0rc2
	 */
	/*
	public BioSequence nextSequence(boolean removeGaps)
	throws FileNotFoundException, IOException, InvalidSequenceCharacterException, SequenceTooLongException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		while (position < fastaRecords.size())
		{
			position++;
			
			BioSequence bs = fastaRecords.get(position - 1).loadSequence();
			bs.setDescription(fastaRecords.get(position - 1).loadDescription());
			return bs;
		}
		return null;
	}
	*/

	/**
	 * Returns the next DNA sequence or <code>null</code> if there are no more 
	 * sequences of this type between the current position and the end.
	 * 
	 * @return
	 * @throws SequenceTooLongException 
	 * @throws InvalidSequenceCharacterException 
	 * @throws FileNotFoundException 
	 * 
	 * @throws IOException
	 * @throws FastaReaderNotParsedException 
	 * @throws RecordTooLongException 
	 * 
	 * @since	1.0rc2
	 */
	public DNASequence nextDnaSequence(boolean removeGaps)
	throws FileNotFoundException, IOException, InvalidSequenceCharacterException, SequenceTooLongException, 
		   FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		while (position < fastaRecords.size())
		{
			position++;
			return fastaRecords.get(position - 1).retrieveDnaSequence(true);
		}
		return null;
	}

	/**
	 * Returns the next RNA sequence or <code>null</code> if there are no more 
	 * sequences of this type between the current position and the end.
	 * 
	 * @return
	 * 
	 * @throws IOException
	 * @throws FastaReaderNotParsedException 
	 * @throws RecordTooLongException 
	 * 
	 * @since	1.0rc2
	 */
	public RNASequence nextRnaSequence(boolean removeGaps)
	throws FileNotFoundException, IOException, InvalidSequenceCharacterException, SequenceTooLongException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		while (position < fastaRecords.size())
		{
			position++;
			return fastaRecords.get(position - 1).retrieveRnaSequence(true);
		}
		return null;
	}

	/**
	 * Returns the next protein sequence or <code>null</code> if there are no 
	 * more sequences of this type between the current position and the end.
	 * 
	 * @return
	 * 
	 * @throws IOException
	 * @throws FastaReaderNotParsedException 
	 * @throws RecordTooLongException 
	 * 
	 * @since	1.0rc2
	 */
	public ProteinSequence nextProteinSequence()
	throws FileNotFoundException, IOException, InvalidSequenceCharacterException, SequenceTooLongException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		while (position < fastaRecords.size())
		{
			position++;
			return fastaRecords.get(position - 1).retrieveProteinSequence(true);
		}
		return null;
	}
	
	/*
	public DnaContig nextDnaContig()
	throws FastaReaderNotParsedException, FileNotFoundException, SequenceTooLongException,
		   InvalidSequenceCharacterException, IOException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		while (position < fastaRecords.size())
		{
			position++;
			return new DnaContig(fastaRecords.get(position - 1).contigProvider);
		}
		return null;
	}
	*/
	
	/**
	 * 
	 * @return
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc2
	 */
	public List<FASTAFileRecord> getFastaRecords() throws FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		return fastaRecords;
	}
	
	/**
	 * 
	 * @param description
	 * 
	 * @return
	 * 
	 * @throws IOException
	 * @throws FastaReaderNotParsedException 
	 * @throws InvalidSequenceCharacterException 
	 * 
	 * @since	1.0rc2
	 */
	public List<FASTAFileRecord> findRecords(String description, boolean caseInsensitive, boolean forceRetrieve)
	throws FileNotFoundException, IOException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		List<FASTAFileRecord> matchingRecords = new ArrayList<FASTAFileRecord>();
		for (FASTAFileRecord fastaRecord : fastaRecords)
		{
			if (forceRetrieve || fastaRecord.headerLineSB == null)
			{
				if (caseInsensitive && fastaRecord.retrieveDescriptionSB().toString().equalsIgnoreCase(description))
				{
					matchingRecords.add(fastaRecord);
				}
				else if (fastaRecord.retrieveDescriptionSB().toString().equals(description))
				{
					matchingRecords.add(fastaRecord);
				}
			}
			else
			{
				if (caseInsensitive && fastaRecord.headerLineSB.toString().equalsIgnoreCase(description))
				{
					matchingRecords.add(fastaRecord);
				}
				else if (fastaRecord.headerLineSB.toString().equals(description))
				{
					matchingRecords.add(fastaRecord);
				}
			}
		}
		return matchingRecords;
	}
	
	public List<FASTAFileRecord> findRecords(String description)
	throws FileNotFoundException, IOException, FastaReaderNotParsedException
	{
		return findRecords(description, false, false);
	}
	
	/**
	 * 
	 * @param description
	 * 
	 * @return
	 * 
	 * @throws IOException
	 * @throws FastaReaderNotParsedException 
	 * @throws InvalidSequenceCharacterException 
	 * 
	 * @since	1.0rc2
	 */
	public List<FASTAFileRecord> findSortedRecords
	(String description, boolean caseInsensitive, boolean resortByOffset)
	throws FileNotFoundException, IOException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		Comparator<FASTAFileRecord> comparator =
				caseInsensitive ? FASTAFileRecord.descriptionComparatorCaseInsensitive : FASTAFileRecord.descriptionComparator;
		
		// Load the descriptions to make sure that sorting by description can be done
		loadDescriptionsIntoRecords(false);
		Collections.sort(fastaRecords, comparator);
		
		// Create a dummy record with the parameter description
		FASTAFileRecord dummyRecord = new FASTAFileRecord(null, null, null, 0, 0, 0, 0, null, 0);
		dummyRecord.headerLineSB = new StringBuilder(description);
		
		// And now, search...
		List<FASTAFileRecord> matchingRecords = new ArrayList<FASTAFileRecord>();
		int searchPosition = Collections.binarySearch(fastaRecords, dummyRecord, comparator);
		
		if (searchPosition >= 0)
		{
			matchingRecords.add(fastaRecords.get(searchPosition));
			
			for (int i = searchPosition - 1 ; i >= 0 ; i--)
			{
				if (caseInsensitive)
				{
					if (fastaRecords.get(i).headerLineSB.toString().equalsIgnoreCase(description))
					{
						matchingRecords.add(fastaRecords.get(i));
					}
					else
					{
						break;
					}
				}
				else // if (!caseInsensitive)
				{
					if (fastaRecords.get(i).headerLineSB.toString().equals(description))
					{
						matchingRecords.add(fastaRecords.get(i));
					}
					else
					{
						break;
					}
				}
			}
			
			for (int i = searchPosition + 1 ; i < fastaRecords.size() ; i++)
			{
				if (caseInsensitive)
				{
					if (fastaRecords.get(i).headerLineSB.toString().equalsIgnoreCase(description))
					{
						matchingRecords.add(fastaRecords.get(i));
					}
					else
					{
						break;
					}
				}
				else // if (!caseInsensitive)
				{
					if (fastaRecords.get(i).headerLineSB.toString().equals(description))
					{
						matchingRecords.add(fastaRecords.get(i));
					}
					else
					{
						break;
					}
				}
			}
		}

		if (resortByOffset)
		{
			Collections.sort(fastaRecords, FASTAFileRecord.sequenceByteOffsetComparator);
		}

		return matchingRecords;
	}
	
	/**
	 * 
	 * @param descriptions
	 * 
	 * @return
	 * 
	 * @throws IOException
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc2
	 */
	public Map<String, List<FASTAFileRecord>> findRecords
	(List<String> descriptions, boolean caseInsensitive, boolean forceRetrieve)
	throws FileNotFoundException, IOException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		Map<String, List<FASTAFileRecord>> matchingRecords = new HashMap<String, List<FASTAFileRecord>>();
				
		for (String description : descriptions)
		{
			matchingRecords.put(description, findRecords(description, caseInsensitive, forceRetrieve));
		}
		return matchingRecords;			
	}
	
	public Map<String, List<FASTAFileRecord>> findRecords(List<String> descriptions)
	throws FileNotFoundException, IOException, FastaReaderNotParsedException
	{
		return findRecords(descriptions, false, false);
	}
	
	/**
	 * 
	 * @param descriptionRegex
	 * 
	 * @return
	 * 
	 * @throws	IOException
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc2
	 */
	public List<FASTAFileRecord> matchRecords(String descriptionRegex, boolean forceRetrieve) 
	throws IOException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		List<FASTAFileRecord> matchingRecords = new ArrayList<FASTAFileRecord>();
		
		for (FASTAFileRecord fastaRecord : fastaRecords)
		{
			if (forceRetrieve || fastaRecord.headerLineSB == null)
			{
				if (fastaRecord.retrieveDescriptionSB().toString().matches(descriptionRegex))
				{
					matchingRecords.add(fastaRecord);
				}
			}
			else if (fastaRecord.headerLineSB.toString().matches(descriptionRegex))
			{
				matchingRecords.add(fastaRecord);
			}
		}
		return matchingRecords;			
	}
	
	public List<FASTAFileRecord> matchRecords(String descriptionRegex) 
	throws IOException, FastaReaderNotParsedException
	{
		return matchRecords(descriptionRegex, false);
	}
	
	/**
	 * 
	 * @param descriptionRegexes
	 * 
	 * @return
	 * 
	 * @throws IOException
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc2
	 */
	public Map<String, List<FASTAFileRecord>> matchRecords(List<String> descriptionRegexes, boolean forceRetrieve)
	throws IOException, FastaReaderNotParsedException
	{
		if (!fileParsed)
		{
			throw new FastaReaderNotParsedException("File has not been successfully parsed.");
		}
		
		Map<String, List<FASTAFileRecord>> matchingRecords = new HashMap<String, List<FASTAFileRecord>>();
		
		for (String descriptionRegex : descriptionRegexes)
		{
			matchingRecords.put(descriptionRegex, matchRecords(descriptionRegex, forceRetrieve));
		}
		return matchingRecords;	
	}
	
	public Map<String, List<FASTAFileRecord>> matchRecords(List<String> descriptionRegexes)
	throws IOException, FastaReaderNotParsedException
	{
		return matchRecords(descriptionRegexes, false);
	}
	
	public FASTAFileRecord addEmptySeq(String newSeqName) {
		// TODO Auto-generated method stub
		// add new line to the file and save it,
		// adjust parameteres for the new record
		
		
		return null;
	}
	
	
	
	public static void main(String[] varg ) {
//		String file ="/data/git/seabream/jbrowser/data/00_input/Sparus_aurata_genome_VER21.fa";
		
		String file = "/home/ahmed/gpro_data/probisearch/5MYC1-DNA2_N0867_Assembly.fa";
		NullProgressMonitor progressMonitor = new NullProgressMonitor();
		

		
		File sequenceFile = new File(file);
		
		
		
		
		

		
		
		for(int i = 0 ; i <= 0  ; i++) {
			
			long time = 0;
			for(int j = 0;j<20;j++) {
			
					//I_byteBufferSize =  128*1024  ;//(int) ((Math.pow(2, i))*1024*1024);
					FASTAReader fastaReader = new FASTAReader(sequenceFile, Charset.defaultCharset());
					try {
						long startMillis = System.currentTimeMillis();
						fastaReader.parse(true, progressMonitor);
						long endMillis = System.currentTimeMillis();
						
						long diffMillis = endMillis - startMillis;
						time+= diffMillis;
						System.out.println("Parse Time (" + (FASTAReader.I_byteBufferSize/(1024)) + "kB) "+  (diffMillis) + "ms");

					} catch (OperationCanceledException | IOException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
					fastaReader = null;
					System.gc();
					try {
						Thread.sleep(10);
					} catch (InterruptedException e) {
						// TODO Auto-generated catch block
						e.printStackTrace();
					}
			}
			System.out.println("Parse Time (" + (FASTAReader.I_byteBufferSize/(1024*1024)) + "MB) "+  (time/20) + "ms");

		}
		
	}
	
}
