/*
 * @author		Alfonso Muñoz-Pomer Fuentes, 
 * 				<a href="mailto:alfonso.munozpomer@biotechvana.com">
 * 				alfonso.munozpomer@biotechvana.com</a>,  
 * 				<a href="http://www.biotechvana.com">Biotechvana</a>
 *
 * @date		2010-09-01
 * 
 * @license		See <a href="http://www.biotechvana.com></a>
 *
 * @copyright	Copyright Biotech Vana, S.L. 2006-2010
 */

package com.biotechvana.javabiotoolkit.io;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.SortedMap;
import java.util.TreeMap;

import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.NullProgressMonitor;
import org.eclipse.core.runtime.OperationCanceledException;

import com.biotechvana.javabiotoolkit.AminoAcid;
import com.biotechvana.javabiotoolkit.BioSequence;
import com.biotechvana.javabiotoolkit.DNABase;
import com.biotechvana.javabiotoolkit.DNASequence;
import com.biotechvana.javabiotoolkit.NucleotideSequenceDirectionality;
import com.biotechvana.javabiotoolkit.PeptideSequenceDirectionality;
import com.biotechvana.javabiotoolkit.ProteinSequence;
import com.biotechvana.javabiotoolkit.RNABase;
import com.biotechvana.javabiotoolkit.RNASequence;
import com.biotechvana.javabiotoolkit.exceptions.FastaReaderNotParsedException;
import com.biotechvana.javabiotoolkit.exceptions.InvalidSequenceCharacterException;
import com.biotechvana.javabiotoolkit.exceptions.SequenceTooLongException;
import com.biotechvana.javabiotoolkit.text.LineSeparatorFormat;
import com.biotechvana.javabiotoolkit.text.StringBuilderTrimmer;
import com.biotechvana.javabiotoolkit.text.UTF8BufferTrimmer;
import com.biotechvana.javabiotoolkit.utils.FASTAFileRecordNaturalOrderComparator;

/**
 * Instances of this class are reading parsers associated to plain text files with one or more biological sequences in 
 * FASTA format. A description of the format can be found at 
 * <a href="http://www.ncbi.nlm.nih.gov/blast/fasta.shtml">National Center for Biotechnology Information (NCBI) </a>.
 * <p>
 * Typically, a <code>FastaReader</code> is used in three steps:
 * <ol>
 * <li>Creation of the object, which scans the file for FASTA headers</li>
 * <li>Determine the type of sequences stored the file</li>
 * <li>Ask the reader for <code>BioSequence</code> objects</li>
 * </ol>
 * Please note that the second step detects the type of sequences in the file based on its contents, thus ignoring 
 * certain file extensions such as <code>.fna</code>, <code>.ffn</code>, <code>.faa</code> and <code>.frn</code>.
 * <p>
 * Additionally, the file type detection is performed by a parameterized random sampling of different areas of the 
 * file which account for a total proportion of the sequence blocks (e.g. 1% of the sequence contents, in pieces of 
 * 128KB, chosen at random positions in the file). This is only an estimation for the client programmer, so that even 
 * if a file contains mainly DNA sequences and the next sequence is asked for using <code>nextSequence()</code>, and 
 * it happens to be a protein sequence, an instance of <code>ProteinSequence</code> will be returned. It is up to the 
 * client to decide whether mixed sequences are contemplated (forcing the reader to return a certain type of sequence).
 * <p>
 * Portions of files which do not conform to the FASTA format standard are ignored. Text may therefore be interspersed 
 * in the file, but sequences will be parsed correctly, nonetheless. There even could be headers with no sequence, 
 * which are considered to be FASTA headers with empty sequences.
 * 
 * @version	1.2
 * 
 * @author	Alfonso Muñoz-Pomer Fuentes, <a href="http://www.biotechvana.com">Biotechvana</a>
 * 
 * @see	DNASequence
 * @see	RNASequence
 * @see	ProteinSequence
 *
 */
public class FASTAReader2
{
	
	
	static public  int I_byteBufferSize = 2*1024*1024; // 20*512KB = 20M
	private static final NumberFormat numberFormat = NumberFormat.getInstance();
	
	/**
	 * 
	 * @param retrieveDescriptionSB
	 * @return
	 */
	

	


	
	private int sortMethod = FASTAReader.SORT_NO;


	private long invalidCharacters;
	
	
	
	FASTAFileHandler fileHandler;
	
	
	/**
	 * Constructor that 
	 * 
	 * @param filePath
	 * @param bufferedSequences
	 * @param fileCharset
	 * @param bufferSize
	 * 
	 * @throws	FileNotFoundException	if the file path stored in the <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * 
	 * @since	0.1
	 */
	public FASTAReader2 (FASTAFileHandler fileHandler)
	{
		this. fileHandler = fileHandler;
		
//		this.fileCharset = fileHandler.getFileCharset();
//		this.fileLineSeparatorFormat = fileHandler.getFileLineSeparatorFormat();
		this.invalidCharacters = 0;
//		this.ignoreBlankLines = fileHandler.getIgnoreBlankLines();
	}
	


	

	

	
	/**
	 * Parse a UTF-8 encoded FASTA file and scan it for FASTA records. This method loads only the header into the 
	 * calling <code>FastaReader</code>. Use for very large files. Afterwards, you may use 
	 * <code>FastaRecord</code>&rsquo;s <code>readSequenceFromFile</code> to load the sequence on demand.
	 * <p> 
	 * Note: invalid characters in the sequence blocks are discarded.
	 * 
	 * @param	bufferSize	size in bytes of the buffer used to read the file.
	 * 
  	 * @return	number of invalid characters discarded from sequence blocks.
	 * 
	 * @throws	FileNotFoundException	if the file path stored in the <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * 
	 * @since	0.8
	 */
	public long parse() throws FileNotFoundException, IOException
	{
		
		
		Charset fileCharset;
		LineSeparatorFormat fileLineSeparatorFormat;
		boolean ignoreBlankLines;
		
		fileCharset = fileHandler.getFileCharset();
		fileLineSeparatorFormat = fileHandler.getFileLineSeparatorFormat();
		ignoreBlankLines = fileHandler.getIgnoreBlankLines();
		File filePath = fileHandler.getFilePath();
		
		FastaRecordCollection fastaRecords  = new FastaRecordCollection();
		
		
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel();	// FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
		
		// To keep track of the offset within the file
		long fileByteOffset = 0;
		long cBytes = 0;
		//long fileCharOffset = 0;
		// To keep track of the description block 
		long descriptionByteOffset = 0;
		long descriptionBytes = 0;
		// To keep track of the sequence block
		long sequenceByteOffset = 0;
		long sequenceBytes = 0;
		SortedMap<Long, Long> componentsByteOffsets = null;
		// A few variables to keep track of each parsing stage
		boolean lineStart = true;
		boolean parseDescription = false;
		boolean parseSequence = false;
		boolean hasParsedRecord = false;
		char c = '\n';
		char previousChar;
		FASTAFileRecordNaturalOrderComparator naturalComparator = new FASTAFileRecordNaturalOrderComparator();
		
		long sequenceLength = 0;
		StringBuilder descLineCache = new StringBuilder();
		try
		{
			invalidCharacters = 0;
			// Read-decode the file's ByteBuffer in the loop
			while (inFC.read(bBuffer) != -1)
			{
				bBuffer.flip();
				// If not EOF and encoding is UTF-8...
				if (fileCharset == Charset.forName("UTF-8") && inFC.size() - inFC.position() > 0)	// IOException
				{
					// ... maybe the buffer ends at an incomplete muliple byte character 
					inFC.position(inFC.position() - UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
				}
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();
				
				while(cBuffer.hasRemaining())
				{
					previousChar = c;
					c = cBuffer.get();
					cBytes = fileCharset.encode(String.valueOf(c)).limit();
					
					// TODO Maybe stick to fileLineSeparator (?)
					// lineStart is "true" if c is the first character in a line
					if (previousChar == '\n' ||					// Windows, Linux, Mac OS X
						(previousChar == '\r' && c != '\n'))	// Mac OS Classic
					{
						lineStart = true;
					}
					else
					{
						lineStart = false;
					}
					
					// New line after description initiates sequence parse
					if (lineStart && parseDescription)
					{
						parseDescription = false;
						parseSequence = true;
						sequenceByteOffset = fileByteOffset;
					}
					
					// Do we ignore blank lines or not?
					if (lineStart && (c == '\r' || c == '\n'))
					{
						if (!ignoreBlankLines)
						{
							parseDescription = false;
							parseSequence = false;
						}
					}
					// Start of new FASTA record
					else if (lineStart && c == '>')
					{
						if (hasParsedRecord)
						{
							componentsByteOffsets.put(sequenceLength, sequenceByteOffset + sequenceBytes);
							
							FASTAFileRecord rec = new FASTAFileRecord(filePath, fileCharset, fileLineSeparatorFormat,
									descriptionByteOffset, descriptionBytes,
									sequenceByteOffset, sequenceBytes, componentsByteOffsets,
									sequenceLength,descLineCache);
									
							if (sortMethod == FASTAReader.SORT_NATURAL) {
								// Ordered by natural sort of sequence names
								int pos = Collections.binarySearch(fastaRecords, rec, naturalComparator);
								if (pos < 0) {
									fastaRecords.add((-pos)-1, rec);
								}
								else {
									fastaRecords.add(pos, rec);
								}
							}
							else if (sortMethod == FASTAReader.SORT_REGULAR) {
								// Ordered straight
								int pos = Collections.binarySearch(fastaRecords, rec, new Comparator<FASTAFileRecord>() {
									@Override
									public int compare(FASTAFileRecord o1, FASTAFileRecord o2) {
										return o1.getDescriptionSB().toString().compareTo(o2.getDescriptionSB().toString());
									}
								});
								if (pos < 0) {
									fastaRecords.add((-pos)-1, rec);
								}
								else {
									fastaRecords.add(pos, rec);
								}
							}
							else {
								// Ordered like input file
								fastaRecords.add(rec);
							}
							
							descLineCache = new StringBuilder();
						}
						hasParsedRecord = true;
						descriptionByteOffset = fileByteOffset;
						descriptionBytes = 0;
						sequenceBytes = 0;
						sequenceLength = 0;
						componentsByteOffsets = new TreeMap<Long, Long>();
						
						parseSequence = false;
						parseDescription = true;
					}
										
					// Get the rest of the description until new line
					if (parseDescription)
					{	
						descriptionBytes += cBytes;
						descLineCache.append(c);
					}
					else if (parseSequence)
					{	// Get the rest of the sequence and discard invalid characters
						sequenceBytes += cBytes;
						if (c != '\n' && c != '\r')
						{
							if (AminoAcid.valueOf(c) != null)
							{
								if (sequenceLength % IContigProvider.I_componentLength == 0)
								{
									componentsByteOffsets.put(sequenceLength, fileByteOffset);
								}
								sequenceLength++;
							}
							else
							{
								invalidCharacters++;
							}
						}
					}
					fileByteOffset += cBytes;
					//fileCharOffset++;
				}
			}
			// Add sequence that reaches EOF
			if (hasParsedRecord)
			{
				componentsByteOffsets.put(sequenceLength, sequenceByteOffset + sequenceBytes);
				FASTAFileRecord rec = new FASTAFileRecord(filePath, fileCharset, fileLineSeparatorFormat,
						descriptionByteOffset, descriptionBytes,
						sequenceByteOffset, sequenceBytes, componentsByteOffsets,
						sequenceLength);
				// TODO move this to the collection implementaion
				if (sortMethod == FASTAReader.SORT_NATURAL) {
					// Ordered by natural sort of sequence names
					int pos = Collections.binarySearch(fastaRecords, rec, naturalComparator);
					if (pos < 0) {
						fastaRecords.add((-pos)-1, rec);
					}
					else {
						fastaRecords.add(pos, rec);
					}
				}
				else if (sortMethod == FASTAReader.SORT_REGULAR) {
					// Ordered straight
					int pos = Collections.binarySearch(fastaRecords, rec, new Comparator<FASTAFileRecord>() {
						@Override
						public int compare(FASTAFileRecord o1, FASTAFileRecord o2) {
							return o1.getDescriptionSB().toString().compareTo(o2.getDescriptionSB().toString());
						}
					});
					if (pos < 0) {
						fastaRecords.add((-pos)-1, rec);
					}
					else {
						fastaRecords.add(pos, rec);
					}
				}
				else {
					// Ordered like input file
					fastaRecords.add(rec);
				}
			}
		}
		finally
		{
			inFC.close();
		}
		return invalidCharacters;
	}
	

	
	/**
	 * Parse a UTF-8 encoded FASTA file and scan it for FASTA records. This method loads only the header into the 
	 * calling <code>FastaReader</code>. Use for very large files. Afterwards, you may use 
	 * <code>FastaRecord</code>&rsquo;s <code>readSequenceFromFile</code> to load the sequence on demand.
	 * <p> 
	 * Note: invalid characters in the sequence blocks are ignored.
	 * 
	 * @param	bufferSize		size in bytes of the buffer used to read the file.
	 * @param	progressMonitor	monitor to be informed of the parsing progress.
	 * 
	 * @return	number of discarded invalid characters
	 * 
	 * @throws	FileNotFoundException	if the file path stored in the <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	OperationCanceledException	if the operation progress was cancelled by the user
	 * 
	 * @since	0.8
	 */
	public long parse(IProgressMonitor progressMonitor) throws FileNotFoundException, IOException, OperationCanceledException
	{
		if (progressMonitor == null) 
			progressMonitor = new NullProgressMonitor();
		
		Charset fileCharset;
		LineSeparatorFormat fileLineSeparatorFormat;
		boolean ignoreBlankLines;
		
		fileCharset = fileHandler.getFileCharset();
		fileLineSeparatorFormat = fileHandler.getFileLineSeparatorFormat();
		ignoreBlankLines = fileHandler.getIgnoreBlankLines();
		File filePath = fileHandler.getFilePath();
		FastaRecordCollection fastaRecords  = fileHandler.getFastaRecords();

		
		progressMonitor.subTask(filePath.getName());
		
		
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel();	// FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
		
		// To keep track of the offset within the file
		long fileByteOffset = 0;
		long cBytes = 0;
		long fileCharOffset = 0;
		// To keep track of the description block 
		long descriptionByteOffset = 0;
		long descriptionBytes = 0;
		// To keep track of the sequence block
		long sequenceByteOffset = 0;
		long sequenceBytes = 0;
		SortedMap<Long, Long> componentsByteOffsets = null;
		// A few variables to keep track of each parsing stage
		boolean lineStart = true;
		boolean parseDescription = false;
		boolean parseSequence = false;
		boolean hasParsedRecord = false;
		char c = '\n';
		char previousChar;
		
		long sequenceLength = 0;
		StringBuilder descLineCache = new StringBuilder();

		try
		{
			invalidCharacters = 0;
			// Read-decode the file's ByteBuffer in the loop
			while (inFC.read(bBuffer) != -1)	// IOException
			{
				// FIXME :: Java 11 Issues: https://stackoverflow.com/questions/61267495/exception-in-thread-main-java-lang-nosuchmethoderror-java-nio-bytebuffer-flip
				bBuffer.flip();
				// If not EOF and encoding is UTF-8...
				if (fileCharset == Charset.forName("UTF-8") && inFC.size() - inFC.position() > 0)	// IOException
				{
					// ... maybe the buffer ends at an incomplete muliple byte character 
					inFC.position(inFC.position() - UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
				}
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();
				
				while(cBuffer.hasRemaining())
				{
					previousChar = c;
					c = cBuffer.get();
					cBytes = fileCharset.encode(String.valueOf(c)).limit();
					
					// lineStart is "true" if c is the first character in a line
					if (previousChar == '\n' ||					// Windows, Linux, Mac OS X
						(previousChar == '\r' && c != '\n'))	// Mac OS Classic
					{
						lineStart = true;
					}
					else
					{
						lineStart = false;
					}
					
					// New line after description initiates sequence parse
					if (lineStart && parseDescription)
					{
						parseDescription = false;
						parseSequence = true;
						sequenceByteOffset = fileByteOffset;
					}
					
					// Do we ignore blank lines or not?
					if (lineStart && (c == '\r' || c == '\n'))
					{
						if (!ignoreBlankLines)
						{
							parseDescription = false;
							parseSequence = false;
						}
					}
					// Start of new FASTA record
					else if (lineStart && c == '>')
					{	
						if (hasParsedRecord)
						{
							componentsByteOffsets.put(sequenceLength, sequenceByteOffset + sequenceBytes);
							fastaRecords.add(
									new FASTAFileRecord(filePath, fileCharset, fileLineSeparatorFormat,
														descriptionByteOffset, descriptionBytes,
														sequenceByteOffset, sequenceBytes, componentsByteOffsets,
														sequenceLength,descLineCache));
							if (fastaRecords.size() % 1000 == 0) {
								progressMonitor.subTask("FASTA records found: " + numberFormat.format(fastaRecords.size()));
							}
						}
						
						hasParsedRecord = true;
						descriptionByteOffset = fileByteOffset;
						descriptionBytes = 0;
						sequenceBytes = 0;
						componentsByteOffsets = new TreeMap<Long, Long>();
						sequenceLength = 0;
						
						parseSequence = false;
						parseDescription = true;
						descLineCache= new StringBuilder();
					}
					// Get the rest of the description until new line
					if (parseDescription)
					{	
						descriptionBytes += cBytes;
						descLineCache.append(c);
					}
					// Get the rest of the sequence and discard invalid characters
					else if (parseSequence)
					{
						// Get the rest of the sequence and discard invalid characters
						sequenceBytes += cBytes;
						if (c != '\n' && c != '\r')
						{
							if (AminoAcid.valueOf(c) != null)
							{
								if (sequenceLength % IContigProvider.I_componentLength == 0)
								{
									componentsByteOffsets.put(sequenceLength, fileByteOffset);
								}
								sequenceLength++;
							}
							else
							{
								invalidCharacters++;
							}
						}
					}
					fileByteOffset += cBytes;
					fileCharOffset++;
					
					
					if (progressMonitor.isCanceled())
					{
						throw new OperationCanceledException();
					}
				}
				progressMonitor.worked(bBuffer.capacity());
				
			}
			// Add sequence that reaches EOF
			if (hasParsedRecord)
			{
				componentsByteOffsets.put(sequenceLength, sequenceByteOffset + sequenceBytes);
				fastaRecords.add(
						new FASTAFileRecord(filePath, fileCharset, fileLineSeparatorFormat,
											descriptionByteOffset, descriptionBytes, 
											sequenceByteOffset, sequenceBytes, componentsByteOffsets, 
											sequenceLength,descLineCache));
				progressMonitor.subTask("Number of sequences found: " + fastaRecords.size());
			}
		}
		finally
		{
			inFC.close();	// IOException
		}
		return invalidCharacters;
	}

	
	

	
	/**
	 * Fills a <code>ByteBuffer</code> with portions of the file associated to this <code>FastaReader</code>. The 
	 * contents are specified in pairs of non-overlapping, strictly ascending, byte offsets.
	 * 
	 * @param	bBuffer		Buffer to read file contents to.
	 * @param	byteRanges	Pairs of non-overlapping, strictly ascending, byte offsets. Even positions (starting 
	 * 						offsets) are inclusive and odd position (ending offsets) are exclusive.
	 * 
	 * @throws	IllegalArgumentException	if <code>byteRanges</code> has an odd number of elements, if they are in 
	 * 										non-increasing order or if the sum of bytes exceeds the 
	 * 										<code>bBuffer</code>'s capacity.
	 * @throws	IOException	if there's an I/O error reading the file.
	 * 
	 * @since	1.0rc3
	 */
	private void readByteRanges(ByteBuffer bBuffer, List<Long> byteRanges)
	throws FileNotFoundException, IOException
	{
		if (byteRanges.size() % 2 != 0)
		{
			throw new IllegalArgumentException("Size of ranges list is odd");
		}
		// Check validity of ranges and that buffer is big enough
		long nBytesTotal = 0;
		for (int i = 0 ; i < byteRanges.size() - 1 ; i ++)
		{
			if (byteRanges.get(i) <= byteRanges.get(i+1))
			{
				if (i % 2 == 0)
				{
					nBytesTotal += byteRanges.get(i + 1) - byteRanges.get(i);
				}
				continue;
			}
			else
			{
				throw new IllegalArgumentException("Ranges not valid at " + i);
			}
		}
		if (nBytesTotal > bBuffer.capacity())
		{
			throw new IllegalArgumentException(nBytesTotal + ": the number " +
					"of bytes to read exceeds the buffer size " + bBuffer.capacity());
		}
		
		FileChannel inFC = new RandomAccessFile(fileHandler.getFilePath(), "r").getChannel();	// FileNotFoundException
		try
		{
			for (int i = 0 ; i < byteRanges.size() - 1 ; i += 2)
			{
				ByteBuffer middleBuffer = ByteBuffer.allocate((int)(byteRanges.get(i + 1) - byteRanges.get(i)));
				middleBuffer.clear();
				inFC.position(byteRanges.get(i));	// IOException
				inFC.read(middleBuffer);			// IOException
				middleBuffer.flip();
				bBuffer.put(middleBuffer);
			}
		}
		finally
		{
			inFC.close();
		}
	}
	
	
	
	
	
	/**
	 * Determines if a specified amount of bytes in a FASTA file contents (i.e. excluding the comment lines) 
	 * correspond to DNA nucleotide sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets. The file needs to be parsed first in order to analyze its contents.
	 * 
	 * @param nBytes	number of sequence bytes to probe. 
	 * @param threshold	minimum proportion of correctly read characters to positively identify this file as a DNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching 
	 * 			characters are A, C, G and T.
	 * 
	 * @see		DNABase
	 * 
	 * @throws	FileNotFoundException	if the file path stored in this <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	FastaReaderNotParsedException	if the file has not been parsed. 
	 * @throws	IllegalArgumentException	if <code>nBytes</code> is negative or greater than 
	 * 										<code>totalSequenceBytes()</code>.
	 * 
	 * @since	1.0rc3
	 */
	private boolean isDnaFile(long nBytes, double threshold, 
							  boolean includeN, boolean includeAmbiguous, boolean includeGaps)
	throws  IllegalArgumentException, FileNotFoundException, IOException 
	{
		
		
		if (nBytes < 0 || nBytes > fileHandler.getTotalSequenceBytes())
		{
			throw new IllegalArgumentException(nBytes + ": the number of read bytes must be between 0 and file size");
		}
		// Adjust nBytes to be a multiple of the encoding
		int bytesPerChar = (int)fileHandler.getFileCharset().newEncoder().averageBytesPerChar();
		while (nBytes % bytesPerChar != 0)
		{
			nBytes++;
		}
		
		// Until which byte can we read without reaching EOF?
		long readLimit = fileHandler.countSequenceBytesBackwards(nBytes);
		Random randomizer = new Random();
		
		int matchingChars = 0;
		int nonMatchingChars = 0;
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		List<DNABase> goodBases = new ArrayList<DNABase>(Arrays.asList(DNABase.N.expandToUnambiguous()));
		if (includeN)
		{
			goodBases.add(DNABase.N);
		}
		if (includeAmbiguous)
		{
			goodBases.add(DNABase.B); goodBases.add(DNABase.C); goodBases.add(DNABase.D); goodBases.add(DNABase.H);
			goodBases.add(DNABase.K); goodBases.add(DNABase.M); goodBases.add(DNABase.R); goodBases.add(DNABase.S); 
			goodBases.add(DNABase.V); goodBases.add(DNABase.W); goodBases.add(DNABase.Y);
		}
		if (includeGaps)
		{
			goodBases.add(DNABase.GAP);
		}

		// Read in chunks of I_bufferSize bytes
		while (nBytes > I_byteBufferSize)
		{
			bBuffer.clear();
			// Find a random starting position
			long randomSafeByte = 
				(long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * (double)readLimit));
			// Adjust to "fall" into place
			while (randomSafeByte % bytesPerChar != 0)
			{
				randomSafeByte--;
			}
			readByteRanges(bBuffer, fileHandler.getSequenceByteRanges(randomSafeByte, I_byteBufferSize));
			bBuffer.flip();
			CharBuffer cBuffer = fileHandler.getFileCharset().decode(bBuffer);
	
			while (cBuffer.hasRemaining())
			{
				char c = cBuffer.get();
				if (goodBases.contains(DNABase.valueOf(c)) || c == '\n' || c == '\r')
				{
					matchingChars++;
				}
				else
				{
					nonMatchingChars++;
				}
			}
			nBytes -= I_byteBufferSize;
		}
		bBuffer.clear();
		// Find a random starting position
		long randomSafeByte =
			(long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * (double)readLimit));
		// Adjust to "fall" into place
		while (randomSafeByte % bytesPerChar != 0)
		{
			randomSafeByte--;
		}
	
		readByteRanges(bBuffer, fileHandler.getSequenceByteRanges(randomSafeByte, (int)nBytes));
		bBuffer.flip();
		CharBuffer cBuffer = fileHandler.getFileCharset().decode(bBuffer);
	
		while (cBuffer.hasRemaining())
		{
			char c = cBuffer.get();
			if (goodBases.contains(DNABase.valueOf(c)) || c == '\n' || c == '\r')
			{
				matchingChars++;
			}
			else
			{
				nonMatchingChars++;
			}
		}
		
		return ((double)matchingChars / (double)(matchingChars + nonMatchingChars)) >= threshold;
	}

	/**
	 * Determines if a specified amount of bytes in a FASTA file contents (i.e. excluding the comment lines) 
	 * correspond to RNA nucleotide sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.

	 * @param nBytes	Number of sequence bytes to probe. 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a RNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and U.
	 * 
	 * @see		RNABase
	 * 
	 * @throws	FileNotFoundException	if the file path stored in this <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	IllegalArgumentException	if <code>nBytes</code> is negative or greater than 
	 * 										<code>totalSequenceBytes()</code>; if <code>threshold</code> is not a 
	 * 										value between 0.0 and 1.0.
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	private boolean isRnaFile(long nBytes, double threshold, 
							  boolean includeN, boolean includeAmbiguous, boolean includeGaps)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		
		
		if (nBytes < 0 || nBytes > fileHandler.getTotalSequenceBytes())
		{
			throw new IllegalArgumentException(nBytes + ": the number of read bytes must be between 0 and file size");
		}
		
		// Adjust nBytes to be a multiple of the encoding
		int bytesPerChar = (int)fileHandler.getFileCharset().newEncoder().averageBytesPerChar();
		while (nBytes % bytesPerChar != 0)
		{
			nBytes++;
		}
		
		// Until which byte can we read without reaching EOF?
		long readableLimit = fileHandler.countSequenceBytesBackwards(nBytes);
		Random randomizer = new Random();
		
		int matchingChars = 0;
		int nonMatchingChars = 0;
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
	
		List<RNABase> goodBases = new ArrayList<RNABase>(Arrays.asList(RNABase.N.expandToUnambiguous()));
		if (includeN)
		{
			goodBases.add(RNABase.N);
		}
		if (includeAmbiguous)
		{
			goodBases.add(RNABase.B); goodBases.add(RNABase.C); goodBases.add(RNABase.D); goodBases.add(RNABase.H);
			goodBases.add(RNABase.K); goodBases.add(RNABase.M); goodBases.add(RNABase.R); goodBases.add(RNABase.S); 
			goodBases.add(RNABase.V); goodBases.add(RNABase.W); goodBases.add(RNABase.Y);
		}
		if (includeGaps)
		{
			goodBases.add(RNABase.GAP);
		}
		
		// Read in chunks of I_bufferSize bytes
		while (nBytes > I_byteBufferSize)
		{
			bBuffer.clear();
			// Find a random starting position
			long randomSafeByte = (long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * 
					 					  (double)readableLimit));
			// Adjust to "fall" into place
			while (randomSafeByte % bytesPerChar != 0)
			{
				randomSafeByte--;
			}
			readByteRanges(bBuffer, fileHandler.getSequenceByteRanges(randomSafeByte, I_byteBufferSize));
			bBuffer.flip();
			CharBuffer cBuffer = fileHandler.getFileCharset().decode(bBuffer);
	
			while (cBuffer.hasRemaining())
			{
				char c = cBuffer.get();
				if (goodBases.contains(RNABase.valueOf(c)) || c == '\n' || c == '\r')
				{
					matchingChars++;
				}
				else
				{
					nonMatchingChars++;
				}
			}
			nBytes -= I_byteBufferSize;
		}
		bBuffer.clear();
		// Find a random starting position
		long randomSafeByte = (long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * 
				 					  (double)readableLimit));
		// Adjust to "fall" into place
		while (randomSafeByte % bytesPerChar != 0)
		{
			randomSafeByte--;
		}
	
		readByteRanges(bBuffer, fileHandler.getSequenceByteRanges(randomSafeByte, (int)nBytes));
		bBuffer.flip();
		CharBuffer cBuffer = fileHandler.getFileCharset().decode(bBuffer);
	
		while (cBuffer.hasRemaining())
		{
			char c = cBuffer.get();
			if (goodBases.contains(RNABase.valueOf(c)) || c == '\n' || c == '\r')
			{
				matchingChars++;
			}
			else
			{
				nonMatchingChars++;
			}
		}
		return ((double)matchingChars / (double)(matchingChars + nonMatchingChars)) >= threshold;
	}

	/**
	 * Determines if a specified amount of bytes in a FASTA file contents (i.e. excluding the comment lines) 
	 * correspond to protein amino acid sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.

	 * @param nBytes	number of sequence bytes to probe. 
	 * @param threshold	minimum proportion of correctly read characters to positively identify this file as a DNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching 
	 * 			characters are A, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W and Y.
	 * 
	 * @see		AminoAcid
	 * 
	 * @throws	FileNotFoundException	if the file path stored in this <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	IllegalArgumentException	if <code>nBytes</code> is negative or greater than 
	 * 										<code>totalSequenceBytes()</code>; if <code>threshold</code> is not a 
	 * 										value between 0.0 and 1.0.
	 * 
	 * @since	1.0rc3
	 */
	private boolean isProteinFile(long nBytes, double threshold, 
								  boolean includeX, boolean includeAmbiguous, boolean includeGaps, boolean includeStops)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
//		File filePath = fileHandler.getFilePath();

		if (nBytes < 0 || nBytes > fileHandler.getTotalSequenceBytes())
		{
			throw new IllegalArgumentException(nBytes + ": the number of read bytes must be between 0 and file size");
		}
		
		// Adjust nBytes to be a multiple of the encoding
		int bytesPerChar = (int)fileHandler.getFileCharset().newEncoder().averageBytesPerChar();
		while (nBytes % bytesPerChar != 0)
		{
			nBytes++;
		}
		
		// Until which byte can we read without reaching EOF?
		long readableLimit = fileHandler.countSequenceBytesBackwards(nBytes);
		Random randomizer = new Random();
		
		int matchingChars = 0;
		int nonMatchingChars = 0;
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
		
		// Read in chunks of I_bufferSize bytes
		while (nBytes > I_byteBufferSize)
		{
			bBuffer.clear();
			// Find a random starting position
			long randomSafeByte = (long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * 
					 					  (double)readableLimit));
			// Adjust to "fall" into place
			while (randomSafeByte % bytesPerChar != 0)
			{
				randomSafeByte--;
			}
			readByteRanges(bBuffer, fileHandler.getSequenceByteRanges(randomSafeByte, I_byteBufferSize));
			bBuffer.flip();
			CharBuffer cBuffer = fileHandler.getFileCharset().decode(bBuffer);

			List<AminoAcid> goodAminos = new ArrayList<AminoAcid>(Arrays.asList(AminoAcid.X.expandToUnambiguous()));
			if (includeX)
			{
				goodAminos.add(AminoAcid.X);
			}
			if (includeAmbiguous)
			{
				goodAminos.add(AminoAcid.B); goodAminos.add(AminoAcid.Z); goodAminos.add(AminoAcid.J);
			}
			if (includeGaps)
			{
				goodAminos.add(AminoAcid.GAP);
			}
			if (includeStops)
			{
				goodAminos.add(AminoAcid.$);
			}
			
			while (cBuffer.hasRemaining())
			{
				char c = cBuffer.get();
				if (goodAminos.contains(AminoAcid.valueOf(c)) || c == '\n' || c == '\r')
				{
					matchingChars++;
				}
				else
				{
					nonMatchingChars++;
				}
			}
			nBytes -= I_byteBufferSize;
		}
		bBuffer.clear();
		// Find a random starting position
		long randomSafeByte = (long)(((double)Math.abs(randomizer.nextLong()) / (double)Long.MAX_VALUE * 
				 					  (double)readableLimit));
		// Adjust to "fall" into place
		while (randomSafeByte % bytesPerChar != 0)
		{
			randomSafeByte--;
		}
	
		readByteRanges(bBuffer, fileHandler.getSequenceByteRanges(randomSafeByte, (int)nBytes));
		bBuffer.flip();
		CharBuffer cBuffer = fileHandler.getFileCharset().decode(bBuffer);

		List<AminoAcid> unambiguousAminos = new ArrayList<AminoAcid>(Arrays.asList(AminoAcid.X.expandToUnambiguous()));
		unambiguousAminos.add(AminoAcid.X);
		unambiguousAminos.add(AminoAcid.GAP);
		unambiguousAminos.add(AminoAcid.$);
		while (cBuffer.hasRemaining())
		{
			char c = cBuffer.get();
			if (unambiguousAminos.contains(AminoAcid.valueOf(c)) || c == '\n' || c == '\r')
			{
				matchingChars++;
			}
			else
			{
				nonMatchingChars++;
			}
		}
		return ((double)matchingChars / (double)(matchingChars + nonMatchingChars)) >= threshold;
	}
	
	/**
	 * Determines if a specified proportion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * DNA nucleotide sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.
	 * 
	 * @param contentsRatio	Proportion of sequence bytes to probe. 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a DNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and T.
	 * 
	 * @see		DNABase
	 * 
	 * @throws	FileNotFoundException	if the file path stored in this <code>FastaReader</code> cannot be found.
	 * @throws	IOException				if the file denoted by this file path could not be read or closed.
	 * @throws	IllegalArgumentException	if <code>nBytes</code> is negative or greater than 
	 * 										<code>totalSequenceBytes()</code>; if <code>threshold</code> is not a 
	 * 										value between 0.0 and 1.0.
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isDnaFile(double contentsRatio, double threshold, 
							 boolean includeN, boolean includeAmbiguous, boolean includeGaps)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		if (contentsRatio < 0 || contentsRatio > 1.0)
		{
			throw new IllegalArgumentException(
					contentsRatio + ": the proportion of the file to be read must be a value between 0.0 and 1.0");
		}
		
		long nBytes = (long) (fileHandler.getTotalSequenceBytes() * contentsRatio);
		return isDnaFile(nBytes, threshold, includeN, includeAmbiguous, includeGaps);
	}
	
	/**
	 * 
	 * @param contentsRatio
	 * @param threshold
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 * @throws FastaReaderNotParsedException
	 * @throws IllegalArgumentException
	 *
	 * @since	x.y.z
	 */
	public boolean isDnaFile(double contentsRatio, double threshold)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		return isDnaFile(contentsRatio, threshold, true, true, false);
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to DNA 
	 * nucleotide sequences with a given probability.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to a default buffer size (e.g. 512KB).
	 * 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a DNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and T.
	 * 
	 * @see		DNABase
	 * 
	 * @throws	IllegalArgumenException	if <code>threshold</code> is not a value between 0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isDnaFile(double threshold)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		long nBytes = fileHandler.getTotalSequenceBytes();
		
		if (nBytes > I_byteBufferSize)
		{
			return isDnaFile(I_byteBufferSize, threshold, true, true, true);
		}
		else if (nBytes > I_byteBufferSize / 512)
		{
			return isDnaFile(nBytes, threshold, true, true, true);
		}
		else
		{
			return isDnaFile(nBytes, threshold, true, true, true);
		}
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to DNA 
	 * nucleotide sequences with a probability of 0.95.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to an implementation, 
	 * system-dependent value.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than 0.95. In this case, matching characters are A, C, G and T.
	 * 
	 * @see		DNABase
	 * 
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isDnaFile()
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		return isDnaFile(0.95);
	}
		
	/**
	 * Determines if a specified proportion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * RNA nucleotide sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.
	 * 
	 * @param contentsRatio	Proportion of sequence bytes to probe. 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a RNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and U.
	 * 
	 * @see		RNABase
	 * 
	 * @throws	IllegalArgumenException	if <code>contentsRatio</code> or <code>threshold</code> is not a value between 
	 * 			0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isRnaFile(double contentsRatio, double threshold, 
							 boolean includeN, boolean includeAmbiguous, boolean includeGaps)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		if (contentsRatio < 0 || contentsRatio > 1.0)
		{
			throw new IllegalArgumentException(
					contentsRatio + ": the proportion of the file to be read must be a value between 0.0 and 1.0");
		}

		long nBytes = (long) (fileHandler.getTotalSequenceBytes() * contentsRatio);
		return isRnaFile(nBytes, threshold, includeN, includeAmbiguous, includeGaps);
	}
	
	/**
	 * 
	 * @param contentsRatio
	 * @param threshold
	 * @return
	 * @throws FastaReaderNotParsedException
	 * @throws IllegalArgumentException
	 * @throws FileNotFoundException
	 * @throws IOException
	 *
	 * @since	x.y.z
	 */
	public boolean isRnaFile(double contentsRatio, double threshold)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		return isRnaFile(contentsRatio, threshold, true, false, false);
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to DNA 
	 * nucleotide sequences with a given probability.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to an implementation, 
	 * system-dependent value.
	 * 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as a RNA file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, G and U.
	 * 
	 * @see		RNABase
	 * 
	 * @throws	IllegalArgumenException	if <code>threshold</code> is not a value between 0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isRnaFile(double threshold)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		long nBytes = fileHandler.getTotalSequenceBytes();
		
		if (nBytes > I_byteBufferSize)
		{
			return isRnaFile(I_byteBufferSize, threshold, true, false, false);
		}
		else if (nBytes > I_byteBufferSize / 512)
		{
			return isRnaFile(nBytes, threshold, true, false, false);
		}
		else
		{
			return isRnaFile(nBytes, threshold, true, true, true);
		}
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to RNA 
	 * nucleotide sequences with a probability of 0.95.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to an implementation, 
	 * system-dependent value.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than 0.95. In this case, matching characters are A, C, G and U.
	 * 
	 * @see		RNABase
	 * 
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isRnaFile() throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		return isRnaFile(0.95);
	}
		
	/**
	 * Determines if a specified proportion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * protein amino acid sequences with a given probability.
	 * <p>
	 * Note: the bytes are divided into a fixed buffer size, starting at random (possibly overlapping) starting 
	 * offsets.
	 * 
	 * @param	contentsRatio	Proportion of sequence bytes to probe. 
	 * @param	threshold	Minimum proportion of correctly read characters to positively identify this file as an 
	 * 						amino acid sequence file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W and Y.
	 * 
	 * @see		AminoAcid
	 * 
	 * @throws	IllegalArgumenException	if <code>contentsRatio</code> or <code>threshold</code> is not a value between 
	 * 									0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isProteinFile(double contentsRatio, double threshold, 
								 boolean includeX, boolean includeAmbiguous, boolean includeGaps, boolean includeStops)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		if (contentsRatio < 0 || contentsRatio > 1.0)
		{
			throw new IllegalArgumentException(
					contentsRatio + ": the proportion of the file to be read must be a value between 0.0 and 1.0");
		}
		
		long nBytes = (long) (fileHandler.getTotalSequenceBytes() * contentsRatio);
		return isProteinFile(nBytes, threshold, includeX, includeAmbiguous, includeGaps, includeStops);
	}
	
	/**
	 * 
	 * @param contentsRatio
	 * @param threshold
	 * @return
	 * @throws FastaReaderNotParsedException
	 * @throws IllegalArgumentException
	 * @throws FileNotFoundException
	 * @throws IOException
	 *
	 * @since	x.y.z
	 */
	public boolean isProteinFile(double contentsRatio, double threshold)
	throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		return isProteinFile(contentsRatio, threshold, true, false, true, true);
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * protein amino acid sequences with a given probability.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contents up to an implementation, 
	 * system-dependent value.
	 * 
	 * @param threshold	Minimum proportion of correctly read characters to positively identify this file as an amino 
	 * 					acid sequence file.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than <code>threshold</code>. In this case, matching characters 
	 * 			are A, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W and Y.
	 * 
	 * @see		AminoAcid
	 * 
	 * @throws	IllegalArgumenException	if <code>threshold</code> is not a value between 0.0 and 1.0 (both inclusive).
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isProteinFile(double threshold) throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		long nBytes = fileHandler.getTotalSequenceBytes();
		
		if (nBytes > I_byteBufferSize)
		{
			return isProteinFile(I_byteBufferSize, threshold, true, false, true, true);
		}
		else if (nBytes > I_byteBufferSize / 512)
		{
			return isProteinFile(nBytes, threshold, true, false, true, true);
		}
		else
		{
			return isProteinFile(nBytes, threshold, true, true, true, true);
		}
	}
	
	/**
	 * Determines if a fixed portion of a FASTA file contents (i.e. excluding the comment lines) corresponds to 
	 * protein amino acid sequences with a probability of 0.95.
	 * <p>
	 * Note: the amount of probed bytes is 1% of the file's sequence contentsup to an implementation, system-dependent 
	 * value.
	 * 
	 * @return	<code>true</code> if <code>(matching characters)/(matching characters + non matching 
	 * 			characters)</code> is equal or greater than 0.95. In this case, matching characters are A, C, D, E, F, 
	 * 			G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W and Y.
	 * 
	 * @see		AminoAcid
	 * 
	 * @throws	IOException 
	 * @throws FastaReaderNotParsedException 
	 * 
	 * @since	1.0rc3
	 */
	public boolean isProteinFile() throws  IllegalArgumentException, FileNotFoundException, IOException
	{
		return isProteinFile(0.95);
	}
	
	

	
	
	

	/**
	 * 
	 * @param forceBuffering
	 * @throws FileNotFoundException, IOException 
	 * @throws IOException
	 *
	 * @since	x.y.z
	 */
	public void loadDescriptionsIntoRecords(boolean force) throws FileNotFoundException, IOException
	{
		List<FASTAFileRecord> fastaRecords = fileHandler.getFastaRecords();
		File filePath = fileHandler.getFilePath();
		
		FileChannel inFC = new FileInputStream(filePath).getChannel();	// FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);
		// Get the channel from the File argument and allocate byte buffer
		try
		{
			for (FASTAFileRecord fastaRecord : fastaRecords)
			{
				if (force || fastaRecord.headerLineSB == null)
				{
					// To keep track of the offset within the file
					long fileByteOffset = fastaRecord.descriptionByteOffset;
					long cBytes = 0;
					char c;
	
					StringBuilder descriptionSB = new StringBuilder();
					inFC.position(fastaRecord.descriptionByteOffset);	// IOException
					while (inFC.read(bBuffer) != -1 &&
							fileByteOffset < fastaRecord.descriptionByteOffset + fastaRecord.descriptionBytes)
					{
						bBuffer.flip();
						// If not EOF and encoding is UTF-8...				// v IOException v
						if (fileHandler.getFileCharset() == Charset.forName("UTF-8") && inFC.size() - inFC.position() > 0)
						{
							// ... maybe the buffer ends at an incomplete muliple byte character 
							inFC.position(inFC.position() - UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
						}
						CharBuffer cBuffer = fileHandler.getFileCharset().decode(bBuffer);
						bBuffer.clear();
	
						// The sequence starts/is in this buffer
						while (cBuffer.hasRemaining() &&
								fileByteOffset < fastaRecord.descriptionByteOffset + fastaRecord.descriptionBytes)
						{
							c = cBuffer.get();
							cBytes = fileHandler.getFileCharset().encode(String.valueOf(c)).limit();
	
							descriptionSB.append(c);
	
							// Update current byte and character count
							fileByteOffset += cBytes;
						}
						break;
					}
					if (descriptionSB.charAt(0) == '>')
					{
						descriptionSB.deleteCharAt(0);
					}
					StringBuilderTrimmer.trim(descriptionSB, fileHandler.getFileLineSeparatorFormat());
					fastaRecord.headerLineSB = descriptionSB;
				}
			}
		}
		finally
		{
			inFC.close();
		}
	}



	

	

	
	
	
	
	
	
	
	
	

	
}
