/*
 * @author		Alfonso Muñoz-Pomer Fuentes, 
 * 				<a href="mailto:alfonso.munozpomer@biotechvana.com">
 * 				alfonso.munozpomer@biotechvana.com</a>,  
 * 				<a href="http://www.biotechvana.com">Biotechvana</a>
 *
 * @date		2010-09-01
 * 
 * @license		See <a href="http://www.biotechvana.com></a>
 *
 * @copyright	Copyright Biotech Vana, S.L. 2006-2010
 */

package com.biotechvana.javabiotoolkit.io;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.biotechvana.javabiotoolkit.AminoAcid;
import com.biotechvana.javabiotoolkit.BioSequence;
import com.biotechvana.javabiotoolkit.DNABase;
import com.biotechvana.javabiotoolkit.DNASequence;
import com.biotechvana.javabiotoolkit.NucleotideSequenceDirectionality;
import com.biotechvana.javabiotoolkit.PeptideSequenceDirectionality;
import com.biotechvana.javabiotoolkit.ProteinSequence;
import com.biotechvana.javabiotoolkit.exceptions.GenBankFeaturesException;
import com.biotechvana.javabiotoolkit.exceptions.GenBankOriginException;
import com.biotechvana.javabiotoolkit.exceptions.GenBankOriginSequenceTooLongException;
import com.biotechvana.javabiotoolkit.exceptions.GenBankPreambleException;
import com.biotechvana.javabiotoolkit.exceptions.GenBankSectionException;
import com.biotechvana.javabiotoolkit.exceptions.InvalidSequenceCharacterException;
import com.biotechvana.javabiotoolkit.text.LineSeparatorFormat;
import com.biotechvana.javabiotoolkit.text.UTF8BufferTrimmer;

/**
 * Instances of <code>GenBankReader</code> are associated to GenBank files and
 * can extract information stored in them. Once the object is constructed it can
 * be asked to parse any of the three main sections of the file: HEADER,
 * FEATURES or ORIGIN, in order to search for any of the available fields either
 * by location or by feature.
 * <p>
 * NOTE: For the remainder of this document, the fields preceding the FEATURES
 * (namely, LOCUS, DEFINITION, ACCESSION, VERSION, KEYWORDS, SOURCE, REFERENCE
 * and COMMENT) are referred to as the preamble of the file.
 * <p>
 * <code>GenBankReader</code> follows GenBank format&rsquo;s features and
 * qualifiers as defined in <a
 * href="ftp://ftp.ncbi.nih.gov/genbank/docs/FTv8_3.html">The DDBJ/EMBL/GenBank
 * Feature Table, version 8.3 released in April of 2010</a> with the additions
 * provided in the <a
 * href="http://www.bio.net/bionet/mm/genbankb/2010-December/000328.html"
 * >GenBank release 181.0</a>, and conserving old qualifiers.
 * <p>
 * There is a sample GenBank record in NCBI at <a
 * href="http://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html">
 * http://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html</a>.
 * 
 * @version 0.5, 2011-02-01
 * 
 * @author <a href="mailto:alfonso.munozpomer@biotechvana.com">Alfonso
 *         Muñoz-Pomer Fuentes</a>, <a
 *         href="http://www.biotechvana.com">Biotechvana</a>.
 */
public class GenBankReader {
	private static final int I_byteBufferSize = 512 * 1024; // 512KB

	// List of preamble fields
	public static final List<String> preambleFields = new ArrayList<String>();
	{
		if (preambleFields.size() == 0) {
			preambleFields.add("LOCUS");
			preambleFields.add("DEFINITION");
			preambleFields.add("ACCESSION");
			preambleFields.add("VERSION");
			preambleFields.add("KEYWORDS");
			preambleFields.add("DBSOURCE");
			preambleFields.add("SOURCE");
			preambleFields.add("REFERENCE");
			preambleFields.add("COMMENT");
		}
	}

	// List of feature keys for the FEATURES section
	private static final List<String> featureKeys = new ArrayList<String>();
	{
		if (featureKeys.size() == 0) {
			featureKeys.add("attenuator");
			featureKeys.add("C_region");
			featureKeys.add("CAAT_signal");
			featureKeys.add("CDS");
			featureKeys.add("conflict");
			featureKeys.add("D-loop");
			featureKeys.add("D_segment");
			featureKeys.add("enhancer");
			featureKeys.add("exon");
			featureKeys.add("gap");
			featureKeys.add("GC_signal");
			featureKeys.add("gene");
			featureKeys.add("iDNA");
			featureKeys.add("intron");
			featureKeys.add("J_segment");
			featureKeys.add("LTR");
			featureKeys.add("mat_peptide");
			featureKeys.add("misc_binding");
			featureKeys.add("misc_difference");
			featureKeys.add("misc_feature");
			featureKeys.add("misc_recomb");
			featureKeys.add("misc_RNA");
			featureKeys.add("misc_signal");
			featureKeys.add("misc_structure");
			featureKeys.add("mobile_element"); // New in 180.0
												// http://www.bio.net/bionet/mm/genbankb/2010-October/000326.html
			featureKeys.add("modified_base");
			featureKeys.add("mRNA");
			featureKeys.add("ncRNA");
			featureKeys.add("N_region");
			featureKeys.add("old_sequence");
			featureKeys.add("operon");
			featureKeys.add("oriT");
			featureKeys.add("polyA_signal");
			featureKeys.add("polyA_site");
			featureKeys.add("precursor_RNA");
			featureKeys.add("prim_transcript");
			featureKeys.add("primer_bind");
			featureKeys.add("promoter");
			featureKeys.add("protein_bind");
			featureKeys.add("RBS");
			featureKeys.add("repeat_region");
			featureKeys.add("rep_origin");
			featureKeys.add("rRNA");
			featureKeys.add("S_region");
			featureKeys.add("sig_peptide");
			featureKeys.add("source");
			featureKeys.add("stem_loop");
			featureKeys.add("STS");
			featureKeys.add("TATA_signal");
			featureKeys.add("terminator");
			featureKeys.add("tmRNA");
			featureKeys.add("transit_peptide");
			featureKeys.add("tRNA");
			featureKeys.add("unsure");
			featureKeys.add("V_region");
			featureKeys.add("V_segment");
			featureKeys.add("variation");
			featureKeys.add("3'UTR");
			featureKeys.add("5'UTR");
			featureKeys.add("-10_signal");
			featureKeys.add("-35_signal");
		}
	}
	// List of qualifier keys for the features
	private static final List<String> qualifierKeys = new ArrayList<String>();
	{
		if (qualifierKeys.size() == 0) {
			qualifierKeys.add("/allele=");
			qualifierKeys.add("/anticodon=");
			qualifierKeys.add("/artificial_location");
			qualifierKeys.add("/bio_material=");
			qualifierKeys.add("/bound_moiety=");
			qualifierKeys.add("/cell_line=");
			qualifierKeys.add("/cell_type=");
			qualifierKeys.add("/chromosome=");
			qualifierKeys.add("/citation=");
			qualifierKeys.add("/clone=");
			qualifierKeys.add("/clone_lib=");
			qualifierKeys.add("/codon=");
			qualifierKeys.add("/codon_start=");
			qualifierKeys.add("/collected_by=");
			qualifierKeys.add("/collection_date=");
			qualifierKeys.add("/compare=");
			qualifierKeys.add("/country=");
			qualifierKeys.add("/cultivar=");
			qualifierKeys.add("/culture_collection=");
			qualifierKeys.add("/db_xref=");
			qualifierKeys.add("/dev_stage=");
			qualifierKeys.add("/direction=");
			qualifierKeys.add("/EC_number=");
			qualifierKeys.add("/ecotype=");
			qualifierKeys.add("/environmental_sample=");
			qualifierKeys.add("/estimated_length=");
			qualifierKeys.add("/exception=");
			qualifierKeys.add("/experiment=");
			qualifierKeys.add("/focus=");
			qualifierKeys.add("/frequency=");
			qualifierKeys.add("/function=");
			qualifierKeys.add("/gene=");
			qualifierKeys.add("/gene_synonym=");
			qualifierKeys.add("/germline=");
			qualifierKeys.add("/haplogroup=");
			qualifierKeys.add("/haplotype=");
			qualifierKeys.add("/host=");
			qualifierKeys.add("/identified_by=");
			qualifierKeys.add("/inference=");
			qualifierKeys.add("/isolate=");
			qualifierKeys.add("/isolation_source=");
			qualifierKeys.add("/label=");
			qualifierKeys.add("/lab_host=");
			qualifierKeys.add("/lat_lon=");
			qualifierKeys.add("/locus_tag=");
			qualifierKeys.add("/map=");
			qualifierKeys.add("/macronuclear");
			qualifierKeys.add("/mating_type=");
			qualifierKeys.add("/mobile_element=");
			qualifierKeys.add("/mobile_element_type="); // New in 180.0
			qualifierKeys.add("/mod_base=");
			qualifierKeys.add("/mol_type=");
			qualifierKeys.add("/ncRNA_class=");
			qualifierKeys.add("/note=");
			qualifierKeys.add("/number=");
			qualifierKeys.add("/old_locus_tag=");
			qualifierKeys.add("/operon=");
			qualifierKeys.add("/organelle=");
			qualifierKeys.add("/organism=");
			qualifierKeys.add("/partial");
			qualifierKeys.add("/PCR_conditions=");
			qualifierKeys.add("/PCR_primers=");
			qualifierKeys.add("/phenotype=");
			qualifierKeys.add("/pop_variant=");
			qualifierKeys.add("/plasmid=");
			qualifierKeys.add("/product=");
			qualifierKeys.add("/protein_id=");
			qualifierKeys.add("/proviral");
			qualifierKeys.add("/pseudo");
			qualifierKeys.add("/rearranged");
			qualifierKeys.add("/replace=");
			qualifierKeys.add("/ribosomal_slippage");
			qualifierKeys.add("/rpt_family=");
			qualifierKeys.add("/rpt_type=");
			qualifierKeys.add("/rpt_unit_range=");
			qualifierKeys.add("/rpt_unit_seq=");
			qualifierKeys.add("/satellite=");
			qualifierKeys.add("/segment=");
			qualifierKeys.add("/serotype=");
			qualifierKeys.add("/serovar=");
			qualifierKeys.add("/sex=");
			qualifierKeys.add("/specimen_voucher=");
			qualifierKeys.add("/standard_name=");
			qualifierKeys.add("/strain=");
			qualifierKeys.add("/sub_clone=");
			qualifierKeys.add("/sub_species=");
			qualifierKeys.add("/sub_strain=");
			qualifierKeys.add("/tag_peptide=");
			qualifierKeys.add("/tissue_lib=");
			qualifierKeys.add("/tissue_type=");
			qualifierKeys.add("/transgenic=");
			qualifierKeys.add("/translation=");
			qualifierKeys.add("/transl_except=");
			qualifierKeys.add("/transl_table=");
			qualifierKeys.add("/trans_splicing");
			qualifierKeys.add("/variety=");
			qualifierKeys.add("/whole_replicon="); // New in 181.0
		}
	}

	// TODO (maybe?) Transform static lists to enums
	// enum PreambleField
	// {
	// }
	// enum FeatureKey
	// {
	// }
	// enum Qualifier
	// {
	// }

	/**
	 * Private class for GenBankReader that represents each of the entries found
	 * in the preamble.
	 * 
	 * @version 0.1
	 * 
	 * @author <a href="mailto:alfonso.munozpomer@biotechvana.com">Alfonso
	 *         Muñoz-Pomer Fuentes</a>, <a
	 *         href="http://www.biotechvana.com">Biotechvana</a>.
	 */
	public class GenBankPreambleEntry {
		private String name;
		private List<String> fieldData;

		/**
		 * Parameterized constructor for preamble entries.
		 * 
		 * @param name
		 *            name of the preamble (see GenBankReader description for a
		 *            list of available fields).
		 * @param dataString
		 *            block of text with all the details corresponding to this
		 *            entry.
		 * 
		 * @throws GenBankPreambleException
		 * 
		 * @since 0.3
		 */
		private GenBankPreambleEntry(String name, String dataString)
				throws GenBankPreambleException {
			this.name = name.trim();
			this.fieldData = new ArrayList<String>();

			StringBuilder fieldDataSB = new StringBuilder();
			for (String fieldDataLine : dataString.split("\\n")) {
				// The length is checked in order not to insert the first empty
				// SB
				if (!fieldDataLine.startsWith("            ")
						&& fieldDataSB.length() > 0) {
					fieldData.add(fieldDataSB.toString());
					fieldDataSB.delete(0, fieldDataSB.length());
				}
				// If we're continuing after a first line, append a space to
				// join lines
				if (fieldDataSB.length() > 0) {
					fieldDataSB.append(" ");
				}
				fieldDataSB.append(fieldDataLine.trim());
			}
			// Add the last detail
			if (fieldDataSB.length() > 0) {
				fieldData.add(fieldDataSB.toString());
			}
		}

		/**
		 * Returns the name of this preamble entry.
		 * 
		 * @return name of this entry.
		 * 
		 * @since 0.5
		 */
		public String name() {
			return name;
		}

		/**
		 * Returns the details of this preamble entry.
		 * 
		 * @return data of this preamble entry, as a <code>List</code> of
		 *         <code>String</code>s. Each <code>String</code> corresponds to
		 *         each of this entry&rsquo;s details, parsed into a single
		 *         line.
		 * 
		 * @since 0.5
		 */
		public List<String> fieldData() {
			return fieldData;
		}
	}

	/**
	 * Private class for GenBankReader that represents each of the features
	 * found in the FEATURES section.
	 * 
	 * @version 0.2
	 * 
	 * @author <a href="mailto:alfonso.munozpomer@biotechvana.com">Alfonso
	 *         Muñoz-Pomer Fuentes</a>, <a
	 *         href="http://www.biotechvana.com">Biotechvana</a>.
	 */
	public class GenBankFeatureRecord {
		private String featureKey;
		private String locationString;
		private int locationStart;
		private int locationEnd;
		private List<String> qualifiers;

		/**
		 * Parameterized constructor for preamble entries.
		 * 
		 * @param featureKey
		 *            the feature key (see GenBankReader description for a list
		 *            of allowed values).
		 * @param locationString
		 *            the text block which contains the location data of this
		 *            feature.
		 * @param qualifiers
		 *            the text block with qualifiers and values of this featre.
		 * 
		 * @throws GenBankFeaturesException
		 *             thrown if the location has invalid values.
		 * 
		 * @since 0.1
		 */
		private GenBankFeatureRecord(String featureKey, String locationString,
				String qualifiers) throws GenBankFeaturesException {
			this.featureKey = featureKey;

			locationString = locationString.replaceAll("\\s+", ""); // Remove
																	// whitespaces
			this.locationString = locationString;

			locationString = locationString.replaceAll("[^\\d\\.\\,\\^]", ""); // Remove
																				// all
																				// text
																				// and
																				// parentheses
			locationString = locationString.replaceAll("\\.\\.|\\,|\\^", "\\."); // Replace
																					// ".."
																					// for
																					// "."
			String[] locationStrings = locationString.split("\\.");
			locationStart = Integer.parseInt(locationStrings[0]);
			locationEnd = Integer
					.parseInt(locationStrings[locationStrings.length - 1]);

			if (locationStart > locationEnd || locationStart < 0
					|| locationEnd < 0) {
				throw new GenBankFeaturesException(locationStart + ".."
						+ locationEnd + " is an invalid location");
			}

			this.qualifiers = new ArrayList<String>();
			for (String qualifier : qualifiers.split("/")) {
				qualifier = qualifier.replaceAll("\\s{21}", ""); // Remove
																	// leading
																	// whitespaces
																	// (leaving
																	// only one
																	// in
				qualifier = qualifier.trim(); // subsequent lines)
				if (!qualifier.equals("")) {
					this.qualifiers.add("/" + qualifier);
				}
			}
		}

		/**
		 * Returns this feature&rsquo;s key name.
		 * 
		 * @return this feature key name.
		 * 
		 * @since 0.1
		 */
		public String featureKey() {
			return featureKey;
		}

		/**
		 * 
		 * @return
		 * 
		 * @since x.y.z
		 */
		public int locationStart() {
			return locationStart;
		}

		/**
		 * 
		 * @return
		 * 
		 * @since x.y.z
		 */
		public int locationEnd() {
			return locationEnd;
		}

		/**
		 * 
		 * @return
		 * 
		 * @since x.y.z
		 */
		public String location() {
			return locationString;
		}

		/**
		 * Returns this feature&rsquo;s qualifiers.
		 * 
		 * @return this feature&rsquo;s qualifiers as a <code>List</code> of
		 *         <code>String</code>s. Each <code>String</code> corresponds to
		 *         each qualifier, parsed into a single line.
		 * 
		 * @since 0.1
		 */
		public List<String> qualifiers() {
			return qualifiers;
		}

		/**
		 * Parse this feature&rsquo;s qualifiers and retrieve a subset specified
		 * by a qualifier key.
		 * 
		 * @param qualifierKey
		 *            a qualifier key, including the <code>/</code> prefix. The
		 *            <code>=</code> suffix should be included if it&rsquo;s a
		 *            valued qualifier.
		 * 
		 * @return a (possibly empty) <code>List</code> of qualifiers that match
		 *         <code>qualifierKey</code>. Each <code>String</code> element
		 *         contains one qualifier in a single line.
		 * 
		 * @throws GenBankFeaturesException
		 *             thrown if <code>qualifierKey</code> does not match any of
		 *             the keys defined in the GenBank format.
		 * 
		 * @since 0.2
		 */
		public List<String> findQualifiers(String qualifierKey)
				throws GenBankFeaturesException {
			// Check that the qualifier is good (case insensitive)
			boolean goodQualifier = false;
			for (String qk : qualifierKeys) {
				if (qualifierKey.equalsIgnoreCase(qk)) {
					goodQualifier = true;
					qualifierKey = qk;
					break;
				}
			}
			if (!goodQualifier) {
				throw new GenBankFeaturesException(qualifierKey
						+ " is an invalid GenBank qualifier");
			}

			Matcher qualifierKeyMatcher = Pattern.compile(
					"^" + qualifierKey + "(.*)$").matcher("");
			List<String> qualifierValues = new ArrayList<String>();
			for (String qualifier : qualifiers) {
				qualifierKeyMatcher.reset(qualifier);
				if (qualifierKeyMatcher.matches()) {
					qualifierValues.add(qualifierKeyMatcher.group(1));
				}
			}
			return qualifierValues;
		}
	}

	// End of inner classes. GenBankReader fields start here

	// File associated to this object and some format information
	private File filePath;
	private Charset fileCharset;
	private LineSeparatorFormat fileLineSeparatorFormat;
	@SuppressWarnings("unused")
	private boolean ignoreBlankLines;
	// Byte and char offsets of the three major sections
	private long locusByteOffset;
	private long locusCharOffset;
	private long featuresByteOffset;
	private long featuresCharOffset;
	private long originByteOffset;
	private long originCharOffset;
	// To keep track of the sections found and their parsed status
	private boolean locusFound = false;
	private boolean featuresFound = false;
	private boolean originFound = false;
	private boolean preambleParsed = false;
	private boolean featuresParsed = false;
	private boolean originParsed = false;

	// Sequential byte and char offsets of each field in the preamble
	private List<Long> preambleFieldsByteOffsets;
	private List<Long> preambleFieldsCharOffsets;
	// Assocation of each field and its corresponding offsets where it appears
	private Map<String, List<Long>> preambleFieldsByteOffsetsMap;
	private Map<String, List<Long>> preambleFieldsCharOffsetsMap;

	// Sequential byte and char offsets of each feature
	private List<Long> featureKeysByteOffsets;
	private List<Long> featureKeysCharOffsets;
	// Assocation of each feature type and its corresponding offsets where it
	// appears
	private Map<String, List<Long>> featureKeysByteOffsetsMap;
	private Map<String, List<Long>> featureKeysCharOffsetsMap;

	private long originSequenceLength;

	/**
	 * Constructs a <code>GenBankReader</code> and associates it to a file using
	 * a specified encoding and new line format.
	 * 
	 * @param filePath
	 *            the GenBank file to read.
	 * @param fileCharset
	 *            the charset in which the file is encoded.
	 * @param fileLineSeparatorFormat
	 *            the line separator used in the file.
	 * @param ignoreBlankLines
	 *            if <code>true</code> blank lines will be ignored; if
	 *            <code>false</code> a blank line will be interpreted as a
	 *            syntax error.
	 * 
	 * @since 0.1
	 */
	public GenBankReader(File filePath, Charset fileCharset,
			LineSeparatorFormat fileLineSeparatorFormat,
			boolean ignoreBlankLines) {
		this.filePath = filePath;
		this.fileCharset = fileCharset;
		this.fileLineSeparatorFormat = fileLineSeparatorFormat;
		this.ignoreBlankLines = ignoreBlankLines;

		// Initialise byte and char offset lists and maps for the preamble
		preambleFieldsByteOffsets = new ArrayList<Long>();
		preambleFieldsCharOffsets = new ArrayList<Long>();
		preambleFieldsByteOffsetsMap = new HashMap<String, List<Long>>();
		preambleFieldsCharOffsetsMap = new HashMap<String, List<Long>>();
		for (String pf : preambleFields) {
			preambleFieldsByteOffsetsMap.put(pf, new ArrayList<Long>());
			preambleFieldsCharOffsetsMap.put(pf, new ArrayList<Long>());
		}

		// Initialise byte and char offset lists and maps for the features
		featureKeysByteOffsets = new ArrayList<Long>();
		featureKeysCharOffsets = new ArrayList<Long>();
		featureKeysByteOffsetsMap = new HashMap<String, List<Long>>();
		featureKeysCharOffsetsMap = new HashMap<String, List<Long>>();
		for (String fk : featureKeys) {
			featureKeysByteOffsetsMap.put(fk, new ArrayList<Long>());
			featureKeysCharOffsetsMap.put(fk, new ArrayList<Long>());
		}

		originSequenceLength = 0;
	}

	/**
	 * Constructs a <code>GenBankReader</code> and associates it to a file using
	 * a specified encoding and the system default line separator.
	 * 
	 * @param filePath
	 *            the GenBank file to read.
	 * @param fileCharset
	 *            the charset in which the file is encoded.
	 * 
	 * @since 0.1
	 */
	public GenBankReader(File filePath, Charset fileCharset) {
		this(filePath, fileCharset, LineSeparatorFormat.SYSTEM_DEFAULT, true);
	}

	/**
	 * Constructs a <code>GenBankReader</code> and associates it to a file using
	 * the system default encoding and line separator.
	 * 
	 * @param filePath
	 *            the GenBank file to read.
	 * @param fileCharset
	 *            the charset in which the file is encoded.
	 * 
	 * @since 0.1
	 */
	public GenBankReader(File filePath) {
		this(filePath, Charset.defaultCharset(),
				LineSeparatorFormat.SYSTEM_DEFAULT, true);
	}

	/**
	 * Returns the file which the receiver is associated to.
	 * 
	 * @return the file which this object is associated to.
	 * 
	 * @since 0.5
	 */
	public File filePath() {
		return filePath;
	}

	/**
	 * Scans a GenBank file for the LOCUS, FEATURES and ORIGIN sections. This
	 * method checks that they are all found and in the correct relative order.
	 * <p>
	 * Note: this method <strong>must</strong> be invoked before parsing any
	 * particular section.
	 * 
	 * @param forceParse
	 *            whether the file should be parsed even if it has been properly
	 *            parsed before. If <code>true</code>, the file is forced to be
	 *            parsed; if <code>false</code> and the file has already been
	 *            successfully parsed it keeps the previous positions of each
	 *            section.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankSectionException
	 *             thrown if the GenBank sections are not formatted properly,
	 *             all present and in the correct order.
	 * 
	 * @since 0.1
	 */
	/*
	 * This method needs a lot of extra checks, buffer transformations, etc.
	 * because the usual Java IO framework doesn't allow to count bytes and
	 * characters when reading lines.
	 */
	public void parseSections(boolean forceParse) throws FileNotFoundException,
			IOException, GenBankSectionException {
		if (!forceParse && locusFound && featuresFound && originFound) {
			return;
		}

		// Get the channel from the File argument and allocate a byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		// To keep track of the offset within the file
		long fileByteOffset = 0;
		long fileCharOffset = 0;
		// Last read character and number of bytes used to encode it
		char c;
		long cBytes = 0;

		// We could include \\s* at the beginning to make the format a bit more
		// flexible
		Matcher locusMatcher = Pattern.compile("^LOCUS\\s+", Pattern.MULTILINE)
				.matcher("");
		Matcher featuresMatcher = Pattern.compile("^FEATURES\\s+",
				Pattern.MULTILINE).matcher("");
		Matcher originMatcher = Pattern.compile("^ORIGIN\\s+",
				Pattern.MULTILINE).matcher("");

		try {
			// Read-decode the file's ByteBuffer in the loop
			while (inFC.read(bBuffer) != -1) // IOException
			{
				bBuffer.flip();
				// If not EOF and encoding is UTF-8...
				if (fileCharset == Charset.forName("UTF-8")
						&& inFC.size() - inFC.position() > 0) // IOException
				{
					// ... maybe the buffer ends at an incomplete muliple byte
					// character
					inFC.position(inFC.position()
							- UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
				}
				// Set file channel position to last newline character to ensure
				// LOCUS, FEATURES or ORIGIN aren't split
				if (inFC.size() - inFC.position() > 0) {
					int discard = UTF8BufferTrimmer.endTrimNewLine(bBuffer,
							fileCharset, fileLineSeparatorFormat);
					inFC.position(inFC.position() - discard);
				}
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();

				// If no sections found yet...
				if (!locusFound && !featuresFound && !originFound) {
					// ... look for LOCUS
					locusMatcher.reset(cBuffer);
					if (locusMatcher.find()) {
						locusFound = true;
						locusCharOffset = fileCharOffset + locusMatcher.start();
					}
				}
				// If only LOCUS found...
				if (locusFound && !featuresFound && !originFound) {
					// ... look for FEATURES
					featuresMatcher.reset(cBuffer);
					if (featuresMatcher.find()) {
						featuresFound = true;
						featuresCharOffset = fileCharOffset
								+ featuresMatcher.start();
					}
				}
				// If LOCUS and FEATURES found...
				if (locusFound && featuresFound && !originFound) {
					// ... look for ORIGIN
					originMatcher.reset(cBuffer);
					if (originMatcher.find()) {
						originFound = true;
						originCharOffset = fileCharOffset
								+ originMatcher.start();
					}
				}

				// Keep char and byte count in the file and scan this buffer for
				// any section header (if any)
				while (cBuffer.hasRemaining()) {
					c = cBuffer.get();
					cBytes = fileCharset.encode(String.valueOf(c)).limit();

					// Check for the section char offsets to see if we're there
					// and update the byte offsets
					if (fileCharOffset == locusCharOffset) {
						locusByteOffset = fileByteOffset;
					}
					if (fileCharOffset == featuresCharOffset) {
						featuresByteOffset = fileByteOffset;
					}
					if (fileCharOffset == originCharOffset) {
						originByteOffset = fileByteOffset;
					}

					// Update current byte and character count
					fileByteOffset += cBytes;
					fileCharOffset++;
				}
			}
		} catch (IOException exception) {
			exception.printStackTrace();
		} finally {
			inFC.close();
		}

		// Check for correctness (at this level, of course) in the GenBank
		// file...
		if (!locusFound) {
			throw new GenBankSectionException("LOCUS field not found");
		} else if (!featuresFound) {
			throw new GenBankSectionException("FEATURES field not found");
		} else if (!originFound) {
			throw new GenBankSectionException("ORIGIN field not found");
		} else if (!(locusByteOffset < featuresByteOffset && featuresByteOffset < originByteOffset)) {
			throw new GenBankSectionException(
					"The sections LOCUS, FEATURES and ORIGIN are not in the correct order");
		}
	}

	/**
	 * Utility method that does a lazy parse (see above) of the file. If the
	 * file has been previousl and successfully parsed, this method has no
	 * effect and returns immediately.
	 * <p>
	 * Note: this method <strong>must</strong> be invoked before parsing any
	 * particular section.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankSectionException
	 *             thrown if the GenBank sections are not formatted properly,
	 *             all present and in the correct order.
	 * 
	 * @since 0.4
	 */
	public void parseSections() throws FileNotFoundException, IOException,
			GenBankSectionException {
		parseSections(false);
	}

	/**
	 * Parses a GenBank file&rsquo;s preamble section (see above). Using this
	 * method <code>GenBankReader</code> objects can keep track of the byte and
	 * char offset of each entry found in the preamble.
	 * <p>
	 * Note: this method <strong>must</strong> be invoked before retrieving any
	 * data in the preamble entries.
	 * 
	 * @param forceParse
	 *            whether the preamble should be parsed even if it has been
	 *            properly parsed before. If <code>true</code>, the preamble is
	 *            forced to be parsed; if <code>false</code> and the preamble
	 *            has already been successfully parsed it keeps the previous
	 *            positions of each <code>GenBankPreambleEntry</code>.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankPreambleException
	 *             thrown if the file sections have not been successfully
	 *             parsed, if the preamble cannot be found (<code>LOCUS</code>
	 *             field is missing), or if a preamble entry is found and it
	 *             does not match the GenBank specification.
	 * 
	 * @since 0.1
	 */
	public void parsePreamble(boolean forceParse)
			throws GenBankPreambleException, FileNotFoundException, IOException {
		if (!forceParse && preambleParsed) {
			return;
		}

		if (!locusFound || !featuresFound) {
			throw new GenBankPreambleException(
					"The file has not been successfully parsed.");
		}

		if ((locusByteOffset < originByteOffset && originByteOffset < featuresByteOffset)
				|| (locusByteOffset > featuresByteOffset)) {
			throw new GenBankPreambleException(
					"The preamble cannot be found before the FEATURES section.");
		}

		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		// To keep track of the offset within the file (and advance to FEATURES)
		long fileByteOffset = locusByteOffset;
		long fileCharOffset = locusCharOffset;
		inFC.position(fileByteOffset); // IOException

		// Last read character and number of bytes used to encode it
		char c;
		long cBytes = 0;

		boolean localLocusFound = false;
		Matcher preambleFieldMatcher = Pattern.compile("^(\\S+)\\s+(.*)$",
				Pattern.MULTILINE).matcher("");
		try {
			// Read-decode the file's ByteBuffer in the loop
			while (inFC.read(bBuffer) != -1
					&& fileByteOffset < featuresByteOffset) // IOException
			{
				bBuffer.flip();
				// If not EOF and encoding is UTF-8...
				if (fileCharset == Charset.forName("UTF-8")
						&& inFC.size() - inFC.position() > 0) // IOException
				{
					// ... maybe the buffer ends at an incomplete muliple byte
					// character
					inFC.position(inFC.position()
							- UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
				}
				// Set file channel position to last newline character to ensure
				// a preamble field is not split up
				if (inFC.size() - inFC.position() > 0) {
					int discard = UTF8BufferTrimmer.endTrimNewLine(bBuffer,
							fileCharset, fileLineSeparatorFormat);
					inFC.position(inFC.position() - discard);
				}
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();

				// Make sure the header is there (done only in the first
				// iteration)
				if (!localLocusFound) {
					Matcher preambleHeaderMatcher = Pattern.compile(
							"^\\s*LOCUS\\s+(.*)$", Pattern.MULTILINE).matcher(
							cBuffer);
					if (preambleHeaderMatcher.lookingAt()) {
						localLocusFound = true;
					} else {
						throw new GenBankPreambleException(
								"LOCUS field is missing.");
					}
				}

				// Everything's fine. Proceed and find fields in this buffer
				List<Long> thisBufferPreambleFieldCharOffsets = new ArrayList<Long>();
				List<String> thisBufferPreambleFieldStrings = new ArrayList<String>();
				preambleFieldMatcher.reset(cBuffer);
				// Find all the preamble fields in this buffer as char offsets
				while (preambleFieldMatcher.find()) {
					thisBufferPreambleFieldStrings.add(preambleFieldMatcher
							.group(1).trim());
					thisBufferPreambleFieldCharOffsets.add(fileCharOffset
							+ preambleFieldMatcher.start());
				}

				// Keep char and byte count in the file
				while (cBuffer.hasRemaining()
						&& fileByteOffset < featuresByteOffset) {
					c = cBuffer.get();
					cBytes = fileCharset.encode(String.valueOf(c)).limit();

					// Check for the feature key char offsets to see if we're
					// there and update the byte offsets
					int preambleFieldIndex = Collections.binarySearch(
							thisBufferPreambleFieldCharOffsets, fileCharOffset);
					if (preambleFieldIndex > -1) {
						String preambleField = thisBufferPreambleFieldStrings
								.remove(preambleFieldIndex);
						// If this feature is not recognised get out
						if (preambleFieldsByteOffsetsMap.get(preambleField) == null) {
							throw new GenBankPreambleException(preambleField
									+ " is not a GenBank 181.0 field");
						}
						// Retrieve this feature char/byte offset and add it to
						// the hash table
						long preambleFieldCharOffset = thisBufferPreambleFieldCharOffsets
								.remove(preambleFieldIndex);
						// Do a case insensitive match against the HashMap
						for (String pf : preambleFields) {
							if (preambleField.equalsIgnoreCase(pf)) {
								preambleField = pf;
								break;
							}
						}
						preambleFieldsCharOffsetsMap.get(preambleField).add(
								preambleFieldCharOffset);
						preambleFieldsByteOffsetsMap.get(preambleField).add(
								fileByteOffset);
					}
					// Update current byte and character count
					fileByteOffset += cBytes;
					fileCharOffset++;
				}
			}
			preambleParsed = true;
		} catch (IOException exception) {
			exception.printStackTrace();
		} finally {
			inFC.close();
			/*
			 * Ordering might not be necessary if we have a *guarantee* that the
			 * map entries are kept in the same order that they are found. It's
			 * reasonable to think they are, but the extra low cost of the
			 * operation is worth the safety.
			 */
			// Fore each preamble field...
			for (String pf : preambleFields) {
				// ... order the maps where that field appears using the byte
				// offset...
				Collections.sort(preambleFieldsByteOffsetsMap.get(pf));
				Collections.sort(preambleFieldsCharOffsetsMap.get(pf));
				// ... and add the offsets to the general lists
				preambleFieldsByteOffsets.addAll(preambleFieldsByteOffsetsMap
						.get(pf));
				preambleFieldsCharOffsets.addAll(preambleFieldsCharOffsetsMap
						.get(pf));
			}
			// Order the lists and add a final offset where ORIGIN begins
			Collections.sort(preambleFieldsByteOffsets);
			preambleFieldsByteOffsets.add(featuresByteOffset - 1);
			Collections.sort(preambleFieldsCharOffsets);
			preambleFieldsCharOffsets.add(featuresByteOffset - 1);
		}
	}

	/**
	 * Does a lazy parse (see above) of the preamble. If the preamble has been
	 * previously and successfully parsed, this method has no effect and returns
	 * immediately.
	 * <p>
	 * Note: this method <strong>must</strong> be invoked before retrieving any
	 * data in the preamble entries.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankPreambleException
	 *             thrown if the file sections have not been successfully
	 *             parsed, if the preamble cannot be found (<code>LOCUS</code>
	 *             field is missing), or if a preamble entry is found and it
	 *             does not match the GenBank specification.
	 * 
	 * @since 0.4
	 */
	public void parsePreamble() throws FileNotFoundException,
			GenBankPreambleException, IOException {
		parsePreamble(false);
	}

	/**
	 * Returns a <code>List</code> with all the preamble entries in the same
	 * order as they are found in the GenBank file.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankPreambleException
	 *             thrown if the preamble has not been successfully parsed or if
	 *             a GenBank preamble entry is found and it does not match the
	 *             GenBank specification.
	 * 
	 * @see GenBankPreambleEntry
	 * 
	 * @since 0.1
	 */
	public List<GenBankPreambleEntry> getPreambleEntries()
			throws FileNotFoundException, IOException, GenBankPreambleException {
		if (!preambleParsed) {
			// preambleParsed guarantees the correctness of the preamble, no
			// more checks needed
			throw new GenBankPreambleException(
					"The preamble has not been parsed yet.");
		}

		List<GenBankPreambleEntry> preambleEntries = new ArrayList<GenBankPreambleEntry>();
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		Matcher preambleFieldEntryMatcher = Pattern.compile("^(\\S+)\\s+(.*)$",
				Pattern.DOTALL).matcher("");
		try {
			for (int i = 0; i < preambleFieldsByteOffsets.size() - 1; i++) {
				long pfbo = preambleFieldsByteOffsets.get(i);
				// Set read pointer to that field offset and allocate a buffer
				// of the field size
				inFC.position(pfbo);
				int fieldIndex = preambleFieldsByteOffsets.indexOf(pfbo);
				int fieldEntryByteSize = (int) (preambleFieldsByteOffsets
						.get(fieldIndex + 1) - preambleFieldsByteOffsets
						.get(fieldIndex));

				// All the field text goes into this fieldEntrySB
				StringBuilder fieldEntrySB = new StringBuilder();
				// If field bigger than buffer: iterate and read into fieldSB
				if (fieldEntryByteSize > I_byteBufferSize) {
					for (int j = 0; j < fieldEntryByteSize / I_byteBufferSize; j++) {
						inFC.read(bBuffer);
						bBuffer.flip();
						CharBuffer cBuffer = fileCharset.decode(bBuffer);
						bBuffer.clear();
						fieldEntrySB.append(cBuffer);
					}
					if (fieldEntryByteSize % I_byteBufferSize > 0) {
						inFC.read(bBuffer);
						bBuffer.flip();
						// Truncate to the needed portion only
						bBuffer.limit(fieldEntryByteSize % I_byteBufferSize);
						CharBuffer cBuffer = fileCharset.decode(bBuffer);
						bBuffer.clear();
						fieldEntrySB.append(cBuffer);
					}
				}
				// Allocate a buffer only big enough for that feature and read
				// it into featureSB
				else {
					inFC.read(bBuffer);
					bBuffer.flip();
					// Truncate to the needed portion only
					bBuffer.limit(fieldEntryByteSize);
					CharBuffer cBuffer = fileCharset.decode(bBuffer);
					bBuffer.clear();
					fieldEntrySB.append(cBuffer);
				}
				// Match SB against a regex to capture the field and data
				preambleFieldEntryMatcher.reset(fieldEntrySB);
				if (!preambleFieldEntryMatcher.matches()
						|| !preambleFields.contains(preambleFieldEntryMatcher
								.group(1).trim())) {
					throw new GenBankPreambleException(fieldEntrySB
							+ " does not look like a GenBank preamble entry.");
				} else {
					GenBankPreambleEntry gbpf = new GenBankPreambleEntry(
							preambleFieldEntryMatcher.group(1).trim(),
							preambleFieldEntryMatcher.group(2).trim());
					preambleEntries.add(gbpf);
				}
			}
		} catch (IOException exception) {
			exception.printStackTrace();
		} finally {
			inFC.close(); // IOException
		}
		return preambleEntries;
	}

	/**
	 * Returns a <code>List</code> with all the preamble entries in the same
	 * order as they are found in the GenBank file which match a preamble field
	 * name.
	 * 
	 * @param preambleField
	 *            name of the field entries that are returned.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankPreambleException
	 *             thrown if the preamble has not been successfully parsed, if
	 *             <code>preambleField</code> does not match the GenBank
	 *             specification or if a GenBank preamble entry is found and it
	 *             does not match the GenBank specification.
	 * 
	 * @see GenBankPreambleEntry
	 * 
	 * @since 0.1
	 */
	public List<GenBankPreambleEntry> getPreambleEntries(String preambleField)
			throws IOException, GenBankPreambleException {
		if (!preambleParsed) {
			throw new GenBankPreambleException(
					"The preamble has not been parsed yet.");
		}

		if (!preambleFields.contains(preambleField)) {
			throw new GenBankPreambleException(preambleField
					+ " is not a GenBank field");
		}

		List<GenBankPreambleEntry> preambleEntries = new ArrayList<GenBankPreambleEntry>();
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		Matcher preambleFieldEntryMatcher = Pattern.compile(
				"^(" + preambleField + ")\\s+(.*)$",
				Pattern.CASE_INSENSITIVE | Pattern.DOTALL).matcher("");
		try {
			for (long pfbo : preambleFieldsByteOffsetsMap.get(preambleField)) {
				// Set read pointer to that field offset and allocate a buffer
				// of the field size
				inFC.position(pfbo);
				int fieldIndex = preambleFieldsByteOffsets.indexOf(pfbo);
				int fieldEntryByteSize = (int) (preambleFieldsByteOffsets
						.get(fieldIndex + 1) - preambleFieldsByteOffsets
						.get(fieldIndex));

				// All the field text goes into this featureSB
				StringBuilder fieldEntrySB = new StringBuilder();
				// If field bigger than buffer: iterate and read into fieldSB
				if (fieldEntryByteSize > I_byteBufferSize) {

					for (int i = 0; i < fieldEntryByteSize / I_byteBufferSize; i++) {
						inFC.read(bBuffer);
						bBuffer.flip();
						CharBuffer cBuffer = fileCharset.decode(bBuffer);
						bBuffer.clear();
						fieldEntrySB.append(cBuffer);
					}
					if (fieldEntryByteSize % I_byteBufferSize > 0) {
						inFC.read(bBuffer);
						bBuffer.flip();
						bBuffer.limit(fieldEntryByteSize % I_byteBufferSize);
						CharBuffer cBuffer = fileCharset.decode(bBuffer);
						bBuffer.clear();
						fieldEntrySB.append(cBuffer);
					}
				}
				// Allocate a buffer only big enough for that feature and read
				// it into featureSB
				else {
					bBuffer.clear();
					inFC.read(bBuffer);
					bBuffer.flip();
					bBuffer.limit(fieldEntryByteSize);
					CharBuffer cBuffer = fileCharset.decode(bBuffer);
					fieldEntrySB.append(cBuffer);
				}
				// Match SB against a regex to capture the field and data
				preambleFieldEntryMatcher.reset(fieldEntrySB);
				if (preambleFieldEntryMatcher.matches()) {
					GenBankPreambleEntry gbpf = new GenBankPreambleEntry(
							preambleFieldEntryMatcher.group(1).trim(),
							preambleFieldEntryMatcher.group(2).trim());
					preambleEntries.add(gbpf);
				} else {
					throw new GenBankPreambleException(fieldEntrySB
							+ " does not look like a GenBank field");
				}
			}
		} catch (IOException exception) {
			exception.printStackTrace();
		} finally {
			inFC.close();
		}
		return preambleEntries;
	}

	/**
	 * Returns a preamble entry in the preamble.
	 * 
	 * @param fieldIndex
	 *            position of the returned entry within the preamble.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankPreambleException
	 *             thrown if the preamble has not been successfully parsed, if
	 *             <code>fieldIndex</code> is negative or over the number of
	 *             entries (-1) or if a GenBank preamble entry is found and it
	 *             does not match the GenBank specification.
	 * 
	 * @see GenBankPreambleEntry
	 * 
	 * @since 0.1
	 */
	public GenBankPreambleEntry getPreambleEntry(int fieldIndex)
			throws IOException, GenBankPreambleException {
		if (!preambleParsed) {
			throw new GenBankPreambleException(
					"The preamble has not been parsed yet.");
		}

		if (fieldIndex < 0 || fieldIndex > preambleFieldsByteOffsets.size() - 2) {
			throw new GenBankPreambleException(fieldIndex
					+ " is not a valid field index");
		}

		GenBankPreambleEntry gbpf = null;
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		// Set read pointer to that field offset
		inFC.position(preambleFieldsByteOffsets.get(fieldIndex));
		int fieldEntryByteSize = (int) (preambleFieldsByteOffsets
				.get(fieldIndex + 1) - preambleFieldsByteOffsets
				.get(fieldIndex));

		try {
			// All the field text goes into this featureSB
			StringBuilder fieldEntrySB = new StringBuilder();
			// If field bigger than buffer: iterate and read into fieldSB
			if (fieldEntryByteSize > I_byteBufferSize) {
				for (int i = 0; i < fieldEntryByteSize / I_byteBufferSize; i++) {
					inFC.read(bBuffer);
					bBuffer.flip();
					CharBuffer cBuffer = fileCharset.decode(bBuffer);
					bBuffer.clear();
					fieldEntrySB.append(cBuffer);
				}
				if (fieldEntryByteSize % I_byteBufferSize > 0) {
					inFC.read(bBuffer);
					bBuffer.flip();
					bBuffer.limit(fieldEntryByteSize % I_byteBufferSize);
					CharBuffer cBuffer = fileCharset.decode(bBuffer);
					bBuffer.clear();
					fieldEntrySB.append(cBuffer);
				}
			}
			// Allocate a buffer only big enough for that feature and read it
			// into featureSB
			else {
				inFC.read(bBuffer);
				bBuffer.flip();
				bBuffer.limit(fieldEntryByteSize);
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();
				fieldEntrySB.append(cBuffer);
			}
			// Match SB against a regex to capture the field and data
			Matcher preambleFieldEntryMatcher = Pattern.compile(
					"^(\\S+)\\s+(.*)$",
					Pattern.CASE_INSENSITIVE | Pattern.DOTALL).matcher(
					fieldEntrySB);
			if (preambleFieldEntryMatcher.matches()) {
				gbpf = new GenBankPreambleEntry(preambleFieldEntryMatcher
						.group(1).trim(), preambleFieldEntryMatcher.group(2)
						.trim());
			} else {
				throw new GenBankPreambleException(fieldEntrySB
						+ " does not look like a GenBank field");
			}
		} catch (IOException exception) {
			exception.printStackTrace();
		} finally {
			inFC.close();
		}
		return gbpf;
	}

	/**
	 * Parses a GenBank file&rsquo;s FEATURES section (see above). Using this
	 * method <code>GenBankReader</code> objects can keep track of the byte and
	 * char offset of each feature and their data.
	 * <p>
	 * Note: this method <strong>must</strong> be invoked before retrieving any
	 * feature or qualifier.
	 * 
	 * @param forceParse
	 *            whether the FEATURES should be parsed even if they have been
	 *            properly parsed before. If <code>true</code>, the FEATUES are
	 *            forced to be parsed; if <code>false</code> and they have
	 *            already been successfully parsed it keeps the previous
	 *            positions of each <code>GenBankFeature</code>.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankFeaturesException
	 *             thrown if the file sections have not been successfully parsed
	 *             or if the FEATURES field cannot be found before ORIGIN.
	 * 
	 * @since 0.1
	 */
	public void parseFeatures(boolean forceReparse)
			throws GenBankFeaturesException, FileNotFoundException, IOException {
		if (!forceReparse && featuresParsed) {
			return;
		}

		if (!featuresFound || !originFound) {
			throw new GenBankFeaturesException(
					"The file has not been successfully parsed.");
		}

		if ((locusByteOffset > featuresByteOffset && locusByteOffset < originByteOffset)
				|| (originByteOffset < featuresByteOffset)) {
			throw new GenBankFeaturesException(
					"The FEATURES cannot be found before the ORIGIN section.");
		}

		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		// To keep track of the offset within the file (and advance to FEATURES)
		long fileByteOffset = featuresByteOffset;
		long fileCharOffset = featuresCharOffset;
		inFC.position(fileByteOffset);
		// Last read character and number of bytes used to encode it
		char c;
		long cBytes = 0;

		boolean featuresFound = false;

		try {
			// Read-decode the file's ByteBuffer in the loop
			while (inFC.read(bBuffer) != -1
					&& fileByteOffset < originByteOffset) // IOException
			{
				bBuffer.flip();
				// If not EOF and encoding is UTF-8...
				if (fileCharset == Charset.forName("UTF-8")
						&& inFC.size() - inFC.position() > 0) // IOException
				{
					// ... maybe the buffer ends at an incomplete muliple byte
					// character
					inFC.position(inFC.position()
							- UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
				}
				// Set file channel position to last newline character to ensure
				// a feature is not split up
				if (inFC.size() - inFC.position() > 0) {
					int discard = UTF8BufferTrimmer.endTrimNewLine(bBuffer,
							fileCharset, fileLineSeparatorFormat);
					inFC.position(inFC.position() - discard);
				}
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();

				// Make sure the header is there (only done in the first
				// iteration)
				if (!featuresFound) {
					Matcher featuresHeaderMatcher = Pattern.compile(
							"^\\s*FEATURES\\s+Location/Qualifiers\\s*$",
							Pattern.MULTILINE).matcher(cBuffer);
					if (featuresHeaderMatcher.lookingAt()) {
						featuresFound = true;
					} else {
						throw new GenBankFeaturesException(
								"FEATURES header is missing.");
					}
				}

				// Everything's fine. Proceed and find feature keys in this
				// buffer
				List<Long> thisBufferFeatureKeyCharOffsets = new ArrayList<Long>();
				List<String> thisBufferFeatureKeyStrings = new ArrayList<String>();
				Pattern featureKeyPattern = Pattern.compile(
						"(^\\s{5}\\S+)\\s+(.*)$", Pattern.MULTILINE);
				Matcher featureKeyMatcher = featureKeyPattern.matcher(cBuffer);
				// Find all the features in this buffer as char offsets
				while (featureKeyMatcher.find()) {
					thisBufferFeatureKeyStrings.add(featureKeyMatcher.group(1)
							.trim());
					thisBufferFeatureKeyCharOffsets.add(fileCharOffset
							+ featureKeyMatcher.start());
				}

				// Keep char and byte count in the file
				while (cBuffer.hasRemaining()
						&& fileByteOffset < originByteOffset) {
					c = cBuffer.get();
					cBytes = fileCharset.encode(String.valueOf(c)).limit();

					// Check for the feature key char offsets to see if we're
					// there and update the byte offsets
					int featureKeyIndex = Collections.binarySearch(
							thisBufferFeatureKeyCharOffsets, fileCharOffset);
					if (featureKeyIndex > -1) {
						String feature = thisBufferFeatureKeyStrings
								.remove(featureKeyIndex);
						// If this feature is not recognised get out
						if (featureKeysByteOffsetsMap.get(feature) == null) {
							throw new GenBankFeaturesException(
									feature
											+ " is not a GenBank 181.0 standard feature");
						}
						// Retrieve this feature char/byte offset and add it to
						// the hash table
						long featureCharOffset = thisBufferFeatureKeyCharOffsets
								.remove(featureKeyIndex);
						// Do a case insensitive match against the HashMap
						for (String fk : featureKeys) {
							if (feature.equalsIgnoreCase(fk)) {
								feature = fk;
								break;
							}
						}
						featureKeysCharOffsetsMap.get(feature).add(
								featureCharOffset);
						featureKeysByteOffsetsMap.get(feature).add(
								fileByteOffset);
					}
					// Update current byte and character count
					fileByteOffset += cBytes;
					fileCharOffset++;
				}
			}
			featuresParsed = true;
		} catch (IOException exception) {
			exception.printStackTrace();
		} finally {
			inFC.close();
			/*
			 * Ordering might not be necessary if we have a *guarantee* that the
			 * map entries are inserted in the same order that they are found.
			 * It's reasonable to think they are, but the extra cost is worth
			 * the safety.
			 */
			// Fore each feature key...
			for (String fk : featureKeys) {
				// ... order the maps where it appears using the byte offset...
				Collections.sort(featureKeysByteOffsetsMap.get(fk));
				Collections.sort(featureKeysCharOffsetsMap.get(fk));
				// ... and add the offsets to the general lists
				featureKeysByteOffsets
						.addAll(featureKeysByteOffsetsMap.get(fk));
				featureKeysCharOffsets
						.addAll(featureKeysCharOffsetsMap.get(fk));
			}
			// Order the lists and add a final offset where ORIGIN begins
			Collections.sort(featureKeysByteOffsets);
			featureKeysByteOffsets.add(originByteOffset - 1);
			Collections.sort(featureKeysCharOffsets);
			featureKeysCharOffsets.add(originCharOffset - 1);
		}
	}

	/**
	 * Does a lazy parse of a GenBank file&rsquo;s FEATURES section (see above).
	 * Using this method <code>GenBankReader</code> objects can keep track of
	 * the byte and char offset of each feature and their data.
	 * <p>
	 * Note: this method <strong>must</strong> be invoked before retrieving any
	 * feature or qualifier.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankFeaturesException
	 *             thrown if the file sections have not been successfully parsed
	 *             or if the FEATURES field cannot be found before ORIGIN.
	 * 
	 * @since 0.1
	 */
	public void parseFeatures() throws FileNotFoundException,
			GenBankFeaturesException, IOException {
		parseFeatures(false);
	}

	/**
	 * Returns a <code>List</code> with all the features in the same order as
	 * they are found in the GenBank file.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankFeaturesException
	 *             thrown if the preamble has not been successfully parsed or if
	 *             a GenBank feature is found and it does not match the GenBank
	 *             specification.
	 * 
	 * @see GenBankFeature
	 * 
	 * @since 0.1
	 */
	public List<GenBankFeatureRecord> getFeatures() throws IOException,
			GenBankFeaturesException {
		if (!featuresParsed) {
			throw new GenBankFeaturesException(
					"The features have not been parsed yet.");
		}

		List<GenBankFeatureRecord> featureRecords = new ArrayList<GenBankFeatureRecord>();
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		Matcher featureMatcher = Pattern.compile(
				"^\\s{5}(\\S+)\\s+([^/]+)(.*)$", Pattern.DOTALL).matcher("");
		try {
			for (int i = 0; i < featureKeysByteOffsets.size() - 1; i++) {
				long fkbo = featureKeysByteOffsets.get(i);
				// Set read pointer to that feature offset and allocate a buffer
				// of the feature size
				inFC.position(fkbo);
				int featureIndex = featureKeysByteOffsets.indexOf(fkbo);
				int featureByteSize = (int) (featureKeysByteOffsets
						.get(featureIndex + 1) - featureKeysByteOffsets
						.get(featureIndex));

				// All the feature text goes into this featureSB
				StringBuilder featureSB = new StringBuilder();
				// If feature bigger than buffer: iterate and read into
				// featureSB
				if (featureByteSize > I_byteBufferSize) {
					for (int j = 0; j < featureByteSize / I_byteBufferSize; j++) {
						inFC.read(bBuffer);
						bBuffer.flip();
						CharBuffer cBuffer = fileCharset.decode(bBuffer);
						bBuffer.clear();
						featureSB.append(cBuffer);
					}
					if (featureByteSize % I_byteBufferSize > 0) {
						inFC.read(bBuffer);
						bBuffer.flip();
						bBuffer.limit(featureByteSize % I_byteBufferSize);
						CharBuffer cBuffer = fileCharset.decode(bBuffer);
						bBuffer.clear();
						featureSB.append(cBuffer);
					}
				}
				// Allocate a buffer only big enough for that feature and read
				// it into featureSB
				else {
					inFC.read(bBuffer);
					bBuffer.flip();
					bBuffer.limit(featureByteSize);
					CharBuffer cBuffer = fileCharset.decode(bBuffer);
					bBuffer.clear();
					featureSB.append(cBuffer);
				}
				// Match SB against a regex to capture the feature, location and
				// qualifiers
				featureMatcher.reset(featureSB);
				if (!featureMatcher.matches()
						|| !featureKeys
								.contains(featureMatcher.group(1).trim())) {
					throw new GenBankFeaturesException(featureSB
							+ " does not look like a GenBank feature");
				} else {
					GenBankFeatureRecord gbfr = new GenBankFeatureRecord(
							featureMatcher.group(1).trim(), featureMatcher
									.group(2).trim(), featureMatcher.group(3)
									.trim());
					featureRecords.add(gbfr);
				}
			}
		} catch (IOException exception) {
			exception.printStackTrace();
		} finally {
			inFC.close();
		}
		return featureRecords;
	}

	/**
	 * Returns a <code>List</code> with all the features in the same order as
	 * they are found in the GenBank file which match a feature key.
	 * 
	 * @param featureKey
	 *            key of the features that are returned.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankPreambleException
	 *             thrown if the FEATURES have not been successfully parsed, if
	 *             <code>featureKey</code> does not match the GenBank
	 *             specification or if a GenBank feature is found and it does
	 *             not match the GenBank specification.
	 * 
	 * @see GenBankFeature
	 * 
	 * @since 0.1
	 */
	public List<GenBankFeatureRecord> getFeatures(String featureKey)
			throws IOException, GenBankFeaturesException {
		if (!featuresParsed) {
			throw new GenBankFeaturesException(
					"The features have not been parsed yet.");
		}

		if (!featureKeys.contains(featureKey)) {
			throw new GenBankFeaturesException(featureKey
					+ " is not a GenBank feature");
		}

		List<GenBankFeatureRecord> featureRecords = new ArrayList<GenBankFeatureRecord>();
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		Matcher featureMatcher = Pattern.compile(
				"^\\s{5}(" + featureKey + ")\\s+([^/]+)(.*)$",
				Pattern.CASE_INSENSITIVE | Pattern.DOTALL).matcher("");
		try {
			for (long fkbo : featureKeysByteOffsetsMap.get(featureKey)) {
				// Set read pointer to that feature offset and allocate a buffer
				// of the feature size
				inFC.position(fkbo);
				int featureIndex = featureKeysByteOffsets.indexOf(fkbo);
				int featureByteSize = (int) (featureKeysByteOffsets
						.get(featureIndex + 1) - featureKeysByteOffsets
						.get(featureIndex));

				// All the feature text goes into this featureSB
				StringBuilder featureSB = new StringBuilder();
				// If feature bigger than buffer: iterate and read into
				// featureSB
				if (featureByteSize > I_byteBufferSize) {
					for (int i = 0; i < featureByteSize / I_byteBufferSize; i++) {
						inFC.read(bBuffer);
						bBuffer.flip();
						CharBuffer cBuffer = fileCharset.decode(bBuffer);
						bBuffer.clear();
						featureSB.append(cBuffer);
					}
					if (featureByteSize % I_byteBufferSize > 0) {
						inFC.read(bBuffer);
						bBuffer.flip();
						bBuffer.limit(featureByteSize % I_byteBufferSize);
						CharBuffer cBuffer = fileCharset.decode(bBuffer);
						bBuffer.clear();
						featureSB.append(cBuffer);
					}
				}
				// Allocate a buffer only big enough for that feature and read
				// it into featureSB
				else {
					inFC.read(bBuffer);
					bBuffer.flip();
					bBuffer.limit(featureByteSize);
					CharBuffer cBuffer = fileCharset.decode(bBuffer);
					bBuffer.clear();
					featureSB.append(cBuffer);
				}
				// Match SB against a regex to capture the feature, location and
				// qualifiers
				featureMatcher.reset(featureSB);
				if (featureMatcher.matches()) {
					GenBankFeatureRecord gbfr = new GenBankFeatureRecord(
							featureMatcher.group(1).trim(), featureMatcher
									.group(2).trim(), featureMatcher.group(3)
									.trim());
					featureRecords.add(gbfr);
				} else {
					throw new GenBankFeaturesException(featureSB
							+ " does not look like a GenBank feature");
				}
			}
		} catch (IOException exception) {
			exception.printStackTrace();
		} finally {
			inFC.close();
		}
		return featureRecords;
	}

	/**
	 * Returns a feature from the FEATURES section.
	 * 
	 * @param featureIndex
	 *            position of the returned entry within the preamble.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankFeaturesException
	 *             thrown if the FEATURES have not been successfully parsed, if
	 *             <code>featureIndex</code> is negative or over the number of
	 *             features (-1) or if a GenBank feature is found and it does
	 *             not match the GenBank specification.
	 * 
	 * @see GenBankFeature
	 * 
	 * @since 0.1
	 */
	public GenBankFeatureRecord getFeature(int featureIndex)
			throws FileNotFoundException, IOException, GenBankFeaturesException {
		if (!featuresParsed) {
			throw new GenBankFeaturesException(
					"The features have not been parsed yet.");
		}

		if (featureIndex < 0
				|| featureIndex > featureKeysByteOffsets.size() - 2) {
			throw new GenBankFeaturesException(featureIndex
					+ " is not a valid feature index");
		}

		GenBankFeatureRecord gbfr = null;
		// Get the channel from the File argument, set read pointer to
		// appropriate offset and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		inFC.position(featureKeysByteOffsets.get(featureIndex));
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		Matcher featureMatcher = Pattern.compile(
				"^\\s{5}(\\S+)\\s+([^/]+)(.*)$", Pattern.DOTALL).matcher("");
		try {
			int featureByteSize = (int) (featureKeysByteOffsets
					.get(featureIndex + 1) - featureKeysByteOffsets
					.get(featureIndex));

			// All the feature text goes into this featureSB
			StringBuilder featureSB = new StringBuilder();
			// If feature bigger than buffer: iterate and read into featureSB
			if (featureByteSize > I_byteBufferSize) {
				for (int j = 0; j < featureByteSize / I_byteBufferSize; j++) {
					inFC.read(bBuffer);
					bBuffer.flip();
					CharBuffer cBuffer = fileCharset.decode(bBuffer);
					bBuffer.clear();
					featureSB.append(cBuffer);
				}
				if (featureByteSize % I_byteBufferSize > 0) {
					inFC.read(bBuffer);
					bBuffer.flip();
					bBuffer.limit(featureByteSize % I_byteBufferSize);
					CharBuffer cBuffer = fileCharset.decode(bBuffer);
					bBuffer.clear();
					featureSB.append(cBuffer);
				}
			}
			// Allocate a buffer only big enough for that feature and read it
			// into featureSB
			else {
				inFC.read(bBuffer);
				bBuffer.flip();
				bBuffer.limit(featureByteSize);
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();
				featureSB.append(cBuffer);
			}
			// Match SB against a regex to capture the feature, location and
			// qualifiers
			featureMatcher.reset(featureSB);
			if (!featureMatcher.matches()
					|| !featureKeys.contains(featureMatcher.group(1).trim())) {
				throw new GenBankFeaturesException(featureSB
						+ " does not look like a GenBank feature");
			} else {
				gbfr = new GenBankFeatureRecord(featureMatcher.group(1).trim(),
						featureMatcher.group(2).trim(), featureMatcher.group(3)
								.trim());
			}
		} catch (IOException exception) {
			exception.printStackTrace();
		} finally {
			inFC.close();
		}
		return gbfr;
	}

	/**
	 * Parses a GenBank file&rsquo;s ORIGIN section (see above). Using this
	 * method <code>GenBankReader</code> objects can keep track of the byte and
	 * char offset of the beginning of the sequence.
	 * <p>
	 * Note: this method <strong>must</strong> be invoked before retrieving any
	 * sequence data.
	 * 
	 * @param forceParse
	 *            whether the ORIGIN should be parsed even if it has been
	 *            properly parsed before. If <code>true</code>, the ORIGIN is
	 *            forced to be parsed; if <code>false</code> and the ORIGIN has
	 *            already been successfully parsed it keeps the previous
	 *            starting and ending positions of the sequence.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankPreambleException
	 *             thrown if the file sections have not been successfully
	 *             parsed, if the preamble cannot be found (<code>LOCUS</code>
	 *             field is missing), or if a preamble entry is found and it
	 *             does not match the GenBank specification.
	 * 
	 * @since 0.1
	 */
	public void parseOrigin(boolean forceReparse)
			throws GenBankOriginException, FileNotFoundException, IOException {
		if (!forceReparse && originParsed) {
			return;
		}

		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		inFC.position(originByteOffset);
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		boolean originHeaderFound = false;
		boolean endFound = false;
		String lastLine = "";

		Matcher originSequenceMatcher = Pattern.compile("^\\s*(\\d+)\\s(.*$)")
				.matcher("");
		try {
			// Read-decode the file's ByteBuffer in the loop
			while (inFC.position() < inFC.size() && inFC.read(bBuffer) != -1) // IOException
			{
				bBuffer.flip();
				// If not EOF and encoding is UTF-8...
				if (fileCharset == Charset.forName("UTF-8")
						&& inFC.size() - inFC.position() > 0) // IOException
				{
					// ... maybe the buffer ends at an incomplete muliple byte
					// character
					inFC.position(inFC.position()
							- UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
				}
				// Set file channel position to last newline character to ensure
				// a preamble field is not split up
				if (inFC.size() - inFC.position() > 0) {
					int discard = UTF8BufferTrimmer.endTrimNewLine(bBuffer,
							fileCharset, fileLineSeparatorFormat);
					inFC.position(inFC.position() - discard);
				}
				CharBuffer cBuffer = fileCharset.decode(bBuffer);
				bBuffer.clear();

				// Proceed and check sequence lines in this buffer
				for (String line : cBuffer.toString().split("(?m)$")) {
					// Make sure the header is there (done only in the first
					// iteration)
					if (!originHeaderFound) {
						if (line.matches("^\\s*ORIGIN\\s*(.*)$")) {
							originHeaderFound = true;
							continue;
						} else {
							throw new GenBankOriginException(
									"ORIGIN header is missing. "
											+ "Did you remember to call scanFields() first?");
						}
					}

					// Check each line...
					originSequenceMatcher.reset(line);
					if (originSequenceMatcher.matches()) {
						// Keep the last line to verify that the length matches
						// with the header info
						lastLine = line;
					} else if (line.trim().length() != 0) {
						if (!endFound && line.matches("^\\s*//\\s*$")) {
							endFound = true;
						} else {
							throw new GenBankOriginException(
									line
											+ " is not a correct GenBank ORIGIN sequence line");
						}
					}
					// else -- ignore blank lines
				}
			}
			// Compare length of the sequence in ORIGIN with the length
			// announced in LOCUS
			originSequenceMatcher.reset(lastLine);
			if (originSequenceMatcher.matches()) {
				originSequenceLength = Integer.parseInt(originSequenceMatcher
						.group(1))
						+ originSequenceMatcher.group(2).replaceAll("\\s", "")
								.trim().length() - 1;

				int locusSequenceLength = 0;
				String locusData = getPreambleEntries("LOCUS").get(0).fieldData
						.get(0);
				Matcher locusBPLengthMatcher = Pattern.compile(
						"^.*\\s+(\\d+)\\s+bp\\s+.*$").matcher(locusData);
				Matcher locusAALengthMatcher = Pattern.compile(
						"^.*\\s+(\\d+)\\s+aa\\s+.*$").matcher(locusData);
				if (locusBPLengthMatcher.matches()) {
					locusSequenceLength = Integer.parseInt(locusBPLengthMatcher
							.group(1));
				} else if (locusAALengthMatcher.matches()) {
					locusSequenceLength = Integer.parseInt(locusAALengthMatcher
							.group(1));
				}

				if (originSequenceLength != locusSequenceLength) {
					throw new GenBankOriginException(
							"Sequence length in LOCUS and length in ORIGIN do not match");
				}
				// else -- sequence length match, all fine, continue and return
			} else {
				// This should never happen...
				throw new GenBankOriginException(
						"Last line in ORIGIN is not a valid GenBank sequence line");
			}
			originParsed = true;
		} catch (GenBankPreambleException exception) {
			throw new GenBankOriginException("From GenBankPreambleException: "
					+ exception.getMessage());
		} finally {
			inFC.close();
		}
	}

	/**
	 * Does a lazy parse of a GenBank file&rsquo;s ORIGIN section (see above).
	 * Using this method <code>GenBankReader</code> objects can keep track of
	 * the byte and char offset of the beginning of the sequence.
	 * <p>
	 * Note: this method <strong>must</strong> be invoked before retrieving any
	 * sequence data.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws GenBankPreambleException
	 *             thrown if the file sections have not been successfully
	 *             parsed, if the preamble cannot be found (<code>LOCUS</code>
	 *             field is missing), or if a preamble entry is found and it
	 *             does not match the GenBank specification.
	 * 
	 * @since 0.4
	 */
	public void parseOrigin() throws FileNotFoundException,
			GenBankOriginException, IOException {
		parseOrigin(false);
	}

	/**
	 * Returns the sequence found in the ORIGIN section.
	 * 
	 * @throws FileNotFoundException
	 *             thrown if the GenBank file cannot be found.
	 * @throws IOException
	 *             thrown if the GenBank file cannot be read or closed after
	 *             reading it.
	 * @throws InvalidSequenceCharacterException
	 *             thrown if an invalid character is found in the ORIGIN
	 *             sequence.
	 * @throws GenBankOriginException
	 *             thrown if the ORIGIN has not been successfully parsed or if
	 *             an incorrectly formatted line is found.
	 * @throws GenBankPreambleException
	 * @throws GenBankOriginSequenceTooLongException
	 * 
	 * @see DNASequence
	 * @see ProteinSequence
	 * 
	 * @since 0.3
	 */
	public BioSequence getOriginSequence()
			throws InvalidSequenceCharacterException, FileNotFoundException,
			IOException, GenBankOriginException, GenBankPreambleException,
			GenBankOriginSequenceTooLongException {
		if (!originParsed) {
			throw new GenBankOriginException("ORIGIN has not been parsed yet");
		}

		if (originSequenceLength > Integer.MAX_VALUE) {
			throw new GenBankOriginSequenceTooLongException(
					originSequenceLength + " ORIGIN sequence too long.");
		}

		BioSequence sequence = null;
		String description = getPreambleEntries("DEFINITION").get(0).fieldData
				.get(0);

		String locusData;
		locusData = getPreambleEntries("LOCUS").get(0).fieldData.get(0);
		Matcher locusBPLengthMatcher = Pattern.compile(
				"^.*\\s+(\\d+)\\s+bp\\s+.*$").matcher(locusData);
		Matcher locusAALengthMatcher = Pattern.compile(
				"^.*\\s+(\\d+)\\s+aa\\s+.*$").matcher(locusData);
		if (locusBPLengthMatcher.matches()) {
			if (originSequenceLength > IContigProvider.I_componentLength) {
				System.out.println("Too long! " + originSequenceLength + " > "
						+ IContigProvider.I_componentLength);
				System.exit(0);
				// TODO DnaContig = new DnaContig(GenBankContigProvider);
			} else // if (originSequenceLength < I_contigSize)
			{
				sequence = originToDnaSequence();
			}
			sequence.setDescription(new StringBuilder(description));
		} else if (locusAALengthMatcher.matches()) {
			if (originSequenceLength > IContigProvider.I_componentLength) {
				System.out.println("Too long!");
				System.exit(0);
				// TODO ProteinContig = new
				// ProteinContig(GenBankContigProvider);
			} else {
				sequence = originToPeptideSequence();
			}
			sequence.setDescription(new StringBuilder(description));
		} else {
			throw new GenBankPreambleException(
					"The sequence type is not featured in the preamble following its length");
		}

		return sequence;
	}

	/*
	 * Used by getOriginSequence() to extract a DNA sequence from the file.
	 * 
	 * @since 0.3
	 */
	private DNASequence originToDnaSequence() throws FileNotFoundException,
			IOException, InvalidSequenceCharacterException,
			GenBankOriginException {
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		inFC.position(originByteOffset);
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		Matcher originSequenceMatcher = Pattern.compile("^\\s*(\\d+)\\s(.*)$")
				.matcher("");
		Matcher originMatcher = Pattern.compile("^ORIGIN\\s+",
				Pattern.MULTILINE).matcher("");
		// Matcher dnaBaseMatcher = Pattern.compile("^[" + DnaBase.getRegexes()
		// + "]$").matcher("");
		boolean originFound = false;
		boolean endFound = false;

		DNASequence ds = new DNASequence("",
				NucleotideSequenceDirectionality.C5_C3, "");

		// Read-decode the file's ByteBuffer in the loop
		while (inFC.position() < inFC.size() && inFC.read(bBuffer) != -1) // IOException
		{
			bBuffer.flip();
			// If not EOF and encoding is UTF-8...
			if (fileCharset == Charset.forName("UTF-8")
					&& inFC.size() - inFC.position() > 0) // IOException
			{
				// ... maybe the buffer ends at an incomplete muliple byte
				// character
				inFC.position(inFC.position()
						- UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
			}
			// Set file channel position to last newline character to ensure a
			// preamble field is not split up
			if (inFC.size() - inFC.position() > 0) {
				int discard = UTF8BufferTrimmer.endTrimNewLine(bBuffer,
						fileCharset, fileLineSeparatorFormat);
				inFC.position(inFC.position() - discard);
			}
			CharBuffer cBuffer = fileCharset.decode(bBuffer);
			bBuffer.clear();

			// Proceed and check sequence lines in this buffer
			for (String line : cBuffer.toString().split("(?m)$")) {
				if (!originFound) {
					originMatcher.reset(line);
					if (!originMatcher.matches()) {
						if (!line.trim().equals("")) {
							throw new GenBankOriginException(
									line
											+ " is not a correct GenBank ORIGIN sequence line");
						}
						// else -- ignore blank lines
					} else {
						originFound = true;
					}
				} else {
					// Check each line...
					originSequenceMatcher.reset(line);
					if (originSequenceMatcher.matches()) {
						// Compact residues string and insert into sequence
						String compactedOriginSequenceString = originSequenceMatcher
								.group(2).replaceAll("\\s+", "");
						for (int i = 0; i < compactedOriginSequenceString
								.length(); i++) {
							ds.add(DNABase
									.valueOf(compactedOriginSequenceString
											.charAt(i)));
						}
					} else if (line.trim().length() != 0) {
						if (!endFound && line.matches("^\\s*//\\s*$")) {
							endFound = true;
						} else {
							throw new GenBankOriginException(
									line
											+ " is not a correct GenBank ORIGIN sequence line");
						}
					}
					// else -- ignore blank lines
				}
			}
		}
		return ds;
	}

	/*
	 * Used by getOriginSequence() to extract a protein sequence from the file.
	 * 
	 * @since 0.3
	 */
	private ProteinSequence originToPeptideSequence()
			throws FileNotFoundException, IOException,
			InvalidSequenceCharacterException, GenBankOriginException {
		// Get the channel from the File argument and allocate byte buffer
		FileChannel inFC = new RandomAccessFile(filePath, "r").getChannel(); // FileNotFoundException
		inFC.position(originByteOffset);
		ByteBuffer bBuffer = ByteBuffer.allocate(I_byteBufferSize);

		Matcher originSequenceMatcher = Pattern.compile("^\\s*(\\d+)\\s(.*)$")
				.matcher("");
		Matcher originMatcher = Pattern.compile("^ORIGIN\\s+",
				Pattern.MULTILINE).matcher("");
		// Matcher dnaBaseMatcher = Pattern.compile("^[" + DnaBase.getRegexes()
		// + "]$").matcher("");
		boolean originFound = false;
		boolean endFound = false;

		ProteinSequence ps = new ProteinSequence("",
				PeptideSequenceDirectionality.N_C, "");

		// Read-decode the file's ByteBuffer in the loop
		while (inFC.position() < inFC.size() && inFC.read(bBuffer) != -1) // IOException
		{
			bBuffer.flip();
			// If not EOF and encoding is UTF-8...
			if (fileCharset == Charset.forName("UTF-8")
					&& inFC.size() - inFC.position() > 0) // IOException
			{
				// ... maybe the buffer ends at an incomplete muliple byte
				// character
				inFC.position(inFC.position()
						- UTF8BufferTrimmer.endTrimUTF8Characters(bBuffer));
			}
			// Set file channel position to last newline character to ensure a
			// preamble field is not split up
			if (inFC.size() - inFC.position() > 0) {
				int discard = UTF8BufferTrimmer.endTrimNewLine(bBuffer,
						fileCharset, fileLineSeparatorFormat);
				inFC.position(inFC.position() - discard);
			}
			CharBuffer cBuffer = fileCharset.decode(bBuffer);
			bBuffer.clear();

			// Proceed and check sequence lines in this buffer
			for (String line : cBuffer.toString().split("(?m)$")) {
				if (!originFound) {
					originMatcher.reset(line);
					if (!originMatcher.matches()) {
						if (!line.trim().equals("")) {
							throw new GenBankOriginException(
									line
											+ " is not a correct GenBank ORIGIN sequence line");
						}
						// else -- ignore blank lines
					} else {
						originFound = true;
					}
				} else {
					// Check each line...
					originSequenceMatcher.reset(line);
					if (originSequenceMatcher.matches()) {
						// Compact residues string and insert into sequence
						String compactedOriginSequenceString = originSequenceMatcher
								.group(2).replaceAll("\\s+", "");
						for (int i = 0; i < compactedOriginSequenceString
								.length(); i++) {
							ps.add(ps.getLength(), AminoAcid
									.valueOf(compactedOriginSequenceString
											.charAt(i)));
						}
					} else if (line.trim().length() != 0) {
						if (!endFound && line.matches("^\\s*//\\s*$")) {
							endFound = true;
						} else {
							throw new GenBankOriginException(
									line
											+ " is not a correct GenBank ORIGIN sequence line");
						}
					}
					// else -- ignore blank lines
				}
			}
		}
		return ps;
	}

	/* Method left here to compare against NIO (above) */
	/*
	 * public void parseFeaturesIO() throws FileNotFoundException, IOException,
	 * GenBankFeaturesException { // Get the reader for the file and skip until
	 * FEATURES and check that it's there BufferedReader bReader = new
	 * BufferedReader(new FileReader(filePath));
	 * bReader.skip(featuresCharOffset); String line = bReader.readLine(); if
	 * (line == null ||
	 * !line.matches("^\\s*FEATURES\\s+Location/Qualifiers\\s*$")) {
	 * bReader.close(); throw new
	 * GenBankFeaturesException("FEATURES header is missing"); }
	 * 
	 * // Everything's OK: proceed with feature and qualifier lines Pattern
	 * featureKeyPattern = Pattern.compile("^(\\s{5}\\S+\\s+)(.*)$"); Pattern
	 * qualifierPattern = Pattern.compile("^\\s{21}(.*)$"); Matcher
	 * featureKeyMatcher = featureKeyPattern.matcher(""); Matcher
	 * qualifierMatcher = qualifierPattern.matcher("");
	 * 
	 * while ((line = bReader.readLine()) != null) { // Break when we reach the
	 * end of FEATURES if (line.matches("^ORIGIN.*$")) { break; }
	 * 
	 * featureKeyMatcher.reset(line); // A wild feature appears! if
	 * (featureKeyMatcher.matches()) { String featureKey =
	 * featureKeyMatcher.group(1).trim(); StringBuilder location = new
	 * StringBuilder(featureKeyMatcher.group(2).trim()); // Use while to read
	 * all the qualifiers... while ((line = bReader.readLine()) != null) {
	 * featureKeyMatcher.reset(line); // A new feature starts... if
	 * (featureKeyMatcher.matches()) {
	 * 
	 * } } }
	 * 
	 * qualifierMatcher.reset(line); String feature; StringBuilder location;
	 * List<String> qualifiers; if (featureKeyMatcher.matches()) {
	 * System.out.println("Feature: " + featureKeyMatcher.group(1).trim());
	 * System.out.println("1st qualifier: " +
	 * featureKeyMatcher.group(2).trim()); } else if
	 * (qualifierMatcher.matches()) { System.out.println("Qualifier: " + line);
	 * } else { System.out.println("None: " + line); System.in.read(); } } }
	 */
}
