/*
 * @author		Alfonso Muñoz-Pomer Fuentes, 
 * 				<a href="mailto:alfonso.munozpomer@biotechvana.com">
 * 				alfonso.munozpomer@biotechvana.com</a>,  
 * 				<a href="http://www.biotechvana.com">Biotechvana</a>
 *
 * @date		2011-10-11
 * 
 * @license		<a href="http://creativecommons.org/licenses/by-nc-sa/3.0/">
 * 				Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License</a>
 *
 * @copyright	Copyright Biotech Vana, S.L. 2006-2011
 */
package com.biotechvana.javabiotoolkit.text;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;

/**
 * Text utilities for UTF-8-encoded <code>String</code>s.
 * 
 * @author	<a href="mailto:alfonso.munozpomer@biotechvana.com">Alfonso Muñoz-Pomer Fuentes</a>,
 * 			<a href="http://www.biotechvana.com">Biotechvana</a>.
 *
 * @version	0.1, 2010-12-01
 */
public class UTF8BufferTrimmer
{
	/**
	 * Given a <code>ByteBuffer</code> representing a UTF-8 encoded <code>CharBuffer</code>, this method sets the 
	 * <code>limit</code> of the <code>ByteBuffer</code> to the end of the last complete character. The value returned 
	 * is the number of bytes between <code>limit</code> and <code>capacity</code> which remain.
	 * <p> 
	 * This method scans for any two, three or four byte incomplete UTF-8 character in the last, penultimate and 
	 * antepenultimate byte, respectively, in the buffer. Such values are:
	 * <ul>
	 * <li>Between <code>0xC2</code> and <code>0xF4</code> (both inclusive) in 
	 *     the last byte</li>
	 * <li>Between <code>0xE0</code> and <code>0xF4</code> (both inclusive) in 
	 *     the second last byte</li>
	 * <li>Between <code>0xF0</code> and <code>0xF4</code> (both inclusive) in 
	 *     the last byte</li>
	 * </ul>
	 * 
	 * @param	bBuffer	buffer containing UTF-8 encoded characters.
	 * 
	 * @return	number of bytes remaining after the <code>limit</code>. A value between 0 and 3 (both inclusive).	
	 * 
	 * @since 1.1, 2010-11-01
	 */
	public static int endTrimUTF8Characters(ByteBuffer bBuffer)
	{
		int incompleteBytes = 0;
		if (bBuffer.get(bBuffer.limit() - 1) >= (byte)0xC2 && bBuffer.get(bBuffer.limit() - 1) <= (byte)0xF4)
		{
			incompleteBytes = 1;
		}
		else if (bBuffer.get(bBuffer.limit() - 2) >= (byte)0xE0 && bBuffer.get(bBuffer.limit() - 2) <= (byte)0xF4)
		{
			incompleteBytes = 2;
		}
		else if (bBuffer.get(bBuffer.limit() - 3) >= (byte)0xF0 && bBuffer.get(bBuffer.limit() - 3) <= (byte)0xF4)
		{
			incompleteBytes = 3;
		}
		bBuffer.limit(bBuffer.limit() - incompleteBytes);
		return incompleteBytes;
	}
	
	/**
	 * Sets a buffer limit to the last new line character and returns the new limit.
	 * 
	 * @param	bBuffer				<code>Buffer</code> to process.
	 * @param	fileCharset			<code>Charset</code> used to encode the buffer.
	 * @param	lineSeparatorFormat	the format used to encode new lines.
	 * 
	 * @return	new limit of the <code>Buffer</code>.
	 * 
	 * @since	1.1, 2010-12-01
	 */
	public static int endTrimNewLine(ByteBuffer bBuffer, Charset fileCharset, LineSeparatorFormat lineSeparatorFormat)
	{
		String lineSeparator = lineSeparatorFormat.lineSeparator();
		byte[] lineSeparatorBytes = lineSeparator.getBytes(fileCharset);

		if (lineSeparatorFormat == LineSeparatorFormat.MIXED)
		{
			// TODO check all possibilities
			return 0;
		}
		if (lineSeparatorBytes.length > 1)
		{
			// TODO Like below, but with additional chars
			return 0;
		}
		else // if (lineSeparatorBytes.length == 1)
		{
			for (int i = bBuffer.limit() - 1 ; i >= 0 ; i--)
			{
				if (bBuffer.get(i) == lineSeparatorBytes[0])
				{
					int bytesAfterLastNewLine = bBuffer.limit() - i - 1; 
					bBuffer.limit(i + 1);
					return bytesAfterLastNewLine;
				}
			}
		}
		return bBuffer.limit();
	}
}
