
/* ************************************************************************************************

FILE NAME:	js_lib_string.js
PURPOSE:	custom functions to modify strings

INPUT:		as required for each function
OUTPUT:		as determined by each function

CONTENT:	findTag ( String str, String regex[, boolean debug] )
			findEndTag ( String str, String tagName, Object startTag )


---------------------------------------------------------------------------------------------------
VERSION			DATE				AUTHOR				NOTES
---------------------------------------------------------------------------------------------------
1.0				3.12.2002		Rob Wilkerson		initial creation : many of these functions have
													existed for some time, but are just now being
													compiled into a single library file.

************************************************************************************************ */

/**
 * Retrieves information about a specific tag and it's content.  Useful for
 * extracting or removing a tag from a given context.
 *
 * Usage :
 * To remove all <span> tags with empty class values without removing the content itself, for
 * example, the following code could be used within a JavaScript block:
 *
 * var matchingTags	= findTag ( myString, "<span[^>]*class=\"[\\s]*\"[^>]*>" );
 * for ( var i = 0; i < matchingTags.length; i++ ) {
 * 		myString = myString.replace ( new RegExp ( matchingTags[i].outerML ), matchingTags[i].innerML );
 * }
 *
 * @param		str			The string to be searched
 * @param		regex		A string or regular expression that represents the opening
 *							tag of a markup language.
 * @return					An array of objects containing relevant details about each
 *							matching tag:
 *								innerML				the content within the tag
 *								outerML				tag content including the tag itself
 *								startTag.tag		the start tag syntax
 *								startTag.index		index of the first character in the
 *													start tag
 *								startTag.lastIndex	index of the last character in the
 *													start tag.
 *								endTag.tag			the end tag syntax
 *								endTag.index		index of the first character in the
 *													end tag
 *								endTag.lastIndex	index of the last character in the
 *													end tag
 */
function findTag ( str, regex )
{
	var debug		= ( arguments.length == 3 && arguments[2] ) ? true : false;
	var	re			= new RegExp ( regex, "ig" );
	var matches		= new Array();
	var tagName		= "";

	/** BEGIN : find all matching tags
	 * find all matching start tags in the string and build an array of tag
	 * objects.
	 */
	do {
		var tag			= new Object();
		var startTag	= re.exec ( str );

		if ( startTag ) {
			tagName					= startTag[0].replace ( /<([^\s]+)[^>]*>/, "$1" );
			tag.startTag			= new Object();

			tag.startTag.tagStr		= startTag[0];
			tag.startTag.index		= startTag.index;
			tag.startTag.lastIndex	= startTag.lastIndex;
			tag.endTag				= findEndTag ( str, tagName, tag.startTag );
			tag.outerML				= str.slice ( startTag.index, tag.endTag.lastIndex );
			tag.innerML				= str.slice ( startTag.lastIndex, tag.endTag.index );

			matches.push ( tag );
		}
	}
	while ( startTag );
	/** END : find all matching tags */

	/** BEGIN : display debug info if requested */
	if ( debug ) {
		for ( foo in tag ) {
			if ( typeof tag[foo] == "object" ) {
				for ( blah in tag[foo] ) {
					alert ( foo + "[" + blah + "] = " + tag[foo][blah] );
				}
			}
			else {
				alert ( "tag[" + foo + "] = " + tag[foo] );
			}
		}
	}
	/** END : display debug info if requested */

	return matches;
}

/**
 * Finds the correct end tag for a specific element.  For example,
 * given the location of an open tag (e.g. <div>) it's correct end
 * tag will be returned.  Nested tags are handled.
 *
 * Assumes that the end tag format is "</[tagName]>".
 *
 * @param		str			The string to be searched
 * @param		tagName		The element itself (e.g. "div" or "span")
 *							without the tag syntax or attributes
 * @param		start		Index from which to start the search.  This
 *							should be the index of the first character
 * 							after the open tag.
 * @return					An object containing the end tag text itself,
 *							the start index of the first character of the
 * 							end tag ("<") and the index of the first
 * 							character after the end tag.
 */
function findEndTag ( str, tagName, startTag )
{
	var re		= new RegExp ( "</?" + tagName + "[^>]*>", "ig" );
	var reStart	= new RegExp ( "<" + tagName + "[^>]*>", "i" );
	var reEnd	= new RegExp ( "</" + tagName + "[^>]*>", "i" );
	var reEmpty	= new RegExp ( "<" + tagName + "[^>]*/>", "i" );
	var endTag	= new Object();

	/** BEGIN : test for an empty tag
	 * if the start tag represents an empty tag (e.g. <img ... />)
	 * then we'll just return the start tag info in the end tag since
	 * the same string represents both tags.
	 */
	if ( startTag.tagStr.match ( reEmpty ) ) {
		endTag.tagStr		= startTag.tagStr;
		endTag.index		= startTag.index;
		endTag.lastIndex	= startTag.lastIndex;

		return endTag;
	}
	/** END : test for an empty tag */

	/**
	 * we'll start searching after the original match, so the c value
	 * should be initialized to 1 since a start tag has been found.
	 */
	var c		= 1;
	re.lastIndex = startTag.lastIndex;

	/** BEGIN : find the end tag
	 * from the start point, search the string for either an end tag
	 * or a nested start tag.  Increment the counter if a start tag
	 * is found and decrement with each end tag found.  Once the counter
	 * reaches 0 the correct end tag has been found.
	 */
	do {
		var end = re.exec ( str );
		if ( end ) {
			if ( end[0].match ( reEnd ) ) {
				endTag.tagStr		= end[0];
				endTag.index		= end.index;
				endTag.lastIndex	= end.lastIndex;
				c--;
			}
			else if ( end[0].match ( reStart ) ) {
				c++;
			}
		}
	}
	while ( c > 0 );
	/** END : find the end tag */

	return endTag;
}

/**
 * Remove all html tags from a string
 *
 * @param		str			the string to be modified
 * @return					a new string stripped of html content
 */
function stripHTML ( str )
{
	try {
		return  str.replace ( /<[^>]*>/ig, "" );
	}
	catch ( e ) {
		return str;
	}
}

/**
 * Remove non-numeric characters from a string
 *
 * @param		str		the string to be modified
 * @return				a new string stripped of non-numeric characters
 */
function stripAlpha ( str )
{
	try {
		return str.replace ( /[^0-9]/ig, "" );
	}
	catch ( e ) {
		return str;
	}
}

/**
 * Remove numeric characters from a string
 *
 * @param		str		the string to be modified
 * @return				a new string stripped of numeric characters
 */
function stripNumbers ( str )
{
	try {
		return str.replace ( /[0-9]/ig, "" );
	}
	catch ( e ) {
		return str;
	}
}

/**
 * Remove characters commonly found in scripting languages from a string.
 * The intent is to disable attempts to insert scripts on the server.
 *
 * @param		str		the string to be modified
 * @ return				a new string stripped of selected characters
 */
function stripScript ( str )
{
	try {
		return str.replace ( /[<>$%{}]/ig, "" );
	}
	catch ( e ) {
		return str;
	}
}

/**
 * Remove whitespace from the beginning and end of a string
 *
 * @param		str		the string to trim
 * @return				the trimmed string
 */
function Trim ( str )
{
	try {
		return ( str.replace ( /^[\s]*([^\s]*)[\s]*$/, "$1" ) );
	}
	catch ( e ) {
		return str;
	}
}

/**
 * Remove whitespace from the beginning and end of a string.
 * This function allows the Trim() function to be called using
 * a lowercase function name.
 *
 * @param		str		the string to trim
 * @return				the trimmed string
 */
function trim ( str )
{
	return Trim ( str );
}

/**
 * Remove whitespace from the beginning of a string
 *
 * @param		str		the string to trim
 * @return				the trimmed string
 */
function LTrim ( str )
{
	try {
		return ( str.replace ( /^\s+/g, "" ) );
	}
	catch ( e ) {
		return str;
	}
}

/**
 * Remove whitespace from the end of a string
 *
 * @param		str		the string to trim
 * @return				the trimmed string
 */
function RTrim ( str )
{
	try {
		return ( str.replace ( /\s+$/g, "" ) );
	}
	catch ( e ) {
		return str;
	}
}

/**
 * Validate email address syntax.  This function does not validate the actual
 * existence of the address mailbox, just that address is constructed in accordance
 * with current standards.
 *
 * @param		str		the email address to check
 * @return				whether the address syntax is valid
 */
function isEmail ( str )
{
	var regex = /^[_A-Za-z0-9!#$%&'*+\/=?^`{|}~-]+(\.[_A-Za-z0-9!#$%&'*+\/=?^`{|}~-]+)*@[A-Za-z0-9-]+(\.[A-Za-z0-9-]+)*\.(([A-Za-z]{2,3})|(aero|coop|info|museum|name|arpa]))$/;

	return ( regex.test ( str ) );
}

/**
 * Validate hexidecimal construction.  Specifically, color codes.
 *
 * @param		str		the color hex value
 * @return				whether the value is a valid hexidecimal color
 */
function isHex ( str )
{
	var regex = /^#?[0-9a-fA-F]{6}$/;

	return regex.test ( str );
}

/**
 * Return the SEURI for the fully qualified URL
 *
 * @param		str		the fully qualified url string
 * @return				the absolute url string
 */
function getAbsoluteURLFromFullyQualified ( str )
{
	var regExp = new RegExp ("^[a-zA-Z]+://[^/]+(/.*)$");
	// strip out the protocol://sitename
	return ( str.replace ( regExp, "$1" ) );

}
