/* * @(#)URI.java 0.3-3 06/05/2001 * * This file is part of the HTTPClient package * Copyright (C) 1996-2001 Ronald Tschalär * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307, USA * * For questions, suggestions, bug-reports, enhancement-requests etc. * I may be contacted at: * * ronald@innovation.ch * * The HTTPClient's home page is located at: * * http://www.innovation.ch/java/HTTPClient/ * */ package HTTPClient; import java.net.URL; import java.net.MalformedURLException; import java.util.BitSet; import java.util.Hashtable; /** * This class represents a generic URI, as defined in RFC-2396. * This is similar to java.net.URL, with the following enhancements: *
The elements are always stored in escaped form. * *
While RFC-2396 distinguishes between just two forms of URI's, those that * follow the generic syntax and those that don't, this class knows about a * third form, named semi-generic, used by quite a few popular schemes. * Semi-generic syntax treats the path part as opaque, i.e. has the form * <scheme>://<authority>/<opaque> . Relative URI's of this * type are only resolved as far as absolute paths - relative paths do not * exist. * *
Ideally, java.net.URL should subclass URI. * * @see rfc-2396 * @version 0.3-3 06/05/2001 * @author Ronald Tschalär * @since V0.3-1 */ public class URI { /** * If true, then the parser will resolve certain URI's in backwards * compatible (but technically incorrect) manner. Example: * *
* base = http://a/b/c/d;p?q * rel = http:g * result = http:g (correct) * result = http://a/b/c/g (backwards compatible) ** * See rfc-2396, section 5.2, step 3, second paragraph. */ public static final boolean ENABLE_BACKWARDS_COMPATIBILITY = true; protected static final Hashtable defaultPorts = new Hashtable(); protected static final Hashtable usesGenericSyntax = new Hashtable(); protected static final Hashtable usesSemiGenericSyntax = new Hashtable(); /* various character classes as defined in the draft */ protected static final BitSet alphanumChar; protected static final BitSet markChar; protected static final BitSet reservedChar; protected static final BitSet unreservedChar; protected static final BitSet uricChar; protected static final BitSet pcharChar; protected static final BitSet userinfoChar; protected static final BitSet schemeChar; protected static final BitSet hostChar; protected static final BitSet opaqueChar; protected static final BitSet reg_nameChar; /* These are not directly in the spec, but used for escaping and * unescaping parts */ /** list of characters which must not be unescaped when unescaping a scheme */ public static final BitSet resvdSchemeChar; /** list of characters which must not be unescaped when unescaping a userinfo */ public static final BitSet resvdUIChar; /** list of characters which must not be unescaped when unescaping a host */ public static final BitSet resvdHostChar; /** list of characters which must not be unescaped when unescaping a path */ public static final BitSet resvdPathChar; /** list of characters which must not be unescaped when unescaping a query string */ public static final BitSet resvdQueryChar; /** list of characters which must not be escaped when escaping a path */ public static final BitSet escpdPathChar; /** list of characters which must not be escaped when escaping a query string */ public static final BitSet escpdQueryChar; /** list of characters which must not be escaped when escaping a fragment identifier */ public static final BitSet escpdFragChar; static { defaultPorts.put("http", new Integer(80)); defaultPorts.put("shttp", new Integer(80)); defaultPorts.put("http-ng", new Integer(80)); defaultPorts.put("coffee", new Integer(80)); defaultPorts.put("https", new Integer(443)); defaultPorts.put("ftp", new Integer(21)); defaultPorts.put("telnet", new Integer(23)); defaultPorts.put("nntp", new Integer(119)); defaultPorts.put("news", new Integer(119)); defaultPorts.put("snews", new Integer(563)); defaultPorts.put("hnews", new Integer(80)); defaultPorts.put("smtp", new Integer(25)); defaultPorts.put("gopher", new Integer(70)); defaultPorts.put("wais", new Integer(210)); defaultPorts.put("whois", new Integer(43)); defaultPorts.put("whois++", new Integer(63)); defaultPorts.put("rwhois", new Integer(4321)); defaultPorts.put("imap", new Integer(143)); defaultPorts.put("pop", new Integer(110)); defaultPorts.put("prospero", new Integer(1525)); defaultPorts.put("irc", new Integer(194)); defaultPorts.put("ldap", new Integer(389)); defaultPorts.put("nfs", new Integer(2049)); defaultPorts.put("z39.50r", new Integer(210)); defaultPorts.put("z39.50s", new Integer(210)); defaultPorts.put("vemmi", new Integer(575)); defaultPorts.put("videotex", new Integer(516)); defaultPorts.put("cmp", new Integer(829)); usesGenericSyntax.put("http", Boolean.TRUE); usesGenericSyntax.put("https", Boolean.TRUE); usesGenericSyntax.put("shttp", Boolean.TRUE); usesGenericSyntax.put("coffee", Boolean.TRUE); usesGenericSyntax.put("ftp", Boolean.TRUE); usesGenericSyntax.put("file", Boolean.TRUE); usesGenericSyntax.put("nntp", Boolean.TRUE); usesGenericSyntax.put("news", Boolean.TRUE); usesGenericSyntax.put("snews", Boolean.TRUE); usesGenericSyntax.put("hnews", Boolean.TRUE); usesGenericSyntax.put("imap", Boolean.TRUE); usesGenericSyntax.put("wais", Boolean.TRUE); usesGenericSyntax.put("nfs", Boolean.TRUE); usesGenericSyntax.put("sip", Boolean.TRUE); usesGenericSyntax.put("sips", Boolean.TRUE); usesGenericSyntax.put("sipt", Boolean.TRUE); usesGenericSyntax.put("sipu", Boolean.TRUE); /* Note: schemes which definitely don't use the generic-URI syntax * and must therefore never appear in the above list: * "urn", "mailto", "sdp", "service", "tv", "gsm-sms", "tel", "fax", * "modem", "eid", "cid", "mid", "data", "ldap" */ usesSemiGenericSyntax.put("ldap", Boolean.TRUE); usesSemiGenericSyntax.put("irc", Boolean.TRUE); usesSemiGenericSyntax.put("gopher", Boolean.TRUE); usesSemiGenericSyntax.put("videotex", Boolean.TRUE); usesSemiGenericSyntax.put("rwhois", Boolean.TRUE); usesSemiGenericSyntax.put("whois++", Boolean.TRUE); usesSemiGenericSyntax.put("smtp", Boolean.TRUE); usesSemiGenericSyntax.put("telnet", Boolean.TRUE); usesSemiGenericSyntax.put("prospero", Boolean.TRUE); usesSemiGenericSyntax.put("pop", Boolean.TRUE); usesSemiGenericSyntax.put("vemmi", Boolean.TRUE); usesSemiGenericSyntax.put("z39.50r", Boolean.TRUE); usesSemiGenericSyntax.put("z39.50s", Boolean.TRUE); usesSemiGenericSyntax.put("stream", Boolean.TRUE); usesSemiGenericSyntax.put("cmp", Boolean.TRUE); alphanumChar = new BitSet(128); for (int ch='0'; ch<='9'; ch++) alphanumChar.set(ch); for (int ch='A'; ch<='Z'; ch++) alphanumChar.set(ch); for (int ch='a'; ch<='z'; ch++) alphanumChar.set(ch); markChar = new BitSet(128); markChar.set('-'); markChar.set('_'); markChar.set('.'); markChar.set('!'); markChar.set('~'); markChar.set('*'); markChar.set('\''); markChar.set('('); markChar.set(')'); reservedChar = new BitSet(128); reservedChar.set(';'); reservedChar.set('/'); reservedChar.set('?'); reservedChar.set(':'); reservedChar.set('@'); reservedChar.set('&'); reservedChar.set('='); reservedChar.set('+'); reservedChar.set('$'); reservedChar.set(','); unreservedChar = new BitSet(128); unreservedChar.or(alphanumChar); unreservedChar.or(markChar); uricChar = new BitSet(128); uricChar.or(unreservedChar); uricChar.or(reservedChar); uricChar.set('%'); pcharChar = new BitSet(128); pcharChar.or(unreservedChar); pcharChar.set('%'); pcharChar.set(':'); pcharChar.set('@'); pcharChar.set('&'); pcharChar.set('='); pcharChar.set('+'); pcharChar.set('$'); pcharChar.set(','); userinfoChar = new BitSet(128); userinfoChar.or(unreservedChar); userinfoChar.set('%'); userinfoChar.set(';'); userinfoChar.set(':'); userinfoChar.set('&'); userinfoChar.set('='); userinfoChar.set('+'); userinfoChar.set('$'); userinfoChar.set(','); // this actually shouldn't contain uppercase letters... schemeChar = new BitSet(128); schemeChar.or(alphanumChar); schemeChar.set('+'); schemeChar.set('-'); schemeChar.set('.'); opaqueChar = new BitSet(128); opaqueChar.or(uricChar); hostChar = new BitSet(128); hostChar.or(alphanumChar); hostChar.set('-'); hostChar.set('.'); reg_nameChar = new BitSet(128); reg_nameChar.or(unreservedChar); reg_nameChar.set('$'); reg_nameChar.set(','); reg_nameChar.set(';'); reg_nameChar.set(':'); reg_nameChar.set('@'); reg_nameChar.set('&'); reg_nameChar.set('='); reg_nameChar.set('+'); resvdSchemeChar = new BitSet(128); resvdSchemeChar.set(':'); resvdUIChar = new BitSet(128); resvdUIChar.set('@'); resvdHostChar = new BitSet(128); resvdHostChar.set(':'); resvdHostChar.set('/'); resvdHostChar.set('?'); resvdHostChar.set('#'); resvdPathChar = new BitSet(128); resvdPathChar.set('/'); resvdPathChar.set(';'); resvdPathChar.set('?'); resvdPathChar.set('#'); resvdQueryChar = new BitSet(128); resvdQueryChar.set('#'); escpdPathChar = new BitSet(128); escpdPathChar.or(pcharChar); escpdPathChar.set('%'); escpdPathChar.set('/'); escpdPathChar.set(';'); escpdQueryChar = new BitSet(128); escpdQueryChar.or(uricChar); escpdQueryChar.clear('#'); escpdFragChar = new BitSet(128); escpdFragChar.or(uricChar); } /* our uri in pieces */ protected static final int OPAQUE = 0; protected static final int SEMI_GENERIC = 1; protected static final int GENERIC = 2; protected int type; protected String scheme; protected String opaque; protected String userinfo; protected String host; protected int port = -1; protected String path; protected String query; protected String fragment; /* cache the java.net.URL */ protected URL url = null; // Constructors /** * Constructs a URI from the given string representation. The string * must be an absolute URI. * * @param uri a String containing an absolute URI * @exception ParseException if no scheme can be found or a specified * port cannot be parsed as a number */ public URI(String uri) throws ParseException { this((URI) null, uri); } /** * Constructs a URI from the given string representation, relative to * the given base URI. * * @param base the base URI, relative to which rel_uri * is to be parsed * @param rel_uri a String containing a relative or absolute URI * @exception ParseException if base is null and * rel_uri is not an absolute URI, or * if base is not null and the scheme * is not known to use the generic syntax, or * if a given port cannot be parsed as a number */ public URI(URI base, String rel_uri) throws ParseException { /* Parsing is done according to the following RE: * * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? * 12 3 4 5 6 7 8 9 * * 2: scheme * 4: authority * 5: path * 7: query * 9: fragment */ char[] uri = rel_uri.toCharArray(); int pos = 0, idx, len = uri.length; // trim() while (pos < len && Character.isWhitespace(uri[pos])) pos++; while (len > 0 && Character.isWhitespace(uri[len-1])) len--; // strip the special "url" or "uri" scheme if (pos < len-3 && uri[pos+3] == ':' && (uri[pos+0] == 'u' || uri[pos+0] == 'U') && (uri[pos+1] == 'r' || uri[pos+1] == 'R') && (uri[pos+2] == 'i' || uri[pos+2] == 'I' || uri[pos+2] == 'l' || uri[pos+2] == 'L')) pos += 4; // get scheme: (([^:/?#]+):)? idx = pos; while (idx < len && uri[idx] != ':' && uri[idx] != '/' && uri[idx] != '?' && uri[idx] != '#') idx++; if (idx < len && uri[idx] == ':') { scheme = rel_uri.substring(pos, idx).trim().toLowerCase(); pos = idx + 1; } // check and resolve scheme String final_scheme = scheme; if (scheme == null) { if (base == null) throw new ParseException("No scheme found"); final_scheme = base.scheme; } // check for generic vs. opaque type = usesGenericSyntax(final_scheme) ? GENERIC : usesSemiGenericSyntax(final_scheme) ? SEMI_GENERIC : OPAQUE; if (type == OPAQUE) { if (base != null && scheme == null) throw new ParseException("Can't resolve relative URI for " + "scheme " + final_scheme); opaque = escape(rel_uri.substring(pos), opaqueChar, true); if (opaque.length() > 0 && opaque.charAt(0) == '/') opaque = "%2F" + opaque.substring(1); return; } // get authority: (//([^/?#]*))? if (pos+1 < len && uri[pos] == '/' && uri[pos+1] == '/') { pos += 2; idx = pos; while (idx < len && uri[idx] != '/' && uri[idx] != '?' && uri[idx] != '#') idx++; parse_authority(rel_uri.substring(pos, idx), final_scheme); pos = idx; } // handle semi-generic and generic uri's if (type == SEMI_GENERIC) { path = escape(rel_uri.substring(pos), uricChar, true); if (path.length() > 0 && path.charAt(0) != '/') path = '/' + path; } else { // get path: ([^?#]*) idx = pos; while (idx < len && uri[idx] != '?' && uri[idx] != '#') idx++; path = escape(rel_uri.substring(pos, idx), escpdPathChar, true); pos = idx; // get query: (\?([^#]*))? if (pos < len && uri[pos] == '?') { pos += 1; idx = pos; while (idx < len && uri[idx] != '#') idx++; this.query = escape(rel_uri.substring(pos, idx), escpdQueryChar, true); pos = idx; } // get fragment: (#(.*))? if (pos < len && uri[pos] == '#') this.fragment = escape(rel_uri.substring(pos+1, len), escpdFragChar, true); } // now resolve the parts relative to the base if (base != null) { if (scheme != null && // resolve scheme !(scheme.equals(base.scheme) && ENABLE_BACKWARDS_COMPATIBILITY)) return; scheme = base.scheme; if (host != null) // resolve authority return; userinfo = base.userinfo; host = base.host; port = base.port; if (type == SEMI_GENERIC) // can't resolve relative paths return; if (path.length() == 0 && query == null) // current doc { path = base.path; query = base.query; return; } if (path.length() == 0 || path.charAt(0) != '/') // relative path { idx = (base.path != null) ? base.path.lastIndexOf('/') : -1; if (idx < 0) path = '/' + path; else path = base.path.substring(0, idx+1) + path; path = canonicalizePath(path); } } } /** * Remove all "/../" and "/./" from path, where possible. Leading "/../"'s * are not removed. * * @param path the path to canonicalize * @return the canonicalized path */ public static String canonicalizePath(String path) { int idx, len = path.length(); if (!((idx = path.indexOf("/.")) != -1 && (idx == len-2 || path.charAt(idx+2) == '/' || (path.charAt(idx+2) == '.' && (idx == len-3 || path.charAt(idx+3) == '/')) ))) return path; char[] p = new char[path.length()]; // clean path path.getChars(0, p.length, p, 0); int beg = 0; for (idx=1; idx
url.toExternalForm()
generates
* an invalid string representation
*/
public URI(URL url) throws ParseException
{
this((URI) null, url.toExternalForm());
}
/**
* Constructs a URI from the given parts, using the default port for
* this scheme (if known). The parts must be in unescaped form.
*
* @param scheme the scheme (sometimes known as protocol)
* @param host the host
* @param path the path part
* @exception ParseException if scheme is null
*/
public URI(String scheme, String host, String path) throws ParseException
{
this(scheme, null, host, -1, path, null, null);
}
/**
* Constructs a URI from the given parts. The parts must be in unescaped
* form.
*
* @param scheme the scheme (sometimes known as protocol)
* @param host the host
* @param port the port
* @param path the path part
* @exception ParseException if scheme is null
*/
public URI(String scheme, String host, int port, String path)
throws ParseException
{
this(scheme, null, host, port, path, null, null);
}
/**
* Constructs a URI from the given parts. Any part except for the
* the scheme may be null. The parts must be in unescaped form.
*
* @param scheme the scheme (sometimes known as protocol)
* @param userinfo the userinfo
* @param host the host
* @param port the port
* @param path the path part
* @param query the query string
* @param fragment the fragment identifier
* @exception ParseException if scheme is null
*/
public URI(String scheme, String userinfo, String host, int port,
String path, String query, String fragment)
throws ParseException
{
if (scheme == null)
throw new ParseException("missing scheme");
this.scheme = escape(scheme.trim().toLowerCase(), schemeChar, true);
if (userinfo != null)
this.userinfo = escape(userinfo.trim(), userinfoChar, true);
if (host != null)
{
host = host.trim();
this.host = isIPV6Addr(host) ? host : escape(host, hostChar, true);
}
if (port != defaultPort(scheme))
this.port = port;
if (path != null)
this.path = escape(path.trim(), escpdPathChar, true); // ???
if (query != null)
this.query = escape(query.trim(), escpdQueryChar, true);
if (fragment != null)
this.fragment = escape(fragment.trim(), escpdFragChar, true);
type = usesGenericSyntax(scheme) ? GENERIC : SEMI_GENERIC;
}
private static final boolean isIPV6Addr(String host)
{
if (host.indexOf(':') < 0)
return false;
for (int idx=0; idxIn general URI are split into two categories: opaque-URI and * generic-URI. The generic-URI syntax is the syntax most are familiar * with from URLs such as ftp- and http-URLs, which is roughly: *
* generic-URI = scheme ":" [ "//" server ] [ "/" ] [ path_segments ] [ "?" query ] ** (see RFC-2396 for exact syntax). Only URLs using the generic-URI syntax * can be used to create and resolve relative URIs. * *
Whether a given scheme is parsed according to the generic-URI * syntax or wether it is treated as opaque is determined by an internal * table of URI schemes. * * @see rfc-2396 */ public boolean isGenericURI() { return (type == GENERIC); } /** * Does the scheme specific part of this URI use the semi-generic-URI syntax? * *
Many schemes which don't follow the full generic syntax actually * follow a reduced form where the path part is treated is opaque. This * is used for example by ldap, smtp, pop, etc, and is roughly *
* generic-URI = scheme ":" [ "//" server ] [ "/" [ opaque_path ] ] ** I.e. parsing is identical to the generic-syntax, except that the path * part is not further parsed. URLs using the semi-generic-URI syntax can * be used to create and resolve relative URIs with the restriction that * all paths are treated as absolute. * *
Whether a given scheme is parsed according to the semi-generic-URI
* syntax is determined by an internal table of URI schemes.
*
* @see #isGenericURI()
*/
public boolean isSemiGenericURI()
{
return (type == SEMI_GENERIC);
}
/**
* Will try to create a java.net.URL object from this URI.
*
* @return the URL
* @exception MalformedURLException if no handler is available for the
* scheme
*/
public URL toURL() throws MalformedURLException
{
if (url != null) return url;
if (opaque != null)
return (url = new URL(scheme + ":" + opaque));
String hostinfo;
if (userinfo != null && host != null)
hostinfo = userinfo + "@" + host;
else if (userinfo != null)
hostinfo = userinfo + "@";
else
hostinfo = host;
StringBuffer file = new StringBuffer(100);
assemblePath(file, true, true, false);
url = new URL(scheme, hostinfo, port, file.toString());
return url;
}
private final void assemblePath(StringBuffer buf, boolean printEmpty,
boolean incFragment, boolean unescape)
{
if ((path == null || path.length() == 0) && printEmpty)
buf.append('/');
if (path != null)
buf.append(unescape ? unescapeNoPE(path, resvdPathChar) : path);
if (query != null)
{
buf.append('?');
buf.append(unescape ? unescapeNoPE(query, resvdQueryChar) : query);
}
if (fragment != null && incFragment)
{
buf.append('#');
buf.append(unescape ? unescapeNoPE(fragment, null) : fragment);
}
}
private final String stringify(boolean unescape)
{
StringBuffer uri = new StringBuffer(100);
if (scheme != null)
{
uri.append(unescape ? unescapeNoPE(scheme, resvdSchemeChar) : scheme);
uri.append(':');
}
if (opaque != null) // it's an opaque-uri
{
uri.append(unescape ? unescapeNoPE(opaque, null) : opaque);
return uri.toString();
}
if (userinfo != null || host != null || port != -1)
uri.append("//");
if (userinfo != null)
{
uri.append(unescape ? unescapeNoPE(userinfo, resvdUIChar) : userinfo);
uri.append('@');
}
if (host != null)
{
if (host.indexOf(':') < 0)
uri.append(unescape ? unescapeNoPE(host, resvdHostChar) : host);
else
uri.append('[').append(host).append(']');
}
if (port != -1)
{
uri.append(':');
uri.append(port);
}
assemblePath(uri, false, true, unescape);
return uri.toString();
}
/**
* @return a string representation of this URI suitable for use in
* links, headers, etc.
*/
public String toExternalForm()
{
return stringify(false);
}
/**
* Return the URI as string. This differs from toExternalForm() in that
* all elements are unescaped before assembly. This is not suitable
* for passing to other apps or in header fields and such, and is usually
* not what you want.
*
* @return the URI as a string
* @see #toExternalForm()
*/
public String toString()
{
return stringify(true);
}
/**
* @return true if other is either a URI or URL and it
* matches the current URI
*/
public boolean equals(Object other)
{
if (other instanceof URI)
{
URI o = (URI) other;
return (scheme.equals(o.scheme) &&
(
type == OPAQUE && areEqual(opaque, o.opaque) ||
type == SEMI_GENERIC &&
areEqual(userinfo, o.userinfo) &&
areEqualIC(host, o.host) &&
port == o.port &&
areEqual(path, o.path) ||
type == GENERIC &&
areEqual(userinfo, o.userinfo) &&
areEqualIC(host, o.host) &&
port == o.port &&
pathsEqual(path, o.path) &&
areEqual(query, o.query) &&
areEqual(fragment, o.fragment)
));
}
if (other instanceof URL)
{
URL o = (URL) other;
String h, f;
if (userinfo != null)
h = userinfo + "@" + host;
else
h = host;
f = getPathAndQuery();
return (scheme.equalsIgnoreCase(o.getProtocol()) &&
(type == OPAQUE && opaque.equals(o.getFile()) ||
type == SEMI_GENERIC &&
areEqualIC(h, o.getHost()) &&
(port == o.getPort() ||
o.getPort() == defaultPort(scheme)) &&
areEqual(f, o.getFile()) ||
type == GENERIC &&
areEqualIC(h, o.getHost()) &&
(port == o.getPort() ||
o.getPort() == defaultPort(scheme)) &&
pathsEqual(f, o.getFile()) &&
areEqual(fragment, o.getRef())
)
);
}
return false;
}
private static final boolean areEqual(String s1, String s2)
{
return (s1 == null && s2 == null ||
s1 != null && s2 != null &&
(s1.equals(s2) ||
unescapeNoPE(s1, null).equals(unescapeNoPE(s2, null)))
);
}
private static final boolean areEqualIC(String s1, String s2)
{
return (s1 == null && s2 == null ||
s1 != null && s2 != null &&
(s1.equalsIgnoreCase(s2) ||
unescapeNoPE(s1, null).equalsIgnoreCase(unescapeNoPE(s2, null)))
);
}
private static final boolean pathsEqual(String p1, String p2)
{
if (p1 == null && p2 == null)
return true;
if (p1 == null || p2 == null)
return false;
if (p1.equals(p2))
return true;
// ok, so it wasn't that simple. Let's split into parts and compare
// unescaped.
int pos1 = 0, end1 = p1.length(), pos2 = 0, end2 = p2.length();
while (pos1 < end1 && pos2 < end2)
{
int start1 = pos1, start2 = pos2;
char ch;
while (pos1 < end1 && (ch = p1.charAt(pos1)) != '/' && ch != ';')
pos1++;
while (pos2 < end2 && (ch = p2.charAt(pos2)) != '/' && ch != ';')
pos2++;
if (pos1 == end1 && pos2 < end2 ||
pos2 == end2 && pos1 < end1 ||
pos1 < end1 && pos2 < end2 && p1.charAt(pos1) != p2.charAt(pos2))
return false;
if ((!p1.regionMatches(start1, p2, start2, pos1-start1) || (pos1-start1) != (pos2-start2)) &&
!unescapeNoPE(p1.substring(start1, pos1), null).equals(unescapeNoPE(p2.substring(start2, pos2), null)))
return false;
pos1++;
pos2++;
}
return (pos1 == end1 && pos2 == end2);
}
private int hashCode = -1;
/**
* The hash code is calculated over scheme, host, path, and query.
*
* @return the hash code
*/
public int hashCode()
{
if (hashCode == -1)
hashCode = (scheme != null ? unescapeNoPE(scheme, null).hashCode() : 0) +
(type == OPAQUE ?
(opaque != null ? unescapeNoPE(opaque, null).hashCode() : 0) * 7
: (host != null ? unescapeNoPE(host, null).toLowerCase().hashCode() : 0) * 7 +
(path != null ? unescapeNoPE(path, null).hashCode() : 0) * 13 +
(query != null ? unescapeNoPE(query, null).hashCode() : 0) * 17);
return hashCode;
}
/**
* Escape any character not in the given character class. Characters
* greater 255 are always escaped according to ??? .
*
* @param elem the string to escape
* @param allowed_char the BitSet of all allowed characters
* @param utf8 if true, will first UTF-8 encode unallowed characters
* @return the string with all characters not in allowed_char
* escaped
*/
public static String escape(String elem, BitSet allowed_char, boolean utf8)
{
return new String(escape(elem.toCharArray(), allowed_char, utf8));
}
/**
* Escape any character not in the given character class. Characters
* greater 255 are always escaped according to ??? .
*
* @param elem the array of characters to escape
* @param allowed_char the BitSet of all allowed characters
* @param utf8 if true, will first UTF-8 encode unallowed characters
* @return the elem array with all characters not in allowed_char
* escaped
*/
public static char[] escape(char[] elem, BitSet allowed_char, boolean utf8)
{
int cnt=0;
for (int idx=0; idx