|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object | +--spider.util.Helper
A bunch of static helper functions
Constructor Summary | |
Helper()
|
Method Summary | |
static java.lang.String |
escapeText(java.lang.String s)
escapes special characters in XML |
static java.lang.String |
escapeURL(java.lang.String s)
escaping special characters from URLs through URL encoding |
static java.lang.String |
getCanonical(java.lang.String url)
returns the canonical URL |
static double |
getCosine(double[] v1,
double[] v2)
cosine of the angle between two vectors |
static java.lang.String |
getDomainName(java.lang.String url)
get the second level domain name from a given url |
static java.lang.String |
getHostName(java.lang.String url)
get the host name from the given URL |
static java.lang.String |
getHostNameWithPort(java.lang.String url)
get host name with port from a given URL |
static java.util.Hashtable |
getLinkContextWords(java.lang.String content,
int noWords)
provides links with context noWords is the number of words around a link text used for context |
static double |
getSim(java.lang.String text1,
java.lang.String text2)
consine similarity between two strings (without idf) |
static double |
getSim(java.lang.String text1,
java.lang.String text2,
java.util.Hashtable df,
int noDocs)
consine similarity between two strings - SMART atc - idf included |
static double |
getSimInQuerySpace(java.lang.String query,
java.lang.String text)
consine similarity by project text onto query space |
static java.lang.String |
getURLPath(java.lang.String url)
get the path from the given URL |
static java.lang.String |
join(java.lang.String seperator,
java.lang.String[] parts)
joins the text in a string array with a given seperator |
static java.lang.String |
join(java.lang.String seperator,
java.lang.String[] parts,
int start,
int end)
joins the text in a part of string array with a given seperator |
static void |
runSystemCmd(java.lang.String cmd)
|
Methods inherited from class java.lang.Object |
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
public Helper()
Method Detail |
public static java.lang.String join(java.lang.String seperator, java.lang.String[] parts)
public static java.lang.String join(java.lang.String seperator, java.lang.String[] parts, int start, int end)
public static java.lang.String getHostName(java.lang.String url)
public static java.lang.String getDomainName(java.lang.String url)
url
-
public static java.lang.String getHostNameWithPort(java.lang.String url)
public static java.lang.String getURLPath(java.lang.String url)
public static java.lang.String getCanonical(java.lang.String url)
public static java.lang.String escapeText(java.lang.String s)
public static java.lang.String escapeURL(java.lang.String s)
public static double getSim(java.lang.String text1, java.lang.String text2)
public static double getCosine(double[] v1, double[] v2)
public static double getSimInQuerySpace(java.lang.String query, java.lang.String text)
public static double getSim(java.lang.String text1, java.lang.String text2, java.util.Hashtable df, int noDocs)
public static java.util.Hashtable getLinkContextWords(java.lang.String content, int noWords)
public static void runSystemCmd(java.lang.String cmd)
|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |