spider.util
Class Helper

java.lang.Object
  |
  +--spider.util.Helper

public class Helper
extends java.lang.Object

A bunch of static helper functions

Author:
Gautam Pant

Constructor Summary
Helper()
           
 
Method Summary
static java.lang.String escapeText(java.lang.String s)
          escapes special characters in XML
static java.lang.String escapeURL(java.lang.String s)
          escaping special characters from URLs through URL encoding
static java.lang.String getCanonical(java.lang.String url)
          returns the canonical URL
static double getCosine(double[] v1, double[] v2)
          cosine of the angle between two vectors
static java.lang.String getDomainName(java.lang.String url)
          get the second level domain name from a given url
static java.lang.String getHostName(java.lang.String url)
          get the host name from the given URL
static java.lang.String getHostNameWithPort(java.lang.String url)
          get host name with port from a given URL
static java.util.Hashtable getLinkContextWords(java.lang.String content, int noWords)
          provides links with context noWords is the number of words around a link text used for context
static double getSim(java.lang.String text1, java.lang.String text2)
          consine similarity between two strings (without idf)
static double getSim(java.lang.String text1, java.lang.String text2, java.util.Hashtable df, int noDocs)
          consine similarity between two strings - SMART atc - idf included
static double getSimInQuerySpace(java.lang.String query, java.lang.String text)
          consine similarity by project text onto query space
static java.lang.String getURLPath(java.lang.String url)
          get the path from the given URL
static java.lang.String join(java.lang.String seperator, java.lang.String[] parts)
          joins the text in a string array with a given seperator
static java.lang.String join(java.lang.String seperator, java.lang.String[] parts, int start, int end)
          joins the text in a part of string array with a given seperator
static void runSystemCmd(java.lang.String cmd)
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

Helper

public Helper()
Method Detail

join

public static java.lang.String join(java.lang.String seperator,
                                    java.lang.String[] parts)
joins the text in a string array with a given seperator


join

public static java.lang.String join(java.lang.String seperator,
                                    java.lang.String[] parts,
                                    int start,
                                    int end)
joins the text in a part of string array with a given seperator


getHostName

public static java.lang.String getHostName(java.lang.String url)
get the host name from the given URL


getDomainName

public static java.lang.String getDomainName(java.lang.String url)
get the second level domain name from a given url

Parameters:
url -
Returns:
the second-level domain name

getHostNameWithPort

public static java.lang.String getHostNameWithPort(java.lang.String url)
get host name with port from a given URL


getURLPath

public static java.lang.String getURLPath(java.lang.String url)
get the path from the given URL


getCanonical

public static java.lang.String getCanonical(java.lang.String url)
returns the canonical URL


escapeText

public static java.lang.String escapeText(java.lang.String s)
escapes special characters in XML


escapeURL

public static java.lang.String escapeURL(java.lang.String s)
escaping special characters from URLs through URL encoding


getSim

public static double getSim(java.lang.String text1,
                            java.lang.String text2)
consine similarity between two strings (without idf)


getCosine

public static double getCosine(double[] v1,
                               double[] v2)
cosine of the angle between two vectors


getSimInQuerySpace

public static double getSimInQuerySpace(java.lang.String query,
                                        java.lang.String text)
consine similarity by project text onto query space


getSim

public static double getSim(java.lang.String text1,
                            java.lang.String text2,
                            java.util.Hashtable df,
                            int noDocs)
consine similarity between two strings - SMART atc - idf included


getLinkContextWords

public static java.util.Hashtable getLinkContextWords(java.lang.String content,
                                                      int noWords)
provides links with context noWords is the number of words around a link text used for context


runSystemCmd

public static void runSystemCmd(java.lang.String cmd)