00001 #ifndef URLREGEX_H 00002 #define URLREGEX_H 00003 00004 #include <abuse/abuse.h> 00005 #include <abuse/RegEx.h> 00006 #include <string> 00007 00008 00009 #define OVECCOUNT 30 // handles 10 results 00010 00011 // Hostname permits whitespace after but not preceding 00012 00013 // This class implements Jeffrey E.F. Friedl's Url validator routines. 00014 class UrlAddress 00015 { 00016 public: 00017 UrlAddress(); 00018 UrlAddress(std::string sUrl); 00019 ~UrlAddress(); 00020 const std::string& HostName()const; 00021 const std::string& Domain()const; 00022 bool IsValid()const; 00023 const std::string GetVersion()const; 00024 00026 bool Parse(const std::string& sUrl); 00027 00028 private: 00029 bool initRegex(); 00030 void buildRegexObject(); 00031 00032 // static so that it's shared accross multiple instances. 00033 static pcre* pPcre; 00034 int ovector[OVECCOUNT]; 00035 int hostnameIndex; 00036 int domainIndex; 00037 00038 // Local variables 00039 std::string UrlRegexStr; // Holds reular expression 00040 std::string m_hostname; // Parsed hostname 00041 std::string m_domain; // Parsed domain name 00042 bool m_bIsValid; // True if successful parse of passed string 00043 int m_captureCount; 00044 int m_PcreError; 00045 }; 00046 #endif