C/C++编程:url解析

深碍√TFBOYSˉ_ 2022-09-02 14:58 413阅读 0赞

url解析

源码

  1. //
  2. // Created by oceanstar on 2021/8/6.
  3. //
  4. #ifndef OCEANSTAR_HTTP_HTTP_UTILS_H
  5. #define OCEANSTAR_HTTP_HTTP_UTILS_H
  6. #include <cstdio>
  7. #include "global.h"
  8. namespace oceanstar {
  9. class http_utils{
  10. public:
  11. http_utils() {
  12. }
  13. ~http_utils() {
  14. }
  15. /**
  16. * 从完整的 url 中获得 WEB 服务器 IP 地址及端口号
  17. * @param url {const char*} HTTP url,非空
  18. * @param domain {char*} 存储域名
  19. * @param size {size_t} domain 内存大小
  20. * @param port {unsigned short*} 存储端口号大小
  21. * @return {bool} 是否成功获得
  22. */
  23. static bool get_addr(const char* url, char* domain, size_t size,
  24. unsigned short* port);
  25. /**
  26. * 从完整的 url 中获得 WEB 服务器地址,格式:domain:port
  27. * @param url {const char*} HTTP url,非空
  28. * @param addr {char*} 存储结果,存储格式:domain:port
  29. * @param size {size_t} out 缓冲区大小
  30. * @return {bool} 是否成功获得
  31. */
  32. static bool get_addr(const char* url, char* addr, size_t size);
  33. };
  34. };
  35. namespace oceanstar{
  36. using namespace std;
  37. class http_url {
  38. public:
  39. http_url(void);
  40. ~http_url(void) {
  41. }
  42. bool parse(const char* url);
  43. public:
  44. /**
  45. * 返回 URL 中的协议类型:http 或 https
  46. * @return {const char*}
  47. */
  48. const char* get_proto(void) const {
  49. return proto_;
  50. }
  51. /**
  52. * 返回 URL 中的域名字段
  53. * @return {const char*} 返回空串则表示没有该字段
  54. */
  55. const char* get_domain(void) const {
  56. return domain_.c_str();
  57. }
  58. /**
  59. * 返回根据 URL 提取的 HTTP 协议服务端端口号,内部缺省值为 80
  60. * @return {unsigned short}
  61. */
  62. unsigned short get_port(void) const {
  63. return port_;
  64. }
  65. /**
  66. * 返回根据 URL 提取的相对路径部分(不含 ? 后面的参数)
  67. * @return {const char*}
  68. */
  69. const char* get_url_path(void) const {
  70. return url_path_.c_str();
  71. }
  72. /**
  73. * 返回从 URL 中提取的参数字段
  74. * @return {const char*}
  75. */
  76. const char* get_url_params(void) const {
  77. return url_params_.c_str();
  78. }
  79. /**
  80. * 清理解析过程中的中间状态,以便重复使用该类对象解析下一个 URL
  81. */
  82. void reset(void);
  83. private:
  84. char proto_[16];
  85. string domain_;
  86. unsigned short port_;
  87. string url_path_;
  88. string url_params_;
  89. bool parse_url_part(const char* url);
  90. const char* parse_domain(const char* url);
  91. };
  92. };
  93. #endif //OCEANSTAR_HTTP_HTTP_UTILS_H
  94. //
  95. // Created by oceanstar on 2021/8/6.
  96. //
  97. #include <iostream>
  98. #include <string.h>
  99. #include "http_utils.h"
  100. namespace oceanstar{
  101. #define HTTP_PREFIX "http://"
  102. #define HTTPS_PREFIX "https://"
  103. bool http_utils::get_addr(const char* url, char* domain, size_t size,
  104. unsigned short* pport)
  105. {
  106. const char* ptr;
  107. unsigned short default_port;
  108. if (!strncasecmp(url, HTTP_PREFIX, sizeof(HTTP_PREFIX) - 1)) {
  109. ptr = url + sizeof(HTTP_PREFIX) - 1;
  110. default_port = 80;
  111. } else if (!strncasecmp(url, HTTPS_PREFIX, sizeof(HTTPS_PREFIX) - 1)) {
  112. ptr = url + sizeof(HTTPS_PREFIX) - 1;
  113. default_port = 443;
  114. } else {
  115. logger_error("invalid url: %s", url);
  116. return false;
  117. }
  118. if (*ptr == 0) {
  119. logger_error("invalid url: %s", url);
  120. return false;
  121. }
  122. char buf[256];
  123. strncpy(buf, ptr, sizeof(buf));
  124. char* slash = strchr(buf, '/');
  125. if (slash) {
  126. *slash = 0;
  127. }
  128. unsigned short port;
  129. char* col = strchr(buf, ':');
  130. if (col == NULL) {
  131. port = default_port;
  132. } else {
  133. *col++ = 0;
  134. port = (unsigned short) atoi(col);
  135. if (port == 0 || port == 65535) {
  136. port = default_port;
  137. }
  138. }
  139. if (pport) {
  140. *pport = port;
  141. }
  142. strncpy(domain, buf, size);
  143. return true;
  144. }
  145. bool http_utils::get_addr(const char* url, char* addr, size_t size)
  146. {
  147. char buf[256];
  148. unsigned short port;
  149. if (!get_addr(url, buf, sizeof(buf), &port)) {
  150. return false;
  151. }
  152. snprintf(addr, size, "%s:%d", buf, port);
  153. return true;
  154. }
  155. }
  156. namespace oceanstar{
  157. http_url::http_url(void) {
  158. strncpy(proto_, "http", sizeof(proto_));
  159. port_ = 80;
  160. }
  161. void http_url::reset(void) {
  162. strncpy(proto_, "http", sizeof(proto_));
  163. port_ = 80;
  164. domain_.clear();
  165. url_path_.clear();
  166. url_params_.clear();
  167. }
  168. const char* http_url::parse_domain(const char *url) {
  169. if (*url == '/') {
  170. logger_error("invalid url: %s", url);
  171. return NULL;
  172. }
  173. const char* ptr = strchr(url, '/');
  174. if (ptr == NULL) {
  175. domain_ = url;
  176. return NULL;
  177. }
  178. char buf[256];
  179. size_t size = ptr - url + 1;
  180. if (size > sizeof(buf)) {
  181. // xxx: sanity check
  182. size = sizeof(buf);
  183. }
  184. strncpy(buf, url, size);
  185. // fixme: Is it error if buf contains IPV6 Addr ---zsx
  186. char* col = strchr(buf, ':');
  187. if (col != NULL) {
  188. *col++ = 0;
  189. port_ = (unsigned short) atoi(col);
  190. if (port_ == 0 || port_ == 65535) {
  191. port_ = 80;
  192. }
  193. }
  194. domain_ = buf;
  195. return ptr;
  196. }
  197. bool http_url::parse(const char *url) {
  198. const char* ptr;
  199. if (!strncasecmp(url, HTTP_PREFIX, sizeof(HTTP_PREFIX) - 1)) {
  200. ptr = url + sizeof(HTTP_PREFIX) - 1;
  201. } else if (!strncasecmp(url, HTTPS_PREFIX, sizeof(HTTPS_PREFIX) - 1)) {
  202. ptr = url + sizeof(HTTPS_PREFIX) - 1;
  203. port_ = 443;
  204. strncpy(proto_, "https", sizeof(proto_));
  205. } else if (*url == '/'){
  206. ptr = url;
  207. } else {
  208. logger_error("invalid url: %s", url);
  209. return false;
  210. }
  211. if (*ptr == 0) {
  212. logger_error("invalid url: %s", url);
  213. return false;
  214. }
  215. if (ptr == url) {
  216. // 说明是仅含相对路径的 url
  217. return parse_url_part(url);
  218. } else {
  219. // 说明包含有完整路径的 url,下面先提取域名字段,再提取相对 url
  220. ptr = parse_domain(ptr);
  221. if (ptr == NULL) {
  222. url_path_ = "/";
  223. return true;
  224. }
  225. return parse_url_part(ptr);
  226. }
  227. }
  228. #define SKIP_WHILE(cond, ptr) {
  229. while(*ptr && (cond)) ptr++; }
  230. bool http_url::parse_url_part(const char *url) {
  231. if (*url != '/') {
  232. logger_error("invalid url: %s", url);
  233. return false;
  234. }
  235. const char* ptr = url;
  236. SKIP_WHILE(*ptr == '/', ptr);
  237. if (*ptr == 0) {
  238. url_path_ = "/";
  239. return true;
  240. }
  241. const char* qm = strchr(ptr, '?');
  242. if (qm == NULL) {
  243. url_path_ = url;
  244. return true;
  245. }
  246. char buf[strlen(url)];
  247. strncpy(buf, url, qm - url);
  248. url_path_ = buf;
  249. ++qm;
  250. if (*qm != 0) {
  251. url_params_ = qm;
  252. }
  253. return true;
  254. }
  255. }

使用

例子

  1. #include <cstring>
  2. #include "http_utils.h"
  3. using namespace oceanstar;
  4. int main(int argc, char *argv[])
  5. {
  6. unsigned short port ;
  7. char host[256];
  8. memset(host, 0, sizeof(host));
  9. std::string url = "http://www.gmail.com:443/path/test.cgi?name=value&name2=value2 ";
  10. if(http_utils::get_addr(url.c_str(), host, sizeof(host), &port)){
  11. logger_info("host: %s, port: %d", host, port);
  12. }else{
  13. logger_error("s(%d): url invalid ", __FUNCTION__ , __LINE__);
  14. }
  15. char addr[256];
  16. memset(addr, 0, sizeof(addr));
  17. if(http_utils::get_addr(url.c_str(), addr, sizeof(addr))){
  18. logger_info("addr: %s", addr);
  19. }else{
  20. logger_error("s(%d): url invalid ", __FUNCTION__ , __LINE__);
  21. }
  22. return (0);
  23. }

在这里插入图片描述

例子

  1. #include <cstring>
  2. #include "http_utils.h"
  3. using namespace oceanstar;
  4. int main(int argc, char *argv[])
  5. {
  6. unsigned short port ;
  7. char host[256];
  8. memset(host, 0, sizeof(host));
  9. std::string url = "http://www.gmail.com:443/path/test.cgi?name=value&name2=value2 ";
  10. http_url httpUrl;
  11. if(!httpUrl.parse(url.c_str())){
  12. exit(0);
  13. }
  14. logger_info("proto: %s", httpUrl.get_proto());
  15. logger_info("domain: %s", httpUrl.get_domain());
  16. logger_info("port: %d", httpUrl.get_port());
  17. logger_info("url_path: %s", httpUrl.get_url_path());
  18. logger_info("url_params: %s", httpUrl.get_url_params());
  19. printf("\n\n");
  20. httpUrl.reset();
  21. url = "http://127.0.0.1:443/path/test.cgi";
  22. if(!httpUrl.parse(url.c_str())){
  23. exit(0);
  24. }
  25. logger_info("proto: %s", httpUrl.get_proto());
  26. logger_info("domain: %s", httpUrl.get_domain());
  27. logger_info("port: %d", httpUrl.get_port());
  28. logger_info("url_path: %s", httpUrl.get_url_path());
  29. logger_info("url_params: %s", httpUrl.get_url_params());
  30. return (0);
  31. }

在这里插入图片描述

通过127.0.0.1:80解析中获取host和port

  1. #include <cstring>
  2. #include "http_utils.h"
  3. using namespace oceanstar;
  4. // s必须是堆内存
  5. static char *acl_split_at_right(char *s, int delimiter)
  6. {
  7. char *cp;
  8. if ((cp = strrchr(s, delimiter)) != 0){
  9. *cp = 0;
  10. cp++;
  11. }
  12. return cp;
  13. }
  14. static const char *acl_host_port(char *buf, char **host, char *def_host,char **port)
  15. {
  16. char *cp = buf;
  17. /* host:port, host:, host, :port */
  18. if ((cp = acl_split_at_right(buf, ':')) != 0) {
  19. *host = *buf ? buf : def_host;
  20. *port = *cp ? cp : NULL;
  21. }
  22. if (*host == 0)
  23. return "missing host information";
  24. return NULL;
  25. }
  26. static int host_port(char *buf, char **host, char **port)
  27. {
  28. char *def_host = "";
  29. const char *ptr = acl_host_port(buf, host, def_host, port);
  30. if (ptr != NULL) {
  31. logger_error("%s(%d), %s: invalid addr %s, %s",
  32. __FILE__, __LINE__, __FUNCTION__, buf, ptr);
  33. return -1;
  34. }
  35. if (*port != NULL && atoi(*port) < 0) {
  36. logger_error("%s(%d), %s: invalid port: %s, addr: %s",
  37. __FILE__, __LINE__, __FUNCTION__,
  38. *port ? *port : "null", buf);
  39. return -1;
  40. }
  41. if (*host && **host == 0)
  42. *host = 0;
  43. if (*host == NULL)
  44. *host = "0";
  45. return 0;
  46. }
  47. int main(int argc, char *argv[])
  48. {
  49. char *addr = strdup("127.0.0.0:80");
  50. char *host = NULL, *port = NULL;
  51. if (host_port(addr, &host, &port) < 0) {
  52. printf("formet error");
  53. free(addr);
  54. return NULL;
  55. }
  56. printf("host: %s, port: %s", host, port);
  57. free(addr);
  58. return (0);
  59. }

通过url获取host【版本1】

  1. #include <iostream>
  2. /* data: "http://www.gmail.com:443/path/test.cgi?name=value&name2=value2 */
  3. static void __get_host_from_url(char *buf, size_t size, const char *url)
  4. {
  5. const char *ptr1, *ptr2;
  6. size_t n;
  7. buf[0] = 0;
  8. if (strncasecmp(url, "http://", sizeof("http://") - 1) == 0)
  9. ptr1 = url + sizeof("http://") - 1;
  10. else if (strncasecmp(url, "https://", sizeof("https://") - 1) == 0)
  11. ptr1 = url + sizeof("https://") - 1;
  12. else
  13. ptr1 = url;
  14. if (ptr1 == NULL || *ptr1 == 0 || *ptr1 == '/')
  15. return;
  16. ptr2 = strchr(ptr1, '/');
  17. if (ptr2)
  18. n = ptr2 - ptr1;
  19. else
  20. n = strlen(ptr1);
  21. // n++;
  22. if (n > size)
  23. n = size;
  24. strncpy(buf, ptr1, (int) n);
  25. }
  26. int main() {
  27. char host[256];
  28. memset(host, 0, sizeof(host));
  29. __get_host_from_url(host, sizeof(host), "http://www.gmail.com:443/path/test.cgi?name=value&name2=value2 ");
  30. std::cout << host << std::endl;
  31. }

在这里插入图片描述

发表评论

表情:
评论列表 (有 0 条评论,413人围观)

还没有评论,来说两句吧...

相关阅读