url解析
源码
//
// Created by oceanstar on 2021/8/6.
//
#ifndef OCEANSTAR_HTTP_HTTP_UTILS_H
#define OCEANSTAR_HTTP_HTTP_UTILS_H
#include <cstdio>
#include "global.h"
namespace oceanstar {
class http_utils{
public:
http_utils() {
}
~http_utils() {
}
/**
* 从完整的 url 中获得 WEB 服务器 IP 地址及端口号
* @param url {const char*} HTTP url,非空
* @param domain {char*} 存储域名
* @param size {size_t} domain 内存大小
* @param port {unsigned short*} 存储端口号大小
* @return {bool} 是否成功获得
*/
static bool get_addr(const char* url, char* domain, size_t size,
unsigned short* port);
/**
* 从完整的 url 中获得 WEB 服务器地址,格式:domain:port
* @param url {const char*} HTTP url,非空
* @param addr {char*} 存储结果,存储格式:domain:port
* @param size {size_t} out 缓冲区大小
* @return {bool} 是否成功获得
*/
static bool get_addr(const char* url, char* addr, size_t size);
};
};
namespace oceanstar{
using namespace std;
class http_url {
public:
http_url(void);
~http_url(void) {
}
bool parse(const char* url);
public:
/**
* 返回 URL 中的协议类型:http 或 https
* @return {const char*}
*/
const char* get_proto(void) const {
return proto_;
}
/**
* 返回 URL 中的域名字段
* @return {const char*} 返回空串则表示没有该字段
*/
const char* get_domain(void) const {
return domain_.c_str();
}
/**
* 返回根据 URL 提取的 HTTP 协议服务端端口号,内部缺省值为 80
* @return {unsigned short}
*/
unsigned short get_port(void) const {
return port_;
}
/**
* 返回根据 URL 提取的相对路径部分(不含 ? 后面的参数)
* @return {const char*}
*/
const char* get_url_path(void) const {
return url_path_.c_str();
}
/**
* 返回从 URL 中提取的参数字段
* @return {const char*}
*/
const char* get_url_params(void) const {
return url_params_.c_str();
}
/**
* 清理解析过程中的中间状态,以便重复使用该类对象解析下一个 URL
*/
void reset(void);
private:
char proto_[16];
string domain_;
unsigned short port_;
string url_path_;
string url_params_;
bool parse_url_part(const char* url);
const char* parse_domain(const char* url);
};
};
#endif //OCEANSTAR_HTTP_HTTP_UTILS_H
//
// Created by oceanstar on 2021/8/6.
//
#include <iostream>
#include <string.h>
#include "http_utils.h"
namespace oceanstar{
#define HTTP_PREFIX "http://"
#define HTTPS_PREFIX "https://"
bool http_utils::get_addr(const char* url, char* domain, size_t size,
unsigned short* pport)
{
const char* ptr;
unsigned short default_port;
if (!strncasecmp(url, HTTP_PREFIX, sizeof(HTTP_PREFIX) - 1)) {
ptr = url + sizeof(HTTP_PREFIX) - 1;
default_port = 80;
} else if (!strncasecmp(url, HTTPS_PREFIX, sizeof(HTTPS_PREFIX) - 1)) {
ptr = url + sizeof(HTTPS_PREFIX) - 1;
default_port = 443;
} else {
logger_error("invalid url: %s", url);
return false;
}
if (*ptr == 0) {
logger_error("invalid url: %s", url);
return false;
}
char buf[256];
strncpy(buf, ptr, sizeof(buf));
char* slash = strchr(buf, '/');
if (slash) {
*slash = 0;
}
unsigned short port;
char* col = strchr(buf, ':');
if (col == NULL) {
port = default_port;
} else {
*col++ = 0;
port = (unsigned short) atoi(col);
if (port == 0 || port == 65535) {
port = default_port;
}
}
if (pport) {
*pport = port;
}
strncpy(domain, buf, size);
return true;
}
bool http_utils::get_addr(const char* url, char* addr, size_t size)
{
char buf[256];
unsigned short port;
if (!get_addr(url, buf, sizeof(buf), &port)) {
return false;
}
snprintf(addr, size, "%s:%d", buf, port);
return true;
}
}
namespace oceanstar{
http_url::http_url(void) {
strncpy(proto_, "http", sizeof(proto_));
port_ = 80;
}
void http_url::reset(void) {
strncpy(proto_, "http", sizeof(proto_));
port_ = 80;
domain_.clear();
url_path_.clear();
url_params_.clear();
}
const char* http_url::parse_domain(const char *url) {
if (*url == '/') {
logger_error("invalid url: %s", url);
return NULL;
}
const char* ptr = strchr(url, '/');
if (ptr == NULL) {
domain_ = url;
return NULL;
}
char buf[256];
size_t size = ptr - url + 1;
if (size > sizeof(buf)) {
// xxx: sanity check
size = sizeof(buf);
}
strncpy(buf, url, size);
// fixme: Is it error if buf contains IPV6 Addr ---zsx
char* col = strchr(buf, ':');
if (col != NULL) {
*col++ = 0;
port_ = (unsigned short) atoi(col);
if (port_ == 0 || port_ == 65535) {
port_ = 80;
}
}
domain_ = buf;
return ptr;
}
bool http_url::parse(const char *url) {
const char* ptr;
if (!strncasecmp(url, HTTP_PREFIX, sizeof(HTTP_PREFIX) - 1)) {
ptr = url + sizeof(HTTP_PREFIX) - 1;
} else if (!strncasecmp(url, HTTPS_PREFIX, sizeof(HTTPS_PREFIX) - 1)) {
ptr = url + sizeof(HTTPS_PREFIX) - 1;
port_ = 443;
strncpy(proto_, "https", sizeof(proto_));
} else if (*url == '/'){
ptr = url;
} else {
logger_error("invalid url: %s", url);
return false;
}
if (*ptr == 0) {
logger_error("invalid url: %s", url);
return false;
}
if (ptr == url) {
// 说明是仅含相对路径的 url
return parse_url_part(url);
} else {
// 说明包含有完整路径的 url,下面先提取域名字段,再提取相对 url
ptr = parse_domain(ptr);
if (ptr == NULL) {
url_path_ = "/";
return true;
}
return parse_url_part(ptr);
}
}
#define SKIP_WHILE(cond, ptr) {
while(*ptr && (cond)) ptr++; }
bool http_url::parse_url_part(const char *url) {
if (*url != '/') {
logger_error("invalid url: %s", url);
return false;
}
const char* ptr = url;
SKIP_WHILE(*ptr == '/', ptr);
if (*ptr == 0) {
url_path_ = "/";
return true;
}
const char* qm = strchr(ptr, '?');
if (qm == NULL) {
url_path_ = url;
return true;
}
char buf[strlen(url)];
strncpy(buf, url, qm - url);
url_path_ = buf;
++qm;
if (*qm != 0) {
url_params_ = qm;
}
return true;
}
}
使用
例子
#include <cstring>
#include "http_utils.h"
using namespace oceanstar;
int main(int argc, char *argv[])
{
unsigned short port ;
char host[256];
memset(host, 0, sizeof(host));
std::string url = "http://www.gmail.com:443/path/test.cgi?name=value&name2=value2 ";
if(http_utils::get_addr(url.c_str(), host, sizeof(host), &port)){
logger_info("host: %s, port: %d", host, port);
}else{
logger_error("s(%d): url invalid ", __FUNCTION__ , __LINE__);
}
char addr[256];
memset(addr, 0, sizeof(addr));
if(http_utils::get_addr(url.c_str(), addr, sizeof(addr))){
logger_info("addr: %s", addr);
}else{
logger_error("s(%d): url invalid ", __FUNCTION__ , __LINE__);
}
return (0);
}

例子
#include <cstring>
#include "http_utils.h"
using namespace oceanstar;
int main(int argc, char *argv[])
{
unsigned short port ;
char host[256];
memset(host, 0, sizeof(host));
std::string url = "http://www.gmail.com:443/path/test.cgi?name=value&name2=value2 ";
http_url httpUrl;
if(!httpUrl.parse(url.c_str())){
exit(0);
}
logger_info("proto: %s", httpUrl.get_proto());
logger_info("domain: %s", httpUrl.get_domain());
logger_info("port: %d", httpUrl.get_port());
logger_info("url_path: %s", httpUrl.get_url_path());
logger_info("url_params: %s", httpUrl.get_url_params());
printf("\n\n");
httpUrl.reset();
url = "http://127.0.0.1:443/path/test.cgi";
if(!httpUrl.parse(url.c_str())){
exit(0);
}
logger_info("proto: %s", httpUrl.get_proto());
logger_info("domain: %s", httpUrl.get_domain());
logger_info("port: %d", httpUrl.get_port());
logger_info("url_path: %s", httpUrl.get_url_path());
logger_info("url_params: %s", httpUrl.get_url_params());
return (0);
}

通过127.0.0.1:80解析中获取host和port
#include <cstring>
#include "http_utils.h"
using namespace oceanstar;
// s必须是堆内存
static char *acl_split_at_right(char *s, int delimiter)
{
char *cp;
if ((cp = strrchr(s, delimiter)) != 0){
*cp = 0;
cp++;
}
return cp;
}
static const char *acl_host_port(char *buf, char **host, char *def_host,char **port)
{
char *cp = buf;
/* host:port, host:, host, :port */
if ((cp = acl_split_at_right(buf, ':')) != 0) {
*host = *buf ? buf : def_host;
*port = *cp ? cp : NULL;
}
if (*host == 0)
return "missing host information";
return NULL;
}
static int host_port(char *buf, char **host, char **port)
{
char *def_host = "";
const char *ptr = acl_host_port(buf, host, def_host, port);
if (ptr != NULL) {
logger_error("%s(%d), %s: invalid addr %s, %s",
__FILE__, __LINE__, __FUNCTION__, buf, ptr);
return -1;
}
if (*port != NULL && atoi(*port) < 0) {
logger_error("%s(%d), %s: invalid port: %s, addr: %s",
__FILE__, __LINE__, __FUNCTION__,
*port ? *port : "null", buf);
return -1;
}
if (*host && **host == 0)
*host = 0;
if (*host == NULL)
*host = "0";
return 0;
}
int main(int argc, char *argv[])
{
char *addr = strdup("127.0.0.0:80");
char *host = NULL, *port = NULL;
if (host_port(addr, &host, &port) < 0) {
printf("formet error");
free(addr);
return NULL;
}
printf("host: %s, port: %s", host, port);
free(addr);
return (0);
}
通过url获取host【版本1】
#include <iostream>
/* data: "http://www.gmail.com:443/path/test.cgi?name=value&name2=value2 */
static void __get_host_from_url(char *buf, size_t size, const char *url)
{
const char *ptr1, *ptr2;
size_t n;
buf[0] = 0;
if (strncasecmp(url, "http://", sizeof("http://") - 1) == 0)
ptr1 = url + sizeof("http://") - 1;
else if (strncasecmp(url, "https://", sizeof("https://") - 1) == 0)
ptr1 = url + sizeof("https://") - 1;
else
ptr1 = url;
if (ptr1 == NULL || *ptr1 == 0 || *ptr1 == '/')
return;
ptr2 = strchr(ptr1, '/');
if (ptr2)
n = ptr2 - ptr1;
else
n = strlen(ptr1);
// n++;
if (n > size)
n = size;
strncpy(buf, ptr1, (int) n);
}
int main() {
char host[256];
memset(host, 0, sizeof(host));
__get_host_from_url(host, sizeof(host), "http://www.gmail.com:443/path/test.cgi?name=value&name2=value2 ");
std::cout << host << std::endl;
}

还没有评论,来说两句吧...