TinyHttp源码分析

//跳过/r/n计算sock函数中字符的数量
int get_line(int sock, char *buf, int size)
{
    int i = 0;
    char c = '\0';
    int n;

    while ((i < size - 1) && (c != '\n'))
    {
        n = recv(sock, &c, 1, 0);
        /* DEBUG printf("%02X\n", c); */
        if (n > 0)
        {
            if (c == '\r')
            {
                n = recv(sock, &c, 1, MSG_PEEK); //MSG_PEEK时代表只是查看数据，而不取走数据。
                /* DEBUG printf("%02X\n", c); */
                if ((n > 0) && (c == '\n'))
                    recv(sock, &c, 1, 0);
                else
                    c = '\n';
            }
            buf[i] = c;
            i++;
        }
        else
            c = '\n';
    }
    buf[i] = '\0';

    return(i);
}

void* accept_request(void* pclient)
{
	int client = *(int*) pclient;//不是很理解的一个点在形参接受到时候为啥为void，但是在函数内调用的时候又转化指针里面的内容为int，pclient形参为int可以感觉可以但是，我还没有搜到具体为啥，待以后补充。
	char buf[1024];
	int numchars;
	char method[255];
	char url[255];
	char path[512];
	size_t i, j;
	struct stat st;
	int cgi = 0;      /* becomes true if server decides this is a CGI
                    * program */
	char *query_string = NULL;
	// http请求报文包括报文头、请求头、空行、报文主体四部分
	// 调用getline()函数，读取一行到buf[]中
	// 先解析报文头，例如：  GET /index.html HTTP/1.1
	numchars = get_line(client, buf, sizeof(buf));
	i = 0; j = 0;
	// 1.方法字段保存在method中
	while (!ISspace(buf[j]) && (i < sizeof(method) - 1))
	{
		method[i] = buf[j];
		i++; j++;
	}
	method[i] = '\0';
	// 只能识别get、post
	if (strcasecmp(method, "GET") && strcasecmp(method, "POST"))
	{
		unimplemented(client);
		return NULL;
	}
	
	// post请求，则开启cgi
	if (strcasecmp(method, "POST") == 0)
		cgi = 1;
	
	// 2.请求的URL保存在url
	i = 0;
	while (ISspace(buf[j]) && (j < sizeof(buf)))
		j++;
	//从缓冲区中吧URL读取出来
	while (!ISspace(buf[j]) && (i < sizeof(url) - 1) && (j < sizeof(buf)))
	{
		url[i] = buf[j];
		i++; j++;
	}
	url[i] = '\0';
 
	// 如果是GET请求
	// get方法，请求参数和对应的值附加在URL后面，用一个？分隔
	// 3.将参数数据存放在query_string中
	if (strcasecmp(method, "GET") == 0)
	{
		query_string = url;
		// 移动指针直至参数部分
		while ((*query_string != '?') && (*query_string != '\0'))
			query_string++;
		// 如果有参数部分，说明这个请求需要脚本处理
		// 此时把请求字符串单独提取出来，即query_string所指
		if (*query_string == '?')
		{
			//开启cgi
			cgi = 1;
			*query_string = '\0';
			query_string++;
		}
	}
 
	// 保存有效的url地址并加上请求地址的主页索引。默认的根目录是htdocs/
	// 这里做路径拼接，因为url以'/'开头，所以不用拼接'/'
	// 格式化url到path数组，html文件都在htdocs中
	sprintf(path, "htdocs%s", url);
	// 如果访问路径的最后一个字符时'/'，就为其补全，即默认访问index.html
	if (path[strlen(path) - 1] == '/')
		strcat(path, "index.html");
	// 访问请求文件
	// 如果文件不存在就直接返回，如果存在就调用cgi程序来处理
	if (stat(path, &st) == -1) {
		// 如果不存在，就把剩下的请求头从缓冲区读出去
		// 把所有的headers信息丢弃
		while ((numchars > 0) && strcmp("\n", buf))  /* read & discard headers */
			numchars = get_line(client, buf, sizeof(buf));
		// 然后返回404错误，回应客户端找不到
		not_found(client);
	}
	else 
	{
		// 如果文件存在却是个目录，则继续拼接路径，默认访问这个目录下的index.html
		if ((st.st_mode & S_IFMT) == S_IFDIR)
			strcat(path, "/index.html");
		// 如果文件可执行，就执行它
		// 如果需要调用cgi，在调用cgi之前有一段是对用户权限的判断
		// 含义如下：S_IXUSR：用户可以执行
        //          S_IXGRP：组可以执行
        //          S_IXOTH：其它人可以执行
		if ((st.st_mode & S_IXUSR) || 
				(st.st_mode & S_IXGRP) ||
				(st.st_mode & S_IXOTH)    )
			cgi = 1;
		// 不是cgi，直接把服务器文件返回，否则执行cgi
		if (!cgi)
			serve_file(client, path); 	// 调用函数，把文件内容返回
		else
			execute_cgi(client, path, method, query_string); 	// 执行cgi程序
	}
 
	close(client);  	// 断开连接（http特点，无连接）
	return NULL;
}

void serve_file(int client, const char *filename)
{
	FILE *resource = NULL;
	int numchars = 1;
	char buf[1024];
 
	buf[0] = 'A'; buf[1] = '\0';
	// 首先读完客户端的头部
	while ((numchars > 0) && strcmp("\n", buf))  /* read & discard headers */
		numchars = get_line(client, buf, sizeof(buf));
 
	// 然后打开创建文件流
	resource = fopen(filename, "r");
	if (resource == NULL)
		not_found(client);
	else
	{
		// 为模拟http相应，首先向客户端发送头部（调用headers()函数）
		headers(client, filename);
		// 再调用cat()函数发送数据体部分，即文件内容
		cat(client, resource);
	}
	fclose(resource);//关闭文件流
}

void execute_cgi(int client, const char *path,
                 const char *method, const char *query_string)
{
	char buf[1024];
	int cgi_output[2];
	int cgi_input[2];
	pid_t pid;
	int status;
	int i;
	char c;
	int numchars = 1;
	int content_length = -1;
 
	buf[0] = 'A'; buf[1] = '\0';
	// 首先需要根据请求是get还是post，来分别处理
	// 如果是GET，就忽略剩余的请求头
	if (strcasecmp(method, "GET") == 0)
		while ((numchars > 0) && strcmp("\n", buf))  /* read & discard headers */
			numchars = get_line(client, buf, sizeof(buf));
	else    /* POST */
	{
		// 如果是POST，那么要读出请求长度，即Content-Length
		numchars = get_line(client, buf, sizeof(buf));
		while ((numchars > 0) && strcmp("\n", buf))
		{
			buf[15] = '\0';
			if (strcasecmp(buf, "Content-Length:") == 0)
				content_length = atoi(&(buf[16]));
			numchars = get_line(client, buf, sizeof(buf));
		}
		// 如果长度不合法，报错
		if (content_length == -1) {
			bad_request(client);
			return;
		}
	}


	sprintf(buf, "HTTP/1.0 200 OK\r\n");
	send(client, buf, strlen(buf), 0);
 
	// 建立管道  输出管道
	if (pipe(cgi_output) < 0) {
		// 错误处理
		cannot_execute(client);
		return;
	}
	// 输入管道
	if (pipe(cgi_input) < 0) {
		cannot_execute(client);
		return;
	}
 
	// fork()自身，生成两个进程
	if ( (pid = fork()) < 0 ) {
		cannot_execute(client);
		return;
	}
	// 子进程调用cgi脚本
	if (pid == 0)  /* child: CGI script */
	{
		char meth_env[255];
		char query_env[255];
		char length_env[255];
		// 重定向管道
		// 把父进程读写描述符分别绑定到子进程的标准输入和输出
		// 
		dup2(cgi_output[1], 1);		// 把STDOUT重定向到cgi_output的写入端
		dup2(cgi_input[0], 0);		// 把STDIN重定向到cgi_input的读取端
		// 关闭不必要的管道端
		close(cgi_output[0]);
		close(cgi_input[1]);
		sprintf(meth_env, "REQUEST_METHOD=%s", method);
		putenv(meth_env);
		// GET  设置query_string的环境变量
		if (strcasecmp(method, "GET") == 0) {
			sprintf(query_env, "QUERY_STRING=%s", query_string);
			putenv(query_env);
		}
		else {   /* POST */
			// POST 设置content_length的环境变量
			sprintf(length_env, "CONTENT_LENGTH=%d", content_length);
			putenv(length_env);
		}
		// 运行cgi脚本
		execl(path, path, NULL);
		exit(0);
	} else {    /* parent */
	// 父进程 
		// 关闭不必要的管道端
		close(cgi_output[1]);
		close(cgi_input[0]);
		// 对于POST请求，直接write()给子进程
		// 这样子进程所调用的脚本就可以从标准输入取得POST数据
		if (strcasecmp(method, "POST") == 0)
			for (i = 0; i < content_length; i++) {
				recv(client, &c, 1, 0);
				// 把POST数据写入cgi_input，重定向到stdin
				write(cgi_input[1], &c, 1);
			}
		// 然后父进程再从输出管道里面读出所有结果，返回给客户端
		while (read(cgi_output[0], &c, 1) > 0)
			send(client, &c, 1, 0);
	
		close(cgi_output[0]);
		close(cgi_input[1]);
		// 等待子进程结束
		waitpid(pid, &status, 0);
	}
}

总结一下，整个流程其实并不算复杂，调用的流程大概为main->startup->accept_request->cgi or serve_file.
小声bb太久没更博客，主要是前端时间太忙（其实是太懒了：），立下flag开始持续更新了！！！