本实验分为三个部分：设置代理、并发、缓存
准备部分：安装netstat工具

1	sudo apt install net-tools

1. 实现一个顺序的代理服务器

概述

在第一部分，设置代理来接受传入连接，读取和解析请求，转发请求到Web服务器，读取服务器的响应，并将这些响应转发给相应的客户端(用于处理HTTP/1.0的GET请求)
启动时，你的代理应该监听在命令行指定的端口上接收传入连接。一旦建立了连接，代理应该从客户端读取整个请求并解析请求。它应该确定客户端是否发送了有效的 HTTP 请求；如果是，则可以建立自己的连接到适当的 Web 服务器，然后请求客户端指定的对象。最后，代理应该读取服务器的响应并将其转发给客户端

代码实现

首先包含必须的库

#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>

#include "csapp.h"

做一些基本定义

// 定义基本常量
#define MAXLINE 8192 // 最大行长度
#define MAXBUF 8192 // 缓冲区
#define LISTENQ 1024 // 监听队列最大长度

// 类型定义
typedef struct sockaddr SA;

// 标识客户端软件信息
static const char *user_agent_hdr = "User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n";
// 控制网络连接的管理
static const char *conn_hdr = "Connection: close\r\n";
// 专门用于代理连接的头部
static const char *proxy_conn_hdr = "Proxy-Connection: close\r\n";

实现核心处理函数

void doit(int fd){
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    int serverfd; // 连接到目标server的套接字
    int n; // 读取的字节数
    rio_t client_rio, server_rio;
    char hostname[MAXLINE], port[MAXLINE], path[MAXLINE], newreq[MAXLINE];

    Rio_readinitb(&client_rio, fd); // 初始化client的I/O结构
    if(!Rio_readlineb(&client_rio, buf, MAXLINE)) return;

    sscanf(buf, "%s %s %s", method, uri, version); // 解析请求行
    parse_uri(uri, hostname, port, path); // 解析URI
    build_reqheader(&client_rio, newreq, method, hostname, port, path); // 构造新的请求头

    serverfd = Open_clientfd(hostname, port); // 连接到目标服务器
    if(serverfd < 0){
        fprintf(stderr, "connect to real server err");
        return;
    }

    Rio_readinitb(&server_rio, serverfd); // 初始化server的I/O
    Rio_writen(serverfd, newreq, strlen(newreq)); // 发送请求到目标server

    // 接收响应并转发给clinet
    while((n = Rio_readlineb(&server_rio, buf, MAXLINE))!=0){ // 当Rio_readlineb()返回0时，表示服务器已关闭连接
        printf("get %d bytes from server\n", n);
        Rio_writen(fd, buf, n);
    }
    Close(serverfd); // 关闭到目标server的连接
}

实现URI解析函数

void parse_uri(char *uri, char *hostname, char *port, char *path){
    char *hostpos = strstr(uri, "//");
    if(hostpos == NULL){ // 没有"//",可能时相对路径
        char *pathpos = strstr(uri, "/");
        if(pathpos != NULL){
            strcpy(path, pathpos); // 复制路径
            strcpy(port, "80"); // 使用默认端口
            return; // hostname为空
        }
        hostpos = uri; // 可能是"hostname:port"格式
    }else{
        hostpos += 2; // 跳过"//"
    }

    char *portpos = strstr(hostpos, ":");
    if(portpos != NULL){ // 有明确端口号
        int portnum;
        char *pathpos = strstr(portpos, "/");
        if(pathpos != NULL){ // 有路径
            sscanf(portpos+1, "%d%s", &portnum, path);
        }else{ // 无路径
            sscanf(portpos+1, "%d", &portnum);
        }
        sprintf(port, "%d", portnum);
        *portpos = '\0'; // 截断字符串，分离主机名
    }else{ // 无明确端口号
        char *pathpos = strstr(hostpos, "/");
        if(pathpos != NULL){ // 有路径
            strcpy(path, pathpos);
            *pathpos = '\0';
        }
        strcpy(port, "80");
    }
    strcpy(hostname, hostpos); // 默认http端口

    return;
}

实现HTTP请求头构造函数

void build_reqheader(rio_t *rp, char *newreq, char *method, char *hostname, char *port, char *path){
    sprintf(newreq, "%s %s HTTP/1.0\r\n", method, path); // 构造请求行

    char buf[MAXLINE];
    
    // 遍历并过滤客户端头部
    while(Rio_readlineb(rp, buf, MAXLINE) > 0){
        if(!strcmp(buf, "\r\n")) break; // 遇到空行结束头部
        if(strstr(buf, "Host:")!=NULL) continue; // 将用新的主机名和端口替换
        if(strstr(buf, "User-Agent:")!=NULL) continue; // 使用代理服务器预定义的用户代理
        if(strstr(buf, "Connection:")!=NULL) continue; //  使用代理服务器定义的连接策略
        if(strstr(buf, "Proxy-Connection:")!=NULL) continue; // 使用代理服务器定义的代理连接策略

        sprintf(newreq, "%s%s", newreq, buf);
    }

    sprintf(newreq, "%sHost: %s:%s\r\n", newreq, hostname, port); // 添加Host头部
    sprintf(newreq, "%s%s", newreq, user_agent_hdr); // 添加预定义的用户代理
    sprintf(newreq, "%s%s", newreq, conn_hdr); // 添加连接控制头部
    sprintf(newreq, "%s%s", newreq, proxy_conn_hdr);
    sprintf(newreq, "%s\r\n", newreq); // 结束头部
}

main函数实现监听

int main(int argc, char** argv)
{
    // printf("%s", user_agent_hdr);
    // return 0;

    int listenfd,connfd; // 套接字描述符
    socklen_t clinetlen;
    char hostname[MAXLINE], port[MAXLINE]; // 存储客户端的主机名和端口
    struct sockaddr_storage clientaddr;

    // 程序运行时必带一个端口号参数
    if(argc != 2){
        fprintf(stderr,"usage :%s <port> \n", argv[0]);
        exit(1);
    }
    signal(SIGPIPE, SIG_IGN); // 防止客户端断开连接后服务器继续写入会导致程序崩溃

    //创建监听套接字
    listenfd = Open_listenfd(argv[1]);
    // 死循环监听端口，如果有请求则提供服务
    while(1){
        clinetlen = sizeof(clientaddr);
        connfd = Accept(listenfd, (SA *)&clientaddr, &clinetlen); // 阻塞等待客户端连接
        Getnameinfo((SA *) &clientaddr, clinetlen, hostname, MAXLINE, port, MAXLINE, 0); // 获取客户端信息
        printf("Accepted connection from (%s, %s)\n", hostname, port);
        doit(connfd); // 处理请求
        Close(connfd);
    }
    return 0;
}

检测

1 2	make ./driver.sh

运行结果
最后的totalScore为40/70

2. 处理多个并发请求

概述

采用多线程方式实现服务器并发：
1. 主线程负责监听端口
2. 每有一个客户端连接时，分配一个新线程单独为这个客户端提供服务
3. 实现：main死循环监听端口，每当有一个连接进入时，创建并分配一个新线程进行服务，服务结束后这个线程回收
为了减少线程创建销毁的效率，采用预线程化：
1. 服务器初始化时就创建一定数量的线程(处于挂起的状态)
2. 主线程监听端口，当有连接进入时，激活一个挂起的线程进行服务
3. 服务完成后，线程接着挂起
4. 如果连接进来时所有线程都忙碌，则将连接的描述符插入缓冲区中

代码实现

添加新的库

#include <pthread.h>
#include <semaphore.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/wait.h>

定义新常量

// 并发相关常量
#define NTHEARDS 4 // 线程数量
#define SBUFSIZE 16 // 缓冲区大小

// 共享缓冲区(实现线程安全的有限缓冲区)
typedef struct{
    int *buf; // 缓冲区数组
    int n; // 最大槽位数
    int front,rear; // 第一个/最后槽位
    sem_t mutex;  // 保护缓冲区访问的互斥锁
    sem_t slots; // 可用槽位数
    sem_t items; // 可用项目数
}sbuf_t;

// 全局缓冲区
sbuf_t sbuf;

在main函数中添加多线程处理的相关函数(生产者-消费者模型)

pthread_t tid; // 线程标识符变量

sbuf_init(&sbuf, SBUFSIZE); // 初始化并发缓冲区

for(int i = 0;i < NTHEARDS;i++){ // 创建一定数量的工作线程
    Pthread_create(&tid, NULL, thread, NULL);
}

sbuf_insert(&sbuf, connfd); // 将连接描述符插入共享缓冲区
// doit(connfd); // 处理请求
// Close(connfd);

增加线程工作函数

void *thread(void *vargp){
    Pthread_detach(Pthread_self()); //分离线程，方便系统回收
    while(1){
        int connfd = sbuf_remove(&sbuf); // 从缓冲区获取连接
        doit(connfd);
        Close(connfd);
    }
}

SBUF(共享缓存区)结构和函数的实现

void sbuf_init(sbuf_t *sp, int n){
    sp->buf = (int *)calloc(n, sizeof(int));
    sp->n = n;
    sp->front = sp->rear = 0;
    sem_init(&sp->mutex, 0, 1);
    sem_init(&sp->slots, 0, n);
    sem_init(&sp->items, 0, 0);
}

void sbuf_insert(sbuf_t *sp, int item){
    sem_wait(&sp->slots); 
    sem_wait(&sp->mutex);
    sp->buf[(++sp->rear) % (sp->n)] = item;
    sem_post(&sp->mutex);
    sem_post(&sp->items); 
}

int sbuf_remove(sbuf_t *sp) {
    int item;
    sem_wait(&sp->items);
    sem_wait(&sp->mutex);
    sp->front = (sp->front + 1) % (sp->n);
    item = sp->buf[sp->front];
    sem_post(&sp->mutex);
    sem_post(&sp->slots);
    return item;
}

检测

1
2
3

make clean
make
./driver.sh

运行结果
最后totalScore: 55/70

3. 缓存Web对象

概述

为所实现的代理添加一个缓存，用于将最近使用的Web对象存储在内存中,缓存需要实现LRU替换策略
由于读操作频率远大于写操作频率，此处使用读者-写者模型(并发模型)：
1. 读者之间不互斥(默认读者不进行写操作)，可以让多个读者对同一个缓存读操作
2. 写者互斥，不仅和其他写者互斥，和读者也互斥
3. 优先安排读操作
需要实现缓存结构以及相关函数

代码实现

创建cache.h,实现相关结构

#ifndef CACHE_H
#define CACHE_H

#include "csapp.h"

#define MAX_CACHE_SIZE 1049000 // 直接复制proxy.c中的宏定义
#define MAX_OBJECT_SIZE 102400   
#define MAX_CACHELINE_NUM 10 // 缓存最多只有10行

typedef struct{
    char uri[MAXLINE];
    char obj[MAX_OBJECT_SIZE]; // 对应的响应缓存

    int reader_count; // 当前读者数
    time_t timestamp; // 最后被使用的时间戳

    sem_t mutex_reader; 
    sem_t mutex_write;
}CacheLine; 

typedef struct{
    CacheLine data[MAX_CACHELINE_NUM];
}Cache;

Cache* init_cache();
int reader(Cache *cache, int fd, char *uri);
void writer(Cache *cache, char *uri, char *buf);

#endif

创建cache.c,实现相关函数

#include "cache.h"
#include <time.h>

static int remove_index(Cache *cache); // 找到应该被替换的缓存行索引

Cache* init_cache(){
    Cache *cache = (Cache*)malloc(sizeof(Cache));

    for(int i = 0; i < MAX_CACHELINE_NUM; i++){
        Sem_init(&cache->data[i].mutex_reader, 0, 1); // 初始化读者互斥信号量
        Sem_init(&cache->data[i].mutex_write, 0, 1);  // 初始化写者互斥信号量
        cache->data[i].reader_count = 0;
        cache->data[i].timestamp = 0;
    }
    return cache;
}

// 缓存读取
int reader(Cache *cache, int fd, char *uri){
    for(int i = 0; i < MAX_CACHELINE_NUM; i++){ 
        if(!strcmp(cache->data[i].uri, uri)){ // 找到匹配的URI
            P(&cache->data[i].mutex_reader); // 获取读者锁
            cache->data[i].reader_count++;
            cache->data[i].timestamp = time(NULL); // 更新时间戳

            if(cache->data[i].reader_count==1){ // 如果是第一个读者
                P(&cache->data[i].mutex_write); // 获取写锁
            }
            V(&cache->data[i].mutex_reader); // 释放读者锁

            Rio_writen(fd, cache->data[i].obj, MAX_OBJECT_SIZE); // 写入数据

            P(&cache->data[i].mutex_reader);
            cache->data[i].reader_count--;

            if(cache->data[i].reader_count==0){ // 如果是最后一个读者
                V(&cache->data[i].mutex_write); // 释放写锁
            }
            V(&cache->data[i].mutex_reader); // 释放读者锁

            return 1; // 找到并返回缓存
        }
    }
    return 0; // 未找到缓存
}

// 在缓存已满时，找出最久未被访问的缓存行进行替换
static int remove_index(Cache *cache){
    time_t min = __LONG_MAX__; // 初始化为最大值
    int result = -1;  // 默认返回-1(未找到)

    for(int i = 0; i < MAX_CACHELINE_NUM; i++){
        if(cache->data[i].timestamp < min){
            min = cache->data[i].timestamp; // 更新最小值
            result = i;
        }
    }
    return result;
}

// 缓存写操作
void writer(Cache *cache, char *uri, char *buf){
    int i=remove_index(cache);

    P(&cache->data[i].mutex_write); // 获取写锁
    strcpy(cache->data[i].uri, uri);
    strcpy(cache->data[i].obj, buf);
    cache->data[i].timestamp = time(NULL); // 更新时间戳
    V(&cache->data[i].mutex_write);
}

添加cache.h (后续函数更改在proxy.c中实现)

1	#include "cache.h"

增删相关定义

// 此处在cache.h定义了，所以注释掉
/* Recommended max cache and object sizes */
// #define MAX_CACHE_SIZE 1049000
// #define MAX_OBJECT_SIZE 102400

// 全局缓存变量
Cache *global_cache;

在doit()中添加相关参数

void doit(int fd, Cache *cache);
void doit(int fd, Cache *cache){
    /*函数代码*/
}

在doit()中添加cache相关代码

char complete_uri[MAXLINE] = ""; // 存储完整的URI
char object_buf[MAX_OBJECT_SIZE] = ""; // 存储从服务器获取的对象内容(缓存数据)
size_t total_size = 0; // 记录接收到的对象总大小

// 生成完整的uri并检查缓存
sprintf(complete_uri, "%s%s", complete_uri, hostname);
sprintf(complete_uri, "%s%s", complete_uri, port);
sprintf(complete_uri, "%s%s", complete_uri, path);

if(reader(cache, fd, complete_uri)){ // 直接命中缓存
    fprintf(stdout, "%s from cache\n", uri);
    fflush(stdout);
    return;
}

// 发送请求报文
Rio_readinitb(&server_rio, serverfd);
Rio_writen(serverfd, newreq, strlen(newreq));

total_size = 0;

// 接收响应并转发给clinet
while((n = Rio_readlineb(&server_rio, buf, MAXLINE)) != 0){ // 当Rio_readlineb()返回0时，表示服务器已关闭连接
    /*...原有代码...*/
    strcpy(object_buf + total_size, buf); // 找到缓冲区中下一个可写入的位置
    total_size += n;
}

在thread()函数中修改doit()中的参数

1	doit(connfd, global_cache);

修改Makefile (增加对cache.c cache.h的编译)

# 增加
cache.o: cache.c cache.h
	$(CC) $(CFLAGS) -c cache.c
# 更改
proxy: proxy.o csapp.o cache.o
	$(CC) $(CFLAGS) proxy.o csapp.o cache.o -o proxy $(LDFLAGS)