KNI与内核交互的代码实现
- 背景
- 环境配置
- 代码实现
- 编译和执行
- 总结
背景
DPDK接管NIC之后,网卡接收到的网络数据都交由DPDK处理,但在开发过程中,我们可能只关注其中某一个协议,而其他协议并不需要我们处理;DPDK提供了KNI模块,用于将数据从DPDK发送到内核和拉取内核的响应数据。
比如我们只想处理UDP的数据,而其他的TCP、ARP、ICMP等协议的数据可以发送到内核让它去处理。这样DPDK就像一个过滤器一样只处理我们关注的协议栈。
要注意,DPDK读取网卡数据是直接读取的共享内存里的,因为网卡通过DMA将数据映射到了内存上;同样的,DPDK读取内核返回的数据也是直接读取由内核映射到的共享内存。_rx_burst()和_tx_burst()不存在阻塞非阻塞之说,阻塞主要存在于IO操作,它们没有涉及到IO的读写操作而是一个纯内存操作,所以它们的读取都是非阻塞的(有数据就返回数据,没数据也返回)。
环境配置
(1)导出dpdk环境变量。
cd dpdk路径
# 如 dpdk/dpdk-stable-19.08.2/
# 切换root权限
sudo su
export RTE_SDK=dpdk路径
export RTE_TARGET=x86_64-native-linux-gcc
(2)配置dpdk。
./usertools/dpdk-setup.sh
依次执行:
43(加载DPDK UIO 模块,即插入driver)
44(加载VFIO模块,也是一种driver)
45(加载KNI模块,将一些数据写回内核)
46(设置巨页,可以不需要频繁页交换,512)
47(设置巨页,可512)
49(执行之前需要eth0 down掉,执行sudo ifconfig eth0 down,使绑定dpdk)pci地址=对应eth0的(如0000:03:00.0)
60(退出)
代码实现
主要目标是实现从网卡获取数据,发送到内核;然后从内核获取返回的数据,发送到网卡。
从网卡中接收数据,判断是不是UDP包;如果不是则写入内核,如果是则自己处理。
读取内核返回的数据,直接发送给网卡。
(dpdk_udp.c)
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_mbuf.h>#include <rte_kni.h>#include <stdio.h>
#include <arpa/inet.h>#define MBUF_NUMBER 8196
#define MBUF_SIZE 32#define ENABLE_SEND 1 //
#define ENABLE_KNI_APP 1 //#define ENABLE_PROMISCUOUS 0int gDpdkPortId = 0;#if ENABLE_KNI_APPstruct rte_kni *global_kni = NULL;#endif//
#if ENABLE_SENDstatic uint8_t gSrcMac[RTE_ETHER_ADDR_LEN];
static uint8_t gDstMac[RTE_ETHER_ADDR_LEN];// 192.168.1.123
static uint32_t gSrcIp;
static uint32_t gDstIp;static uint16_t gSrcPort;
static uint16_t gDstPort;#endif//int encode_udp_pkt()#if ENABLE_KNI_APPstatic int g_config_network_if(uint16_t port_id, uint8_t if_up) {if (!rte_eth_dev_is_valid_port(port_id)) {return -EINVAL;}int ret = 0;if (if_up) {rte_eth_dev_stop(port_id);ret = rte_eth_dev_start(port_id);} else {rte_eth_dev_stop(port_id);}if (ret < 0) {printf("Failed to start port : %d\n", port_id);}return 0;
}#endif#if ENABLE_SENDstatic struct rte_mbuf *alloc_udp_pkt(struct rte_mempool *pool, uint8_t *data, uint16_t length) {// 32, 2048 + hdrsizestruct rte_mbuf *mbuf = rte_pktmbuf_alloc(pool); //if (!mbuf) {rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc error\n");}mbuf->pkt_len = length + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_ether_hdr);mbuf->data_len = length + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_ether_hdr);uint8_t *msg = rte_pktmbuf_mtod(mbuf, uint8_t*);// ether struct rte_ether_hdr *eth = (struct rte_ether_hdr *)msg;rte_memcpy(eth->s_addr.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN);rte_memcpy(eth->d_addr.addr_bytes, gDstMac, RTE_ETHER_ADDR_LEN);eth->ether_type = htons(RTE_ETHER_TYPE_IPV4);// 6 + /* 6 bytes 6 bytes 2 bytes+----------+----------+------+| src mac | dst mac | type |+----------+----------+------+*/// iphdrstruct rte_ipv4_hdr *ip = (struct rte_ipv4_hdr *)(msg + sizeof(struct rte_ether_hdr));ip->version_ihl = 0x45;ip->type_of_service = 0;ip->total_length = htons(length + sizeof(struct rte_ipv4_hdr));ip->packet_id = 0;ip->fragment_offset = 0;ip->time_to_live = 64; // ttl = 64ip->next_proto_id = IPPROTO_UDP;ip->src_addr = gSrcIp;ip->dst_addr = gDstIp;ip->hdr_checksum = 0;ip->hdr_checksum = rte_ipv4_cksum(ip);// udphdrstruct rte_udp_hdr *udp = (struct rte_udp_hdr *)(msg + sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr));udp->src_port = gSrcPort;udp->dst_port = gDstPort;//uint16_t udplen = length - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr);udp->dgram_len = htons(length);rte_memcpy((uint8_t*)(udp+1), data, length-sizeof(struct rte_udp_hdr));udp->dgram_cksum = 0;udp->dgram_cksum = rte_ipv4_udptcp_cksum(ip, udp);return mbuf;}#endif// 192.168.1.26int main(int argc, char *argv[]) {// 4G, hugepage, bind pci if (rte_eal_init(argc, argv) < 0) {rte_exit(EXIT_FAILURE, "Error\n");}//per_lcore_socket_id;struct rte_mempool *mbuf_pool = rte_pktmbuf_pool_create("mbufpool", MBUF_NUMBER,0,0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());if (!mbuf_pool) {rte_exit(EXIT_FAILURE, "mbuf Error\n");}#if ENABLE_KNI_APPif (-1 == rte_kni_init(gDpdkPortId)) {rte_exit(EXIT_FAILURE, "kni init failed\n");}#endif// setupuint16_t nb_rx_queues = 1;
#if ENABLE_SENDuint16_t nb_tx_queues = 1;
#elseuint16_t nb_tx_queues = 0;
#endifconst struct rte_eth_conf port_conf_default = {.rxmode = {.max_rx_pkt_len = RTE_ETHER_MAX_LEN }};rte_eth_dev_configure(gDpdkPortId, nb_rx_queues, nb_tx_queues, &port_conf_default);rte_eth_rx_queue_setup(gDpdkPortId, 0, 128, rte_eth_dev_socket_id(gDpdkPortId), NULL, mbuf_pool);#if ENABLE_SENDrte_eth_tx_queue_setup(gDpdkPortId, 0, 1024, rte_eth_dev_socket_id(gDpdkPortId),NULL);
#endif rte_eth_dev_start(gDpdkPortId);// disable
#if ENABLE_PROMISCUOUSrte_eth_promiscuous_enable(gDpdkPortId); //
#endif#if ENABLE_KNI_APPstruct rte_kni_conf conf;memset(&conf, 0, sizeof(conf));snprintf(conf.name, RTE_KNI_NAMESIZE, "vEth%d", gDpdkPortId);conf.group_id = gDpdkPortId;conf.mbuf_size = RTE_MBUF_DEFAULT_BUF_SIZE;//conf.rte_eth_macaddr_get(gDpdkPortId, (struct rte_ether_addr*)conf.mac_addr);rte_eth_dev_get_mtu(gDpdkPortId, &conf.mtu);struct rte_kni_ops ops;memset(&ops, 0, sizeof(ops));ops.port_id = gDpdkPortId;ops.config_network_if = g_config_network_if;global_kni = rte_kni_alloc(mbuf_pool, &conf, &ops);#endifwhile (1) {unsigned num_recvd=0;unsigned i = 0;#if ENABLE_KNI_APPstruct rte_mbuf *kni_burst[MBUF_SIZE];num_recvd = rte_kni_rx_burst(global_kni, kni_burst, MBUF_SIZE);if (num_recvd > MBUF_SIZE) {rte_exit(EXIT_FAILURE, "rte_kni_rx_burst Error\n");}// 发送到网卡unsigned nb_tx = rte_eth_tx_burst(gDpdkPortId,0,kni_burst,num_recvd);if(nb_tx<num_recvd){// 可以再次发送,这里选择直接释放掉。for(i=nb_tx;i<num_recvd;i++){rte_pktmbuf_free(kni_burst[i]);kni_burst[i]=NULL;}}#endifstruct rte_mbuf *mbufs[MBUF_SIZE];num_recvd = rte_eth_rx_burst(gDpdkPortId, 0, mbufs, MBUF_SIZE);if (num_recvd > MBUF_SIZE) {rte_exit(EXIT_FAILURE, "rte_eth_rx_burst Error\n");}for (i = 0;i < num_recvd;i ++) {struct rte_ether_hdr *ehdr = rte_pktmbuf_mtod(mbufs[i], struct rte_ether_hdr *);if (ehdr->ether_type != rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) {
#if ENABLE_KNI_APPrte_kni_tx_burst(global_kni, &mbufs[i], 1);rte_kni_handle_request(global_kni);
#elserte_pktmbuf_free(mbufs[i]);
#endifcontinue;}struct rte_ipv4_hdr *iphdr = rte_pktmbuf_mtod_offset(mbufs[i], struct rte_ipv4_hdr *, sizeof(struct rte_ether_hdr));if (iphdr->next_proto_id == IPPROTO_UDP) {struct rte_udp_hdr* udphdr = (struct rte_udp_hdr*)(iphdr + 1);// 数据过滤,用于debugif(ntohs(udphdr->dst_port) !=8888){rte_pktmbuf_free(mbufs[i]);continue;}uint16_t length = ntohs(udphdr->dgram_len);*((char*) udphdr + length) = '\0';struct in_addr addr;addr.s_addr = iphdr->src_addr;printf("src: %s:%d, ", inet_ntoa(addr), ntohs(udphdr->src_port));addr.s_addr = iphdr->dst_addr;printf("dst: %s:%d, %s\n", inet_ntoa(addr), ntohs(udphdr->dst_port),(char *)(udphdr+1));#if ENABLE_SENDrte_memcpy(gSrcMac, ehdr->d_addr.addr_bytes, RTE_ETHER_ADDR_LEN);rte_memcpy(gDstMac, ehdr->s_addr.addr_bytes, RTE_ETHER_ADDR_LEN);rte_memcpy(&gSrcIp, &iphdr->dst_addr, sizeof(uint32_t));rte_memcpy(&gDstIp, &iphdr->src_addr, sizeof(uint32_t));rte_memcpy(&gSrcPort, &udphdr->dst_port, sizeof(uint16_t));rte_memcpy(&gDstPort, &udphdr->src_port, sizeof(uint16_t));////length + sizeof(struct iphdr)struct rte_mbuf *mbuf = alloc_udp_pkt(mbuf_pool, (uint8_t*)(udphdr+1), length);rte_eth_tx_burst(gDpdkPortId, 0, &mbuf, 1);
#endif} else {#if ENABLE_KNI_APP rte_kni_tx_burst(global_kni, &mbufs[i], 1);#endif}}#if ENABLE_KNI_APPrte_kni_handle_request(global_kni);
#endif}}
Makefile:
# binary name
APP = dpdk_udp# all source are stored in SRCS-y
SRCS-y := dpdk_udp.c# Build using pkg-config variables if possible
ifeq ($(shell pkg-config --exists libdpdk && echo 0),0)all: shared
.PHONY: shared static
shared: build/$(APP)-sharedln -sf $(APP)-shared build/$(APP)
static: build/$(APP)-staticln -sf $(APP)-static build/$(APP)PKGCONF=pkg-config --define-prefixPC_FILE := $(shell $(PKGCONF) --path libdpdk)
CFLAGS += -O3 $(shell $(PKGCONF) --cflags libdpdk)
LDFLAGS_SHARED = $(shell $(PKGCONF) --libs libdpdk)
LDFLAGS_STATIC = -Wl,-Bstatic $(shell $(PKGCONF) --static --libs libdpdk)build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)build:@mkdir -p $@.PHONY: clean
clean:rm -f build/$(APP) build/$(APP)-static build/$(APP)-sharedtest -d build && rmdir -p build || trueelseifeq ($(RTE_SDK),)
$(error "Please define RTE_SDK environment variable")
endif# Default target, detect a build directory, by looking for a path with a .config
RTE_TARGET ?= $(notdir $(abspath $(dir $(firstword $(wildcard $(RTE_SDK)/*/.config)))))include $(RTE_SDK)/mk/rte.vars.mkCFLAGS += -O3
CFLAGS += $(WERROR_FLAGS)include $(RTE_SDK)/mk/rte.extapp.mkendif
编译和执行
编译:
make
运行:
./build/dpdk_udp
启动网卡:
ifconfig vEth0 192.168.7.26 up
设置可读写内核:
echo 1 > /sys/devices/virtual/net/vEth0/carrier
ping测试:
ping 192.168.7.26
注意,IP设置为自己的局域网IP。
总结
- 从内核读取数据包发送出去:调用 rte_kni_rx_burst(…) 从内核中获取返回包,然后调用 rte_eth_tx_burst(…) 将数据包发送到网卡。
- 将接收的数据包写入内核:调用 rte_eth_rx_brust(…) 从网卡获取数据包,然后调用 rte_kni_tx_burst(…) 将数据包写入内核。
本文链接:https://www.ngui.cc/article/show-862304.html