繼 記一次傳遞文件句柄引發的血案 之後,這個 demo 又引發了一次血案,現錄如下。 這次我是在 linux 上測試文件句柄的傳遞,linux 上並沒有 STREAMS 系統, 因此是採用 unix domain socket 的 sendmsg/recvmsg 中控制消息部分來傳遞句柄的。 代碼的 ...
繼 記一次傳遞文件句柄引發的血案 之後,這個 demo 又引發了一次血案,現錄如下。
這次我是在 linux 上測試文件句柄的傳遞,linux 上並沒有 STREAMS 系統,
因此是採用 unix domain socket 的 sendmsg/recvmsg 中控制消息部分來傳遞句柄的。
代碼的主要修改部分集中於發送 fd 與接收 fd 處,一開始代碼是這樣的,運行良好。
1 #define MAXLINE 128 2 #define RIGHTSLEN CMSG_LEN(sizeof(int)) 3 #define CREDSLEN CMSG_LEN(sizeof(struct CREDSTRUCT)) 4 #define CONTROLLEN (RIGHTSLEN+CREDSLEN) 5 6 int send_fd (int fd, int fd_to_send) 7 { 8 struct iovec iov[1]; 9 struct msghdr msg; 10 struct cmsghdr *cmptr = NULL; 11 char buf[2]; 12 13 iov[0].iov_base = buf; 14 iov[0].iov_len = 2; 15 16 msg.msg_iov = iov; 17 msg.msg_iovlen = 1; 18 msg.msg_name = NULL; 19 msg.msg_namelen = 0; 20 msg.msg_flags = 0; 21 22 if (fd_to_send < 0) { 23 msg.msg_control = NULL; 24 msg.msg_controllen = 0; 25 buf[1] = -fd_to_send; 26 if (buf[1] == 0) 27 buf[1] = 1; 28 } else { 29 if ((cmptr = malloc(CONTROLLEN)) == NULL) { 30 fprintf (stderr, "malloc memory failed\n"); 31 return -1; 32 } 33 34 msg.msg_control = cmptr; 35 msg.msg_controllen = CONTROLLEN; 36 37 cmptr->cmsg_level = SOL_SOCKET; 38 cmptr->cmsg_type = SCM_RIGHTS; 39 cmptr->cmsg_len = CONTROLLEN; 40 41 *(int *) CMSG_DATA(cmptr) = fd_to_send; 42 buf[1] = 0; 43 } 44 45 buf[0] = 0; 46 if (sendmsg(fd, &msg, 0) != 2) { 47 free (cmptr); 48 return -1; 49 } 50 51 free (cmptr); 52 return 0; 53 }
以上是發送句柄部分,重點位於 37-39 行,設置了控制消息的類型與句柄的值。
sendmsg 中的數據消息部分,用來相容出錯的場景(出錯時可以提供一個-1~-255的錯誤碼,及一段描述信息),關鍵信息位於控制部分。
下麵來看消息的接收:
1 int recv_fd (int fd, uid_t *uidptr, ssize_t (*userfunc) (int, const void*, size_t)) 2 { 3 struct cmsghdr *cmptr = NULL; 4 int newfd, nr, status; 5 char *ptr; 6 char buf[MAXLINE]; 7 struct iovec iov[1]; 8 struct msghdr msg; 9 10 status = -1; 11 newfd = -1; 12 13 for (;;) { 14 iov[0].iov_base = buf; 15 iov[0].iov_len = sizeof (buf); 16 17 msg.msg_iov = iov; 18 msg.msg_iovlen = 1; 19 msg.msg_name = NULL; 20 msg.msg_namelen = 0; 21 22 if ((cmptr = malloc (CONTROLLEN)) == NULL) { 23 fprintf (stderr, "malloc error\n"); 24 return -1; 25 } 26 27 msg.msg_control = cmptr; 28 msg.msg_controllen = CONTROLLEN; 29 30 if ((nr = recvmsg (fd, &msg, 0)) < 0) { 31 fprintf (stderr, "recvmsg error\n"); 32 free (cmptr); 33 return -1; 34 } else if (nr == 0) { 35 fprintf (stderr, "connection closed by server\n"); 36 free (cmptr); 37 return -1; 38 } 39 40 for (ptr = buf; ptr < &buf[nr]; ) { 41 if (*ptr ++ == 0) { 42 if (ptr != &buf[nr-1]) { 43 fprintf (stderr, "message format error"); 44 free (cmptr); 45 return -1; 46 } 47 48 status = *ptr & 0xff; 49 if (status == 0) { 50 if (msg.msg_controllen != CONTROLLEN) { 51 fprintf (stderr, "status = 0 but no fd\n"); 52 free (cmptr); 53 return -1; 54 } 55 56 newfd = *(int *) CMSG_DATA(cmptr); 57 } else { 58 newfd = -status; 59 } 60 61 nr -= 2; 62 } 63 } 64 65 free(cmptr); 66 if (nr > 0 && (*userfunc)(STDERR_FILENO, buf, nr) != nr) 67 return -1; 68 69 if (status >= 0) 70 return newfd; 71 } 72 73 return -1; 74 }
接收部分的重點位於 56 行,這裡取得了對方傳遞過來的文件句柄(註意不是簡單的值傳遞!參考上篇文章)
其它一些代碼則用來處理出錯信息,當出現錯誤時,調用 userfunc 列印錯誤信息 (用戶一般傳遞 write) 。
另外介面中 uidptr 參數並沒有用,這個是為將來擴展預留的。
使用之前的 demo (spipe_server.c / spipe_client.c)編譯、運行,輸出結果如下:
./spipe_server ./spipe_client create pipe 3.4 3 7 create temp file /tmp/outliqA3i with fd 4 seek to head send fd 4 to peer recv fd 3, position 0 create temp file /tmp/inaLr30i with fd 4 source: 3 7 seek to head send fd 4 recv fd 5 from peer, position 0 10
可以看到通過新的方式傳遞的文件句柄值也發生了變化(從 4 變為 3),且也需要對文件偏移進行重置,否則還會掉到之前文章說的那個坑裡。
問題出現在增加一些代碼來傳遞發送進程憑證(如uid)時,此時發送方需要傳遞兩個控制子消息(分別表示句柄與憑證),接收方也需要處理兩個子消息。
新的發送代碼如下:
1 #define MAXLINE 128 2 #if defined(SCM_CREDS) // on BSD 3 #define CREDSTRUCT cmsgcred 4 #define CR_UID cmcred_uid 5 #define CREDOPT LOCAL_PEERCRED 6 #define SCM_CREDTYPE SCM_CREDS 7 #elif defined(SCM_CREDENTIALS) // on linux 8 #define CREDSTRUCT ucred 9 #define CR_UID uid 10 #define CREDOPT SO_PASSCRED 11 #define SCM_CREDTYPE SCM_CREDENTIALS 12 #else 13 #error passing credentials is unsupported! 14 #endif 15 16 #define RIGHTSLEN CMSG_LEN(sizeof(int)) 17 #define CREDSLEN CMSG_LEN(sizeof(struct CREDSTRUCT)) 18 #define CONTROLLEN (RIGHTSLEN+CREDSLEN) 19 20 21 int send_fd (int fd, int fd_to_send) 22 { 23 struct iovec iov[1]; 24 struct msghdr msg; 25 struct cmsghdr *cmptr = NULL; 26 char buf[2]; 27 struct CREDSTRUCT *credp; 28 struct cmsghdr *cmp; 29 30 iov[0].iov_base = buf; 31 iov[0].iov_len = 2; 32 33 msg.msg_iov = iov; 34 msg.msg_iovlen = 1; 35 msg.msg_name = NULL; 36 msg.msg_namelen = 0; 37 msg.msg_flags = 0; 38 39 if (fd_to_send < 0) { 40 msg.msg_control = NULL; 41 msg.msg_controllen = 0; 42 buf[1] = -fd_to_send; 43 if (buf[1] == 0) 44 buf[1] = 1; 45 } else { 46 if ((cmptr = malloc(CONTROLLEN)) == NULL) { 47 fprintf (stderr, "malloc memory failed\n"); 48 return -1; 49 } 50 51 msg.msg_control = cmptr; 52 msg.msg_controllen = CONTROLLEN; 53 54 cmp = cmptr; 55 cmp->cmsg_level = SOL_SOCKET; 56 cmp->cmsg_type = SCM_RIGHTS; 57 cmp->cmsg_len = RIGHTSLEN; 58 *(int *) CMSG_DATA(cmp) = fd_to_send; 59 60 cmp = CMSG_NXTHDR(&msg, cmp); 61 cmp->cmsg_level = SOL_SOCKET; 62 cmp->cmsg_type = SCM_CREDTYPE; 63 cmp->cmsg_len = CREDSLEN; 64 credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 65 66 # if defined(SCM_CREDENTIALS) 67 // only linux need to set members of this struct ! 68 credp->uid = getuid (); 69 credp->gid = getegid (); 70 credp->pid = getpid (); 71 # endif 72 buf[1] = 0; 73 } 74 75 buf[0] = 0; 76 if (sendmsg(fd, &msg, 0) != 2) { 77 free (cmptr); 78 return -1; 79 } 80 81 free (cmptr); 82 return 0; 83 }
最開始的一些巨集定義,是用來區分 linux 與 bsd 上一些細節,重點在 55-64 行,這兩段代碼分別設置了句柄與憑證。
然後控制消息的大小 CONTROLLEN 由兩部分消息的長度(RIGHTSLEN 與 CREDSLEN)累加得到,分配的記憶體也是這麼大。
再來看接收部分:
1 int recv_fd (int fd, uid_t *uidptr, ssize_t (*userfunc) (int, const void*, size_t)) 2 { 3 struct cmsghdr *cmptr = NULL; 4 5 int newfd, nr, status; 6 char *ptr; 7 char buf[MAXLINE]; 8 struct iovec iov[1]; 9 struct msghdr msg; 10 11 status = -1; 12 newfd = -1; 13 14 const int on = -1; 15 struct cmsghdr *cmp; 16 struct CREDSTRUCT *credp; 17 if (setsockopt (fd, SOL_SOCKET, CREDOPT, &on, sizeof(int)) < 0) { 18 fprintf (stderr, "setsockopt for %d failed\n", CREDOPT); 19 return -1; 20 } 21 22 for (;;) { 23 iov[0].iov_base = buf; 24 iov[0].iov_len = sizeof (buf); 25 26 msg.msg_iov = iov; 27 msg.msg_iovlen = 1; 28 msg.msg_name = NULL; 29 msg.msg_namelen = 0; 30 31 if ((cmptr = malloc (CONTROLLEN)) == NULL) { 32 fprintf (stderr, "malloc error\n"); 33 return -1; 34 } 35 36 msg.msg_control = cmptr; 37 msg.msg_controllen = CONTROLLEN; 38 39 if ((nr = recvmsg (fd, &msg, 0)) < 0) { 40 fprintf (stderr, "recvmsg error\n"); 41 free (cmptr); 42 return -1; 43 } else if (nr == 0) { 44 fprintf (stderr, "connection closed by server\n"); 45 free (cmptr); 46 return -1; 47 } 48 49 for (ptr = buf; ptr < &buf[nr]; ) { 50 if (*ptr ++ == 0) { 51 if (ptr != &buf[nr-1]) { 52 fprintf (stderr, "message format error"); 53 free (cmptr); 54 return -1; 55 } 56 57 status = *ptr & 0xff; 58 if (status == 0) { 59 if (msg.msg_controllen != CONTROLLEN) { 60 fprintf (stderr, "status = 0 but no fd\n"); 61 free (cmptr); 62 return -1; 63 } 64 65 for (cmp = CMSG_FIRSTHDR(&msg); cmp != NULL; cmp = CMSG_NXTHDR(&msg, cmp)) { 66 if (cmp->cmsg_level != SOL_SOCKET) { 67 fprintf (stderr, "ignore unknown socket level %d\n", cmp->cmsg_level); 68 continue; 69 } 70 71 switch (cmp->cmsg_type) { 72 case SCM_RIGHTS: 73 newfd = *(int *) CMSG_DATA(cmp); 74 break; 75 case SCM_CREDTYPE: 76 credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 77 *uidptr = credp->CR_UID; 78 break; 79 default: 80 fprintf (stderr, "ignore unknown msg type %d\n", cmp->cmsg_type); 81 break; 82 } 83 } 84 } else { 85 newfd = -status; 86 } 87 88 nr -= 2; 89 } 90 } 91 92 free(cmptr); 93 if (nr > 0 && (*userfunc)(STDERR_FILENO, buf, nr) != nr) 94 return -1; 95 96 if (status >= 0) 97 return newfd; 98 } 99 100 return -1; 101 }
重點分為兩個部分:
14-20 行,設置 unix domain socket 可以接收憑證信息;
65-83 行,分別讀取控制消息中的句柄與憑證信息,這裡我們取了發送進程的 uid 信息作為憑證返回給上層調用者;
與發送消息類似,這裡使用系統提供的 CMSG_FIRSTHDR、CMSG_NXTHDR 在控制消息中遍歷各個子部分。
重新編譯、運行 demo,卻發現出錯了:
./spipe_server ./spipe_client create pipe 3.4 3 7 create temp file /tmp/outgQY1Y4 with fd 4 seek to head send fd 4 to peer recv fd 3, uid 500, position 0 create temp file /tmp/invVgKW4 with fd 4 source: 3 7 seek to head connection closed by server recv fd from peer failed, error -1
從輸出日誌看,第一次從 server 發往 client 的句柄及憑證是可以的(line 7),再之後 client 處理完消息回傳時,就出錯了。
首先定位出錯代碼位置,在 client 回傳這裡 (send_fd),加入一些日誌:
1 if ((cmptr = malloc(CONTROLLEN)) == NULL) { 2 fprintf (stderr, "malloc memory failed\n"); 3 return -1; 4 } 5 6 msg.msg_control = cmptr; 7 msg.msg_controllen = CONTROLLEN; 8 9 cmp = cmptr; 10 cmp->cmsg_level = SOL_SOCKET; 11 cmp->cmsg_type = SCM_RIGHTS; 12 cmp->cmsg_len = RIGHTSLEN; 13 *(int *) CMSG_DATA(cmp) = fd_to_send; 14 fprintf (stderr, "add fd with len %d\n", RIGHTSLEN); 15 16 cmp = CMSG_NXTHDR(&msg, cmp); 17 cmp->cmsg_level = SOL_SOCKET; 18 cmp->cmsg_type = SCM_CREDTYPE; 19 cmp->cmsg_len = CREDSLEN; 20 credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 21 fprintf (stderr, "add credential with len %d\n", CREDSLEN); 22 23 # if defined(SCM_CREDENTIALS) 24 // only linux need to set members of this struct ! 25 credp->uid = getuid (); 26 credp->gid = getegid (); 27 credp->pid = getpid (); 28 fprintf (stderr, "set uid %d, gid %d, pid %d\n", credp->uid, credp->gid, credp->pid); 29 # endif 30 buf[1] = 0;
標黃的是新加入的輸出日誌,再次編譯運行:
./spipe_server ./spipe_client create pipe 3.4 3 7 create temp file /tmp/outivt2Og with fd 4 seek to head add fd with len 16 add credential with len 24 set uid 500, gid 500, pid 12071 send fd 4 to peer recv fd 3, uid 500, position 0 create temp file /tmp/inHqRwMg with fd 4 source: 3 7 seek to head add fd with len 16 connection closed by server recv fd from peer failed, error -1
可以看到,第一次傳遞時,這三條日誌全都正確輸出了,而回傳時,只輸出了第一條日誌。
所以明顯是在第一條日誌與第二條日誌之間的代碼出了問題。左看右看,看不出這塊有什麼問題,難道系統提供的 CMSG_NXTHDR 會出錯?
這邊再加兩條日誌:
1 if ((cmptr = malloc(CONTROLLEN)) == NULL) { 2 fprintf (stderr, "malloc memory failed\n"); 3 return -1; 4 } 5 6 msg.msg_control = cmptr; 7 msg.msg_controllen = CONTROLLEN; 8 9 cmp = cmptr; 10 cmp->cmsg_level = SOL_SOCKET; 11 cmp->cmsg_type = SCM_RIGHTS; 12 cmp->cmsg_len = RIGHTSLEN; 13 *(int *) CMSG_DATA(cmp) = fd_to_send; 14 fprintf (stderr, "add fd with len %d\n", RIGHTSLEN); 15 fprintf (stderr, "cmsghdr = %d, cmsglen = %d, after align = %d, control len = %d\n", sizeof(struct cmsghdr), CREDSLEN, CMSG_ALIGN(CREDSLEN), CONTROLLEN); 16 17 cmp = CMSG_NXTHDR(&msg, cmp); 18 fprintf (stderr, "cmp = %p\n", cmp); 19 cmp->cmsg_level = SOL_SOCKET; 20 cmp->cmsg_type = SCM_CREDTYPE; 21 cmp->cmsg_len = CREDSLEN; 22 credp = (struct CREDSTRUCT *) CMSG_DATA(cmp); 23 fprintf (stderr, "add credential with len %d\n", CREDSLEN); 24 25 # if defined(SCM_CREDENTIALS) 26 // only linux need to set members of this struct ! 27 credp->uid = getuid (); 28 credp->gid = getegid (); 29 credp->pid = getpid (); 30 fprintf (stderr, "set uid %d, gid %d, pid %d\n", credp->uid, credp->gid, credp->pid); 31 # endif 32 buf[1] = 0;
第二條日誌是主要懷疑的地方,看指針是否為空;第一條日誌則是懷疑塊大小計算有誤,導致分配的記憶體不夠大,指針遞增時出現了範圍錯誤,所以這裡列印各種長度做驗證。
再次運行後,又多了一些輸出:
./spipe_server ./spipe_client create pipe 3.4 3 7 create temp file /tmp/out7UgSYZ with fd 4 seek to head add fd with len 16 cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40 cmp = 0x9ded018 add credential with len 24 set uid 500, gid 500, pid 12100 send fd 4 to peer recv fd 3, uid 500, position 0 create temp file /tmp/inC3nyWZ with fd 4 source: 3 7 seek to head add fd with len 16 cmsghdr = 12, cmsglen = 24, after align = 24, control len = 40 cmp = (nil) connection closed by server recv fd from peer failed, error -1
神奇的地方出現了,同樣的代碼,相同的尺寸,第一次指針正常;第二次就為空了!
崩潰點找到了,但是還是一頭霧水,看起來數據塊都對齊了,計算也沒毛病,難道是這個系統提供的巨集 (CMSG_NXTHDR) 出問題了嗎?
翻看頭文件,找到這一段的定義 (我所在的系統,位於 /usr/include/bits/socket.h (L311)):
1 __EXTERN_INLINE struct cmsghdr * 2 __NTH (__cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg)) 3 { 4 if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr)) 5 /* The kernel header does this so there may be a reason. */ 6 return 0; 7 8 __cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg 9 + CMSG_ALIGN (__cmsg->cmsg_len)); 10 if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control 11 + __mhdr->msg_controllen) 12 || ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len) 13 > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen))) 14 /* No more entries. */ 15 return 0; 16 return __cmsg; 17 }
這段 INLINE 函數主要包含三個判斷,
1)子消息長度小於消息頭長度,返回 null;
2)下一個子消息的消息頭超出消息尾部,返回null;
3)下一個子消息的消息體超出消息尾部,返回null;
直接修改系統代碼不方便,將這個函數拷貝到本地並重全名為 my_cmsg_nxthdr,在各個判斷下麵添加日誌輸出:
1 struct cmsghdr *my_cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg) 2 { 3 if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr)) { 4 /* The kernel header does this so there may be a reason. */ 5 fprintf (stderr, "in step1\n"); 6 return 0; 7 } 8 9 fprintf (stderr, "%p: cmsg_len %u, cmsg_level %d, cmsg_type %d\n", __cmsg, __cmsg->cmsg_len, __cmsg->cmsg_level, __cmsg->cmsg_type); 10 __cmsg = (struct cmsghdr *) ((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len)); 11 if ((unsigned char *) (__cmsg + 1) > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)) { 12 fprintf (stderr, "in step2\n"); 13 return 0; 14 } 15 16 fprintf (stderr, "%p: cmsg_len %u, cmsg_level %d, cmsg_type %d\n", __cmsg, __cmsg->cmsg_len, __cmsg->cmsg_level, __cmsg->cmsg_type); 17 if (((unsigned char *) __cmsg + CMSG_ALIGN (__cmsg->cmsg_len) > ((unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen))) { 18 /* No more entries. */ 19 fprintf (stderr, "in step3\n"); 20 fprintf (stderr, "msg len %d, after align