Glusterfs之nfs模块源码分析（中）之Glusterfs

我的新浪微博： http://weibo.com/freshairbrucewoo 。

欢迎大家相互交流，共同提高技术。

五、 Glusterfs 实现 NFS 服务器

第一节、启动过程分析

Glusterfs 的 nfs 服务器启动命令如下：

/usr/local/sbin/glusterfs -f /etc/glusterd/nfs/nfs-server.vol -p /etc/glusterd/nfs/run/nfs.pid

-l /usr/local/var/log/glusterfs/nfs.log

说明：所有列出的代码都把错误处理、参数检查和日志输出去掉了！

上面的命令会启动 glusterfsd 程序，下面是入口函数 main 的代码实现：

            
               1
            
            
              int
            
             main (
            
              int
            
             argc, 
            
              char
            
             *
            
              argv[])  


            
            
               2
            
            
              {  


            
            
               3
            
                 glusterfs_ctx_t  *ctx =
            
               NULL;  


            
            
               4
            
            
              int
            
                           ret = -
            
              1
            
            
              ;  


            
            
               5
            
                 ret = glusterfs_globals_init ();
            
              //
            
            
              初始化一些全局变量和参数  
            
            
               6
            
                 ctx =
            
               glusterfs_ctx_get ();  


            
            
               7
            
                 ret = glusterfs_ctx_defaults_init (ctx);
            
              //
            
            
              初始化一些glusterfs的上下文默认信息  
            
            
               8
            
                 ret = parse_cmdline (argc, argv, ctx);
            
              //
            
            
              解析命令行参数  
            
            
               9
            
                 ret = logging_init (ctx);
            
              //
            
            
              初始化日志文件  
            
            
              10
            
                 gf_proc_dump_init();
            
              //
            
            
              初始化代表程序的全局锁  
            
            
              11
            
                 ret = create_fuse_mount (ctx);
            
              //
            
            
              创建fuse的主（根）xlator：mount/fuse，并且初始化相关值  
            
            
              12
            
                 ret = daemonize (ctx);
            
              //
            
            
              设置守护进程运行模式  
            
            
              13
            
                 ret = glusterfs_volumes_init (ctx);
            
              //
            
            
              初始化卷服务，创建相应的xlator并且初始化  
            
            
              14
            
                 ret = event_dispatch (ctx->event_pool);
            
              //
            
            
              时间分发，将相应的事件交给相应的函数处理  
            
            
              15
            
             }

整个 main 做的工作在代码中都有注释了，对于 nfs 启动比较关心的就是两个函数，一个是命令行参数解析函数 parse_cmdline ，按照上面给出的命令解析出程序启动的需要的卷文件路径、日志文件路径和存放进程 ID 的文件。而且程序是以 glusterfs 模式（有三种模式：（ 1 ） glusterfsd ；（ 2 ） glusterfs ；（ 3 ） glusterd ）运行。

另外一个函数就是初始化具体的卷服务函数 glusterfs_volumes_init ，根据我们的启动命令是启动 nfs 类型的服务，每一个卷服务都会用一个 xlator 表示，代码如下：

            
               1
            
            
              int
            
             glusterfs_volumes_init (glusterfs_ctx_t *
            
              ctx)  


            
            
               2
            
            
              {  


            
            
               3
            
                 FILE               *fp =
            
               NULL;  


            
            
               4
            
                 cmd_args_t         *cmd_args =
            
               NULL;  


            
            
               5
            
            
              int
            
                             ret = 
            
              0
            
            
              ;  


            
            
               6
            
                 cmd_args = &ctx->
            
              cmd_args;  


            
            
               7
            
            
              if
            
             (cmd_args->sock_file) {
            
              //
            
            
              是否设置了sock_file来启动监听服务  
            
            
               8
            
                     ret = glusterfs_listener_init (ctx);
            
              //
            
            
              初始化监听服务  
            
            
               9
            
            
                  }  


            
            
              10
            
                 fp = get_volfp (ctx);
            
              //
            
            
              得到描述卷的文件指针  
            
            
              11
            
                 ret = glusterfs_process_volfp (ctx, fp);
            
              //
            
            
              处理描述卷的文件  
            
            
              12
            
             }

从启动命令可以看出并没有设置 cmd_args->sock_file 和 cmd_args->volfile_server 参数，所以直接进入卷处理函数 glusterfs_process_volfp ，下面继续看这个函数的实现，如下：

      
         1
      
      
         2
      
      
         3
      
      
        int
      
       glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *
      
        fp)  


      
      
         4
      
      
        {  


      
      
         5
      
          glusterfs_graph_t  *graph =
      
         NULL;  


      
      
         6
      
      
        int
      
                       ret = -
      
        1
      
      
        ;  


      
      
         7
      
          xlator_t           *trav =
      
         NULL;  


      
      
         8
      
          graph = glusterfs_graph_construct (fp);
      
        //
      
      
        根据卷描述文件构造一个graph  
      
      
         9
      
      
        for
      
       (trav = graph->first; trav; trav = trav->
      
        next) {  


      
      
        10
      
      
        if
      
       (strcmp (trav->type, 
      
        "
      
      
        mount/fuse
      
      
        "
      
      ) == 
      
        0
      
      ) {
      
        //
      
      
        卷文件中不能有mount/fuse类型的卷  
      
      
        11
      
              gf_log (
      
        "
      
      
        glusterfsd
      
      
        "
      
      , GF_LOG_ERROR, 
      
        "
      
      
        fuse xlator cannot be specified 
      
      
        "
      
      
        "
      
      
        in volume file
      
      
        "
      
      
        );  


      
      
        12
      
      
        goto
      
      
        out
      
      
        ;  


      
      
        13
      
      
               }  


      
      
        14
      
      
           }  


      
      
        15
      
          ret = glusterfs_graph_prepare (graph, ctx);
      
        //
      
      
        准备工作  
      
      
        16
      
         ret = glusterfs_graph_activate (graph, ctx);
      
        //
      
      
        激活这个图结构的卷  
      
      
        17
      
         gf_log_volume_file (fp);
      
        //
      
      
        卷的日志文件  
      
      
        18
      
          ret = 
      
        0
      
      
        ;  


      
      
        19
      
      
        out
      
      
        :  


      
      
        20
      
      
        if
      
      
         (fp)  


      
      
        21
      
      
           fclose (fp);  


      
      
        22
      
      
        if
      
       (ret && !ctx->
      
        active) {  


      
      
        23
      
           cleanup_and_exit (
      
        0
      
      );
      
        //
      
      
        如果没有激活就清理掉并且直接退出整个程序  
      
      
        24
      
      
          }  


      
      
        25
      
      
        return
      
      
         ret;  


      
      
        26
      
       }

继续关注比较重要的，上面代码中最重要的就是激活卷（ graph ）服务的函数 glusterfs_graph_activate 了，所以继续看这个函数的实现，代码如下：

      
         1
      
      
         2
      
      
         3
      
      
        int
      
       glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *
      
        ctx)  


      
      
         4
      
      
        {  


      
      
         5
      
      
        int
      
       ret = 
      
        0
      
      
        ;  


      
      
         6
      
           ret = glusterfs_graph_validate_options (graph);
      
        //
      
      
        验证所有卷包括子卷的配置选项的正确性  
      
      
         7
      
           ret = glusterfs_graph_init (graph);
      
        //
      
      
        初始化由整个配置文件中的各个卷组成的图结构  
      
      
         8
      
           ret = glusterfs_graph_unknown_options (graph);
      
        //
      
      
        再次验证是否有不知道的参数  
      
      
         9
      
           list_add (&graph->list, &ctx->graphs);
      
        //
      
      
        加入到总的链表中进行统一管理  
      
      
        10
      
          ctx->active =
      
         graph;  


      
      
        11
      
      
        if
      
       (ctx->master)
      
        //
      
      
        附加到master（mount/fuse）节点  
      
      
        12
      
             ret = xlator_notify (ctx->
      
        master, GF_EVENT_GRAPH_NEW, graph);  


      
      
        13
      
          ret = glusterfs_graph_parent_up (graph);
      
        //
      
      
        设置父节点  
      
      
        14
      
      
        return
      
      
        0
      
      
        ;  


      
      
        15
      
       }

在 graph 初始化函数中有具体初始化 xlator 的实现，这个就关系到是怎样连接到 nfs 服务器，所以继续看这个函数的实现：

      
         1
      
      
        int
      
       glusterfs_graph_init (glusterfs_graph_t *
      
        graph)  


      
      
         2
      
      
        {  


      
      
         3
      
           xlator_t           *trav =
      
         NULL;  


      
      
         4
      
      
        int
      
                       ret = -
      
        1
      
      
        ;  


      
      
         5
      
          trav = graph->first;
      
        //
      
      
        第一个节点，也就是nfs类型的节点  
      
      
         6
      
      
        while
      
      
         (trav) {  


      
      
         7
      
               ret = xlator_init (trav);
      
        //
      
      
        依次初始化每一个节点（xlator）  
      
      
         8
      
          trav = trav->next;
      
        //
      
      
        指向下一个节点  
      
      
         9
      
      
           }  


      
      
        10
      
      
        return
      
      
        0
      
      
        ;  


      
      
        11
      
       }

继续看初始化节点 xlator 的函数 xlator_init ，代码如下：

        
           1
        
        
          int
        
         xlator_init (xlator_t *
        
          xl)  


        
        
           2
        
        
          {  


        
        
           3
        
             int32_t ret = -
        
          1
        
        
          ;  


        
        
           4
        
             GF_VALIDATE_OR_GOTO (
        
          "
        
        
          xlator
        
        
          "
        
        , xl, 
        
          out
        
        
          );  


        
        
           5
        
        
          if
        
         (xl->
        
          mem_acct_init)  


        
        
           6
        
                 xl->mem_acct_init (xl);
        
          //
        
        
          如果这个函数指针不为空就调用  
        
        
           7
        
        
          if
        
         (!xl->init) {
        
          //
        
        
          init函数指针不能为空  
        
        
           8
        
        
              }  


        
        
           9
        
             ret = __xlator_init (xl);
        
          //
        
        
          继续初始化  
        
        
          10
        
             xl->init_succeeded = 
        
          1
        
        
          ;  


        
        
          11
        
             ret = 
        
          0
        
        
          ;  


        
        
          12
        
        
          out
        
        
          :  


        
        
          13
        
        
          return
        
        
           ret;  


        
        
          14
        
         }

继续看 __xlator_init (xl);

      
         1
      
      
        static
      
      
        int
      
       __xlator_init(xlator_t *
      
        xl)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
       xlator_t *old_THIS =
      
         NULL;


      
      
         6
      
      
         7
      
      
        int
      
             ret = 
      
        0
      
      
        ;


      
      
         8
      
      
         9
      
       old_THIS =
      
         THIS;


      
      
        10
      
      
        11
      
         THIS =
      
         xl;


      
      
        12
      
      
        13
      
       ret = xl->init (xl);
      
        //
      
      
        调用具体xlator的init函数完成初始化工作
      
      
        14
      
      
        15
      
       THIS =
      
         old_THIS;


      
      
        16
      
      
        17
      
      
        return
      
      
         ret;


      
      
        18
      
      
        19
      
       }

到此为止就可以看到真正调用 NFS 的 init 函数了，这个时候才真正开始执行与 nfs 服务相关功能的代码。当然在结束服务的时候还会执行 fini 函数。

第二节、 NFS 协议实现的 init 函数

先看看这个函数的代码吧，如下：

      
         1
      
      
        int
      
       init (xlator_t *
      
        this
      
      
        ) {


      
      
         2
      
      
         3
      
      
        struct
      
       nfs_state        *nfs =
      
         NULL;


      
      
         4
      
      
         5
      
      
        int
      
                           ret = -
      
        1
      
      
        ;


      
      
         6
      
      
         7
      
      
        if
      
       (!
      
        this
      
      
        )


      
      
         8
      
      
         9
      
      
        return
      
       -
      
        1
      
      
        ;


      
      
        10
      
      
        11
      
       nfs = nfs_init_state (
      
        this
      
      );
      
        //
      
      
        初始化一些nfs的选项参数
      
      
        12
      
      
        13
      
       ret = nfs_add_all_initiators (nfs);
      
        //
      
      
        添加所有协议的初始化器
      
      
        14
      
      
        15
      
       ret = nfs_init_subvolumes (nfs, 
      
        this
      
      ->children);
      
        //
      
      
        初始化nfs的所有子卷
      
      
        16
      
      
        17
      
       ret = nfs_init_versions (nfs, 
      
        this
      
      );
      
        //
      
      
        初始化所有nfs协议的版本
      
      
        18
      
      
        19
      
      
        return
      
      
         ret;


      
      
        20
      
      
        21
      
       }

上面代码可以看出， init 函数的主要作用就是初始化 nfs 协议的所有版本以及其所有的子卷。下面依次分析各个函数实现的功能。

1.nfs_init_state 函数

这个函数代码比较多，分步骤解析：

第一步：判断 nfs 是否存在子卷，如果不存在就退出，因为必须要有子卷才能正常工作，代码如下：

      
        1
      
      
        if
      
       ((!
      
        this
      
      ->children) || (!
      
        this
      
      ->children->
      
        xlator)) {


      
      
        2
      
      
        3
      
       gf_log (GF_NFS, GF_LOG_ERROR, 
      
        "
      
      
        nfs must have at least one
      
      
        "
      
      
        "
      
      
         child subvolume
      
      
        "
      
      
        );


      
      
        4
      
      
        5
      
      
        return
      
      
         NULL;


      
      
        6
      
      
        7
      
       }

第二步：为 nfs_state 结构体指针分配内存，分配失败就报错并退出程序，分配内存代码如下：

      
        1
      
       nfs = GF_CALLOC (
      
        1
      
      , 
      
        sizeof
      
       (*nfs), gf_nfs_mt_nfs_state);

第三步：启动 rpc 服务： nfs->rpcsvc = nfs_rpcsvc_init (this->ctx, this->options);

第四步：根据参数 nfs.mem-factor 设置内存池的大小，用于提高访问速度。

这一步首先调用函数 xlator_get_volopt_info 得到参数的值，然后转换为无符号整型并赋值给 nfs->memfactor ，表示内存因子，然后计算整个内存池的大小并新建这样大小的一个内存池，代码如下；

      
        1
      
       fopspoolsize = nfs->memfactor *
      
         GF_NFS_CONCURRENT_OPS_MULT;


      
      
        2
      
      
        3
      
       nfs->foppool = mem_pool_new (
      
        struct
      
       nfs_fop_local, fopspoolsize);

第五步：安装第四步同样的方式解析参数 nfs.dynamic-volumes 、 nfs.enable-ino32 和 nfs.port ，并且赋值给 nfs_state 结构体中相应的字段保存。

第六步：将 nfs_state 保存到 xlator 的私有数据部分并初始化 nfs 协议版本的链表。

      
        1
      
      
        this
      
      ->
      
        private
      
       = (
      
        void
      
       *
      
        )nfs;


      
      
        2
      
      
        3
      
               INIT_LIST_HEAD (&nfs->versions);

经过上面 6 步这个函数就执行完了，如果其中有地方出错都会进行相应的处理，尤其是资源的释放处理尤为重要，保证不会发生内存泄露。其中第三步是比较重要的复杂的，涉及到 rpc 服务的初始化工作，所以需要详细分析，因为后面很多操作都会依赖于 rpc 服务进行通信， nfs_rpcsvc_init 函数定义和实现如下：

      
         1
      
       rpcsvc_t * nfs_rpcsvc_init (glusterfs_ctx_t *ctx, dict_t *
      
        options)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
       rpcsvc_t        *svc =
      
         NULL;


      
      
         6
      
      
         7
      
      
        int
      
                   ret = -
      
        1
      
      
        ;


      
      
         8
      
      
         9
      
      
        int
      
                   poolsize = 
      
        0
      
      
        ;


      
      
        10
      
      
        11
      
        svc = GF_CALLOC (
      
        1
      
      , 
      
        sizeof
      
       (*svc), gf_common_mt_rpcsvc_t);
      
        //
      
      
        分配内存资源
      
      
        12
      
      
        13
      
       pthread_mutex_init (&svc->rpclock, NULL);
      
        //
      
      
        初始化锁
      
      
        14
      
      
        15
      
          INIT_LIST_HEAD (&svc->stages);
      
        //
      
      
        初始化rpc服务的阶段处理链表
      
      
        16
      
      
        17
      
          INIT_LIST_HEAD (&svc->authschemes);
      
        //
      
      
        初始化可用的身份验证方案链表
      
      
        18
      
      
        19
      
          INIT_LIST_HEAD (&svc->allprograms);
      
        //
      
      
        初始化存放所有程序的链表
      
      
        20
      
      
        21
      
         ret = nfs_rpcsvc_init_options (svc, options);
      
        //
      
      
        初始化一些选项信息
      
      
        22
      
      
        23
      
         ret = nfs_rpcsvc_auth_init (svc, options);
      
        //
      
      
        初始化权限信息
      
      
        24
      
      
        25
      
         poolsize = RPCSVC_POOLCOUNT_MULT * RPCSVC_DEFAULT_MEMFACTOR;
      
        //
      
      
        计算内存池大小
      
      
        26
      
      
        27
      
          svc->connpool = mem_pool_new (rpcsvc_conn_t, poolsize);
      
        //
      
      
        为连接对象分配内存池空间
      
      
        28
      
      
        29
      
          svc->defaultstage = nfs_rpcsvc_stage_init (svc);
      
        //
      
      
        初始化默认阶段执行服务
      
      
        30
      
      
        31
      
        svc->options = options;
      
        //
      
      
        赋值选项信息
      
      
        32
      
      
        33
      
          svc->ctx = ctx;
      
        //
      
      
        设置属于哪一个glusterfs的上下文
      
      
        34
      
      
        35
      
      
        return
      
      
         svc;


      
      
        36
      
      
        37
      
       }

这个函数是 rpc 服务的全局初始化函数，这是 rpc 服务的开始阶段（ rpc 服务分为多个阶段实现），等待 rpc 程序注册的到来。整个函数其实都是在初始化一个结构体的相关内容，就是 rpcsvc_t 结构体，它的作用就是描述 rpc 服务的状态（包括各个阶段的，对 rpc 各个阶段进行统一管理）。下面看看这个结构体的具体定义：

      
         1
      
      
        /*
      
      
         Contains global state required for all the RPC services. 
      
      
        */
      
      
         2
      
      
         3
      
       typedef 
      
        struct
      
      
         rpc_svc_state {


      
      
         4
      
      
         5
      
      
        /*
      
      
         Contains the list of rpcsvc_stage_t list of (program, version) handlers. other options. 
      
      
        */
      
      
         6
      
      
         7
      
      
        /*
      
      
         At this point, lock is not used to protect anything. Later, it'll be used for protecting stages. 
      
      
        */
      
      
         8
      
      
         9
      
      
                pthread_mutex_t         rpclock;


      
      
        10
      
      
        11
      
      
        /*
      
      
         This is the first stage that is inited, so that any RPC based services that do not need multi-threaded 


      
      
        12
      
      
        13
      
      
          * support can just use the service right away. This is not added to the stages list declared later.


      
      
        14
      
      
        15
      
      
                 * This is also the stage over which all service listeners are run. 
      
      
        */
      
      
        16
      
      
        17
      
               rpcsvc_stage_t          *
      
        defaultstage;


      
      
        18
      
      
        19
      
      
        /*
      
      
         When we have multi-threaded RPC support, we'll use this to link to the multiple Stages.
      
      
        */
      
      
        20
      
      
        21
      
      
        struct
      
       list_head        stages;         
      
        /*
      
      
         All stages 
      
      
        */
      
      
        22
      
      
        23
      
               unsigned 
      
        int
      
      
                    memfactor;


      
      
        24
      
      
        25
      
      
        struct
      
       list_head        authschemes;
      
        /*
      
      
         List of the authentication schemes available. 
      
      
        */
      
      
        26
      
      
        27
      
               dict_t                  *options;
      
        /*
      
      
         Reference to the options 
      
      
        */
      
      
        28
      
      
        29
      
      
        int
      
                           allow_insecure;
      
        /*
      
      
         Allow insecure ports. 
      
      
        */
      
      
        30
      
      
        31
      
               glusterfs_ctx_t         *
      
        ctx;


      
      
        32
      
      
        33
      
      
                gf_boolean_t            register_portmap;


      
      
        34
      
      
        35
      
      
        struct
      
      
         list_head        allprograms;


      
      
        36
      
      
        37
      
      
        struct
      
       mem_pool         *connpool;
      
        /*
      
      
         Mempool for incoming connection objects. 
      
      
        */
      
      
        38
      
      
        39
      
       } rpcsvc_t;

在 nfs_rpcsvc_init 函数中，有一个初始化权限信息的函数 nfs_rpcsvc_auth_init 和一个初始化 rpc 执行阶段信息的函数 nfs_rpcsvc_stage_init 需要重点分析。先分析权限信息的初始化函数 nfs_rpcsvc_auth_init ，如下：

（1）nfs_rpcsvc_auth_init 函数

函数定义和实现如下：

      
         1
      
      
        int
      
       nfs_rpcsvc_auth_init (rpcsvc_t *svc, dict_t *
      
        options)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
      
        int
      
                   ret = -
      
        1
      
      
        ;


      
      
         6
      
      
         7
      
         ret = nfs_rpcsvc_auth_add_initers (svc);
      
        //
      
      
        增加auth-null和auth-unix两个关键字代表的初始化函数
      
      
         8
      
      
         9
      
       ret = nfs_rpcsvc_auth_init_auths (svc, options);
      
        //
      
      
        开启权限使能相关选项并且执行初始化函数
      
      
        10
      
      
        11
      
      
        return
      
      
         ret;


      
      
        12
      
      
        13
      
       }

这个函数主要实现增加权限的初始化函数到权限操作链表中，然后通过执行执行初始化函数得到一个描述相关权限信息的结构体，这个结构体包括一些操作函数指针的结构体地址和一些基本信息（如名称）。执行初始化函数并且得到权限描述信息的实现是在如下代码中（在 nfs_rpcsvc_auth_init_auths 中调用的）：

      
         1
      
      
        int
      
       nfs_rpcsvc_auth_init_auth (rpcsvc_t *svc, dict_t *options, 
      
        struct
      
       rpcsvc_auth_list *
      
        authitem)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
      
        .....


      
      
         6
      
      
         7
      
          authitem->auth = authitem->init (svc, options);
      
        //
      
      
        执行权限的初始化函数
      
      
         8
      
      
         9
      
          authitem->enable = 
      
        1
      
      ;
      
        //
      
      
        权限使能
      
      
        10
      
      
        11
      
      
        ......


      
      
        12
      
      
        13
      
       }

这里执行的初始化函数是在上面初始化的，有两类权限的初始化函数，相关内容定义如下：

1 ） auth-null

      
         1
      
       rpcsvc_auth_ops_t nfs_auth_null_ops = {
      
        //
      
      
        权限操作相关的处理函数
      
      
         2
      
      
         3
      
               .conn_init              =
      
         NULL,


      
      
         4
      
      
         5
      
               .request_init           =
      
         nfs_auth_null_request_init,


      
      
         6
      
      
         7
      
               .authenticate           =
      
         nfs_auth_null_authenticate


      
      
         8
      
      
         9
      
      
        };


      
      
        10
      
      
        11
      
       rpcsvc_auth_t nfs_rpcsvc_auth_null = {
      
        //
      
      
        权限描述的结构体和默认值
      
      
        12
      
      
        13
      
               .authname       = 
      
        "
      
      
        AUTH_NULL
      
      
        "
      
      
        ,


      
      
        14
      
      
        15
      
               .authnum        =
      
         AUTH_NULL,


      
      
        16
      
      
        17
      
               .authops        = &
      
        nfs_auth_null_ops,


      
      
        18
      
      
        19
      
               .authprivate    =
      
         NULL


      
      
        20
      
      
        21
      
      
        };


      
      
        22
      
      
        23
      
       rpcsvc_auth_t * nfs_rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options)
      
        //
      
      
        初始化函数
      
      
        24
      
      
        25
      
      
        {


      
      
        26
      
      
        27
      
      
        return
      
       &nfs_rpcsvc_auth_null;
      
        //
      
      
        返回权限描述信息结构体
      
      
        28
      
      
        29
      
      
        }


      
      
        30
      
      
        31
      
      
        2
      
      ）auth-
      
        unix


      
      
        32
      
      
        33
      
       rpcsvc_auth_ops_t nfs_auth_unix_ops = {
      
        //
      
      
        权限操作相关的处理函数
      
      
        34
      
      
        35
      
               .conn_init              =
      
         NULL,


      
      
        36
      
      
        37
      
               .request_init           =
      
         nfs_auth_unix_request_init,


      
      
        38
      
      
        39
      
               .authenticate           =
      
         nfs_auth_unix_authenticate


      
      
        40
      
      
        41
      
      
        };


      
      
        42
      
      
        43
      
       rpcsvc_auth_t nfs_rpcsvc_auth_unix = {
      
        //
      
      
        权限描述的结构体和默认值
      
      
        44
      
      
        45
      
               .authname       = 
      
        "
      
      
        AUTH_UNIX
      
      
        "
      
      
        ,


      
      
        46
      
      
        47
      
               .authnum        =
      
         AUTH_UNIX,


      
      
        48
      
      
        49
      
               .authops        = &
      
        nfs_auth_unix_ops,


      
      
        50
      
      
        51
      
               .authprivate    =
      
         NULL


      
      
        52
      
      
        53
      
      
        };


      
      
        54
      
      
        55
      
       rpcsvc_auth_t * nfs_rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options)
      
        //
      
      
        初始化函数
      
      
        56
      
      
        57
      
      
        {


      
      
        58
      
      
        59
      
      
        return
      
       &nfs_rpcsvc_auth_unix;
      
        //
      
      
        返回权限描述信息结构体
      
      
        60
      
      
        61
      
       }

（2）nfs_rpcsvc_stage_init 函数

首先还是看看这个函数的定义和实现吧：

      
         1
      
       rpcsvc_stage_t * nfs_rpcsvc_stage_init (rpcsvc_t *
      
        svc)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
        rpcsvc_stage_t          *stg =
      
         NULL;


      
      
         6
      
      
         7
      
      
        int
      
                           ret = -
      
        1
      
      
        ;


      
      
         8
      
      
         9
      
         size_t                  stacksize =
      
         RPCSVC_THREAD_STACK_SIZE;


      
      
        10
      
      
        11
      
      
          pthread_attr_t          stgattr;


      
      
        12
      
      
        13
      
         unsigned 
      
        int
      
                  eventpoolsize = 
      
        0
      
      
        ;


      
      
        14
      
      
        15
      
       stg = GF_CALLOC (
      
        1
      
      , 
      
        sizeof
      
      (*stg), gf_common_mt_rpcsvc_stage_t);
      
        //
      
      
        分配内存资源
      
      
        16
      
      
        17
      
       eventpoolsize = svc->memfactor * RPCSVC_EVENTPOOL_SIZE_MULT;
      
        //
      
      
        计算事件内存池大小
      
      
        18
      
      
        19
      
        stg->eventpool = event_pool_new (eventpoolsize);
      
        //
      
      
        分配内存资源
      
      
        20
      
      
        21
      
          pthread_attr_init (&stgattr);
      
        //
      
      
        初始化线程熟悉值
      
      
        22
      
      
        23
      
        ret = pthread_attr_setstacksize (&stgattr, stacksize);
      
        //
      
      
        设置线程的堆栈大小
      
      
        24
      
      
        25
      
       ret = pthread_create (&stg->tid, &stgattr, nfs_rpcsvc_stage_proc, (
      
        void
      
       *)stg);
      
        //
      
      
        创建线程
      
      
        26
      
      
        27
      
         stg->svc =
      
         svc;


      
      
        28
      
      
        29
      
      
        return
      
      
         stg;


      
      
        30
      
      
        31
      
       }

这个函数主要就是启动一个线程然后开始分发事件，事件分发函数会等待某一个事件的发生，发生以后会执行以前已经注册的函数指针，在这里就是注册的是权限操作相关的函数。具体的事件处理和分发过程就不在详细分析了！

2.nfs_add_all_initiators 函数

这个函数主要是添加各个版本的 nfs 协议的初始化。它三次调用函数 nfs_add_initer 分别来为 mnt3 、 mnt1 和 nfs3 版本的 nfs 协议（各个版本的协议内容见附件）进行初始化。详细看看 nfs_add_initer 函数的代码，如下：

      
         1
      
      
        int
      
       nfs_add_initer (
      
        struct
      
       list_head *
      
        list, nfs_version_initer_t init)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
      
        struct
      
       nfs_initer_list  *
      
        new
      
       =
      
         NULL;


      
      
         6
      
      
         7
      
      
        new
      
       = GF_CALLOC (
      
        1
      
      , 
      
        sizeof
      
       (*
      
        new
      
      ), gf_nfs_mt_nfs_initer_list);
      
        //
      
      
        分配内存
      
      
         8
      
      
         9
      
      
        new
      
      ->init = init;
      
        //
      
      
        赋值初始化函数指针
      
      
        10
      
      
        11
      
         list_add_tail (&
      
        new
      
      ->list, list);
      
        //
      
      
        添加到协议版本链表的末尾
      
      
        12
      
      
        13
      
      
        return
      
      
        0
      
      
        ;


      
      
        14
      
      
        15
      
       }

每个版本的 nfs 协议都有自己的初始化函数，以便处理那些特殊的协议部分，上面的过程就是将各个版本 nfs 协议初始化保存到链表中，在使用协议的时候以便调用相应的初始化函数初始化相关协议内容。

（1）mnt3 版本协议

mnt3 版本的 nfs 协议的实现函数，代码如下：

      
         1
      
       rpcsvc_program_t * mnt3svc_init (xlator_t *
      
        nfsx)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
      
        struct
      
       mount3_state     *mstate =
      
         NULL;


      
      
         6
      
      
         7
      
         mstate =
      
         mnt3_init_state (nfsx);


      
      
         8
      
      
         9
      
       mnt3prog.
      
        private
      
       =
      
         mstate;


      
      
        10
      
      
        11
      
      
        return
      
       &
      
        mnt3prog;


      
      
        12
      
      
        13
      
       }

这个函数代码很简单，只有两个重点内容需要关注，一个结构体和一个函数，结构体就是 mount3_state ，它的定义如下：

      
         1
      
      
        /*
      
      
         Describes a program and its version along with the function pointers


      
      
         2
      
      
         3
      
      
         * required to handle the procedures/actors of each program/version.


      
      
         4
      
      
         5
      
      
         * Never changed ever by any thread so no need for a lock. 
      
      
        */
      
      
         6
      
      
         7
      
      
        struct
      
      
         rpc_svc_program {


      
      
         8
      
      
         9
      
      
        struct
      
      
         list_head        proglist;


      
      
        10
      
      
        11
      
      
        char
      
      
                            progname[RPCSVC_NAME_MAX];


      
      
        12
      
      
        13
      
      
        int
      
      
                             prognum;


      
      
        14
      
      
        15
      
      
        int
      
      
                             progver;


      
      
        16
      
      
        17
      
               uint16_t                progport;       
      
        /*
      
      
         Registered with portmap 
      
      
        */
      
      
        18
      
      
        19
      
      
        int
      
                           progaddrfamily; 
      
        /*
      
      
         AF_INET or AF_INET6 
      
      
        */
      
      
        20
      
      
        21
      
      
        char
      
                          *proghost;      
      
        /*
      
      
         Bind host, can be NULL 
      
      
        */
      
      
        22
      
      
        23
      
               rpcsvc_actor_t          *actors;        
      
        /*
      
      
         All procedure handlers 
      
      
        */
      
      
        24
      
      
        25
      
      
        int
      
                           numactors;      
      
        /*
      
      
         Num actors in actor array 
      
      
        */
      
      
        26
      
      
        27
      
      
        int
      
                           proghighvers;   
      
        /*
      
      
         Highest ver for program supported by the system. 
      
      
        */
      
      
        28
      
      
        29
      
      
        int
      
                           proglowvers;    
      
        /*
      
      
         Lowest ver 
      
      
        */
      
      
        30
      
      
        31
      
      
        /*
      
      
         Program specific state handed to actors 
      
      
        */
      
      
        32
      
      
        33
      
      
        void
      
                          *
      
        private
      
      
        ;


      
      
        34
      
      
        35
      
      
        /*
      
      
         An integer that identifies the min auth strength that is required


      
      
        36
      
      
        37
      
      
                 * by this protocol, for eg. MOUNT3 needs AUTH_UNIX at least.


      
      
        38
      
      
        39
      
      
                 * See RFC 1813, Section 5.2.1. 
      
      
        */
      
      
        40
      
      
        41
      
      
        int
      
      
                             min_auth;


      
      
        42
      
      
        43
      
      
        /*
      
      
         The translator in whose context the actor must execute. This is


      
      
        44
      
      
        45
      
      
                 * needed to setup THIS for memory accounting to work correctly. 
      
      
        */
      
      
        46
      
      
        47
      
               xlator_t                *
      
        actorxl;


      
      
        48
      
      
        49
      
       };

这个结构体的定义中注释已经很清晰，就不具体分析了，在看看程序中使用这个结构体的赋值，如下：

      
         1
      
       rpcsvc_program_t        mnt3prog =
      
         {


      
      
         2
      
      
         3
      
                               .progname       = 
      
        "
      
      
        MOUNT3
      
      
        "
      
      
        ,


      
      
         4
      
      
         5
      
                               .prognum        =
      
         MOUNT_PROGRAM,


      
      
         6
      
      
         7
      
                               .progver        =
      
         MOUNT_V3,


      
      
         8
      
      
         9
      
                               .progport       =
      
         GF_MOUNTV3_PORT,


      
      
        10
      
      
        11
      
                               .progaddrfamily =
      
         AF_INET,


      
      
        12
      
      
        13
      
                               .proghost       =
      
         NULL,


      
      
        14
      
      
        15
      
                               .actors         =
      
         mnt3svc_actors,


      
      
        16
      
      
        17
      
                               .numactors      =
      
         MOUNT3_PROC_COUNT,


      
      
        18
      
      
        19
      
       };

这个是这个结构体的静态赋值方式，还有动态的赋值方式，就是上面提到的一个函数 mnt3_init_state ，也是下面将要分析的，这个函数实现代码如下：

      
         1
      
      
        struct
      
       mount3_state * mnt3_init_state (xlator_t *
      
        nfsx)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
      
        struct
      
       mount3_state     *ms =
      
         NULL;


      
      
         6
      
      
         7
      
      
        int
      
                           ret = -
      
        1
      
      
        ;


      
      
         8
      
      
         9
      
         ms = GF_CALLOC (
      
        1
      
      , 
      
        sizeof
      
       (*ms), gf_nfs_mt_mount3_state);
      
        //
      
      
        分配结构体对象内存
      
      
        10
      
      
        11
      
          ms->iobpool = nfsx->ctx->iobuf_pool;
      
        //
      
      
        IO缓冲池
      
      
        12
      
      
        13
      
         ms->nfsx = nfsx;
      
        //
      
      
        属于哪一个xlator
      
      
        14
      
      
        15
      
         INIT_LIST_HEAD (&ms->exportlist);
      
        //
      
      
        初始化导出列表
      
      
        16
      
      
        17
      
          ret = mnt3_init_options (ms, nfsx->options);
      
        //
      
      
        初始化选项信息
      
      
        18
      
      
        19
      
         INIT_LIST_HEAD (&ms->mountlist);
      
        //
      
      
        初始化挂载列表
      
      
        20
      
      
        21
      
          LOCK_INIT (&ms->mountlock);
      
        //
      
      
        初始化锁
      
      
        22
      
      
        23
      
      
        return
      
      
         ms;


      
      
        24
      
      
        25
      
       }

上面这个函数最主要的工作还是初始化一些相关的参数和选项，其中主要的的内容还是一个结构体和一个函数，结构体就是 mount3_state ，它的定义如下：

      
         1
      
      
        struct
      
      
         mount3_state {


      
      
         2
      
      
         3
      
               xlator_t                *
      
        nfsx;


      
      
         4
      
      
         5
      
      
        /*
      
      
         The buffers for all network IO are got from this pool. 
      
      
        */
      
      
         6
      
      
         7
      
      
        struct
      
       iobuf_pool       *
      
        iobpool;


      
      
         8
      
      
         9
      
      
        /*
      
      
         List of exports, can be volumes or directories in those volumes. 
      
      
        */
      
      
        10
      
      
        11
      
      
        struct
      
      
         list_head        exportlist;


      
      
        12
      
      
        13
      
      
        /*
      
      
         List of current mount points over all the exports from this


      
      
        14
      
      
        15
      
      
                 * server. 
      
      
        */
      
      
        16
      
      
        17
      
      
        struct
      
      
         list_head        mountlist;


      
      
        18
      
      
        19
      
      
        /*
      
      
         Used to protect the mountlist. 
      
      
        */
      
      
        20
      
      
        21
      
      
                gf_lock_t               mountlock;


      
      
        22
      
      
        23
      
      
        /*
      
      
         Set to 0 if exporting full volumes is disabled. On by default. 
      
      
        */
      
      
        24
      
      
        25
      
      
        int
      
      
                             export_volumes;


      
      
        26
      
      
        27
      
      
        int
      
      
                             export_dirs;


      
      
        28
      
      
        29
      
       };

上面这个结构体基本上描述了 mnt3 版本的 nfs 协议的一些状态信息，注释中都有具体的描述了，下面这个函数就是针对这些信息做一些初始化的工作，如下：

      
         1
      
      
        int
      
       mnt3_init_options (
      
        struct
      
       mount3_state *ms, dict_t *
      
        options)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
        xlator_list_t   *volentry =
      
         NULL;


      
      
         6
      
      
         7
      
      
        int
      
                   ret = -
      
        1
      
      
        ;


      
      
         8
      
      
         9
      
       __mnt3_init_volume_export (ms, options);
      
        //
      
      
        根据nfs3.export-volumes配置选项设置导出卷的信息
      
      
        10
      
      
        11
      
          __mnt3_init_dir_export (ms, options);
      
        //
      
      
        根据nfs3.export-dirs配置选项设置导出目录的信息
      
      
        12
      
      
        13
      
         volentry = ms->nfsx->children;
      
        //
      
      
        初始化xlator的链表
      
      
        14
      
      
        15
      
      
        while
      
       (volentry) {
      
        //
      
      
        遍历所有的子xlator
      
      
        16
      
      
        17
      
            gf_log (GF_MNT, GF_LOG_TRACE, 
      
        "
      
      
        Initing options for: %s
      
      
        "
      
      , volentry->xlator->
      
        name);


      
      
        18
      
      
        19
      
              ret = __mnt3_init_volume (ms, options, volentry->xlator);
      
        //
      
      
        初始化xlator的卷信息
      
      
        20
      
      
        21
      
       volentry = volentry->next;
      
        //
      
      
        下一个xlator
      
      
        22
      
      
        23
      
      
          }


      
      
        24
      
      
        25
      
      
        return
      
      
         ret;


      
      
        26
      
      
        27
      
       }

上面的代码主要是初始化所有子 xlator 的卷相关信息，调用函数 __mnt3_init_volume 实现，代码定义如下（把所有错误处理代码、变量定义和参数检查删掉后的代码）：

      
         1
      
      
        int
      
       __mnt3_init_volume (
      
        struct
      
       mount3_state *ms, dict_t *opts, xlator_t *
      
        xlator)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
               uuid_clear (volumeid);
      
        //
      
      
        清除uuid，即设置为0
      
      
         6
      
      
         7
      
      
        if
      
       (gf_nfs_dvm_off (nfs_state (ms->nfsx)))
      
        //
      
      
        关闭动态卷
      
      
         8
      
      
         9
      
      
        goto
      
      
         no_dvm;


      
      
        10
      
      
        11
      
               ret = snprintf (searchstr, 
      
        1024
      
      , 
      
        "
      
      
        nfs3.%s.volume-id
      
      
        "
      
      , xlator->name);
      
        //
      
      
        格式化选项key的字符串
      
      
        12
      
      
        13
      
      
        if
      
       (dict_get (opts, searchstr)) {
      
        //
      
      
        根据选项key得到选项的值
      
      
        14
      
      
        15
      
                       ret = dict_get_str (opts, searchstr, &optstr);
      
        //
      
      
        得到字符串形式的值
      
      
        16
      
      
        17
      
      
                } 


      
      
        18
      
      
        19
      
      
        if
      
       (optstr) {
      
        //
      
      
        如果不为null
      
      
        20
      
      
        21
      
                       ret = uuid_parse (optstr, volumeid);
      
        //
      
      
        根据得到的值解析uuid
      
      
        22
      
      
        23
      
      
                }


      
      
        24
      
      
        25
      
      
        no_dvm:


      
      
        26
      
      
        27
      
               ret = snprintf (searchstr, 
      
        1024
      
      , 
      
        "
      
      
        nfs3.%s.export-dir
      
      
        "
      
      , xlator->name);
      
        //
      
      
        export-dir选项key
      
      
        28
      
      
        29
      
      
        if
      
       (dict_get (opts, searchstr)) {
      
        //
      
      
        同上
      
      
        30
      
      
        31
      
                       ret = dict_get_str (opts, searchstr, &
      
        optstr);


      
      
        32
      
      
        33
      
                       ret = __mnt3_init_volume_direxports (ms, xlator, optstr, volumeid);
      
        //
      
      
        初始化卷导出目录
      
      
        34
      
      
        35
      
      
                }


      
      
        36
      
      
        37
      
      
        if
      
       (ms->export_volumes) {
      
        //
      
      
        如果导出卷使能
      
      
        38
      
      
        39
      
                       newexp = mnt3_init_export_ent (ms, xlator, NULL, volumeid);
      
        //
      
      
        初始化导出环境
      
      
        40
      
      
        41
      
                       list_add_tail (&newexp->explist, &ms->exportlist);
      
        //
      
      
        添加导出列表到导出列表的末尾
      
      
        42
      
      
        43
      
      
                }


      
      
        44
      
      
        45
      
               ret = 
      
        0
      
      
        ;


      
      
        46
      
      
        47
      
      
        err:


      
      
        48
      
      
        49
      
      
        return
      
      
         ret;


      
      
        50
      
      
        51
      
       }

由上面代码可知：主要在初始化导出目录和导出环境，具体的实现都是调用相应的函数实现。

总结：这个初始化过程主要是在分配一些资源和建立一些关系，真正处理客户端请求的功能是在很多注册的或关联的函数中，客户端的某一个请求可能就需要调用一个专门的函数来处理。

（2）mnt1 版本协议

这个版本的协议实现基本上和 mnt3 的实现一样，很多函数基本都就是调用 mnt3 的，不同的就是具体描述相关谢谢的结构体内容不同吧，例如有关信息的客户端请求是执行的处理函数等等。所以不再分析此版本协议初始化。

（3）nfs3 版本协议

此版本的 nfs 协议初始化流程和前面分析的 mnt3 版本协议基本相同，下面只分析不同的部分，具体流程就不在那么分析了，主要介绍一些重点信息。第一需要介绍的就是 nfs3_state 结构体，定义如下：

      
         1
      
      
        struct
      
      
         nfs3_state {


      
      
         2
      
      
         3
      
      
        /*
      
      
         The NFS xlator pointer. The NFS xlator can be running


      
      
         4
      
      
         5
      
      
                 * multiple versions of the NFS protocol.
      
      
        */
      
      
         6
      
      
         7
      
               xlator_t                *
      
        nfsx;


      
      
         8
      
      
         9
      
      
        /*
      
      
         The iob pool from which memory allocations are made for receiving


      
      
        10
      
      
        11
      
      
                 * and sending network messages. 
      
      
        */
      
      
        12
      
      
        13
      
      
        struct
      
       iobuf_pool       *
      
        iobpool;


      
      
        14
      
      
        15
      
      
        /*
      
      
         List of child subvolumes for the NFSv3 protocol.


      
      
        16
      
      
        17
      
      
                 * Right now, is simply referring to the list of children in nfsx above. 
      
      
        */
      
      
        18
      
      
        19
      
               xlator_list_t           *
      
        exportslist;


      
      
        20
      
      
        21
      
      
        struct
      
      
         list_head        exports;


      
      
        22
      
      
        23
      
      
        /*
      
      
         Mempool for allocations of struct nfs3_local 
      
      
        */
      
      
        24
      
      
        25
      
      
        struct
      
       mem_pool         *
      
        localpool;


      
      
        26
      
      
        27
      
      
        /*
      
      
         Server start-up timestamp, currently used for write verifier. 
      
      
        */
      
      
        28
      
      
        29
      
      
                uint64_t                serverstart;


      
      
        30
      
      
        31
      
      
        /*
      
      
         NFSv3 Protocol configurables 
      
      
        */
      
      
        32
      
      
        33
      
      
                size_t                  readsize;


      
      
        34
      
      
        35
      
      
                size_t                  writesize;


      
      
        36
      
      
        37
      
      
                size_t                  readdirsize;


      
      
        38
      
      
        39
      
      
        /*
      
      
         Size of the iobufs used, depends on the sizes of the three params above. 
      
      
        */
      
      
        40
      
      
        41
      
      
                size_t                  iobsize;


      
      
        42
      
      
        43
      
               unsigned 
      
        int
      
      
                    memfactor;


      
      
        44
      
      
        45
      
      
        struct
      
      
         list_head        fdlru;


      
      
        46
      
      
        47
      
      
                gf_lock_t               fdlrulock;


      
      
        48
      
      
        49
      
      
        int
      
      
                             fdcount;


      
      
        50
      
      
        51
      
       };

上面的结构体主要是记录一些 nfs3 协议运行过程中的状态信息，每一项的意义代码中有详细注释，理解这些信息对后面其他代码的理解是有非常大的好处的。在看看下面这个结构体的初始化默认值：

      
         1
      
       rpcsvc_program_t        nfs3prog =
      
         {


      
      
         2
      
      
         3
      
                               .progname       = 
      
        "
      
      
        NFS3
      
      
        "
      
      
        ,


      
      
         4
      
      
         5
      
                               .prognum        =
      
         NFS_PROGRAM,


      
      
         6
      
      
         7
      
                               .progver        =
      
         NFS_V3,


      
      
         8
      
      
         9
      
                               .progport       =
      
         GF_NFS3_PORT,


      
      
        10
      
      
        11
      
                               .progaddrfamily =
      
         AF_INET,


      
      
        12
      
      
        13
      
                               .proghost       =
      
         NULL,


      
      
        14
      
      
        15
      
                               .actors         =
      
         nfs3svc_actors,


      
      
        16
      
      
        17
      
                               .numactors      =
      
         NFS3_PROC_COUNT,


      
      
        18
      
      
        19
      
      
        /*
      
      
         Requests like FSINFO are sent before an auth scheme


      
      
        20
      
      
        21
      
      
                                 * is inited by client. See RFC 2623, Section 2.3.2. 
      
      
        */
      
      
        22
      
      
        23
      
                               .min_auth       =
      
         AUTH_NULL,


      
      
        24
      
      
        25
      
       };

在看看里面的 nfs3svc_actors 这个结构体的值，如下：

      
         1
      
       rpcsvc_actor_t          nfs3svc_actors[NFS3_PROC_COUNT] =
      
         {


      
      
         2
      
      
         3
      
               {
      
        "
      
      
        NULL
      
      
        "
      
      
        ,        NFS3_NULL,      nfs3svc_null,   NULL,   NULL},


      
      
         4
      
      
         5
      
               {
      
        "
      
      
        GETATTR
      
      
        "
      
      
        ,     NFS3_GETATTR,   nfs3svc_getattr,NULL,   NULL},


      
      
         6
      
      
         7
      
               {
      
        "
      
      
        SETATTR
      
      
        "
      
      
        ,     NFS3_SETATTR,   nfs3svc_setattr,NULL,   NULL},


      
      
         8
      
      
         9
      
               {
      
        "
      
      
        LOOKUP
      
      
        "
      
      
        ,      NFS3_LOOKUP,    nfs3svc_lookup, NULL,   NULL},


      
      
        10
      
      
        11
      
               {
      
        "
      
      
        ACCESS
      
      
        "
      
      
        ,      NFS3_ACCESS,    nfs3svc_access, NULL,   NULL},


      
      
        12
      
      
        13
      
               {
      
        "
      
      
        READLINK
      
      
        "
      
      
        ,    NFS3_READLINK,  nfs3svc_readlink,NULL,  NULL},


      
      
        14
      
      
        15
      
               {
      
        "
      
      
        READ
      
      
        "
      
      
        ,        NFS3_READ,      nfs3svc_read,   NULL,   NULL},


      
      
        16
      
      
        17
      
               {
      
        "
      
      
        WRITE
      
      
        "
      
      
        , NFS3_WRITE, nfs3svc_write, nfs3svc_write_vec, nfs3svc_write_vecsizer},


      
      
        18
      
      
        19
      
               {
      
        "
      
      
        CREATE
      
      
        "
      
      
        ,      NFS3_CREATE,    nfs3svc_create, NULL,   NULL},


      
      
        20
      
      
        21
      
               {
      
        "
      
      
        MKDIR
      
      
        "
      
      
        ,       NFS3_MKDIR,     nfs3svc_mkdir,  NULL,   NULL},


      
      
        22
      
      
        23
      
               {
      
        "
      
      
        SYMLINK
      
      
        "
      
      
        ,     NFS3_SYMLINK,   nfs3svc_symlink,NULL,   NULL},


      
      
        24
      
      
        25
      
               {
      
        "
      
      
        MKNOD
      
      
        "
      
      
        ,       NFS3_MKNOD,     nfs3svc_mknod,  NULL,   NULL},


      
      
        26
      
      
        27
      
               {
      
        "
      
      
        REMOVE
      
      
        "
      
      
        ,      NFS3_REMOVE,    nfs3svc_remove, NULL,   NULL},


      
      
        28
      
      
        29
      
               {
      
        "
      
      
        RMDIR
      
      
        "
      
      
        ,       NFS3_RMDIR,     nfs3svc_rmdir,  NULL,   NULL},


      
      
        30
      
      
        31
      
               {
      
        "
      
      
        RENAME
      
      
        "
      
      
        ,      NFS3_RENAME,    nfs3svc_rename, NULL,   NULL},


      
      
        32
      
      
        33
      
               {
      
        "
      
      
        LINK
      
      
        "
      
      
        ,        NFS3_LINK,      nfs3svc_link,   NULL,   NULL},


      
      
        34
      
      
        35
      
               {
      
        "
      
      
        READDIR
      
      
        "
      
      
        ,     NFS3_READDIR,   nfs3svc_readdir,NULL,   NULL},


      
      
        36
      
      
        37
      
               {
      
        "
      
      
        READDIRPLUS
      
      
        "
      
      
        , NFS3_READDIRP,  nfs3svc_readdirp,NULL,  NULL},


      
      
        38
      
      
        39
      
               {
      
        "
      
      
        FSSTAT
      
      
        "
      
      
        ,      NFS3_FSSTAT,    nfs3svc_fsstat, NULL,   NULL},


      
      
        40
      
      
        41
      
               {
      
        "
      
      
        FSINFO
      
      
        "
      
      
        ,      NFS3_FSINFO,    nfs3svc_fsinfo, NULL,   NULL},


      
      
        42
      
      
        43
      
               {
      
        "
      
      
        PATHCONF
      
      
        "
      
      
        ,    NFS3_PATHCONF,  nfs3svc_pathconf,NULL,  NULL},


      
      
        44
      
      
        45
      
               {
      
        "
      
      
        COMMIT
      
      
        "
      
      
        ,      NFS3_COMMIT,    nfs3svc_commit, NULL,   NULL}


      
      
        46
      
      
        47
      
       };

由上面两个结构体的值可以看出，一个具体版本的 nfs 协议都有一个对应的结构体描述其基本信息，还有一个结构体存储了消息与函数的对应关系，当接受到什么消息就执行对应的函数，明白了这一点，其实对于各个版本的协议分析都大同小异了，关键就是在各个函数具体的实现了。而开头就介绍的那个结构体存放的都是一些各个版本不同的信息部分，所以会在 rpc_svc_program 结构体的 private 保存（ void * 类型可以保存任何数据类型，也表示是各个版本的 nfs 协议的私有部分数据）。

3.nfs_init_subvolumes 函数

这个函数完成初始化所有子卷的任务，它首先计算需要分配给 inode 表使用的缓存大小，然后遍历存放子卷的链表，然后依次调用 nfs_init_subvolume 函数分别初始化每一个子卷，这个函数定义和实现如下：

      
         1
      
      
        int
      
       nfs_init_subvolume (
      
        struct
      
       nfs_state *nfs, xlator_t *
      
        xl)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
       unsigned 
      
        int
      
          lrusize = 
      
        0
      
      
        ;


      
      
         6
      
      
         7
      
      
        int
      
                   ret = -
      
        1
      
      
        ;


      
      
         8
      
      
         9
      
       lrusize = nfs->memfactor * GF_NFS_INODE_LRU_MULT;
      
        //
      
      
        计算在lru链表中inodes的数量
      
      
        10
      
      
        11
      
          xl->itable = inode_table_new (lrusize, xl);
      
        //
      
      
        新建一个inode的表
      
      
        12
      
      
        13
      
        ret = 
      
        0
      
      
        ;


      
      
        14
      
      
        15
      
      
        err:


      
      
        16
      
      
        17
      
      
        return
      
      
         ret;


      
      
        18
      
      
        19
      
       }

这里最重要的是 inode_table_t 结构体和 inode_table_new 函数， inode_table_t 定义如下：

      
         1
      
      
        struct
      
      
         _inode_table {


      
      
         2
      
      
         3
      
               pthread_mutex_t    
      
        lock
      
      
        ;


      
      
         4
      
      
         5
      
               size_t             hashsize;    
      
        /*
      
      
         bucket size of inode hash and dentry hash 
      
      
        */
      
      
         6
      
      
         7
      
      
        char
      
                    *name;        
      
        /*
      
      
         name of the inode table, just for gf_log() 
      
      
        */
      
      
         8
      
      
         9
      
               inode_t           *root;        
      
        /*
      
      
         root directory inode, with number 1 
      
      
        */
      
      
        10
      
      
        11
      
               xlator_t          *xl;          
      
        /*
      
      
         xlator to be called to do purge 
      
      
        */
      
      
        12
      
      
        13
      
               uint32_t           lru_limit;   
      
        /*
      
      
         maximum LRU cache size 
      
      
        */
      
      
        14
      
      
        15
      
      
        struct
      
       list_head  *inode_hash;  
      
        /*
      
      
         buckets for inode hash table 
      
      
        */
      
      
        16
      
      
        17
      
      
        struct
      
       list_head  *name_hash;   
      
        /*
      
      
         buckets for dentry hash table 
      
      
        */
      
      
        18
      
      
        19
      
      
        struct
      
       list_head   active;      
      
        /*
      
      
         list of inodes currently active (in an fop) 
      
      
        */
      
      
        20
      
      
        21
      
               uint32_t           active_size; 
      
        /*
      
      
         count of inodes in active list 
      
      
        */
      
      
        22
      
      
        23
      
      
        struct
      
       list_head   lru;         
      
        /*
      
      
         list of inodes recently used.


      
      
        24
      
      
        25
      
      
                                                   lru.next most recent 
      
      
        */
      
      
        26
      
      
        27
      
               uint32_t           lru_size;    
      
        /*
      
      
         count of inodes in lru list  
      
      
        */
      
      
        28
      
      
        29
      
      
        struct
      
       list_head   purge;       
      
        /*
      
      
         list of inodes to be purged soon 
      
      
        */
      
      
        30
      
      
        31
      
               uint32_t           purge_size;  
      
        /*
      
      
         count of inodes in purge list 
      
      
        */
      
      
        32
      
      
        33
      
      
        struct
      
       mem_pool   *inode_pool;  
      
        /*
      
      
         memory pool for inodes 
      
      
        */
      
      
        34
      
      
        35
      
      
        struct
      
       mem_pool   *dentry_pool; 
      
        /*
      
      
         memory pool for dentrys 
      
      
        */
      
      
        36
      
      
        37
      
      
        struct
      
       mem_pool   *fd_mem_pool; 
      
        /*
      
      
         memory pool for fd_t 
      
      
        */
      
      
        38
      
      
        39
      
       };

结构体中每一项都有详细的注释了，就不多解析了，下面继续分析 inode_table_new 函数，由于这个函数代码还是有点点多，所以还是采取分步骤来解析，如下：

第一步：定义一个 inode_table_t 结构体并且分配内存：

      
        1
      
               inode_table_t *
      
        new
      
       =
      
         NULL;


      
      
        2
      
      
        3
      
      
        new
      
       = (
      
        void
      
       *)GF_CALLOC(
      
        1
      
      , 
      
        sizeof
      
       (*
      
        new
      
      ), gf_common_mt_inode_table_t);

第二步：初始化各个参数；

第三步：初始化各个链表，如下：

      
        1
      
               INIT_LIST_HEAD (&
      
        new
      
      ->active);
      
        //
      
      
        初始化激活链表
      
      
        2
      
      
        3
      
               INIT_LIST_HEAD (&
      
        new
      
      ->lru);
      
        //
      
      
        最近使用链表
      
      
        4
      
      
        5
      
               INIT_LIST_HEAD (&
      
        new
      
      ->purge);
      
        //
      
      
        清楚了的链表

第四步：为 inode 表设置 root 的 inode 节点信息：

      
        1
      
       __inode_table_init_root (
      
        new
      
      );

第五步：为 inode 表初始化锁。

上面的第四步是操作 inode 节点相关的信息，在 ext2/3 文件系统中也有 inode 节点，所以具体看看 inode 节点信息的管理和操作，就从初始化一个 inode 表的根节点开始，定义如下：

      
         1
      
      
        static
      
      
        void
      
       __inode_table_init_root (inode_table_t *
      
        table)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
       inode_t     *root = NULL;
      
        //
      
      
        定义inode表的根节点
      
      
         6
      
      
         7
      
      
        struct
      
       iatt  iatt = {
      
        0
      
      , };
      
        //
      
      
        inode节点的属性信息
      
      
         8
      
      
         9
      
       root = __inode_create (table);
      
        //
      
      
        创建一个inode表的根节点
      
      
        10
      
      
        11
      
       iatt.ia_gfid[
      
        15
      
      ] = 
      
        1
      
      ;
      
        //
      
      
        inode节点的属性赋值
      
      
        12
      
      
        13
      
          iatt.ia_ino = 
      
        1
      
      
        ;


      
      
        14
      
      
        15
      
         iatt.ia_type =
      
         IA_IFDIR;


      
      
        16
      
      
        17
      
         table->root = root;
      
        //
      
      
        赋值inode表的根节点
      
      
        18
      
      
        19
      
          __inode_link (root, NULL, NULL, &iatt);
      
        //
      
      
        inode节点和属性连接起来
      
      
        20
      
      
        21
      
       }

整个 inode 表的初始化完成根节点创建和属性的赋值，下面主要看看两个结构体定义和两个函数的实现，先看看 inode 和 iatt 两个结构体的定义，他们的定义中包含很多重要的信息，他们的定义如下：

      
         1
      
      
        struct
      
      
         _inode {


      
      
         2
      
      
         3
      
               inode_table_t       *table;         
      
        /*
      
      
         the table this inode belongs to 
      
      
        */
      
      
         4
      
      
         5
      
      
                uuid_t               gfid;


      
      
         6
      
      
         7
      
               gf_lock_t            
      
        lock
      
      
        ;


      
      
         8
      
      
         9
      
      
                uint64_t             nlookup;


      
      
        10
      
      
        11
      
               uint32_t             
      
        ref
      
      ;           
      
        /*
      
      
         reference count on this inode 
      
      
        */
      
      
        12
      
      
        13
      
               ino_t                ino;           
      
        /*
      
      
         inode number in the storage (persistent) 
      
      
        */
      
      
        14
      
      
        15
      
               ia_type_t            ia_type;       
      
        /*
      
      
         what kind of file 
      
      
        */
      
      
        16
      
      
        17
      
      
        struct
      
       list_head     fd_list;       
      
        /*
      
      
         list of open files on this inode 
      
      
        */
      
      
        18
      
      
        19
      
      
        struct
      
       list_head     dentry_list;   
      
        /*
      
      
         list of directory entries for this inode 
      
      
        */
      
      
        20
      
      
        21
      
      
        struct
      
       list_head     hash;          
      
        /*
      
      
         hash table pointers 
      
      
        */
      
      
        22
      
      
        23
      
      
        struct
      
       list_head     list;          
      
        /*
      
      
         active/lru/purge 
      
      
        */
      
      
        24
      
      
        25
      
      
        struct
      
       _inode_ctx   *_ctx;    
      
        /*
      
      
         replacement for dict_t *(inode->ctx) 
      
      
        */
      
      
        26
      
      
        27
      
      
        };


      
      
        28
      
      
        29
      
      
        struct
      
      
         iatt {


      
      
        30
      
      
        31
      
               uint64_t     ia_ino;        
      
        /*
      
      
         inode number 
      
      
        */
      
      
        32
      
      
        33
      
      
                uuid_t       ia_gfid;


      
      
        34
      
      
        35
      
               uint64_t     ia_dev;        
      
        /*
      
      
         backing device ID 
      
      
        */
      
      
        36
      
      
        37
      
               ia_type_t    ia_type;       
      
        /*
      
      
         type of file 
      
      
        */
      
      
        38
      
      
        39
      
               ia_prot_t    ia_prot;       
      
        /*
      
      
         protection 
      
      
        */
      
      
        40
      
      
        41
      
               uint32_t     ia_nlink;      
      
        /*
      
      
         Link count 
      
      
        */
      
      
        42
      
      
        43
      
               uint32_t     ia_uid;        
      
        /*
      
      
         user ID of owner 
      
      
        */
      
      
        44
      
      
        45
      
               uint32_t     ia_gid;        
      
        /*
      
      
         group ID of owner 
      
      
        */
      
      
        46
      
      
        47
      
               uint64_t     ia_rdev;       
      
        /*
      
      
         device ID (if special file) 
      
      
        */
      
      
        48
      
      
        49
      
               uint64_t     ia_size;       
      
        /*
      
      
         file size in bytes 
      
      
        */
      
      
        50
      
      
        51
      
               uint32_t     ia_blksize;    
      
        /*
      
      
         blocksize for filesystem I/O 
      
      
        */
      
      
        52
      
      
        53
      
               uint64_t     ia_blocks;     
      
        /*
      
      
         number of 512B blocks allocated 
      
      
        */
      
      
        54
      
      
        55
      
               uint32_t     ia_atime;      
      
        /*
      
      
         last access time 
      
      
        */
      
      
        56
      
      
        57
      
      
                uint32_t     ia_atime_nsec;


      
      
        58
      
      
        59
      
               uint32_t     ia_mtime;      
      
        /*
      
      
         last modification time 
      
      
        */
      
      
        60
      
      
        61
      
      
                uint32_t     ia_mtime_nsec;


      
      
        62
      
      
        63
      
               uint32_t     ia_ctime;      
      
        /*
      
      
         last status change time 
      
      
        */
      
      
        64
      
      
        65
      
      
                uint32_t     ia_ctime_nsec;


      
      
        66
      
      
        67
      
       };

上面的定义代码中都有很详细的注释了，下面继续看 inode 节点的创建函数，定义如下：

      
         1
      
      
        static
      
       inode_t * __inode_create (inode_table_t *
      
        table)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
       inode_t  *newi =
      
         NULL;


      
      
         6
      
      
         7
      
          newi = mem_get0 (table->inode_pool);
      
        //
      
      
        从inode表中的inode内存池中得到一个inode内存
      
      
         8
      
      
         9
      
       newi->table = table;
      
        //
      
      
        想创建的inode属于哪一个inode表
      
      
        10
      
      
        11
      
       LOCK_INIT (&newi->
      
        lock
      
      );
      
        //
      
      
        操作inode节点以前初始化锁
      
      
        12
      
      
        13
      
       INIT_LIST_HEAD (&newi->fd_list);
      
        //
      
      
        初始化各个链表
      
      
        14
      
      
        15
      
          INIT_LIST_HEAD (&newi->
      
        list);


      
      
        16
      
      
        17
      
          INIT_LIST_HEAD (&newi->
      
        hash);


      
      
        18
      
      
        19
      
          INIT_LIST_HEAD (&newi->
      
        dentry_list);


      
      
        20
      
      
        21
      
       newi->_ctx = GF_CALLOC (
      
        1
      
      , (
      
        sizeof
      
       (
      
        struct
      
       _inode_ctx) *table->xl->graph->
      
        xl_count),


      
      
        22
      
      
        23
      
                                       gf_common_mt_inode_ctx);
      
        //
      
      
        为多键值对结构体分配内存
      
      
        24
      
      
        25
      
      
        if
      
       (newi->_ctx ==
      
         NULL) {


      
      
        26
      
      
        27
      
           LOCK_DESTROY (&newi->
      
        lock
      
      );
      
        //
      
      
        释放锁
      
      
        28
      
      
        29
      
               mem_put (table->inode_pool, newi);
      
        //
      
      
        把inode节点放回内存池
      
      
        30
      
      
        31
      
            newi =
      
         NULL;


      
      
        32
      
      
        33
      
      
        goto
      
      
        out
      
      
        ;


      
      
        34
      
      
        35
      
      
          }


      
      
        36
      
      
        37
      
          list_add (&newi->list, &table->lru);
      
        //
      
      
        增加链表到最近使用链表
      
      
        38
      
      
        39
      
          table->lru_size++;
      
        //
      
      
        最近使用链表的数量加1
      
      
        40
      
      
        41
      
      
        out
      
      
        :


      
      
        42
      
      
        43
      
      
        return
      
      
         newi;


      
      
        44
      
      
        45
      
       }

这里面最难懂也最重要的是 mem_get0 函数，它的重要就是从 inode 节点的内存池中获取一个 inode 节点对象所需要的内存空间，具体的内存池的管理和分配使用到了 slab 分配器相关的知识。 Slab 分配器的思想就是把以前已经分配过的对象内存缓存起来，下一次同类的对象来分配对象就直接从缓存中取得，这样省去分配和初始化的时间（因为是同样的内存对象）。除了 mem_get0 函数其余代码做一些初始化的相关工作，后面有一个分配多键值对的内存结构体需要分配，如果失败就是归还内存池和释放锁占用的资源。这里可以在学习一点知识就是多键值对的结果，定义如下：

      
         1
      
      
        struct
      
      
         _inode_ctx {


      
      
         2
      
      
         3
      
      
                union {


      
      
         4
      
      
         5
      
                       uint64_t    key; xlator_t   *
      
        xl_key;


      
      
         6
      
      
         7
      
      
                };


      
      
         8
      
      
         9
      
      
                union {


      
      
        10
      
      
        11
      
                       uint64_t    value1; 
      
        void
      
             *
      
        ptr1;


      
      
        12
      
      
        13
      
      
                };


      
      
        14
      
      
        15
      
      
                union {


      
      
        16
      
      
        17
      
                       uint64_t    value2; 
      
        void
      
             *
      
        ptr2;


      
      
        18
      
      
        19
      
      
                };


      
      
        20
      
      
        21
      
       };

这个结构体的作用是可以有两种类型的键，也可以有两种类型的值，其中一种可以是任意数据结构，而且这是一种一个键对应两个值的结构，特殊情况特殊的处理，从这里可以学习到，如果以后有一个键关联三个值的时候也可以采取这种方式。虽然这个结构体在这里具体是什么作用还不是很明朗，但是可以肯定的是用处大大的，后面可能会用到。

继续看 __inode_link 函数的定义和实现，代码如下：

      
         1
      
      
        static
      
       inode_t * __inode_link (inode_t *inode, inode_t *parent, 
      
        const
      
      
        char
      
       *name, 
      
        struct
      
       iatt *
      
        iatt)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
       dentry_t      *dentry = NULL;
      
        //
      
      
        目录项和inode相关的变量定义
      
      
         6
      
      
         7
      
          dentry_t      *old_dentry =
      
         NULL;


      
      
         8
      
      
         9
      
         inode_t       *old_inode =
      
         NULL;


      
      
        10
      
      
        11
      
         inode_table_t *table =
      
         NULL;


      
      
        12
      
      
        13
      
          inode_t       *link_inode =
      
         NULL;


      
      
        14
      
      
        15
      
          table = inode->
      
        table;


      
      
        16
      
      
        17
      
      
        if
      
      
         (parent) {


      
      
        18
      
      
        19
      
      
        if
      
       (inode->table != parent->table) {
      
        //
      
      
        防止不同的inode表连接起来（成为父子关系）
      
      
        20
      
      
        21
      
              GF_ASSERT (!
      
        "
      
      
        link attempted b/w inodes of diff table
      
      
        "
      
      
        );


      
      
        22
      
      
        23
      
      
            }


      
      
        24
      
      
        25
      
      
        }


      
      
        26
      
      
        27
      
       link_inode =
      
         inode;


      
      
        28
      
      
        29
      
      
        if
      
       (!__is_inode_hashed (inode)) {
      
        //
      
      
        此inode是否有hash链表
      
      
        30
      
      
        31
      
      
        if
      
       (!iatt)
      
        //
      
      
        属性值不能为null
      
      
        32
      
      
        33
      
      
        return
      
      
         NULL;


      
      
        34
      
      
        35
      
      
        if
      
       (uuid_is_null (iatt->ia_gfid))
      
        //
      
      
        uuid不能为null
      
      
        36
      
      
        37
      
      
        return
      
      
         NULL;


      
      
        38
      
      
        39
      
             uuid_copy (inode->gfid, iatt->ia_gfid);
      
        //
      
      
        复制uuid到inode节点
      
      
        40
      
      
        41
      
              inode->ino        = iatt->ia_ino;
      
        //
      
      
        赋值inode节点数量
      
      
        42
      
      
        43
      
              inode->ia_type    = iatt->ia_type;
      
        //
      
      
        inode节点的类型
      
      
        44
      
      
        45
      
             old_inode = __inode_find (table, inode->gfid);
      
        //
      
      
        在inode表里面查找是否存在此inode节点
      
      
        46
      
      
        47
      
      
        if
      
      
         (old_inode) {


      
      
        48
      
      
        49
      
                link_inode = old_inode;
      
        //
      
      
        存在
      
      
        50
      
      
        51
      
              } 
      
        else
      
      
         {


      
      
        52
      
      
        53
      
                 __inode_hash (inode);
      
        //
      
      
        不存在进行hash并进入hash链表
      
      
        54
      
      
        55
      
      
            }


      
      
        56
      
      
        57
      
      
        }


      
      
        58
      
      
        59
      
      
        if
      
       (parent) {
      
        //
      
      
        父节点不为null
      
      
        60
      
      
        61
      
           old_dentry = __dentry_grep (table, parent, name);
      
        //
      
      
        搜索目录项
      
      
        62
      
      
        63
      
      
        if
      
       (!old_dentry || old_dentry->inode != link_inode) {
      
        //
      
      
        没有找到目录项或目录项不等于当前目录项
      
      
        64
      
      
        65
      
               dentry = __dentry_create (link_inode, parent, name);
      
        //
      
      
        创建一个目录项
      
      
        66
      
      
        67
      
      
        if
      
       (old_inode && __is_dentry_cyclic (dentry)) {
      
        //
      
      
        如果inode已经存在并且目录项是循环的
      
      
        68
      
      
        69
      
                   __dentry_unset (dentry);
      
        //
      
      
        取消设置目录项
      
      
        70
      
      
        71
      
      
        return
      
      
         NULL;


      
      
        72
      
      
        73
      
      
                  }


      
      
        74
      
      
        75
      
           __dentry_hash (dentry);
      
        //
      
      
        hash此目录项
      
      
        76
      
      
        77
      
      
        if
      
      
         (old_dentry)


      
      
        78
      
      
        79
      
             __dentry_unset (old_dentry);
      
        //
      
      
        取消设置老的目录项
      
      
        80
      
      
        81
      
      
          }


      
      
        82
      
      
        83
      
      
         }


      
      
        84
      
      
        85
      
      
        return
      
      
         link_inode;


      
      
        86
      
      
        87
      
       }

这个函数比较复杂，主要涉及到一个目录项的操作，目录项本身有 inode 节点，也有父节点，还包括很多属于此目录项的 inode 节点，这里使用的链表进行管理的，还有可能维护一个 hash 链表。对于目录项的各种具体操作就不在详细分析了。毕竟这次的主要任务是分析 nfs 协议的实现，所以 init 函数分析到此结束。

4.nfs_init_versions 函数

前面主要完成了 nfs 协议相关信息的静态内容初始化，这个函数会根据前面的初始化信息执行各个 nfs 协议版本的初始化函数 init ，然后会注册监听事件来监听客户端的请求。这个函数的实现如下：

      
         1
      
      
        int
      
       nfs_init_versions (
      
        struct
      
       nfs_state *nfs, xlator_t *
      
        this
      
      
        )


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
      
        struct
      
       nfs_initer_list          *version = NULL;
      
        //
      
      
        nfs各个版本协议初始化函数列表
      
      
         6
      
      
         7
      
      
        struct
      
       nfs_initer_list          *tmp =
      
         NULL;


      
      
         8
      
      
         9
      
          rpcsvc_program_t                *prog = NULL;
      
        //
      
      
        定义个描述rpc服务程序的结构体
      
      
        10
      
      
        11
      
      
        int
      
                                   ret = -
      
        1
      
      
        ;


      
      
        12
      
      
        13
      
      
        struct
      
       list_head                *versions =
      
         NULL;


      
      
        14
      
      
        15
      
         versions = &nfs->versions;
      
        //
      
      
        需要遍历的协议链表
      
      
        16
      
      
        17
      
          list_for_each_entry_safe (version, tmp, versions, list) {
      
        //
      
      
        变量所有的nfs协议版本
      
      
        18
      
      
        19
      
           prog = version->init (
      
        this
      
      );
      
        //
      
      
        调用协议版本的初始化函数（前面已经分析了具体的初始化过程）
      
      
        20
      
      
        21
      
             prog->actorxl = 
      
        this
      
      ;
      
        //
      
      
        执行属于哪一个xlator
      
      
        22
      
      
        23
      
              version->program = prog;
      
        //
      
      
        保存初始化函数返回描述协议的rpc服务程序结构体
      
      
        24
      
      
        25
      
      
        if
      
       (nfs->override_portnum)
      
        //
      
      
        是否覆盖端口
      
      
        26
      
      
        27
      
               prog->progport = nfs->override_portnum;
      
        //
      
      
        覆盖端口
      
      
        28
      
      
        29
      
             ret = nfs_rpcsvc_program_register (nfs->rpcsvc, *prog);
      
        //
      
      
        注册rpc服务监听端口
      
      
        30
      
      
        31
      
      
          }


      
      
        32
      
      
        33
      
      
        return
      
      
         ret;


      
      
        34
      
      
        35
      
       }

这个函数的作用主要在初始化由 rpc 服务相关的内容，某个 nfs 版本的协议初始化在前面已经分析了，所以这个函数中重点需要分析的内容就是注册 rpc 服务的函数了，先看看实现，如下：

      
         1
      
      
        int
      
       nfs_rpcsvc_program_register (rpcsvc_t *
      
        svc, rpcsvc_program_t program)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
       rpcsvc_program_t        *newprog =
      
         NULL;


      
      
         6
      
      
         7
      
         rpcsvc_stage_t          *selectedstage =
      
         NULL;


      
      
         8
      
      
         9
      
      
        int
      
                           ret = -
      
        1
      
      
        ;


      
      
        10
      
      
        11
      
       newprog = GF_CALLOC (
      
        1
      
      , 
      
        sizeof
      
      (*newprog),gf_common_mt_rpcsvc_program_t);
      
        //
      
      
        分配资源
      
      
        12
      
      
        13
      
          memcpy (newprog, &program, 
      
        sizeof
      
       (program));
      
        //
      
      
        拷贝
      
      
        14
      
      
        15
      
          INIT_LIST_HEAD (&newprog->proglist);
      
        //
      
      
        初始化程序链表
      
      
        16
      
      
        17
      
          list_add_tail (&newprog->proglist, &svc->allprograms);
      
        //
      
      
        添加到所有程序链表的末尾
      
      
        18
      
      
        19
      
        selectedstage = nfs_rpcsvc_select_stage (svc);
      
        //
      
      
        选择rpc服务阶段程序
      
      
        20
      
      
        21
      
        ret = nfs_rpcsvc_stage_program_register (selectedstage, newprog);
      
        //
      
      
        执行rpc阶段程序的注册
      
      
        22
      
      
        23
      
          ret = nfs_rpcsvc_program_register_portmap (svc, newprog);
      
        //
      
      
        注册本地端口映射服务
      
      
        24
      
      
        25
      
      
        return
      
      
         ret;


      
      
        26
      
      
        27
      
       }

真正实现监听功能的是在函数 nfs_rpcsvc_stage_program_register 中，所以下面继续看这个函数的实现：

      
         1
      
      
        int
      
       nfs_rpcsvc_stage_program_register (rpcsvc_stage_t *stg, rpcsvc_program_t *
      
        newprog)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
         rpcsvc_conn_t           *newconn =
      
         NULL;


      
      
         6
      
      
         7
      
          rpcsvc_t                *svc =
      
         NULL;


      
      
         8
      
      
         9
      
          svc = nfs_rpcsvc_stage_service (stg);
      
        //
      
      
        获得阶段服务程序
      
      
        10
      
      
        11
      
          newconn = nfs_rpcsvc_conn_listen_init (svc, newprog);
      
        //
      
      
        创建监听的socket


      
      
        12
      
      
        13
      
      
        //
      
      
        注册监听事件发生执行的函数
      
      
        14
      
      
        15
      
      
        if
      
       ((nfs_rpcsvc_stage_conn_associate (stg, newconn, nfs_rpcsvc_conn_listening_handler, newconn)) == -
      
        1
      
      
        ) {


      
      
        16
      
      
        17
      
      
          }


      
      
        18
      
      
        19
      
      
        return
      
      
        0
      
      
        ;


      
      
        20
      
      
        21
      
       }

这个函数调用 nfs_rpcsvc_conn_listen_init 函数创建监听使用的 socket 并且绑定，开始监听客户端的请求，并且初始化一些链接相关的状态信息。具体实现如下：

      
         1
      
       rpcsvc_conn_t * nfs_rpcsvc_conn_listen_init (rpcsvc_t *svc, rpcsvc_program_t *
      
        newprog)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
       rpcsvc_conn_t  *conn =
      
         NULL;


      
      
         6
      
      
         7
      
      
        int
      
                   sock = -
      
        1
      
      
        ;


      
      
         8
      
      
         9
      
      
        //
      
      
        创建监听socket对象并且设置相应参数和绑定到对应端口，例如地址重用、设置为非阻塞等
      
      
        10
      
      
        11
      
         sock = nfs_rpcsvc_socket_listen (newprog->progaddrfamily, newprog->proghost, newprog->
      
        progport);


      
      
        12
      
      
        13
      
       conn = nfs_rpcsvc_conn_init (svc, sock);
      
        //
      
      
        初始化链接的核心，例如分配链接池等资源
      
      
        14
      
      
        15
      
       nfs_rpcsvc_conn_state_init (conn);
      
        //
      
      
        初始化rpc为已连接状态
      
      
        16
      
      
        17
      
      
        return
      
      
         conn;


      
      
        18
      
      
        19
      
       }

在 nfs_rpcsvc_stage_program_register 中还有一个很重要的函数是 nfs_rpcsvc_stage_conn_associate ，它关联一些当有链接请求来的时候执行的函数，这里主要是指客户端链接来的时候服务器响应事件时执行的函数。看看是怎么注册和关联的，如下：

      
        1
      
       conn->stage =
      
         stg;


      
      
        2
      
      
        3
      
          conn->eventidx = event_register (stg->eventpool, conn->sockfd, handler, data, 
      
        1
      
      , 
      
        0
      
      );

终于到达事件处理的核心函数之一了： event_register 事件注册函数并且返回注册后 id 值。这个函数中就一句重点代码：

      
        1
      
       event_pool->ops->event_register (event_pool, fd, handler, data, poll_in, poll_out);

由前面初始化过程可知，这里的 event_pool->ops 的值如下：

      
         1
      
      
        static
      
      
        struct
      
       event_ops event_ops_epoll =
      
         {


      
      
         2
      
      
         3
      
               .
      
        new
      
                    =
      
         event_pool_new_epoll,


      
      
         4
      
      
         5
      
               .event_register   =
      
         event_register_epoll,


      
      
         6
      
      
         7
      
               .event_select_on  =
      
         event_select_on_epoll,


      
      
         8
      
      
         9
      
               .event_unregister =
      
         event_unregister_epoll,


      
      
        10
      
      
        11
      
               .event_dispatch   =
      
         event_dispatch_epoll


      
      
        12
      
      
        13
      
       };

所以这里就是执行 event_register_epoll 函数，这个函数会在 socket 描述符上注册一些事件，然后广播一个条件信号，在阻塞的线程就会开始执行并开始调用 epoll_wait 等待具体的 IO 事件，当注册的 IO 事件响应以后会调用响应的函数处理，上面是注册了 socket 读取事件，也就是如果有客户端的链接请求到来时会执行这里注册的函数，注册的函数定义如下：

      
         1
      
      
        int
      
       nfs_rpcsvc_conn_listening_handler (
      
        int
      
       fd, 
      
        int
      
       idx, 
      
        void
      
       *data, 
      
        int
      
       poll_in, 
      
        int
      
       poll_out, 
      
        int
      
      
         poll_err)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
       rpcsvc_conn_t           *newconn =
      
         NULL;


      
      
         6
      
      
         7
      
         rpcsvc_stage_t          *selectedstage =
      
         NULL;


      
      
         8
      
      
         9
      
      
        int
      
                           ret = -
      
        1
      
      
        ;


      
      
        10
      
      
        11
      
         rpcsvc_conn_t           *conn =
      
         NULL;


      
      
        12
      
      
        13
      
          rpcsvc_t                *svc =
      
         NULL;


      
      
        14
      
      
        15
      
      
        if
      
       (!poll_in)
      
        //
      
      
        值处理读取的IO，这里是指客户端发出的链接请求
      
      
        16
      
      
        17
      
      
        return
      
      
        0
      
      
        ;


      
      
        18
      
      
        19
      
       conn = (rpcsvc_conn_t *)data;
      
        //
      
      
        得到传输过来的数据
      
      
        20
      
      
        21
      
         svc = nfs_rpcsvc_conn_rpcsvc (conn);
      
        //
      
      
        得到链接阶段的处理程序
      
      
        22
      
      
        23
      
         newconn = nfs_rpcsvc_conn_accept_init (svc, fd);
      
        //
      
      
        接收链接请求并且返回一个新的套接字用于通信
      
      
        24
      
      
        25
      
       selectedstage = nfs_rpcsvc_select_stage (svc);
      
        //
      
      
        选择一个rpc阶段处理程序（链接阶段）


      
      
        26
      
      
        27
      
      
        //
      
      
        已经接受连接，需要关联下一个阶段的事件处理程序：指的应该就是数据传输相关，如读写等
      
      
        28
      
      
        29
      
        ret =
      
         nfs_rpcsvc_stage_conn_associate (selectedstage, newconn, nfs_rpcsvc_conn_data_handler, newconn);


      
      
        30
      
      
        31
      
      
        return
      
      
         ret;


      
      
        32
      
      
        33
      
       }

这个函数的功能就是接受客户端的链接并建立新的套接字用于以后单独与客户端通信（传输数据），当然这个新的套接字需要注册相应的读写等 epoll 事件，注册流程和监听事件完全一样，只是不同的参数（ socket 和事件类型等）而已。这些事件的处理函数也就是在这里传递函数指针： nfs_rpcsvc_conn_data_handler 函数，当有数据传输时就会执行这个函数中的代码，看看它是怎么处理的：

      
         1
      
      
        int
      
       nfs_rpcsvc_conn_data_handler (
      
        int
      
       fd, 
      
        int
      
       idx, 
      
        void
      
       *data, 
      
        int
      
       poll_in, 
      
        int
      
       poll_out, 
      
        int
      
      
         poll_err)


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
        rpcsvc_conn_t   *conn =
      
         NULL;


      
      
         6
      
      
         7
      
      
        int
      
                   ret = 
      
        0
      
      
        ;


      
      
         8
      
      
         9
      
          conn = (rpcsvc_conn_t *
      
        )data;


      
      
        10
      
      
        11
      
      
        if
      
      
         (poll_out)


      
      
        12
      
      
        13
      
           ret = nfs_rpcsvc_conn_data_poll_out (conn);
      
        //
      
      
        处理可写事件（套接字可写）
      
      
        14
      
      
        15
      
      
        if
      
      
         (poll_err) {


      
      
        16
      
      
        17
      
              ret = nfs_rpcsvc_conn_data_poll_err (conn);
      
        //
      
      
        处理套接字出错事件
      
      
        18
      
      
        19
      
      
        return
      
      
        0
      
      
        ;


      
      
        20
      
      
        21
      
      
           }


      
      
        22
      
      
        23
      
      
        if
      
       ((ret != -
      
        1
      
      ) && poll_in) {
      
        //
      
      
        如果处理可写事件失败以后就不处理可读事件了
      
      
        24
      
      
        25
      
            ret = 
      
        0
      
      
        ;


      
      
        26
      
      
        27
      
             ret = nfs_rpcsvc_conn_data_poll_in (conn);
      
        //
      
      
        处理可读事件
      
      
        28
      
      
        29
      
      
          }


      
      
        30
      
      
        31
      
      
        if
      
       (ret == -
      
        1
      
      
        )


      
      
        32
      
      
        33
      
            nfs_rpcsvc_conn_data_poll_err (conn);
      
        //
      
      
        出错处理
      
      
        34
      
      
        35
      
      
        return
      
      
        0
      
      
        ;


      
      
        36
      
      
        37
      
       }

这个函数基本上就处理客户端与服务器连接以后的各种可读可写事件，具体的处理在各个函数中，就不在详细分析了，相信到达这里以后就不在有其他的难点了。

到此为止这个 nfs 协议初始化部分分析完毕！

第三节、 fini 函数

这个函数和 init 函数做的基本上是完全相反的工作，主要工作就是卸载掉 nfs 的各个版本的协议并且释放各种资源，实现如下：

      
         1
      
      
        int
      
       fini (xlator_t *
      
        this
      
      
        )


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
      
        struct
      
       nfs_state        *nfs =
      
         NULL;


      
      
         6
      
      
         7
      
         nfs = (
      
        struct
      
       nfs_state *)
      
        this
      
      ->
      
        private
      
      ;
      
        //
      
      
        从xlator获得私有数据转换为struct nfs_state结构体
      
      
         8
      
      
         9
      
           nfs_deinit_versions (&nfs->versions, 
      
        this
      
      );
      
        //
      
      
        卸载协议
      
      
        10
      
      
        11
      
      
        return
      
      
        0
      
      
        ;


      
      
        12
      
      
        13
      
       }

这个函数代码简单，首先从 xlator 得到 struct nfs_state 结构体数据，这是在初始化的时候设置的，然后就调用函数 nfs_deinit_versions 来完成协议具体卸载。卸载函数定义如下：

      
         1
      
      
        int
      
       nfs_deinit_versions (
      
        struct
      
       list_head *versions, xlator_t *
      
        this
      
      
        )


      
      
         2
      
      
         3
      
      
        {


      
      
         4
      
      
         5
      
      
        struct
      
       nfs_initer_list          *version =
      
         NULL;


      
      
         6
      
      
         7
      
      
        struct
      
       nfs_initer_list          *tmp =
      
         NULL;


      
      
         8
      
      
         9
      
      
        struct
      
       nfs_state                *nfs =
      
         NULL;


      
      
        10
      
      
        11
      
         nfs = (
      
        struct
      
       nfs_state *)
      
        this
      
      ->
      
        private
      
      
        ;


      
      
        12
      
      
        13
      
         list_for_each_entry_safe (version, tmp, versions, list) {
      
        //
      
      
        遍历所有版本的协议
      
      
        14
      
      
        15
      
      
        if
      
       (version->
      
        program)


      
      
        16
      
      
        17
      
               nfs_rpcsvc_program_unregister (nfs->rpcsvc, *(version->program));
      
        //
      
      
        注销rpc服务过程
      
      
        18
      
      
        19
      
       list_del (&version->list);
      
        //
      
      
        从版本链表中依次删除
      
      
        20
      
      
        21
      
       GF_FREE (version);
      
        //
      
      
        释放内存资源
      
      
        22
      
      
        23
      
      
          }


      
      
        24
      
      
        25
      
      
        return
      
      
        0
      
      
        ;


      
      
        26
      
      
        27
      
       }

整个过程都比较简单就不在详细分析卸载过程了。

Glusterfs之nfs模块源码分析（中）之Glusterfs实现NFS服务器

更多文章、技术交流、商务合作、联系博主

微信扫码或搜索：z360901061

微信扫一扫加我为好友

QQ号联系： 360901061

您的支持是博主写作最大的动力，如果您喜欢我的文章，感觉我的文章对您有帮助，请用微信扫描下面二维码支持博主2元、5元、10元、20元等您想捐的金额吧，狠狠点击下面给点支持吧，站长非常感激您！手机微信长按不能支付解决办法：请将微信支付二维码保存到相册，切换到微信，然后点击微信右上角扫一扫功能，选择支付二维码完成支付。

【本文对您有帮助就好】元

2元

5元

10元

20元

自定义