• ADADADADAD

    MYSQL INNODB 组合索引分支节点数据解析[ mysql数据库 ]

    mysql数据库 时间:2024-12-03 12:11:43

    作者:文/会员上传

    简介:

    1、本文证明组合索引的所有键值在分支节点(非叶子结点也进行了存储)。
    2、本文给出B+ 索引如何进行验证其B+树结构

    关于B树结构(不是B+树)可以参考:
    http://blog.itpub.net/

    以下为本文的正文内容,内容仅供参考!本站为公益性网站,复制本文以及下载DOC文档全部免费。

    1、本文证明组合索引的所有键值在分支节点(非叶子结点也进行了存储)。
    2、本文给出B+ 索引如何进行验证其B+树结构

    关于B树结构(不是B+树)可以参考:
    http://blog.itpub.net/7728585/viewspace-2126929/

    脚本:
    mysql> create table testzh(id int primary key auto_increment ,id2 int,id3 int,name varchar(20), key(id2,id3));
    Query OK, 0 rows affected (0.07 sec)


    delimiter //
    create procedure ins()
    begin
    declare i int;
    set i=0;
    while i<100000 do
    insert into testzh(id2,id3,name) values(FLOOR((RAND()*100000)),FLOOR((RAND()*100000)),'gaopeng');
    set i=i+1;
    end while;
    end;
    //
    delimiter ;


    写一个程序 主要读取下面几位每个块的:
    64-66字节:B+层次,0是叶子结点,向上分别是1,2.... 如果是三层结构的则根结点为层次为2,分支为1,叶子结点为0
    66-74字节:索引ID,对应INNODB_SYS_INDEXES 的INDEX_ID字段
    24-26字节:块类型,我们主要查看0X45BF的类型是数据类型

    程序附在最后,下面是跑出来的结果我的:
    key(id2,id3)的INDEX_ID是
    mysql> select * from information_schema.INNODB_SYS_INDEXES where TABLE_ID=132;
    +----------+---------+----------+------+----------+---------+-------+-----------------+
    | INDEX_ID | NAME| TABLE_ID | TYPE | N_FIELDS | PAGE_NO | SPACE | MERGE_THRESHOLD |
    +----------+---------+----------+------+----------+---------+-------+-----------------+
    | 160 | PRIMARY | 132 |3 |1 |3 |207 | 50 |
    | 161 | id2 | 132 |0 |2 |4 |207 | 50 |
    +----------+---------+----------+------+----------+---------+-------+-----------------+




    注意程序计数块从0开始,为贴近C/C++数组规定
    [root@testmy test]# ./b_level testzh.ibd|grep 161
    Block id is 4:Index page no is 161 : B+ Tree Level is 1
    Block id is 8:Index page no is 161 : B+ Tree Level is 0
    Block id is 9:Index page no is 161 : B+ Tree Level is 0
    Block id is 12:Index page no is 161 : B+ Tree Level is 0
    Block id is 14:Index page no is 161 : B+ Tree Level is 0
    Block id is 19:Index page no is 161 : B+ Tree Level is 0
    Block id is 20:Index page no is 161 : B+ Tree Level is 0
    Block id is 21:Index page no is 161 : B+ Tree Level is 0
    Block id is 22:Index page no is 161 : B+ Tree Level is 0
    Block id is 31:Index page no is 161 : B+ Tree Level is 0
    Block id is 32:Index page no is 161 : B+ Tree Level is 0
    Block id is 34:Index page no is 161 : B+ Tree Level is 0
    Block id is 35:Index page no is 161 : B+ Tree Level is 0
    Block id is 36:Index page no is 161 : B+ Tree Level is 0
    Block id is 37:Index page no is 161 : B+ Tree Level is 0
    Block id is 38:Index page no is 161 : B+ Tree Level is 0
    Block id is 41:Index page no is 161 : B+ Tree Level is 0
    Block id is 53:Index page no is 161 : B+ Tree Level is 0
    Block id is 54:Index page no is 161 : B+ Tree Level is 0
    Block id is 55:Index page no is 161 : B+ Tree Level is 0
    ...........
    Block id is 348:Index page no is 161 : B+ Tree Level is 0
    Block id is 349:Index page no is 161 : B+ Tree Level is 0
    Block id is 350:Index page no is 161 : B+ Tree Level is 0
    Block id is 351:Index page no is 161 : B+ Tree Level is 0
    Block id is 352:Index page no is 161 : B+ Tree Level is 0
    Block id is 353:Index page no is 161 : B+ Tree Level is 0
    Block id is 354:Index page no is 161 : B+ Tree Level is 0
    Block id is 355:Index page no is 161 : B+ Tree Level is 0
    Block id is 356:Index page no is 161 : B+ Tree Level is 0
    Block id is 357:Index page no is 161 : B+ Tree Level is 0
    Block id is 358:Index page no is 161 : B+ Tree Level is 0
    Block id is 359:Index page no is 161 : B+ Tree Level is 0
    Block id is 360:Index page no is 161 : B+ Tree Level is 0
    Block id is 361:Index page no is 161 : B+ Tree Level is 0
    Block id is 362:Index page no is 161 : B+ Tree Level is 0
    Block id is 363:Index page no is 161 : B+ Tree Level is 0
    Block id is 364:Index page no is 161 : B+ Tree Level is 0
    这就是我161索引key(id2,id3)全部块,
    这里我们看到我们的这个组合索引是 2层的 B+树结构
    Block id is 4:Index page no is 161 : B+ Tree Level is 1
    这个块就是根结点,那么我们需要读取它需要用到我写过的另外一个小程序
    ,用于读取其数据,但是这个程序写死了读取2个INT类型数据如下,按照顺序给出
    并且给出偏移量,并且给出指向的块(注意这个指向的块是我猜测的,随后验证):

    那我们就读取BLOCK 4(dataview 程序放到最后)
    [root@testmy test]# ./dataview testzh.ibd 4
    Index_no is:161
    find first one record!
    B:23,A:59613,offset:126,leaf block:8
    B:736,A:31951,offset:2106,leaf block:249
    B:1591,A:58218,offset:1072,leaf block:203
    B:2390,A:34725,offset:2326,leaf block:324
    B:3231,A:16448,offset:500,leaf block:54
    B:3647,A:95182,offset:3118,leaf block:360
    B:4050,A:42271,offset:1776,leaf block:235
    B:4751,A:62810,offset:1028,leaf block:201
    B:5614,A:53731,offset:1886,leaf block:240
    B:6482,A:29372,offset:346,leaf block:34
    B:7216,A:26606,offset:2524,leaf block:333
    B:8028,A:78263,offset:1138,leaf block:206
    B:8777,A:61401,offset:2238,leaf block:255
    B:9562,A:34379,offset:698,leaf block:63
    B:10353,A:93823,offset:2150,leaf block:251
    B:11114,A:50825,offset:1358,leaf block:216
    B:11859,A:57029,offset:2634,leaf block:338
    B:12611,A:67208,offset:214,leaf block:19
    B:13378,A:43201,offset:2062,leaf block:247
    B:14179,A:72734,offset:940,leaf block:197
    B:14972,A:39942,offset:2458,leaf block:330
    B:15743,A:9319,offset:632,leaf block:60
    B:16488,A:33875,offset:2392,leaf block:327
    B:17320,A:32881,offset:1204,leaf block:209
    B:18117,A:6361,offset:2084,leaf block:248
    B:18966,A:70177,offset:368,leaf block:35
    B:19722,A:16497,offset:1710,leaf block:232
    B:20563,A:60667,offset:896,leaf block:195
    B:21357,A:38077,offset:2040,leaf block:246
    B:22151,A:15974,offset:522,leaf block:55
    B:22612,A:89791,offset:3008,leaf block:355
    B:23070,A:74154,offset:1534,leaf block:224
    B:23935,A:42535,offset:852,leaf block:193
    B:24814,A:51733,offset:1644,leaf block:229
    B:25714,A:232,offset:170,leaf block:12
    B:26468,A:119,offset:2678,leaf block:340
    B:27177,A:3573,offset:1402,leaf block:218
    B:27870,A:22983,offset:2700,leaf block:341
    B:28679,A:71426,offset:676,leaf block:62
    B:29439,A:77570,offset:2216,leaf block:254
    B:30160,A:66775,offset:1424,leaf block:219
    B:30865,A:17274,offset:2722,leaf block:342
    B:31611,A:25970,offset:390,leaf block:36
    B:32377,A:56419,offset:1930,leaf block:242
    B:33117,A:8818,offset:1292,leaf block:213
    B:33838,A:19538,offset:2898,leaf block:350
    B:34489,A:92156,offset:742,leaf block:129
    B:35286,A:6546,offset:2194,leaf block:253
    B:36076,A:40723,offset:1314,leaf block:214
    B:36763,A:20769,offset:2876,leaf block:349
    B:37450,A:46558,offset:236,leaf block:20
    B:38214,A:7960,offset:2568,leaf block:335
    B:38945,A:498,offset:984,leaf block:199
    B:39726,A:40552,offset:2260,leaf block:321
    B:40462,A:1439,offset:588,leaf block:58
    B:41208,A:19781,offset:2612,leaf block:337
    B:41939,A:99119,offset:1248,leaf block:211
    B:42637,A:26247,offset:2436,leaf block:329
    B:43441,A:1592,offset:434,leaf block:38
    B:44198,A:74037,offset:2480,leaf block:331
    B:44889,A:50243,offset:1226,leaf block:210
    B:45599,A:7689,offset:2788,leaf block:345
    B:46378,A:44374,offset:786,leaf block:131
    B:47200,A:3361,offset:2370,leaf block:326
    B:47932,A:52288,offset:1336,leaf block:215
    B:48736,A:82841,offset:2128,leaf block:250
    B:49492,A:68781,offset:148,leaf block:9
    B:50257,A:38011,offset:2348,leaf block:325
    B:51074,A:96951,offset:1446,leaf block:220
    B:51847,A:62231,offset:2744,leaf block:343
    B:52618,A:31192,offset:654,leaf block:61
    B:53362,A:72348,offset:2590,leaf block:336
    B:54029,A:82275,offset:1380,leaf block:217
    B:54788,A:28840,offset:2546,leaf block:334
    B:55538,A:98163,offset:324,leaf block:32
    B:56313,A:20704,offset:2282,leaf block:322
    B:57065,A:90078,offset:1116,leaf block:205
    B:57525,A:17975,offset:3096,leaf block:359
    B:57973,A:52757,offset:1798,leaf block:236
    B:58427,A:8888,offset:3140,leaf block:361
    B:58890,A:65370,offset:764,leaf block:130
    B:59627,A:28852,offset:1996,leaf block:320
    B:60454,A:26779,offset:1160,leaf block:207
    B:61261,A:35533,offset:2304,leaf block:323
    B:62009,A:21826,offset:280,leaf block:22
    B:62517,A:26392,offset:2986,leaf block:354
    B:62950,A:3417,offset:1556,leaf block:225
    B:63882,A:92888,offset:874,leaf block:194
    B:64651,A:5358,offset:1732,leaf block:233
    B:65511,A:32339,offset:544,leaf block:56
    B:66290,A:26183,offset:1952,leaf block:243
    B:67063,A:60386,offset:1182,leaf block:208
    B:67767,A:48347,offset:2502,leaf block:332
    B:68527,A:49114,offset:412,leaf block:37
    B:69290,A:16760,offset:1864,leaf block:239
    B:70112,A:32543,offset:1050,leaf block:202
    B:70941,A:92527,offset:1908,leaf block:241
    B:71758,A:23571,offset:610,leaf block:59
    B:72210,A:32172,offset:3030,leaf block:356
    B:72689,A:9869,offset:1666,leaf block:230
    B:73155,A:27492,offset:3162,leaf block:362
    B:73580,A:84669,offset:918,leaf block:196
    B:74380,A:61469,offset:1754,leaf block:234
    B:75228,A:21920,offset:192,leaf block:14
    B:75625,A:16519,offset:3052,leaf block:357
    B:76082,A:45066,offset:1600,leaf block:227
    B:76901,A:3530,offset:962,leaf block:198
    B:77370,A:45042,offset:3184,leaf block:363
    B:77786,A:63188,offset:1688,leaf block:231
    B:78604,A:5492,offset:566,leaf block:57
    B:79500,A:6597,offset:1842,leaf block:238
    B:80265,A:85811,offset:1094,leaf block:204
    B:81035,A:6126,offset:2018,leaf block:245
    B:81911,A:21386,offset:302,leaf block:31
    B:82717,A:10268,offset:1622,leaf block:228
    B:83172,A:46978,offset:3206,leaf block:364
    B:83602,A:89835,offset:830,leaf block:192
    B:84063,A:98697,offset:2964,leaf block:353
    B:84571,A:9764,offset:1578,leaf block:226
    B:85033,A:59776,offset:3074,leaf block:358
    B:85471,A:83888,offset:478,leaf block:53
    B:86224,A:39031,offset:2172,leaf block:252
    B:86980,A:61579,offset:1006,leaf block:244
    B:87142,A:42336,offset:1974,leaf block:200
    B:87905,A:39935,offset:2656,leaf block:339
    B:88630,A:74063,offset:258,leaf block:21
    B:89335,A:42509,offset:2810,leaf block:346
    B:90013,A:10693,offset:1490,leaf block:222
    B:90671,A:21582,offset:2920,leaf block:351
    B:91364,A:38857,offset:720,leaf block:128
    B:92062,A:98705,offset:2414,leaf block:328
    B:92852,A:82534,offset:1270,leaf block:212
    B:93665,A:57106,offset:1820,leaf block:237
    B:94450,A:36182,offset:456,leaf block:41
    B:95109,A:82179,offset:2854,leaf block:348
    B:95803,A:39017,offset:1468,leaf block:221
    B:96562,A:53131,offset:2832,leaf block:347
    B:97236,A:69746,offset:808,leaf block:132
    B:97963,A:33843,offset:2766,leaf block:344
    B:98709,A:78567,offset:1512,leaf block:223
    B:99367,A:91536,offset:2942,leaf block:352


    这里B代表是ID1的值,A代表是ID2的值,那么这里我们
    证明了在B+数的分支节点存储了组合索引的全部键值,
    这里存储的是在叶子结点中物理位置的开始值
    随便查询一下:
    mysql> select * from testzh where id2=23;
    +-------+------+-------+---------+
    | id| id2 | id3| name|
    +-------+------+-------+---------+
    | 99428 |23 | 9079 | gaopeng |
    |378 |23 | 59613 | gaopeng |
    | 90301 |23 | 93864 | gaopeng |
    +-------+------+-------+---------+
    3 rows in set (0.00 sec)


    mysql> select * from testzh where id2=736;
    +-------+------+-------+---------+
    | id| id2 | id3| name|
    +-------+------+-------+---------+
    | 2716 | 736 | 31951 | gaopeng |
    | 95328 | 736 | 53286 | gaopeng |
    | 75440 | 736 | 85983 | gaopeng |
    +-------+------+-------+---------+
    3 rows in set (0.00 sec)


    mysql> select * from testzh where id2=1591;
    +-------+------+-------+---------+
    | id| id2 | id3| name|
    +-------+------+-------+---------+
    | 10056 | 1591 | 58218 | gaopeng |
    +-------+------+-------+---------+


    可以看到这些值都是有的。


    然后我们更进一步,为了算出每一行占用的空间,我需要取出根结点实际物理空间排序使用:
    ./dataview testzh.ibd 4 |awk -F ',' '{print $3}'|awk -F ':' '{print $2}'|sort -n
    得出
    126
    148
    170
    192
    214
    236
    258
    ........
    3074
    3096
    3118
    3140
    3162
    3184
    3206


    因为字段字节固定那么算出根结点中,一行数据占用的空间为22字节,6字节的行头开销,8字节的数据开销,剩下的8字节
    为一个指针(我只是猜测最后2个字节为块号其他6个字节未知),那么我们可以验证查看指针指向块的数据是否包含了我们根
    结点的指向数据,我们知道B+树中,所有叶子结点包含了分支节点的全部数据,一般来讲只要我们验证分支节点的指向数据
    在叶子结点存在,则证明指向的块正确。查询的话,一旦定位到了页块那么剩下的工作就是二分法进行查找数据了。
    因为我并没搞懂指针所有位数的含义,但是最后几个字节看起来像块号,所以这样验证。
    (当然懂得源码的人来看我的方法太LOW了,我自己现在确实没有能力看懂源码)。


    我们来验证一下指向的块中是否包含了指向的数据,这里leaf block没有意义因为是叶子结点
    写一个脚本:
    test.sh
    ./dataview testzh.ibd 8 |grep B:23,A:59613
    ./dataview testzh.ibd 249|grep B:736,A:31951
    ./dataview testzh.ibd 203|grep B:1591,A:58218
    ./dataview testzh.ibd 324|grep B:2390,A:34725
    ./dataview testzh.ibd 54|grep B:3231,A:16448
    ./dataview testzh.ibd 360|grep B:3647,A:95182
    ./dataview testzh.ibd 235|grep B:4050,A:42271
    ./dataview testzh.ibd 201|grep B:4751,A:62810
    ./dataview testzh.ibd 240|grep B:5614,A:53731
    ./dataview testzh.ibd 34|grep B:6482,A:29372
    ./dataview testzh.ibd 333|grep B:7216,A:26606
    ......
    一共验证141行


    [root@testmy test]# sh test.sh
    B:23,A:59613,offset:126,leaf block:24
    B:736,A:31951,offset:126,leaf block:24
    B:1591,A:58218,offset:126,leaf block:24
    B:2390,A:34725,offset:126,leaf block:24
    B:3231,A:16448,offset:126,leaf block:24
    B:3647,A:95182,offset:126,leaf block:24
    B:4050,A:42271,offset:126,leaf block:24
    B:4751,A:62810,offset:126,leaf block:24
    B:5614,A:53731,offset:126,leaf block:24
    B:6482,A:29372,offset:126,leaf block:24
    B:7216,A:26606,offset:126,leaf block:24
    B:8028,A:78263,offset:126,leaf block:24
    B:8777,A:61401,offset:126,leaf block:24
    B:9562,A:34379,offset:126,leaf block:24
    B:10353,A:93823,offset:126,leaf block:24
    B:11114,A:50825,offset:126,leaf block:24
    ........
    输出141
    如下:
    [root@testmy test]# sh test.sh |wc -l
    141
    [root@testmy test]# cat test.sh|wc -l
    141


    那我们可以发现所有的根结点的记录在叶子结点都找到了,而且我们可以看到偏移量都是物理偏移量的
    126,及物理上的第一个记录(因为我这里没有delete过数据)则我们也验证了分支节点存储是物理位置
    上块中的第一个值,及这里的偏移量126的值。

    最后我们取5个值画一个图,也许大家就明白了 上面是根结点,下面是叶子结点:
    (注意叶子节点都是取得开始值,千万不要以为叶子节点就一个值,并且实际上同一层B+树是一个双向链表
    关于双向链表参考:http://blog.itpub.net/7728585/viewspace-2125114/



    水平有限 如果有误请告知


    ./b_level 工具源码:

    点击(此处)折叠或打开

      #include<stdio.h>
      #include<stdlib.h>
      #include<string.h>


      void* reverse(void* p,int length) //Little_endianreverse
      {
      int i;
      char* s= (char*)(p);
      char* temp = (char*)calloc(1,length);
      memcpy(temp,s,length);


      for(i=0;i<length;i++)
      {
      s[i] = temp[length-1-i];
      }
      free(temp);
      temp=NULL;
      return p;
      }


      int main(int argc, char* argv[])
      {
      FILE* fd;
      short n_lev;
      longn_idx_id;
      unsigned short b_type;
      int i=0;
      long ed;

      if(argc != 2 )
      {
      printf("USEAGE ERROR useage:./tool dbf \n");
      exit(2);
      }

      if(!(fd = fopen(argv[1],"r")))
      {
      perror("error:");
      exit(1);
      }
      fseek(fd,0,SEEK_END);
      ed=ftell(fd);
      fseek(fd,0,0);
      printf("file size is %ld\n",ed);
      while(ftell(fd)<ed)
      {
      fseek(fd,24,1);
      //printf("%d\n",ftell(fd));
      fread(&b_type,2,1,fd);
      fseek(fd,38,1);
      //printf("%d\n",ftell(fd));
      fread(&n_lev,2,1,fd);
      fread(&n_idx_id,8,1,fd);
      i++;
      fseek(fd,16384*i,0);
      reverse(&n_idx_id,8);
      reverse(&n_lev,2);
      if(b_type == (unsigned short)0Xbf45)
      {
      printf("Block id is %d:Index page no is%ld : B+ Tree Level is%d\n",i-1,n_idx_id,n_lev);
      }
      }

      }

    ./dataview 源码,此工具只对2列init数据类型的组合索引读取分支节点有效,因为写死了

    点击(此处)折叠或打开

      #include<stdio.h>
      #include<stdlib.h>
      #include<string.h>

      void* reverse(void* p,int length) //Little_endianreverse
      {
      int i;
      char* s= (char*)(p);
      char* temp = (char*)calloc(1,length);
      memcpy(temp,s,length);


      for(i=0;i<length;i++)
      {
      s[i] = temp[length-1-i];
      }
      free(temp);
      temp=NULL;
      return p;
      }



      int main(int argc,char *argv[])
      {
      FILE* fd;
      long blofset;
      short level;
      long int index_no;
      short initof;
      int B;
      int A;
      unsigned short C;
      int reofset;


      if(argc != 3 )
      {
      printf("USEAGE ERROR useage:./tool dbf pageno\n");
      exit(3);
      }

      if(!(fd = fopen(argv[1],"r")))
      {
      perror("error:");
      exit(1);
      }

      sscanf(argv[2],"%ld",&blofset);
      fseek(fd,blofset*16*1024,SEEK_SET);
      fseek(fd,64,SEEK_CUR);
      fread(&level,2,1,fd);
      fread(&index_no,8,1,fd);
      reverse(&level,2);
      reverse(&index_no,8);
      fseek(fd,23,SEEK_CUR);
      fread(&initof,2,1,fd);
      reverse(&initof,2);
      printf("Index_no is:%ld\n",index_no);
      if(initof != 0 )
      {
      printf("find first one record!\n");
      while(1)
      {
      fseek(fd,initof-2,SEEK_CUR);
      fread(&initof,2,1,fd);
      reverse(&initof,2);
      if(initof == 0)
      {
      break;
      }
      else
      {
      fread(&B,4,1,fd);
      fread(&A,4,1,fd);
      fseek(fd,6,SEEK_CUR);
      fread(&C,2,1,fd);
      fseek(fd,-16,SEEK_CUR);
      reverse(&B,4);
      reverse(&A,4);
      reverse(&C,2);
      A=A^0X80000000;
      B=B^0X80000000;
      printf("B:%d,A:%d,offset:%ld,leaf block:%ld\n",B,A,ftell(fd)-blofset*16*1024,C);
      }

      }
      }
      else
      {
      printf("no record find!\n");
      exit(2);
      }
      }


    MYSQL INNODB 组合索引分支节点数据解析.docx

    将本文的Word文档下载到电脑

    推荐度:

    下载
    热门标签: innodbmysql解析