一些数据表会存储执行过的SQL语句和其digest,比如表events_statements_summary_by_digest中的第二个和第三个列,digest其实是一个MD5 hash值,接下来简单介绍下digest的生成过程。

digest是基于一串字节文本做MD5计算出来的hash值,这个字节文本在parser解析SQL时根据识别出来的token和identifier构造。下面以MySQL 5.7的代码为例,简单介绍digest的生成过程。

当一个token被识别时,调用构造字节文本,

  1. File sql/sql_digest.cc
  2. 71 /**
  3. 72 Store a single token in token array.
  4. 73 */
  5. 74 inline void store_token(sql_digest_storage* digest_storage, uint token)
  6. 75 {
  7. 76 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
  8. 77
  9. 78 if (digest_storage->m_byte_count + SIZE_OF_A_TOKEN <= digest_storage->m_token_array_length)
  10. 79 {
  11. 80 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
  12. 81 dest[0]= token & 0xff;
  13. 82 dest[1]= (token >> 8) & 0xff;
  14. 83 digest_storage->m_byte_count+= SIZE_OF_A_TOKEN;
  15. 84 }
  16. 85 else
  17. 86 {
  18. 87 digest_storage->m_full= true;
  19. 88 }
  20. 89 }
  21. 90
  1. File sql/sql_digest.cc
  2. 135 inline void store_token_identifier(sql_digest_storage* digest_storage,
  3. 136 uint token,
  4. 137 size_t id_length, const char *id_name)
  5. 138 {
  6. 139 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
  7. 140
  8. 141 size_t bytes_needed= 2 * SIZE_OF_A_TOKEN + id_length;
  9. 142 if (digest_storage->m_byte_count + bytes_needed <= (unsigned int)digest_storage->m_token_array_length)
  10. 143 {
  11. 144 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
  12. 145 /* Write the token */
  13. 146 dest[0]= token & 0xff;
  14. 147 dest[1]= (token >> 8) & 0xff;
  15. 148 /* Write the string length */
  16. 149 dest[2]= id_length & 0xff;
  17. 150 dest[3]= (id_length >> 8) & 0xff;
  18. 151 /* Write the string data */
  19. 152 if (id_length > 0)
  20. 153 memcpy((char *)(dest + 4), id_name, id_length);
  21. 154 digest_storage->m_byte_count+= bytes_needed;
  22. 155 }
  23. 156 else
  24. 157 {
  25. 158 digest_storage->m_full= true;
  26. 159 }
  27. 160 }

可以看到,

  • 前两个字节,函数中的dest[0] / dest[1] ,根据token值计算而来;
  • 第三和第四个字节,dest[2] / dest[3],根据id_length计算而来;
  • 之后的地址存放id_name对应的文本。

store_token()store_token_identifier()可以被调用多次,从而把不断识别出的token和identifier拼接成一个最终的字节文本,存放在digest_storage->m_token_array中。

相关的函数调用路径如下,

  1. Breakpoint 1, store_token_identifier (digest_storage=0x7ff428002848, token=945, id_length=18, id_name=0x7ff428006118 "performance_schema")
  2. at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:139
  3. 139 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
  4. (gdb) bt
  5. #0 store_token_identifier (digest_storage=0x7ff428002848, token=945, id_length=18, id_name=0x7ff428006118 "performance_schema")
  6. at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:139
  7. #1 0x000000000166049f in digest_add_token (state=0x7ff428002840, token=945, yylval=0x7ff4c0281a60) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:590
  8. #2 0x0000000001675311 in Lex_input_stream::add_digest_token (this=0x7ff4c0283568, token=488, yylval=0x7ff4c0281a60) at /disk6/lefeng/porting/polardb571/sql/sql_lex.cc:382
  9. #3 0x00000000016777ab in MYSQLlex (yylval=0x7ff4c0281a60, yylloc=0x7ff4c0281a40, thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_lex.cc:1362
  10. #4 0x00000000017fd83b in MYSQLparse (YYTHD=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_yacc.cc:20171
  11. #5 0x00000000016b8801 in parse_sql (thd=0x7ff428000950, parser_state=0x7ff4c0283560, creation_ctx=0x0) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:7578
  12. #6 0x00000000016b5300 in mysql_parse (thd=0x7ff428000950, parser_state=0x7ff4c0283560) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:5924
  13. #7 0x00000000016a9e3e in dispatch_command (thd=0x7ff428000950, com_data=0x7ff4c0283dd0, command=COM_QUERY) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1550
  14. #8 0x00000000016a8a5b in do_command (thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1011
  15. #9 0x00000000017eeb6e in handle_connection (arg=0x5cec640) at /disk6/lefeng/porting/polardb571/sql/conn_handler/connection_handler_per_thread.cc:303
  16. #10 0x0000000001a9f465 in pfs_spawn_thread (arg=0x5d87a50) at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs.cc:2188
  17. #11 0x00007ff4c9795e25 in start_thread () from /lib64/libpthread.so.0
  18. #12 0x00007ff4c865cbad in clone () from /lib64/libc.so.6
  1. File storage/perfschema/pfs_digest.cc
  2. 188 PFS_statement_stat*
  3. 189 find_or_create_digest(PFS_thread *thread,
  4. 190 const sql_digest_storage *digest_storage,
  5. 191 const char *schema_name,
  6. 192 uint schema_name_length)
  7. 193 {
  8. ...
  9. 203 if (unlikely(pins == NULL))
  10. 204 return NULL;
  11. 205
  12. 206 /*
  13. 207 Note: the LF_HASH key is a block of memory,
  14. 208 make sure to clean unused bytes,
  15. 209 so that memcmp() can compare keys.
  16. 210 */
  17. 211 PFS_digest_key hash_key;
  18. 212 memset(& hash_key, 0, sizeof(hash_key));
  19. 213 /* Compute MD5 Hash of the tokens received. */
  20. 214 compute_digest_md5(digest_storage, hash_key.m_md5);
  21. 215 memcpy((void*)& digest_storage->m_md5, &hash_key.m_md5, MD5_HASH_SIZE);
  22. 216 /* Add the current schema to the key */
  23. 217 hash_key.m_schema_name_length= schema_name_length;
  24. 218 if (schema_name_length > 0)
  25. 219 memcpy(hash_key.m_schema_name, schema_name, schema_name_length);
  26. 220
  27. 221 ...

在storage/perfschema/pfs_digest.cc第214行,find_or_create_digest()会调用compute_digest_md5()compute_digest_md5()会从digest_storage->m_token_array读取构造好的字节文本,完成hash计算。

接下来,我们以SQL语句 “TRUNCATE TABLE performance_schema.events_statements_summary_by_digest” 为例介绍 digest计算过程。

  1. 首先识别出的token是859,对应的token定义如下,其token值用于填充字节文本的前2个字节,
  1. File sql/sql_yacc.h
  2. #define TRUNCATE_SYM 859

函数调用栈如下,

  1. Breakpoint 2, store_token (digest_storage=0x7ff428002848, token=859) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:76
  2. 76 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
  3. (gdb) n
  4. 78 if (digest_storage->m_byte_count + SIZE_OF_A_TOKEN <= digest_storage->m_token_array_length)
  5. (gdb)
  6. 80 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
  7. (gdb)
  8. 81 dest[0]= token & 0xff;
  9. (gdb)
  10. 82 dest[1]= (token >> 8) & 0xff;
  11. (gdb)
  12. 83 digest_storage->m_byte_count+= SIZE_OF_A_TOKEN;
  13. (gdb)
  14. 89 }
  15. (gdb) p digest_storage->m_byte_count
  16. (gdb) 2
  17. (gdb) bt
  18. #0 store_token (digest_storage=0x7ff428002848, token=859) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:76
  19. #1 0x00000000016604c2 in digest_add_token (state=0x7ff428002840, token=859, yylval=0x7ff4c0281a60) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:599
  20. #2 0x0000000001675311 in Lex_input_stream::add_digest_token (this=0x7ff4c0283568, token=859, yylval=0x7ff4c0281a60) at /disk6/lefeng/porting/polardb571/sql/sql_lex.cc:382
  21. #3 0x00000000016777ab in MYSQLlex (yylval=0x7ff4c0281a60, yylloc=0x7ff4c0281a40, thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_lex.cc:1362
  22. #4 0x00000000017fd83b in MYSQLparse (YYTHD=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_yacc.cc:20171
  23. #5 0x00000000016b8801 in parse_sql (thd=0x7ff428000950, parser_state=0x7ff4c0283560, creation_ctx=0x0) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:7578
  24. #6 0x00000000016b5300 in mysql_parse (thd=0x7ff428000950, parser_state=0x7ff4c0283560) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:5924
  25. #7 0x00000000016a9e3e in dispatch_command (thd=0x7ff428000950, com_data=0x7ff4c0283dd0, command=COM_QUERY) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1550
  26. #8 0x00000000016a8a5b in do_command (thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1011
  27. #9 0x00000000017eeb6e in handle_connection (arg=0x5cec640) at /disk6/lefeng/porting/polardb571/sql/conn_handler/connection_handler_per_thread.cc:303
  28. #10 0x0000000001a9f465 in pfs_spawn_thread (arg=0x5d87a50) at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs.cc:2188
  29. #11 0x00007ff4c9795e25 in start_thread () from /lib64/libpthread.so.0
  30. #12 0x00007ff4c865cbad in clone () from /lib64/libc.so.6
  1. 其次识别出的token是835,其对应的token定义如下,其token值用于填充接下来的2字节,
  1. File sql/sql_yacc.h
  2. #define TABLE_SYM 835
  1. 然后识别出来并用于构造字节文本的token是488,对应的token定义是,
  1. #define IDENT_QUOTED 488

根据token值和id_name的长度构造4字节文本数据,之后把”performance_schema”追加到其后。

  1. Breakpoint 1, store_token_identifier (digest_storage=0x7ff428002848, token=945, id_length=18, id_name=0x7ff428006118 "performance_schema")
  2. at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:139
  3. 139 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
  4. (gdb) p digest_storage->m_byte_count
  5. $3 = 4
  6. (gdb) n
  7. 141 size_t bytes_needed= 2 * SIZE_OF_A_TOKEN + id_length;
  8. (gdb)
  9. 142 if (digest_storage->m_byte_count + bytes_needed <= (unsigned int)digest_storage->m_token_array_length)
  10. (gdb)
  11. 144 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
  12. (gdb)
  13. 146 dest[0]= token & 0xff;
  14. (gdb)
  15. 147 dest[1]= (token >> 8) & 0xff;
  16. (gdb)
  17. 149 dest[2]= id_length & 0xff;
  18. (gdb)
  19. 150 dest[3]= (id_length >> 8) & 0xff;
  20. 152 if (id_length > 0)
  21. (gdb)
  22. 153 memcpy((char *)(dest + 4), id_name, id_length);
  23. 154 digest_storage->m_byte_count+= bytes_needed;
  24. (gdb)
  25. 160 }
  26. (gdb) p digest_storage->m_byte_count
  27. $4 = 26
  1. 接下来识别出的token是46
  1. Breakpoint 2, store_token (digest_storage=0x7ff428002848, token=46) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:76
  2. 76 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
  3. 78 if (digest_storage->m_byte_count + SIZE_OF_A_TOKEN <= digest_storage->m_token_array_length)
  4. (gdb)
  5. 80 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
  6. (gdb)
  7. 81 dest[0]= token & 0xff;
  8. (gdb)
  9. 82 dest[1]= (token >> 8) & 0xff;
  10. (gdb)
  11. 83 digest_storage->m_byte_count+= SIZE_OF_A_TOKEN;
  12. (gdb)
  13. 89 }
  14. (gdb) p digest_storage->m_byte_count
  15. $11 = 28
  1. 最后识别出来且用于构造字节文本的token是945 (同上,488转换而来),”events_statements_summary_by_digest”会被追加到文本末尾。
  1. Breakpoint 1, store_token_identifier (digest_storage=0x7ff428002848, token=945, id_length=35, id_name=0x7ff428006130 "events_statements_summary_by_digest")
  2. at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:139
  3. 139 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
  4. (gdb) n
  5. 141 size_t bytes_needed= 2 * SIZE_OF_A_TOKEN + id_length;
  6. (gdb)
  7. 142 if (digest_storage->m_byte_count + bytes_needed <= (unsigned int)digest_storage->m_token_array_length)
  8. (gdb)
  9. 144 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
  10. (gdb)
  11. 146 dest[0]= token & 0xff;
  12. (gdb)
  13. 147 dest[1]= (token >> 8) & 0xff;
  14. (gdb)
  15. 149 dest[2]= id_length & 0xff;
  16. (gdb)
  17. 150 dest[3]= (id_length >> 8) & 0xff;
  18. (gdb)
  19. 152 if (id_length > 0)
  20. (gdb)
  21. 153 memcpy((char *)(dest + 4), id_name, id_length);
  22. (gdb)
  23. 154 digest_storage->m_byte_count+= bytes_needed;
  24. (gdb)
  25. 160 }
  26. (gdb) p digest_storage->m_byte_count
  27. $12 = 67
  1. 至此,字节文本构造完毕,接下来计算MD5 hash,
  1. Breakpoint 3, find_or_create_digest (thread=0x7ff4c7cc2c00, digest_storage=0x7ff428002848, schema_name=0x7ff428002930 "", schema_name_length=0)
  2. at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs_digest.cc:194
  3. 194 DBUG_ASSERT(digest_storage != NULL);
  4. (gdb) n
  5. 196 if (statements_digest_stat_array == NULL)
  6. (gdb)
  7. 199 if (digest_storage->m_byte_count <= 0)
  8. (gdb)
  9. 202 LF_PINS *pins= get_digest_hash_pins(thread);
  10. (gdb)
  11. 203 if (unlikely(pins == NULL))
  12. (gdb)
  13. 212 memset(& hash_key, 0, sizeof(hash_key));
  14. (gdb)
  15. 214 compute_digest_md5(digest_storage, hash_key.m_md5);
  16. (gdb)
  17. 215 memcpy((void*)& digest_storage->m_md5, &hash_key.m_md5, MD5_HASH_SIZE);
  18. (gdb)
  19. 217 hash_key.m_schema_name_length= schema_name_length;
  20. (gdb) p /x hash_key.m_md5
  21. $13 = {0xf8, 0x37, 0x3f, 0x7b, 0xed, 0x47, 0x77, 0x3d, 0x4c, 0xd1, 0xd5, 0xc0, 0xab, 0xb7, 0x88, 0xc9}
  22. (gdb) bt
  23. #0 find_or_create_digest (thread=0x7ff4c7cc2c00, digest_storage=0x7ff428002848, schema_name=0x7ff428002930 "", schema_name_length=0)
  24. at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs_digest.cc:217
  25. #1 0x0000000001aa648e in pfs_end_statement_v1 (locker=0x7ff428002888, stmt_da=0x7ff428003890) at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs.cc:5405
  26. #2 0x00000000016a5c46 in inline_mysql_end_statement (locker=0x7ff428002888, stmt_da=0x7ff428003890) at /disk6/lefeng/porting/polardb571/include/mysql/psi/mysql_statement.h:228
  27. #3 0x00000000016ab574 in dispatch_command (thd=0x7ff428000950, com_data=0x7ff4c0283dd0, command=COM_QUERY) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:2023
  28. #4 0x00000000016a8a5b in do_command (thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1011
  29. #5 0x00000000017eeb6e in handle_connection (arg=0x5cec640) at /disk6/lefeng/porting/polardb571/sql/conn_handler/connection_handler_per_thread.cc:303
  30. #6 0x0000000001a9f465 in pfs_spawn_thread (arg=0x5d87a50) at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs.cc:2188
  31. #8 0x00007ff4c865cbad in clone () from /lib64/libc.so.6
  1. 最后,查询performance_schema.events_statements_summary_by_digest,显示计算出的MD5 hash值。