一些数据表会存储执行过的SQL语句和其digest,比如表events_statements_summary_by_digest中的第二个和第三个列,digest其实是一个MD5 hash值,接下来简单介绍下digest的生成过程。
digest是基于一串字节文本做MD5计算出来的hash值,这个字节文本在parser解析SQL时根据识别出来的token和identifier构造。下面以MySQL 5.7的代码为例,简单介绍digest的生成过程。
当一个token被识别时,调用构造字节文本,
File sql/sql_digest.cc
71 /**
72 Store a single token in token array.
73 */
74 inline void store_token(sql_digest_storage* digest_storage, uint token)
75 {
76 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
77
78 if (digest_storage->m_byte_count + SIZE_OF_A_TOKEN <= digest_storage->m_token_array_length)
79 {
80 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
81 dest[0]= token & 0xff;
82 dest[1]= (token >> 8) & 0xff;
83 digest_storage->m_byte_count+= SIZE_OF_A_TOKEN;
84 }
85 else
86 {
87 digest_storage->m_full= true;
88 }
89 }
90
File sql/sql_digest.cc
135 inline void store_token_identifier(sql_digest_storage* digest_storage,
136 uint token,
137 size_t id_length, const char *id_name)
138 {
139 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
140
141 size_t bytes_needed= 2 * SIZE_OF_A_TOKEN + id_length;
142 if (digest_storage->m_byte_count + bytes_needed <= (unsigned int)digest_storage->m_token_array_length)
143 {
144 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
145 /* Write the token */
146 dest[0]= token & 0xff;
147 dest[1]= (token >> 8) & 0xff;
148 /* Write the string length */
149 dest[2]= id_length & 0xff;
150 dest[3]= (id_length >> 8) & 0xff;
151 /* Write the string data */
152 if (id_length > 0)
153 memcpy((char *)(dest + 4), id_name, id_length);
154 digest_storage->m_byte_count+= bytes_needed;
155 }
156 else
157 {
158 digest_storage->m_full= true;
159 }
160 }
可以看到,
- 前两个字节,函数中的dest[0] / dest[1] ,根据token值计算而来;
- 第三和第四个字节,dest[2] / dest[3],根据id_length计算而来;
- 之后的地址存放id_name对应的文本。
store_token()
和store_token_identifier()
可以被调用多次,从而把不断识别出的token和identifier拼接成一个最终的字节文本,存放在digest_storage->m_token_array中。
相关的函数调用路径如下,
Breakpoint 1, store_token_identifier (digest_storage=0x7ff428002848, token=945, id_length=18, id_name=0x7ff428006118 "performance_schema")
at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:139
139 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
(gdb) bt
#0 store_token_identifier (digest_storage=0x7ff428002848, token=945, id_length=18, id_name=0x7ff428006118 "performance_schema")
at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:139
#1 0x000000000166049f in digest_add_token (state=0x7ff428002840, token=945, yylval=0x7ff4c0281a60) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:590
#2 0x0000000001675311 in Lex_input_stream::add_digest_token (this=0x7ff4c0283568, token=488, yylval=0x7ff4c0281a60) at /disk6/lefeng/porting/polardb571/sql/sql_lex.cc:382
#3 0x00000000016777ab in MYSQLlex (yylval=0x7ff4c0281a60, yylloc=0x7ff4c0281a40, thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_lex.cc:1362
#4 0x00000000017fd83b in MYSQLparse (YYTHD=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_yacc.cc:20171
#5 0x00000000016b8801 in parse_sql (thd=0x7ff428000950, parser_state=0x7ff4c0283560, creation_ctx=0x0) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:7578
#6 0x00000000016b5300 in mysql_parse (thd=0x7ff428000950, parser_state=0x7ff4c0283560) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:5924
#7 0x00000000016a9e3e in dispatch_command (thd=0x7ff428000950, com_data=0x7ff4c0283dd0, command=COM_QUERY) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1550
#8 0x00000000016a8a5b in do_command (thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1011
#9 0x00000000017eeb6e in handle_connection (arg=0x5cec640) at /disk6/lefeng/porting/polardb571/sql/conn_handler/connection_handler_per_thread.cc:303
#10 0x0000000001a9f465 in pfs_spawn_thread (arg=0x5d87a50) at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs.cc:2188
#11 0x00007ff4c9795e25 in start_thread () from /lib64/libpthread.so.0
#12 0x00007ff4c865cbad in clone () from /lib64/libc.so.6
File storage/perfschema/pfs_digest.cc
188 PFS_statement_stat*
189 find_or_create_digest(PFS_thread *thread,
190 const sql_digest_storage *digest_storage,
191 const char *schema_name,
192 uint schema_name_length)
193 {
...
203 if (unlikely(pins == NULL))
204 return NULL;
205
206 /*
207 Note: the LF_HASH key is a block of memory,
208 make sure to clean unused bytes,
209 so that memcmp() can compare keys.
210 */
211 PFS_digest_key hash_key;
212 memset(& hash_key, 0, sizeof(hash_key));
213 /* Compute MD5 Hash of the tokens received. */
214 compute_digest_md5(digest_storage, hash_key.m_md5);
215 memcpy((void*)& digest_storage->m_md5, &hash_key.m_md5, MD5_HASH_SIZE);
216 /* Add the current schema to the key */
217 hash_key.m_schema_name_length= schema_name_length;
218 if (schema_name_length > 0)
219 memcpy(hash_key.m_schema_name, schema_name, schema_name_length);
220
221 ...
在storage/perfschema/pfs_digest.cc第214行,find_or_create_digest()
会调用compute_digest_md5()
,compute_digest_md5()
会从digest_storage->m_token_array读取构造好的字节文本,完成hash计算。
接下来,我们以SQL语句 “TRUNCATE TABLE performance_schema.events_statements_summary_by_digest” 为例介绍 digest计算过程。
- 首先识别出的token是859,对应的token定义如下,其token值用于填充字节文本的前2个字节,
File sql/sql_yacc.h
#define TRUNCATE_SYM 859
函数调用栈如下,
Breakpoint 2, store_token (digest_storage=0x7ff428002848, token=859) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:76
76 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
(gdb) n
78 if (digest_storage->m_byte_count + SIZE_OF_A_TOKEN <= digest_storage->m_token_array_length)
(gdb)
80 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
(gdb)
81 dest[0]= token & 0xff;
(gdb)
82 dest[1]= (token >> 8) & 0xff;
(gdb)
83 digest_storage->m_byte_count+= SIZE_OF_A_TOKEN;
(gdb)
89 }
(gdb) p digest_storage->m_byte_count
(gdb) 2
(gdb) bt
#0 store_token (digest_storage=0x7ff428002848, token=859) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:76
#1 0x00000000016604c2 in digest_add_token (state=0x7ff428002840, token=859, yylval=0x7ff4c0281a60) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:599
#2 0x0000000001675311 in Lex_input_stream::add_digest_token (this=0x7ff4c0283568, token=859, yylval=0x7ff4c0281a60) at /disk6/lefeng/porting/polardb571/sql/sql_lex.cc:382
#3 0x00000000016777ab in MYSQLlex (yylval=0x7ff4c0281a60, yylloc=0x7ff4c0281a40, thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_lex.cc:1362
#4 0x00000000017fd83b in MYSQLparse (YYTHD=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_yacc.cc:20171
#5 0x00000000016b8801 in parse_sql (thd=0x7ff428000950, parser_state=0x7ff4c0283560, creation_ctx=0x0) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:7578
#6 0x00000000016b5300 in mysql_parse (thd=0x7ff428000950, parser_state=0x7ff4c0283560) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:5924
#7 0x00000000016a9e3e in dispatch_command (thd=0x7ff428000950, com_data=0x7ff4c0283dd0, command=COM_QUERY) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1550
#8 0x00000000016a8a5b in do_command (thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1011
#9 0x00000000017eeb6e in handle_connection (arg=0x5cec640) at /disk6/lefeng/porting/polardb571/sql/conn_handler/connection_handler_per_thread.cc:303
#10 0x0000000001a9f465 in pfs_spawn_thread (arg=0x5d87a50) at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs.cc:2188
#11 0x00007ff4c9795e25 in start_thread () from /lib64/libpthread.so.0
#12 0x00007ff4c865cbad in clone () from /lib64/libc.so.6
- 其次识别出的token是835,其对应的token定义如下,其token值用于填充接下来的2字节,
File sql/sql_yacc.h
#define TABLE_SYM 835
- 然后识别出来并用于构造字节文本的token是488,对应的token定义是,
#define IDENT_QUOTED 488
根据token值和id_name的长度构造4字节文本数据,之后把”performance_schema”追加到其后。
Breakpoint 1, store_token_identifier (digest_storage=0x7ff428002848, token=945, id_length=18, id_name=0x7ff428006118 "performance_schema")
at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:139
139 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
(gdb) p digest_storage->m_byte_count
$3 = 4
(gdb) n
141 size_t bytes_needed= 2 * SIZE_OF_A_TOKEN + id_length;
(gdb)
142 if (digest_storage->m_byte_count + bytes_needed <= (unsigned int)digest_storage->m_token_array_length)
(gdb)
144 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
(gdb)
146 dest[0]= token & 0xff;
(gdb)
147 dest[1]= (token >> 8) & 0xff;
(gdb)
149 dest[2]= id_length & 0xff;
(gdb)
150 dest[3]= (id_length >> 8) & 0xff;
152 if (id_length > 0)
(gdb)
153 memcpy((char *)(dest + 4), id_name, id_length);
154 digest_storage->m_byte_count+= bytes_needed;
(gdb)
160 }
(gdb) p digest_storage->m_byte_count
$4 = 26
- 接下来识别出的token是46
Breakpoint 2, store_token (digest_storage=0x7ff428002848, token=46) at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:76
76 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
78 if (digest_storage->m_byte_count + SIZE_OF_A_TOKEN <= digest_storage->m_token_array_length)
(gdb)
80 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
(gdb)
81 dest[0]= token & 0xff;
(gdb)
82 dest[1]= (token >> 8) & 0xff;
(gdb)
83 digest_storage->m_byte_count+= SIZE_OF_A_TOKEN;
(gdb)
89 }
(gdb) p digest_storage->m_byte_count
$11 = 28
- 最后识别出来且用于构造字节文本的token是945 (同上,488转换而来),”events_statements_summary_by_digest”会被追加到文本末尾。
Breakpoint 1, store_token_identifier (digest_storage=0x7ff428002848, token=945, id_length=35, id_name=0x7ff428006130 "events_statements_summary_by_digest")
at /disk6/lefeng/porting/polardb571/sql/sql_digest.cc:139
139 DBUG_ASSERT(digest_storage->m_byte_count <= digest_storage->m_token_array_length);
(gdb) n
141 size_t bytes_needed= 2 * SIZE_OF_A_TOKEN + id_length;
(gdb)
142 if (digest_storage->m_byte_count + bytes_needed <= (unsigned int)digest_storage->m_token_array_length)
(gdb)
144 unsigned char* dest= & digest_storage->m_token_array[digest_storage->m_byte_count];
(gdb)
146 dest[0]= token & 0xff;
(gdb)
147 dest[1]= (token >> 8) & 0xff;
(gdb)
149 dest[2]= id_length & 0xff;
(gdb)
150 dest[3]= (id_length >> 8) & 0xff;
(gdb)
152 if (id_length > 0)
(gdb)
153 memcpy((char *)(dest + 4), id_name, id_length);
(gdb)
154 digest_storage->m_byte_count+= bytes_needed;
(gdb)
160 }
(gdb) p digest_storage->m_byte_count
$12 = 67
- 至此,字节文本构造完毕,接下来计算MD5 hash,
Breakpoint 3, find_or_create_digest (thread=0x7ff4c7cc2c00, digest_storage=0x7ff428002848, schema_name=0x7ff428002930 "", schema_name_length=0)
at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs_digest.cc:194
194 DBUG_ASSERT(digest_storage != NULL);
(gdb) n
196 if (statements_digest_stat_array == NULL)
(gdb)
199 if (digest_storage->m_byte_count <= 0)
(gdb)
202 LF_PINS *pins= get_digest_hash_pins(thread);
(gdb)
203 if (unlikely(pins == NULL))
(gdb)
212 memset(& hash_key, 0, sizeof(hash_key));
(gdb)
214 compute_digest_md5(digest_storage, hash_key.m_md5);
(gdb)
215 memcpy((void*)& digest_storage->m_md5, &hash_key.m_md5, MD5_HASH_SIZE);
(gdb)
217 hash_key.m_schema_name_length= schema_name_length;
(gdb) p /x hash_key.m_md5
$13 = {0xf8, 0x37, 0x3f, 0x7b, 0xed, 0x47, 0x77, 0x3d, 0x4c, 0xd1, 0xd5, 0xc0, 0xab, 0xb7, 0x88, 0xc9}
(gdb) bt
#0 find_or_create_digest (thread=0x7ff4c7cc2c00, digest_storage=0x7ff428002848, schema_name=0x7ff428002930 "", schema_name_length=0)
at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs_digest.cc:217
#1 0x0000000001aa648e in pfs_end_statement_v1 (locker=0x7ff428002888, stmt_da=0x7ff428003890) at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs.cc:5405
#2 0x00000000016a5c46 in inline_mysql_end_statement (locker=0x7ff428002888, stmt_da=0x7ff428003890) at /disk6/lefeng/porting/polardb571/include/mysql/psi/mysql_statement.h:228
#3 0x00000000016ab574 in dispatch_command (thd=0x7ff428000950, com_data=0x7ff4c0283dd0, command=COM_QUERY) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:2023
#4 0x00000000016a8a5b in do_command (thd=0x7ff428000950) at /disk6/lefeng/porting/polardb571/sql/sql_parse.cc:1011
#5 0x00000000017eeb6e in handle_connection (arg=0x5cec640) at /disk6/lefeng/porting/polardb571/sql/conn_handler/connection_handler_per_thread.cc:303
#6 0x0000000001a9f465 in pfs_spawn_thread (arg=0x5d87a50) at /disk6/lefeng/porting/polardb571/storage/perfschema/pfs.cc:2188
#8 0x00007ff4c865cbad in clone () from /lib64/libc.so.6
- 最后,查询performance_schema.events_statements_summary_by_digest,显示计算出的MD5 hash值。