|
在客户现场的一次问题报告中发现某个带有CTE语句进行查询的时候,数据量少的时候没问题,但是数据量大的时候会导致core。注意:这个问题只在 MySQL 8.0.32 版本才会复现,最新的8.4.4版本没有问题。
看下面例子:
1、准备表
CREATE TABLE t1 (
ORG_ID decimal(12,0) NOT NULL,
ORG_NAME varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin NOT NULL,
ORG_CODE varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin NOT NULL,
ORG_TYPE varchar(4) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT (_utf8mb4'01'),
ORG_LEVEL varchar(4) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
PORG_ID decimal(12,0) DEFAULT NULL,
ORG_STATE varchar(4) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin NOT NULL,
COMMENTS varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
MANAGER_FLAG varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
SOET_WEEKLY varchar(12) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
CONTACT_NAME varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
CONTACT_TELEPHONE decimal(32,0) DEFAULT NULL,
OTHER_TELEPHONE decimal(32,0) DEFAULT NULL,
OFFICE_ADDR varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
PROVINCE varchar(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
CITY varchar(4) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
COUNTRY varchar(6) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
ORG_CLASS varchar(2) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
UPDATE_TIME datetime DEFAULT NULL,
OPERATE_TYPE varchar(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
CREATE_TIME datetime DEFAULT NULL,
KEY IDX_t1 (ORG_ID,PORG_ID),
KEY INX_ESOP_ORG_OID (ORG_ID),
KEY IDX_PROVINCE_CITY (PROVINCE,CITY)
);
INSERT INTO t1 VALUES(29100709,'数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识',29100709,01,5,291007,1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,290,9150,915009,02,'2018-02-09 04:19:00',1,'2017-11-15 00:00:00');
INSERT INTO t1 VALUES(29100708,'数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识',29100709,01,5,291007,1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,290,9150,915009,02,'2018-02-09 04:19:00',1,'2017-11-15 00:00:00');
INSERT INTO t1 VALUES(29100707,'数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识数据库数据知识',29100709,01,5,291007,1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,290,9150,915009,02,'2018-02-09 04:19:00',1,'2017-11-15 00:00:00');
INSERT INTO t1 SELECT * FROM t1;
INSERT INTO t1 SELECT * FROM t1;
CREATE TABLE t2 (
USER_ID decimal(14,0) NOT NULL,
USER_NAME varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin NOT NULL,
NICK_NAME varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
USER_CODE varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin NOT NULL,
PASS_WORD varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
ORG_ID decimal(12,0) DEFAULT NULL,
POSITION varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
FUNCTIONS varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
MOBILE_NO varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
EXTENSION_NO varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
EMAIL varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
STATE varchar(4) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
COMMENTS varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
SCORE decimal(10,0) DEFAULT NULL,
IDCARD varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
AUDIT_STATE decimal(1,0) DEFAULT NULL,
UPDATE_TIME datetime DEFAULT NULL,
OPERATE_TYPE varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
MAPPINGCODE varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_bin DEFAULT NULL,
CREATE_TIME datetime DEFAULT NULL,
KEY IDX_t2_ID (USER_ID,USER_CODE),
KEY INX_EUS_MCOD (MAPPINGCODE),
KEY INX_EUERS_UNM (USER_NAME),
KEY IDX_t2_MOBILE_NO (MOBILE_NO),
KEY INX_ESOP_USR_ID (USER_ID),
KEY IDX_t2_USER_CODE (USER_CODE),
KEY IDX_ORG_ID (ORG_ID)
);
INSERT INTO t2(USER_ID, user_name, user_code, org_id) VALUES(1001, 'daizhong', 'daizhong', 1);
INSERT INTO t2(USER_ID, user_name, user_code, org_id) VALUES(1002, 'daizhong1', 'daizhong1', 29100709);
INSERT INTO t2(USER_ID, user_name, user_code, org_id) VALUES(1003, 'daizhong1', 'daizhong1', 29100708);
INSERT INTO t2(USER_ID, user_name, user_code, org_id) VALUES(1003, 'daizhong1', 'daizhong1', 29100707);
INSERT INTO t2 SELECT * FROM t2;
INSERT INTO t2 SELECT * FROM t2;
2、tmp_table_size为默认值场合
如下所示用默认tmp_table_size
并且进行CTE派生表查询,可以发现结果正常显示一条数据,符合预期。
-- tmp_table_size参数为默认值的情况
greatsql> SHOW variables LIKE '%tmp_table_size%';
+----------------+----------+
| Variable_name | Value |
+----------------+----------+
| tmp_table_size | 16777216 |
+----------------+----------+
greatsql> WITH RECURSIVE cte_tree AS ( SELECT 1 AS LEVEL, t.org_id,t.org_id as porg_id,user_name, nick_name, user_code FROM t2 t UNION ALL SELECT LEVEL + 1 AS LEVEL, pt.org_id,pt.porg_id,pt.ORG_NAME,pt.CONTACT_NAME,pt.ORG_CODE FROM t1 pt JOIN cte_tree ct ON pt.org_id = ct.porg_id ) SELECT a.* FROM cte_tree a limit 1;
+-------+--------+---------+-----------+-----------+-----------+
| LEVEL | org_id | porg_id | user_name | nick_name | user_code |
+-------+--------+---------+-----------+-----------+-----------+
| 1 | 1 | 1 | daizhong | NULL | daizhong |
+-------+--------+---------+-----------+-----------+-----------+
3、tmp_table_size修改小的场合
为了不让之前的临时表影响RAM内存的阈值,重新启动数据库。然后执行以下命令。
-- 设置tmp_table_size参数值比临时表数据大的情况
greatsql> SET tmp_table_size=1342;
-- 以下同样的命令导致core
greatsql> WITH RECURSIVE cte_tree AS ( SELECT 1 AS LEVEL, t.org_id,t.org_id as porg_id,user_name, nick_name, user_code FROM t2 t UNION ALL SELECT LEVEL + 1 AS LEVEL, pt.org_id,pt.porg_id,pt.ORG_NAME,pt.CONTACT_NAME,pt.ORG_CODE FROM t1 pt JOIN cte_tree ct ON pt.org_id = ct.porg_id ) SELECT a.* FROM cte_tree a limit 1;
core堆栈如下:
Thread 56 "mysqld" received signal SIGABRT, Aborted.
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
50 ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
#1 0x00007ffff7508859 in __GI_abort () at abort.c:79
#2 0x00007ffff7508729 in __assert_fail_base (
fmt=0x7ffff769e588 "%s%s%s:%u: %s%sAssertion `%s' failed.\n%n",
assertion=0x55555dd91569 "m_opened_table != nullptr",
file=0x55555dd91510 "storage/temptable/include/temptable/handler.h", line=563, function=<optimized out>) at assert.c:94
#3 0x00007ffff7519fd6 in __GI___assert_fail (
assertion=0x55555dd91569 "m_opened_table != nullptr",
file=0x55555dd91510 "storage/temptable/include/temptable/handler.h", line=563,
function=0x55555dd914d8 "void temptable::Handler::opened_table_validate()")
at assert.c:103
#4 0x000055555bb98e96 in temptable::Handler::opened_table_validate (this=0x7fff2020f7e8)
at storage/temptable/include/temptable/handler.h:563
#5 0x000055555bb95354 in temptable::Handler::info (this=0x7fff2020f7e8)
at storage/temptable/src/handler.cc:751
#6 0x000055555bb93482 in temptable::Handler::open (this=0x7fff2020f7e8,
table_name=0x7fff2020b170 "greatdb/confper/tmp/#sqlf1fea_8_2")
at storage/temptable/src/handler.cc:218
#7 0x0000555558fd1804 in handler::ha_open (this=0x7fff2020f7e8, table_arg=0x7fff2020edf0,
name=0x7fff2020b170 "greatdb/confper/tmp/#sqlf1fea_8_2", mode=2,
test_if_locked=516, table_def=0x0)
at sql/handler.cc:2927
#8 0x00005555597c223e in open_tmp_table (table=0x7fff2020edf0)
at sql/sql_tmp_table.cc:2422
#9 0x000055555923a798 in FollowTailIterator::Init (this=0x7fff2021e9c0)
at sql/iterators/basic_row_iterators.cc:345
#10 0x0000555559255c13 in FilterIterator::Init (this=0x7fff2021ea20)
at sql/iterators/composite_iterators.h:85
#11 0x0000555559b8fb69 in NestedLoopIterator::Init (this=0x7fff2021ea90)
at sql/iterators/composite_iterators.cc:523
#12 0x0000555559b9805b in MaterializeIterator<DummyIteratorProfiler>::MaterializeQueryBlock (
this=0x7fff2021ec08, query_block=..., stored_rows=0x7fffc83eedd8)
at sql/iterators/composite_iterators.cc:1221
#13 0x0000555559b979f1 in MaterializeIterator<DummyIteratorProfiler>::MaterializeRecursive (
this=0x7fff2021ec08)
at sql/iterators/composite_iterators.cc:1129
#14 0x0000555559b960b9 in MaterializeIterator<DummyIteratorProfiler>::Init (
this=0x7fff2021ec08)
at sql/iterators/composite_iterators.cc:971
#15 0x0000555559b8e859 in LimitOffsetIterator::Init (this=0x7fff2021eca8)
at sql/iterators/composite_iterators.cc:100
#16 0x00005555597dacd5 in Query_expression::ExecuteIteratorQuery (this=0x7fff201c20d0,
thd=0x7fff20001050) at sql/sql_union.cc:1814
#17 0x00005555597db113 in Query_expression::execute (this=0x7fff201c20d0, thd=0x7fff20001050)
at sql/sql_union.cc:1877
#18 0x00005555597001c0 in Sql_cmd_dml::execute_inner (this=0x7fff20200a80,
thd=0x7fff20001050) at sql/sql_select.cc:872
#19 0x00005555596ff38e in Sql_cmd_dml::execute (this=0x7fff20200a80, thd=0x7fff20001050)
at sql/sql_select.cc:612
#20 0x000055555966207c in mysql_execute_command (thd=0x7fff20001050, first_level=true)
at sql/sql_parse.cc:5227
打印m_opened_table变量,发现结果为空,所以导致core
#4 0x000055555bb98e96 in temptable::Handler::opened_table_validate (this=0x7fff2c20ed88)
at storage/temptable/include/temptable/handler.h:563
563 assert(m_opened_table != nullptr);
(gdb) p m_opened_table
$21 = (temptable::Table *) 0x0
查询带有CTE派生表的时候内部会创建临时表用于保存临时数据,因此先看一下上面2个场合的临时表情况:
tmp_table_size值 | 查询开始时候临时表情况 | 查询过程中临时表情况 | 说明 |
---|---|---|---|
16777216 | 内存表 | 内存表 | 没问题 |
1342 | 内存表 | 转为落盘表 | CORE |
1、正常执行的代码流程调查
先看一下tmp_table_size=16777216时候没问题的代码流程,以下代码通过Handler::create()
函数向kv_store这个map插入了一条新的table信息。
首先创建内部物化表,代码调用流程如下:
Table_ref::create_materialized_table -> instantiate_tmp_table -> create_tmp_table_with_fallback
-> temptable::Handler::create -> temptable::Allocator
-> temptable::Prefer_RAM_over_MMAP_policy_obeying_per_table_limit::block_source
bool instantiate_tmp_table(THD *thd, TABLE *table) {
// 这里创建临时表,包括引擎,调用Handler::create,注意这里handler=temptable::Handler
if (create_tmp_table_with_fallback(thd, table)) return true;
// 这里打开表,调用Handler::open打开临时表引擎
open_tmp_table(table);
}
int Handler::create(const char *table_name, TABLE *mysql_table,
HA_CREATE_INFO *, dd::Table *) {
try {
size_t per_table_limit = thd_get_tmp_table_size(ha_thd());
auto &kv_store = kv_store_shard[thd_thread_id(ha_thd())];
// 这里创建表引擎的时候申请新的内存用于保存table信息,kv_store.emplace调用下面的block_source()函数申请新的内存。
const auto insert_result = kv_store.emplace(
std::piecewise_construct, std::forward_as_tuple(table_name),
std::forward_as_tuple(mysql_table, m_shared_block,
all_columns_are_fixed_size, per_table_limit));
ret = insert_result.second ? Result::OK : Result::TABLE_EXIST;
} catch (Result ex) {
ret = ex;
} catch (...) {
ret = Result::OUT_OF_MEM;
}
}
// 申请新的内存
struct Prefer_RAM_over_MMAP_policy_obeying_per_table_limit {
static Source block_source(uint32_t block_size,
TableResourceMonitor *table_resource_monitor) {
assert(table_resource_monitor);
assert(table_resource_monitor->consumption() <=
table_resource_monitor->threshold());
/* 以下各参数值:
table_resource_monitor->consumption() = 0 这个值为已经申请到的内存值
block_size = 1048576 这个值为当前新申请的内存值为1MB内存,为新申请的块大小
table_resource_monitor->threshold() = 16777216 这个值为当前tmp_table_size值
这个比较结果可以看出没有超出tmp_table_size值,因此正常新增1048576内存。
*/
if (table_resource_monitor->consumption() + block_size >
table_resource_monitor->threshold())
throw Result::RECORD_FILE_FULL;
return Prefer_RAM_over_MMAP_policy::block_source(block_size);
}
};
接着FollowTailIterator
迭代器通过open_tmp_table
函数用temptable::Handler::open
函数在kv_store这个map寻找刚才插入的那条记录成功。
int Handler::open(const char *table_name, int, uint, const dd::Table *) {
try {
auto &kv_store = kv_store_shard[thd_thread_id(ha_thd())];
m_opened_table = kv_store.find(table_name);
if (m_opened_table) {
ret = Result::OK;
opened_table_validate();
} else {
ret = Result::NO_SUCH_TABLE;
}
} catch (std::bad_alloc &) {
ret = Result::OUT_OF_MEM;
}
info(HA_STATUS_VARIABLE);
DBUG_RET(ret);
}
2、导致core的代码流程调查
设置完tmp_table_size=1342
,这里代码流程跟上面是一样的,但是有一些参数值有变化。
bool instantiate_tmp_table(THD *thd, TABLE *table) {
// 这里创建临时表,包括引擎,调用Handler::create,因为创建临时表内存超过阈值,因此这里引擎改为落盘表,hangler=ha_innobase
if (create_tmp_table_with_fallback(thd, table)) return true;
// 这里打开落盘表,调用Handler::open
open_tmp_table(table);
}
static bool create_tmp_table_with_fallback(THD *thd, TABLE *table) {
// 一开始临时表handler=temptable::Handler,但是申请内存超过阈值
int error =
table->file->create(share->table_name.str, table, &create_info, nullptr);
if (error == HA_ERR_RECORD_FILE_FULL &&
table->s->db_type() == temptable_hton) {
// 这里修改临时表handler=ha_innobase
table->file = get_new_handler(
table->s, false, share->alloc_for_tmp_file_handler, innodb_hton);
error = table->file->create(share->table_name.str, table, &create_info,
nullptr);
}
}
int Handler::create(const char *table_name, TABLE *mysql_table,
HA_CREATE_INFO *, dd::Table *) {
try {
size_t per_table_limit = thd_get_tmp_table_size(ha_thd());
auto &kv_store = kv_store_shard[thd_thread_id(ha_thd())];
// 这里创建表引擎的时候申请新的内存用于保存table信息,kv_store.emplace调用下面的block_source()函数申请新的内存。
// 这里因为新申请的内存大于tmp_table_size=1342这个阈值,因此报错RECORD_FILE_FULL,这回kv_store这个map因为报错因此没有插入table信息。
const auto insert_result = kv_store.emplace(
std::piecewise_construct, std::forward_as_tuple(table_name),
std::forward_as_tuple(mysql_table, m_shared_block,
all_columns_are_fixed_size, per_table_limit));
ret = insert_result.second ? Result::OK : Result::TABLE_EXIST;
} catch (Result ex) {
ret = ex;
} catch (...) {
ret = Result::OUT_OF_MEM;
}
}
// 申请新的内存
struct Prefer_RAM_over_MMAP_policy_obeying_per_table_limit {
static Source block_source(uint32_t block_size,
TableResourceMonitor *table_resource_monitor) {
assert(table_resource_monitor);
assert(table_resource_monitor->consumption() <=
table_resource_monitor->threshold());
/* 以下各参数值:
table_resource_monitor->consumption() = 0 这个值为已经申请到的内存值
block_size = 1048576 这个值为当前新申请的内存值
table_resource_monitor->threshold() = 1342 这个值为当前tmp_table_size值
这个比较结果可以看出这次新申请的内存超出tmp_table_size值,因此这次返回Result::RECORD_FILE_FULL的错误。
*/
if (table_resource_monitor->consumption() + block_size >
table_resource_monitor->threshold())
throw Result::RECORD_FILE_FULL;
return Prefer_RAM_over_MMAP_policy::block_source(block_size);
}
};
接着继续执行sql,这次在FollowTailIterator
迭代器打开临时表,注意,这里临时表引擎还是temptable::Handler
而不是ha_innobase
,那是因为FollowTailIterator
迭代器用到的临时表跟上面MaterializeIterator
迭代器用到的临时表是在prepare阶段,通过Common_table_expr::clone_tmp_table
函数拷贝出来的,所以他们的handler虽然类型相同但是地址不同,只有table->share
相同。
打开这张临时表,在kv_store寻找临时表信息的时候找不到对应临时表信息,导致m_opened_table
为空,接着调用info()函数的时候core了。
实际上这时候FollowTailIterator
迭代器用到的临时表的引擎应该跟上面的临时表一样改为innodb引擎,因为这里引擎没有跟着改动,加上kv_store没有存放相关临时表信息,因此导致这里core了。
打开临时表代码流程:
MaterializeIterator<DummyIteratorProfiler>::Init -> aterializeIterator<DummyIteratorProfiler>::MaterializeRecursive
-> MaterializeIterator<DummyIteratorProfiler>::MaterializeQueryBlock
-> NestedLoopIterator::Init
-> FilterIterator::Init
-> FollowTailIterator::Init
-> open_tmp_table
-> handler::ha_open -> temptable::Handler::open
int Handler::open(const char *table_name, int, uint, const dd::Table *) {
try {
auto &kv_store = kv_store_shard[thd_thread_id(ha_thd())];
// 因为上面Handler::create创建的时候,kv_store没有插入临时表信息,因此这里找不到这张临时表的信息,导致这里m_opened_table为空。
m_opened_table = kv_store.find(table_name);
if (m_opened_table) {
ret = Result::OK;
opened_table_validate();
} else {
ret = Result::NO_SUCH_TABLE;
}
} catch (std::bad_alloc &) {
ret = Result::OUT_OF_MEM;
}
info(HA_STATUS_VARIABLE);
DBUG_RET(ret);
}
3、总结问题
根据上面分析可以知道,第二次修改tmp_table_size
值为较小数值之后core,原因在于申请内存的时候tmp_table_size
值超过阈值,因此在kv_store插入表信息失败,后面open临时表的时候,通过kv_store寻找这张表信息没有找到,因此最后m_opened_table
为空指针。
从上面判断阈值公式其实可以看出这里判断方法有问题,因为实际申请的是96大小,但是这里却用块大小1MB来进行阈值比较。
if (table_resource_monitor->consumption() + block_size >
table_resource_monitor->threshold())
throw Result::RECORD_FILE_FULL;
结合上面分析,我们发现问题原因在于内存申请额时候用的判断阈值的公式有问题,导致提前报错使kv_store没有插入相关临时表信息,同时CTE的其中一个相关临时表引擎改变了,但是其它层的临时表引擎没改,这两个原因导致最后的core。
我们用较新的8.4.4版本代码尝试运行该sql发现没有问题,因此可见最新代码已经修复该bug。查看代码,可以发现有一个如下修复patch:
Change-Id: I90d7374971bdfc1fc178801d0c793382c6ab8a49Bug #33814188 - Assertion consumption > threshold
MySQL 8.4.4版本代码作如下修复,就可以解决这个问题了。
修复storage/temptable/include/temptable/allocator.h文件的如下代码:
删除Prefer_RAM_over_MMAP_policy_obeying_per_table_limit::block_source函数,
申请内存的时候判断阈值从判断新增块大小改为判断实际新增内存大小,
这样第一次申请内存的时候就不会超过tmp_table_size,
在temptable::Handler::create可以正常向kv_store插入临时表信息。
同时在allocate()函数申请完内存之后增加如下阈值判断的操作。这样第二次申请内存的时候就会正常报错。
template <class T, class AllocationScheme>
inline T *Allocator<T, AllocationScheme>::allocate(size_t n_elements) {
// 增加申请内存之后执行判断阈值操作的代码。这里从原来的block_size改为n_bytes_requested来判断,
// 也就是从原来判断新增块大小改为判断实际新增内存大小,这样用实际新增大小来判断阈值。
if (m_table_resource_monitor.consumption() + n_bytes_requested(※注意这个改动) >
m_table_resource_monitor.threshold()) {
throw Result::RECORD_FILE_FULL;
}
}
修改之后的代码调用流程如下:
1、一开始创建正常handler=temptable::Handler临时表,并且向kv_store插入表信息成功。
2、通过handler::ha_write_row在向临时表写数据的时候,因为新申请的内存64kBytes超过了阈值
因此修改handler=ha_innobase,接着通过create_ondisk_from_heap函数创建落盘表。
3、create_ondisk_from_heap函数创建落盘表的时候会把之前CTE拷贝出来的相关表的引擎都一起改为
innodb表,因此接着FollowTailIterator迭代器的表执行open_tmp_table的时候打开的就跟
MaterializeIterator迭代器一样都是innodb表了,就不会有查找临时表handler的kv_store为空的情况了。
接着执行上面的查询,发现可以查出结果了。
greatsql> SET tmp_table_size=1342;
Query OK, 0 rows affected (0.00 sec)
greatsql> WITH RECURSIVE cte_tree AS ( SELECT 1 AS LEVEL, t.org_id,t.org_id as porg_id,user_name, nick_name, user_code FROM t2 t UNION ALL SELECT LEVEL + 1 AS LEVEL, pt.org_id,pt.porg_id,pt.ORG_NAME,pt.CONTACT_NAME,pt.ORG_CODE FROM t1 pt JOIN cte_tree ct ON pt.org_id = ct.porg_id ) SELECT a.* FROM cte_tree a limit 1;
+-------+--------+---------+-----------+-----------+-----------+
| LEVEL | org_id | porg_id | user_name | nick_name | user_code |
+-------+--------+---------+-----------+-----------+-----------+
| 1 | 1 | 1 | daizhong | NULL | daizhong |
+-------+--------+---------+-----------+-----------+-----------+
1 row in set (1 min 1.63 sec)
通过以上分析我们可以发现,带有CTE派生表的查询会在内部创建临时表用于储存中间数据,根据tmp_table_size
值设置会影响临时表存放地方,如果tmp_table_size
设置小了那么一开始就会从内存表改为创建落盘表,但是CTE内部涉及好几层迭代器,这时候每一层临时表的引擎都需要改,而导致core的代码只改了其中一层的临时表,没有把别的引擎一起改了,最后导致core。
通过分析可以发现,问题代码判断阈值的公式也有问题,因此才导致一开始其中一层临时表申请内存超过阈值判断错误,修改了引擎类型。
这个问题涉及了2个原因,一环套一环,分析类似问题的时候,需要持续深挖本质原因,同时结合查询计划的执行和临时表创建情况分析,还需要对内存申请过程熟悉才能更好解决问题。
合作电话:010-64087828
社区邮箱:greatsql@greatdb.com