[英]Why MySQL optimizer doesn't use all columns index?
Percona MySQL 5.7
表scheeme:
CREATE TABLE Developer.Rate (
ID bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT,
TIME datetime NOT NULL,
BASE varchar(3) NOT NULL,
QUOTE varchar(3) NOT NULL,
BID double NOT NULL,
ASK double NOT NULL,
PRIMARY KEY (ID),
INDEX IDX_TIME (TIME),
UNIQUE INDEX IDX_UK (BASE, QUOTE, TIME)
)
ENGINE = INNODB
ROW_FORMAT = COMPRESSED;
我嘗試在選定期間之前請求最新數據。 optimazer使用no-complete唯一鍵,只有2列3。
如果我以共同的方式提出要求:
EXPLAIN FORMAT=JSON
SELECT
BID
FROM
Rate
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
`TIME` DESC
LIMIT 1
;
“Explain”顯示只使用了2個第一列索引:BASE,QUOTE
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "10231052.40"
},
"ordering_operation": {
"using_filesort": false,
"table": {
"table_name": "Rate",
"access_type": "ref",
"possible_keys": [
"IDX_UK",
"IDX_TIME"
],
"key": "IDX_UK",
"used_key_parts": [
"BASE",
"QUOTE"
],
"key_length": "22",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 45966462,
"rows_produced_per_join": 22983231,
"filtered": "50.00",
"cost_info": {
"read_cost": "1037760.00",
"eval_cost": "4596646.20",
"prefix_cost": "10231052.40",
"data_read_per_join": "1G"
},
"used_columns": [
"ID",
"TIME",
"BASE",
"QUOTE",
"BID"
],
"attached_condition": "((`Developer`.`Rate`.`BASE` <=> 'EUR') and (`Developer`.`Rate`.`QUOTE` <=> 'USD') and (`Developer`.`Rate`.`TIME` <= <cache>((now() - interval 1 month))))"
}
}
}
}
但是如果你強制優化器使用IDX_UK,MySQL會使用請求中的所有3列:
EXPLAIN FORMAT=JSON
SELECT
BID
FROM
Rate FORCE INDEX(IDX_UK)
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
`TIME` DESC
LIMIT 1
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "10231052.40"
},
"ordering_operation": {
"using_filesort": false,
"table": {
"table_name": "Rate",
"access_type": "range",
"possible_keys": [
"IDX_UK"
],
"key": "IDX_UK",
"used_key_parts": [
"BASE",
"QUOTE",
"TIME"
],
"key_length": "27",
"rows_examined_per_scan": 45966462,
"rows_produced_per_join": 15320621,
"filtered": "100.00",
"index_condition": "((`Developer`.`Rate`.`BASE` = 'EUR') and (`Developer`.`Rate`.`QUOTE` = 'USD') and (`Developer`.`Rate`.`TIME` <= <cache>((now() - interval 1 month))))",
"cost_info": {
"read_cost": "1037760.00",
"eval_cost": "3064124.31",
"prefix_cost": "10231052.40",
"data_read_per_join": "818M"
},
"used_columns": [
"ID",
"TIME",
"BASE",
"QUOTE",
"BID"
]
}
}
}
}
為什么優化器在沒有明確聲明索引的情況下不使用所有3列?
添加:
我理解正確,我應該使用這樣的請求嗎?
Reuest示例:
EXPLAIN FORMAT=JSON
SELECT
BID
FROM
Rate
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
BASE DESC, QUOTE DESC, TIME DESC
LIMIT 1
如果我理解正確,那么Explain的輸出就不會更好。 仍然只有2列沒有TIME使用
解釋輸出
{ "query_block": { "select_id": 1, "cost_info": { "query_cost": "10384642.20" }, "ordering_operation": { "using_filesort": false, "table": { "table_name": "Rate", "access_type": "ref", "possible_keys": [ "IDX_UK", "IDX_TIME" ], "key": "IDX_UK", "used_key_parts": [ "BASE", "QUOTE" ], "key_length": "22", "ref": [ "const", "const" ], "rows_examined_per_scan": 46734411, "rows_produced_per_join": 23367205, "filtered": "50.00", "index_condition": "((
Developer
. Rate
. BASE
<=> 'EUR') and ( Developer
. Rate
. QUOTE
<=> 'USD') and ( Developer
. Rate
. TIME
<= ((now() - interval 1 month))))", "cost_info": { "read_cost": "1037760.00", "eval_cost": "4673441.10", "prefix_cost": "10384642.20", "data_read_per_join": "1G" }, "used_columns": [ "ID", "TIME", "BASE", "QUOTE", "BID" ] } } } }
新增2:
我做了這4個請求:
- 1 -
<code>FLUSH STATUS;
SELECT
BID
FROM
Rate
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
LIMIT 1;
SHOW SESSION STATUS LIKE 'Handler%';</code>
- 2 -
<code>FLUSH STATUS;
SELECT
BID
FROM
Rate FORCE INDEX (IDX_UK)
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
LIMIT 1;
SHOW SESSION STATUS LIKE 'Handler%';
</code>
- 3 -
<code>FLUSH STATUS;
SELECT
BID
FROM
Rate
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
`TIME` DESC
LIMIT 1;
SHOW SESSION STATUS LIKE 'Handler%';</code>
- 4 -
<code>
FLUSH STATUS;
SELECT
BID
FROM
Rate FORCE INDEX (IDX_UK)
WHERE
BASE = 'EUR'
AND QUOTE = 'USD'
AND `TIME` <= (NOW() - INTERVAL 1 MONTH)
ORDER BY
`TIME` DESC
LIMIT 1;
SHOW SESSION STATUS LIKE 'Handler%';</code>
session_status的輸出在除請求3之外的所有請求中都是相同的。在請求3的輸出中:Handler_read_prev = 486474; 在所有ather請求的輸出中:Handler_read_prev = 0;
補充3:
我制作了表的副本,刪除了Id字段,將UNIQUE鍵提升為PRIMARY。
方案:
CREATE TABLE Developer.Rate2 (
TIME datetime NOT NULL,
BASE varchar(3) NOT NULL,
QUOTE varchar(3) NOT NULL,
BID double NOT NULL,
ASK double NOT NULL,
PRIMARY KEY (BASE, QUOTE, TIME),
INDEX IDX_BID_ASK (BID, ASK)
)
ENGINE = INNODB
AVG_ROW_LENGTH = 26
CHARACTER SET utf8
COLLATE utf8_general_ci
ROW_FORMAT = COMPRESSED;
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "9673452.20"
},
"ordering_operation": {
"using_filesort": false,
"table": {
"table_name": "Rate2",
"access_type": "range",
"possible_keys": [
"PRIMARY"
],
"key": "PRIMARY",
"used_key_parts": [
"BASE",
"QUOTE",
"TIME"
],
"key_length": "27",
"rows_examined_per_scan": 48023345,
"rows_produced_per_join": 16006180,
"filtered": "100.00",
"cost_info": {
"read_cost": "68783.20",
"eval_cost": "3201236.12",
"prefix_cost": "9673452.20",
"data_read_per_join": "732M"
},
"used_columns": [
"TIME",
"BASE",
"QUOTE",
"BID"
],
"attached_condition": "((`Developer`.`Rate2`.`BASE` = 'EUR') and (`Developer`.`Rate2`.`QUOTE` = 'USD') and (`Developer`.`Rate2`.`TIME` <= <cache>((now() - interval 1 month))))"
}
}
}
}
現在請求確實有效,Explain顯示所有3列都被使用。 這種變體有效。
擺脫ID
,沒用。 將您的UNIQUE
鍵提升為PRIMARY
。 現在,奇跡般地,查詢會更快,你提出的問題將變得毫無意義。 (您可能還需要洛林建議的DESC
技巧。)
這是另一種比較性能的技術:
FLUSH STATUS;
SELECT ...;
SHOW SESSION STATUS LIKE 'Handler%';
我有興趣看看SHOW
的輸出是否帶有DESC
技巧。 有/沒有您提到的FORCE INDEX
。
為什么更快? 您的查詢使用的是二級索引,但它需要bid
,而索引並未對其進行“覆蓋”。 要獲得bid
,需要在'數據'中鑽取PRIMARY KEY
。 通過更改它以便使用PK,可以避免這種額外的向下鑽取。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.