1. 创建分区表,和同样结构的非分区表;
2. 用存储过程模拟800万条数据分别存入两张表;
3. 通过比较同样的查询语句耗时来验证两者的性能区别;
-----创建非分区表
DROP TABLE IF EXISTS `test_non`;
CREATE TABLE `test_non` (
`dt_location_time` datetime NOT NULL,
`vc_plate` varchar(10) NOT NULL,
`dec_gpsspeed` decimal(6,3) DEFAULT NULL,
`dec_gpsmileage` decimal(10,3) DEFAULT NULL,
`dec_altitude` decimal(10,2) DEFAULT NULL,
`int_direction` int(11) DEFAULT NULL,
`dec_latitude` decimal(18,15) DEFAULT NULL,
`dec_longitude` decimal(18,15) DEFAULT NULL,
`dec_offset_latitude` decimal(18,15) DEFAULT NULL,
`dec_offset_longitude` decimal(18,15) DEFAULT NULL,
`dec_total_engine_worktime` decimal(10,1) DEFAULT NULL,
`dec_total_engine_revolution` decimal(10,1) DEFAULT NULL,
`dec_oil_temperature` decimal(5,2) DEFAULT NULL,
`dec_water_temperature` decimal(5,2) DEFAULT NULL,
`dec_instant_fuel_economy` decimal(6,3) DEFAULT NULL,
`dec_instant_fuel_consume` decimal(6,3) DEFAULT NULL,
`dec_speed` decimal(6,3) DEFAULT NULL,
`dec_engine_revolution` decimal(10,3) DEFAULT NULL,
`dec_oil_pressure` decimal(6,3) DEFAULT NULL,
`dec_battery_voltage` decimal(6,3) DEFAULT NULL,
`dec_total_fuel_consume` decimal(10,3) DEFAULT NULL,
PRIMARY KEY (`dt_location_time`,`vc_plate`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
-----插入800万条数据(阿里云RDS耗时2.5小时)
drop procedure if exists pro_load_test_non;
create procedure pro_load_test_non()
begin
declare v int default 0;
while(v < 8000000)
do
insert into test_non
(
dt_location_time,
vc_plate,
dec_latitude
)values(
DATE_ADD('2015-10-01',INTERVAL v SECOND),
CONCAT('A_',v),
RAND()
);
set v = v + 1;
end while;
end
-----创建分区表
DROP TABLE IF EXISTS `test_par`;
CREATE TABLE `test_par` (
`dt_location_time` datetime NOT NULL,
`vc_plate` varchar(10) NOT NULL,
`dec_gpsspeed` decimal(6,3) DEFAULT NULL,
`dec_gpsmileage` decimal(10,3) DEFAULT NULL,
`dec_altitude` decimal(10,2) DEFAULT NULL,
`int_direction` int(11) DEFAULT NULL,
`dec_latitude` decimal(18,15) DEFAULT NULL,
`dec_longitude` decimal(18,15) DEFAULT NULL,
`dec_offset_latitude` decimal(18,15) DEFAULT NULL,
`dec_offset_longitude` decimal(18,15) DEFAULT NULL,
`dec_total_engine_worktime` decimal(10,1) DEFAULT NULL,
`dec_total_engine_revolution` decimal(10,1) DEFAULT NULL,
`dec_oil_temperature` decimal(5,2) DEFAULT NULL,
`dec_water_temperature` decimal(5,2) DEFAULT NULL,
`dec_instant_fuel_economy` decimal(6,3) DEFAULT NULL,
`dec_instant_fuel_consume` decimal(6,3) DEFAULT NULL,
`dec_speed` decimal(6,3) DEFAULT NULL,
`dec_engine_revolution` decimal(10,3) DEFAULT NULL,
`dec_oil_pressure` decimal(6,3) DEFAULT NULL,
`dec_battery_voltage` decimal(6,3) DEFAULT NULL,
`dec_total_fuel_consume` decimal(10,3) DEFAULT NULL,
PRIMARY KEY (`dt_location_time`,`vc_plate`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
PARTITION BY RANGE (to_days(dt_location_time))
(
PARTITION par_201510 VALUES LESS THAN (to_days('2015-11-01')) ENGINE = InnoDB,
PARTITION par_201511 VALUES LESS THAN (to_days('2015-12-01')) ENGINE = InnoDB,
PARTITION par_201512 VALUES LESS THAN (to_days('2016-01-01')) ENGINE = InnoDB,
PARTITION par_201601 VALUES LESS THAN (to_days('2016-02-01')) ENGINE = InnoDB,
PARTITION par_201602 VALUES LESS THAN (to_days('2016-03-01')) ENGINE = InnoDB,
PARTITION par_201603 VALUES LESS THAN (to_days('2016-04-01')) ENGINE = InnoDB,
PARTITION par_catch_all VALUES LESS THAN MAXVALUE ENGINE = InnoDB
);
注意,partition by range 中仅可以使用以下函数:
ABS()
CEILING() (see CEILING() and FLOOR(), immediately following this list)
DAY()
DAYOFMONTH()
DAYOFWEEK()
DAYOFYEAR()
DATEDIFF()
EXTRACT()
FLOOR() (see CEILING() and FLOOR(), immediately following this list)
HOUR()
MICROSECOND()
MINUTE()
MOD()
MONTH()
QUARTER()
SECOND()
TIME_TO_SEC()
TO_DAYS()
WEEKDAY()
YEAR()
YEARWEEK()
-----插入800万条数据
drop procedure if exists pro_load_test_par;
create procedure pro_load_test_par()
begin
declare v int default 0;
while(v < 8000000)
do
insert into test_par
(
dt_location_time,
vc_plate,
dec_latitude
)values(
DATE_ADD('2015-10-01',INTERVAL v SECOND),
CONCAT('A_',v),
RAND()
);
set v = v + 1;
end while;
end
如何验证表分区是否有效呢?
SELECT
partition_name part,
partition_expression expr,
partition_description descr,
table_rows
FROM
INFORMATION_SCHEMA.partitions
WHERE
TABLE_SCHEMA = schema()
AND TABLE_NAME='test_par';
执行上面的语句,可以看到表分区的相关情况。
性能验证:
查询一个时间段的数据用时:
SELECT COUNT(*) from test_non; //800万用时1.7s,1.74s,1.74s
SELECT COUNT(*) from test_par; //800万用时1.85s,1.79s,1.76s
SELECT COUNT(*) from test_non where DATE(dt_location_time) > '2015-10-01' and DATE(dt_location_time) < '2015-10-11';
// 查出777600条,3.49s,3.05s
SELECT COUNT(*) from test_par where DATE(dt_location_time) > '2015-10-01' and DATE(dt_location_time) < '2015-10-11';
// 查出777600条,3.10s,3.19s
SELECT COUNT(*) from test_non where DATE(dt_location_time) > '2015-10-01' and DATE(dt_location_time) < '2015-12-11';
//查出6048000条,3.183s,3.245s,3.151s
SELECT COUNT(*) from test_par where DATE(dt_location_time) > '2015-10-01' and DATE(dt_location_time) < '2015-12-11';
//查出6048000条,3.26s,3.448s,3.291s
查询一条数据用时:
SELECT * from test_non where vc_plate = 'A_9'; //7.254S, 7.317S,7.176S
SELECT * from test_par where vc_plate = 'A_9'; //7.254S, 7.441S,7.348S
SELECT * from test_non where DATE(dt_location_time) = '2015-10-01';//查出86400条,用时10.218s, 9.048s
SELECT * from test_par where DATE(dt_location_time) = '2015-10-01'; //查出86400条,用时9.5s, 9.656s
SELECT * from test_non where DATE(dt_location_time) = '2015-10-01' and vc_plate = 'A_9'; //7.551s, 7.519s
SELECT * from test_par where DATE(dt_location_time) = '2015-10-01' and vc_plate = 'A_9'; // 7.737s, 7.753s
结论:
使用表分区后,性能并没有提高;