MyBatis+Spring SQL效率测试报告

2023-08-25,

1. 数据库结构

2. insert 测试

insert 的测试包括

1) 批量拼接values()插入

2) 有事务for循环插入

3) 无事务for循环插入

测试 SQL:

 <!-- 普通 insert -->
<insert id="insert"
parameterType="com.qunar.mybatistask.bean.Post"
keyProperty="id">
<![CDATA[ insert into post (
title,
content,
author,
status,
created
) values (
#{title},
#{content},
#{author},
#{status},
#{created}
) ]]>
</insert> <!-- 拼接values insert -->
<insert id="batchInsert"
parameterType="java.util.List">
<![CDATA[
insert into post (
title,
content,
author,
status,
created
) values
]]>
<foreach collection="list" item="post" separator=",">
(
#{post.title},
#{post.content},
#{post.author},
#{post.status},
#{post.created}
)
</foreach>
</insert>

测试代码:

service

    /**
* 批量拼接VALUES() insert
*
* @param postList
* @return
*/
@Override
@Transactional(propagation = Propagation.REQUIRED)
public int batchInsert(List<Post> postList) {
int singleNum = 1000;
int affectedRows = 0;
for (int i = 0; i < Math.ceil((double)(postList.size() / singleNum)); i++) {
affectedRows += sqlSession.insert("post.batchInsert", postList.subList(i * singleNum, (i + 1) * singleNum));
}
return affectedRows;
} /**
* 事务内循环insert
*
* @param postList
* @return
*/
@Override
@Transactional(propagation = Propagation.REQUIRED)
public int createList(List<Post> postList) {
int affectedRows = 0;
for (Post post : postList) {
affectedRows += sqlSession.insert("post.insert", post);
}
return affectedRows;
}

test case:

    /**
* 批量插入效率测试
*
* Method: batchInsert(List<Post> postList)
*
*/
@Test
public void testBatchInsert() throws Exception {
List<Post> postList = Lists.newArrayList();
for (int i = 0; i < 10000; i++) {
Post post = new Post();
post.setAuthor("test");
post.setContent("test");
post.setCreated(new Date());
post.setTitle("test");
post.setStatus(PostStatus.NORMAL);
postList.add(post);
} // 批量拼接SQL插入
long start = System.nanoTime();
int affectedRows = postService.batchInsert(postList);
double duration = System.nanoTime() - start;
System.out.format("batch: %.2f\n", duration / 1.0e9);
System.out.println("affected rows: " + affectedRows); // 事务内循环插入
start = System.nanoTime();
affectedRows = postService.createList(postList);
duration = System.nanoTime() - start;
System.out.format("transaction: %.2f\n", duration / 1.0e9);
System.out.println("affected rows: " + affectedRows); // 无事务直接循环插入
start = System.nanoTime();
affectedRows = 0;
for (Post post : postList)
affectedRows += postService.create(post);
duration = System.nanoTime() - start;
System.out.format("simple: %.2f\n", duration / 1.0e9);
System.out.println("affected rows: " + affectedRows);
}

结果

batch: 1.44
affected rows: 10000
transaction: 2.87
affected rows: 10000
simple: 77.57
affected rows: 10000

总结:

排行

1) 使用拼接的手段,这种插入其实就是batch,只不过这是手动batch

2) 使用事务循环插入,相对于无事务快很多的原因大概是数据库连接和事务开启的次数

3) 无事务循环插入, 我想应该没人这么写

2. 单表循环查询与拼接in查询测试

SQL

<select id="selectById" parameterType="int" resultType="com.qunar.mybatistask.bean.Post">
<![CDATA[ select
id,
title,
content,
author,
status,
created
from
post
where
id = #{id} ]]>
</select> <!-- 拼接where in条件查询 -->
<select id="selectIn" parameterType="java.util.List" resultType="com.qunar.mybatistask.bean.Post">
<![CDATA[
select
id,
title,
content,
author,
status,
created
from
post
where
id in
]]>
<foreach collection="list" item="id" open="(" close=")" separator=",">
#{id}
</foreach>
</select>

Service

    @Override
public Post selectById(int id) {
return sqlSession.selectOne("post.selectById", id);
} @Override
public List<Post> selectByIds(List<Integer> ids) {
List<Post> postList = Lists.newArrayList();
int singleNum = 1000;
int start;
int end;
for (int i = 0; i < Math.ceil(((double)ids.size() / (double)singleNum)); i++) {
start = i * singleNum;
end = (i + 1) * singleNum;
end = end > ids.size() ? ids.size() : end;
List<Post> result = sqlSession.selectList("post.selectIn", ids.subList(start, end));
postList.addAll(result);
}
return postList;
}

test case

    /**
* 使用IN查询效率测试
*
* @throws Exception
*/
@Test
public void testInSelect() throws Exception {
List<Integer> ids = Lists.newArrayList();
for (int i = 1; i < 10000; i++) {
ids.add(i);
} // in 查询
long start = System.nanoTime();
List<Post> list = postService.selectByIds(ids);
double duration = System.nanoTime() - start;
System.out.format("in select: %.2f\n", duration / 1.0e9);
System.out.println("list size: " + list.size()); // 循环查询
list = Lists.newArrayList();
start = System.nanoTime();
for (int id : ids)
list.add(postService.selectById(id));
duration = System.nanoTime() - start;
System.out.format("simple select: %.2f\n", duration / 1.0e9);
System.out.println("list size: " + list.size());
}

结果

in select: 0.55
list size: 9999
simple select: 6.24
list size: 9999

总结:

我想应该没人会用for循环去做查询吧

3. 多表联结查询, join, form 2个table, in, exists 比较

SQL

    <!-- 用于循环查询 -->
<select id="selectAll" resultType="com.qunar.mybatistask.bean.Comment">
<![CDATA[
select
cmt.id as id,
cmt.post_id as postId,
cmt.content as content
from
cmt
]]>
</select> <!-- join 查询 -->
<select id="selectJoin" resultType="com.qunar.mybatistask.bean.Comment">
<![CDATA[
select
cmt.id as id,
cmt.post_id as postId,
cmt.content as content
from
cmt
join
post
on
post.id = cmt.post_id
]]>
</select> <!-- from 2个table -->
<select id="selectTowTable" resultType="com.qunar.mybatistask.bean.Comment">
<![CDATA[
select
cmt.id as id,
cmt.post_id as postId,
cmt.content as content
from
cmt, post
where cmt.post_id = post.id
]]>
</select> <!-- in 联表查询 -->
<select id="selectIn" resultType="com.qunar.mybatistask.bean.Comment">
<![CDATA[
select
cmt.id as id,
cmt.post_id as postId,
cmt.content as content
from
cmt
where
cmt.post_id
in
(
select
post.id
from
post
)
]]>
</select> <!-- exists 联表查询 -->
<select id="selectExists" resultType="com.qunar.mybatistask.bean.Comment">
<![CDATA[
select
cmt.id as id,
cmt.post_id as postId,
cmt.content as content
from
cmt
where
exists
(
select
post.id
from
post
where
post.id = cmt.id
)
]]>
</select>

service

    @Override
public List<Comment> selectTwoTable() {
return sqlSession.selectList("comment.selectTowTable");
} @Override
public List<Comment> selectJoin() {
return sqlSession.selectList("comment.selectJoin");
} @Override
public List<Comment> selectIn() {
return sqlSession.selectList("comment.selectIn");
} @Override
public List<Comment> selectExists() {
return sqlSession.selectList("comment.selectExists");
} @Override
public List<Comment> selectAll() {
return sqlSession.selectList("comment.selectAll");
}

test case

    /**
* 测试JOIN查询效率
*
*/
@Test
public void testJoinSelect() {
// join 查询
long start = System.nanoTime();
List<Comment> list = commentService.selectJoin();
double duration = System.nanoTime() - start;
System.out.format("join select: %.2f\n", duration / 1.0e9);
System.out.println("list size: " + list.size()); // From 两个表查询
start = System.nanoTime();
list = commentService.selectTwoTable();
duration = System.nanoTime() - start;
System.out.format("2 table select: %.2f\n", duration / 1.0e9);
System.out.println("list size: " + list.size()); // in多表查询
start = System.nanoTime();
list = commentService.selectIn();
duration = System.nanoTime() - start;
System.out.format("in multi table select: %.2f\n", duration / 1.0e9);
System.out.println("list size: " + list.size()); // exists多表查询
start = System.nanoTime();
list = commentService.selectExists();
duration = System.nanoTime() - start;
System.out.format("exists multi table select: %.2f\n", duration / 1.0e9);
System.out.println("list size: " + list.size()); // 分次查询, 太慢了, 忽略这种方法的测试吧
// start = System.nanoTime();
// list = commentService.selectAll();
// for (Comment comment : list) {
// postService.selectById(comment.getPostId());
// }
// duration = System.nanoTime() - start;
// System.out.format("separate select: %.2f\n", duration / 1.0e9);
// System.out.println("list size: " + list.size());
}

结果

join select: 2.44
list size: 210000
2 table select: 2.26
list size: 210000
in multi table select: 2.03
list size: 210000
exists multi table select: 2.35
list size: 210000

总结:

21W条数据下效率都差不多,而且我们一般会使用limit去限制查询的条数,所以应该他们的效率差距应该很小,我通过观察explain发现实际上join和from 2个table的方式的查询的执行计划是一模一样的,而in和exists的执行计划也是一模一样的

这里的表结构相对简单,也基本能用上索引 post_id 和 post.id 这些primary, 具体更加复杂的情况也许会影响这几种查询方式的执行计划, 才会体现出他们直接的差距, 当然我也相信他们执行的效率很大程度上是决定于mysql的优化器的优化策略,而这个优化策略很难人为的去判断,所以也不好说

MyBatis+Spring SQL效率测试报告的相关教程结束。

《MyBatis+Spring SQL效率测试报告.doc》

下载本文的Word格式文档,以方便收藏与打印。