easyexcel大数据多线程读取后导出
easyexcel
快速、简洁、解决大文件内存溢出的java处理Excel工具
项目地址:https://gitcode.com/gh_mirrors/ea/easyexcel
免费下载资源
·
最近在研究大数据的导入导出,由此想到了在管理系统中,excel导入导出都是常见操作,故我的操作由此开始。
首先是选择适合的工具类,alibaba开源的easyexcel比较好,处理大数据效率较高,故选用此工具,这是easyexcel官网
1.导入maven依赖
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.2.0</version>
</dependency>
2.配置好相应的实体类
package com.example.admin.domain;
import com.alibaba.excel.annotation.ExcelIgnore;
import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.annotation.format.DateTimeFormat;
import com.alibaba.excel.annotation.write.style.ColumnWidth;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableLogic;
import com.baomidou.mybatisplus.annotation.TableName;
import com.example.admin.execl.TimestampStringConverter;
import lombok.Data;
import java.io.Serializable;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.sql.Timestamp;
@Data
@TableName("example")//@TableName中的值对应着表名
public class Example implements Serializable {
private static final long serialVersionUID = 1L;
/**
* 主键
*/
@ExcelIgnore
@TableId(type = IdType.AUTO)
private Long id;
/**
* 姓名
*/
@ExcelProperty(value = "姓名", index = 0)
@ColumnWidth(16)
private String name;
/**
* 状态
*/
@ExcelIgnore
@TableLogic
private String status;
/**
* 创建时间
*/
@ExcelProperty(value = "创建时间", index = 1, converter = TimestampStringConverter.class)
@ColumnWidth(28)
private Timestamp createTime;
/**
* 更新时间
*/
@ExcelProperty(value = "更新时间", index = 2, converter = TimestampStringConverter.class)
@ColumnWidth(28)
private Timestamp updateTime;
/**
* 金额
*/
@ExcelProperty(value = "金额", index = 3)
@ColumnWidth(16)
private BigDecimal money;
/**
* 数量
*/
@ExcelProperty(value = "数量", index = 4)
@ColumnWidth(16)
private BigInteger num;
}
package com.example.admin.execl;
import com.alibaba.excel.converters.Converter;
import com.alibaba.excel.enums.CellDataTypeEnum;
import com.alibaba.excel.metadata.GlobalConfiguration;
import com.alibaba.excel.metadata.data.WriteCellData;
import com.alibaba.excel.metadata.property.ExcelContentProperty;
import com.alibaba.excel.util.DateUtils;
import java.sql.Timestamp;
/**
* @author yss
* @date 2023/3/3
*/
public class TimestampStringConverter implements Converter<Timestamp> {
@Override
public Class<?> supportJavaTypeKey() {
return Timestamp.class;
}
@Override
public CellDataTypeEnum supportExcelTypeKey() {
return CellDataTypeEnum.STRING;
}
@Override
public WriteCellData<?> convertToExcelData(Timestamp value, ExcelContentProperty contentProperty,
GlobalConfiguration globalConfiguration) {
WriteCellData cellData = new WriteCellData();
String cellValue;
if (contentProperty == null || contentProperty.getDateTimeFormatProperty() == null) {
cellValue = DateUtils.format(value.toLocalDateTime(), null, globalConfiguration.getLocale());
} else {
cellValue = DateUtils.format(value.toLocalDateTime(), contentProperty.getDateTimeFormatProperty().getFormat(),
globalConfiguration.getLocale());
}
cellData.setType(CellDataTypeEnum.STRING);
cellData.setStringValue(cellValue);
cellData.setData(cellValue);
return cellData;
}
}
3.多线程读取数据库数据,存放到一个map集合中,然后遍历集合写入到excel;注意 easyexcel不支持多线程写入。
@Autowired
@Qualifier("excelThreadPool")
private ThreadPoolTaskExecutor threadPoolTaskExecutor;
@Test
public void contextTest(){
// 每页多少条数据
int pageSize = 50000;
// 导出文件路径
String fileName = "d:\\test.xlsx";
// 查询条件
QueryWrapper<Example> queryWrapper = new QueryWrapper<>();
Long start = System.currentTimeMillis();
Long count = exampleMapper.selectCount(queryWrapper);
Long sheetNum = count % pageSize == 0 ? count / pageSize:count / pageSize + 1;
// 多线程去读
// 1.初始化map容量 防止扩容带来的效率损耗
Map<Integer, Page<Example>> pageMap = new ConcurrentHashMap<>(Math.toIntExact(sheetNum));
CountDownLatch countDownLatch = new CountDownLatch(Math.toIntExact(sheetNum));
// 注意 easyexcel 暂时不支持多线程并发写入!!! 详情请看github上issues
for (int i = 0 ;i< sheetNum;i++){
int finali = i;
threadPoolTaskExecutor.submit(()->{
Page<Example> page = new Page<>();
page.setCurrent(finali + 1);
page.setSize(pageSize);
// 获取数据存放到map中
Page<Example> selectPage = exampleMapper.selectPage(page,queryWrapper);
pageMap.put(finali,selectPage);
// 消耗掉一个
countDownLatch.countDown();
});
}
try {
countDownLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
}
// 写入
try (ExcelWriter excelWriter = EasyExcel.write(fileName, Example.class).build()) {
pageMap.forEach((k,v)->{
log.info("正在写入{}条数据",pageSize);
WriteSheet writeSheet = EasyExcel.writerSheet(k, "第"+(k+1)+"批数据").build();
excelWriter.write(v.getRecords(), writeSheet);
// 写完当前数据立刻删除 不删除会产生内存泄漏即无法回收Map中巨大的空间 导致oom
pageMap.remove(k);
});
excelWriter.finish();
}
Long end = System.currentTimeMillis();
log.info("耗时"+(end-start)+"ms");
}
线程池操作类
package com.example.admin.utils;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import java.util.concurrent.ThreadPoolExecutor;
/**
* @author yss
* @date 2023/3/3
*/
@Configuration
public class ConcurrentThreadGlobalConfig {
@Bean("excelThreadPool")
public ThreadPoolTaskExecutor defaultThreadPool() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
//核心线程数目
executor.setCorePoolSize(Runtime.getRuntime().availableProcessors()+1);
//指定最大线程数
executor.setMaxPoolSize(Runtime.getRuntime().availableProcessors()+1);
//队列中最大的数目
executor.setQueueCapacity(650);
//线程名称前缀
executor.setThreadNamePrefix("DefaultThreadPool_");
//rejection-policy:当pool已经达到max size的时候,如何处理新任务
//CALLER_RUNS:不在新线程中执行任务,而是由调用者所在的线程来执行
//对拒绝task的处理策略
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
//线程空闲后的最大存活时间
executor.setKeepAliveSeconds(60);
//加载
executor.initialize();
return executor;
}
}
导出1000w条数据,用时5分钟左右,这个效率还是可以的
这是导出的操作,明天继续研究大数据导入操作!!!
GitHub 加速计划 / ea / easyexcel
14
5
下载
快速、简洁、解决大文件内存溢出的java处理Excel工具
最近提交(Master分支:4 个月前 )
c42183df
Bugfix 4 个月前
efa7dff6 * 重新加回 `commons-io`
4 个月前
更多推荐
已为社区贡献6条内容
所有评论(0)