最近在研究大数据的导入导出,由此想到了在管理系统中,excel导入导出都是常见操作,故我的操作由此开始。
首先是选择适合的工具类,alibaba开源的easyexcel比较好,处理大数据效率较高,故选用此工具,这是easyexcel官网
1.导入maven依赖

<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>easyexcel</artifactId>
    <version>3.2.0</version>
</dependency>

2.配置好相应的实体类

package com.example.admin.domain;


import com.alibaba.excel.annotation.ExcelIgnore;
import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.annotation.format.DateTimeFormat;
import com.alibaba.excel.annotation.write.style.ColumnWidth;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableLogic;
import com.baomidou.mybatisplus.annotation.TableName;
import com.example.admin.execl.TimestampStringConverter;
import lombok.Data;

import java.io.Serializable;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.sql.Timestamp;

@Data
@TableName("example")//@TableName中的值对应着表名
public class Example implements Serializable {

    private static final long serialVersionUID = 1L;

    /**
     * 主键
     */
    @ExcelIgnore
    @TableId(type = IdType.AUTO)
    private Long id;

    /**
     * 姓名
     */
    @ExcelProperty(value = "姓名", index = 0)
    @ColumnWidth(16)
    private String name;

    /**
     * 状态
     */
    @ExcelIgnore
    @TableLogic
    private String status;

    /**
     * 创建时间
     */
    @ExcelProperty(value = "创建时间", index = 1, converter = TimestampStringConverter.class)
    @ColumnWidth(28)
    private Timestamp createTime;

    /**
     * 更新时间
     */
    @ExcelProperty(value = "更新时间", index = 2, converter = TimestampStringConverter.class)
    @ColumnWidth(28)
    private Timestamp updateTime;

    /**
     * 金额
     */
    @ExcelProperty(value = "金额", index = 3)
    @ColumnWidth(16)
    private BigDecimal money;

    /**
     * 数量
     */
    @ExcelProperty(value = "数量", index = 4)
    @ColumnWidth(16)
    private BigInteger num;

}

Timestamp需要类型转化,具体参考大神博客大神博客

package com.example.admin.execl;

import com.alibaba.excel.converters.Converter;
import com.alibaba.excel.enums.CellDataTypeEnum;
import com.alibaba.excel.metadata.GlobalConfiguration;
import com.alibaba.excel.metadata.data.WriteCellData;
import com.alibaba.excel.metadata.property.ExcelContentProperty;
import com.alibaba.excel.util.DateUtils;

import java.sql.Timestamp;

/**
 * @author yss
 * @date 2023/3/3
 */
public class TimestampStringConverter implements Converter<Timestamp> {

    @Override
    public Class<?> supportJavaTypeKey() {
        return Timestamp.class;
    }

    @Override
    public CellDataTypeEnum supportExcelTypeKey() {
        return CellDataTypeEnum.STRING;
    }

    @Override
    public WriteCellData<?> convertToExcelData(Timestamp value, ExcelContentProperty contentProperty,
                                               GlobalConfiguration globalConfiguration) {
        WriteCellData cellData = new WriteCellData();
        String cellValue;
        if (contentProperty == null || contentProperty.getDateTimeFormatProperty() == null) {
            cellValue = DateUtils.format(value.toLocalDateTime(), null, globalConfiguration.getLocale());
        } else {
            cellValue = DateUtils.format(value.toLocalDateTime(), contentProperty.getDateTimeFormatProperty().getFormat(),
                    globalConfiguration.getLocale());
        }
        cellData.setType(CellDataTypeEnum.STRING);
        cellData.setStringValue(cellValue);
        cellData.setData(cellValue);
        return cellData;
    }
}

3.多线程读取数据库数据,存放到一个map集合中,然后遍历集合写入到excel;注意 easyexcel不支持多线程写入。


    @Autowired
    @Qualifier("excelThreadPool")
    private ThreadPoolTaskExecutor threadPoolTaskExecutor;

    @Test
    public void contextTest(){
        // 每页多少条数据
        int pageSize = 50000;
        // 导出文件路径
        String fileName = "d:\\test.xlsx";
        // 查询条件
        QueryWrapper<Example> queryWrapper = new QueryWrapper<>();
        Long start = System.currentTimeMillis();
        Long count = exampleMapper.selectCount(queryWrapper);
        Long sheetNum = count % pageSize == 0 ? count / pageSize:count / pageSize + 1;
        // 多线程去读
        // 1.初始化map容量 防止扩容带来的效率损耗
        Map<Integer, Page<Example>> pageMap = new ConcurrentHashMap<>(Math.toIntExact(sheetNum));
        CountDownLatch countDownLatch = new CountDownLatch(Math.toIntExact(sheetNum));
        // 注意 easyexcel 暂时不支持多线程并发写入!!! 详情请看github上issues
        for (int i = 0 ;i< sheetNum;i++){
            int finali = i;
            threadPoolTaskExecutor.submit(()->{
                Page<Example> page = new Page<>();
                page.setCurrent(finali + 1);
                page.setSize(pageSize);
                // 获取数据存放到map中
                Page<Example> selectPage = exampleMapper.selectPage(page,queryWrapper);
                pageMap.put(finali,selectPage);
                // 消耗掉一个
                countDownLatch.countDown();
            });
        }
        try {
            countDownLatch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        // 写入
        try (ExcelWriter excelWriter = EasyExcel.write(fileName, Example.class).build()) {
            pageMap.forEach((k,v)->{
                log.info("正在写入{}条数据",pageSize);
                WriteSheet writeSheet = EasyExcel.writerSheet(k, "第"+(k+1)+"批数据").build();
                excelWriter.write(v.getRecords(), writeSheet);
                // 写完当前数据立刻删除  不删除会产生内存泄漏即无法回收Map中巨大的空间 导致oom
                pageMap.remove(k);
            });
            excelWriter.finish();
        }
        Long end = System.currentTimeMillis();
        log.info("耗时"+(end-start)+"ms");

    }

线程池操作类

package com.example.admin.utils;

import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;

import java.util.concurrent.ThreadPoolExecutor;

/**
 * @author yss
 * @date 2023/3/3
 */
@Configuration
public class ConcurrentThreadGlobalConfig {
    @Bean("excelThreadPool")
    public ThreadPoolTaskExecutor defaultThreadPool() {
        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
        //核心线程数目
        executor.setCorePoolSize(Runtime.getRuntime().availableProcessors()+1);
        //指定最大线程数
        executor.setMaxPoolSize(Runtime.getRuntime().availableProcessors()+1);
        //队列中最大的数目
        executor.setQueueCapacity(650);
        //线程名称前缀
        executor.setThreadNamePrefix("DefaultThreadPool_");
        //rejection-policy:当pool已经达到max size的时候,如何处理新任务
        //CALLER_RUNS:不在新线程中执行任务,而是由调用者所在的线程来执行
        //对拒绝task的处理策略
        executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
        //线程空闲后的最大存活时间
        executor.setKeepAliveSeconds(60);
        //加载
        executor.initialize();

        return executor;
    }
}

导出1000w条数据,用时5分钟左右,这个效率还是可以的
在这里插入图片描述
在这里插入图片描述
这是导出的操作,明天继续研究大数据导入操作!!!

GitHub 加速计划 / ea / easyexcel
14
5
下载
快速、简洁、解决大文件内存溢出的java处理Excel工具
最近提交(Master分支:4 个月前 )
c42183df Bugfix 4 个月前
efa7dff6 * 重新加回 `commons-io` 4 个月前
Logo

旨在为数千万中国开发者提供一个无缝且高效的云端环境,以支持学习、使用和贡献开源项目。

更多推荐