diff --git a/backend/src/main/java/io/dataease/commons/constants/DatasetMode.java b/backend/src/main/java/io/dataease/commons/constants/DatasetMode.java new file mode 100644 index 0000000000..b47fe5c05f --- /dev/null +++ b/backend/src/main/java/io/dataease/commons/constants/DatasetMode.java @@ -0,0 +1,6 @@ +package io.dataease.commons.constants; + +public class DatasetMode { + public static final String EXTRACT = "1"; + public static final String DIRECT = "0"; +} diff --git a/backend/src/main/java/io/dataease/config/HbaseConfig.java b/backend/src/main/java/io/dataease/config/HbaseConfig.java new file mode 100644 index 0000000000..f7b8bfaece --- /dev/null +++ b/backend/src/main/java/io/dataease/config/HbaseConfig.java @@ -0,0 +1,30 @@ +package io.dataease.config; + +import com.fit2cloud.autoconfigure.QuartzAutoConfiguration; +import org.springframework.boot.autoconfigure.AutoConfigureBefore; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.DependsOn; +import org.springframework.core.env.Environment; + +import javax.annotation.Resource; + +@Configuration +@AutoConfigureBefore(QuartzAutoConfiguration.class) +public class HbaseConfig { + + @Resource + private Environment env; // 保存了配置文件的信息 + + + @Bean + @ConditionalOnMissingBean + public org.apache.hadoop.conf.Configuration configuration(){ + org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration(); + configuration.set("hbase.zookeeper.quorum", env.getProperty("hbase.zookeeper.quorum")); + configuration.set("hbase.zookeeper.property.clientPort", env.getProperty("hbase.zookeeper.property.clientPort")); + configuration.set("hbase.client.retries.number", env.getProperty("hbase.client.retries.number", "1")); + return configuration; + } +} diff --git a/backend/src/main/java/io/dataease/job/sechedule/ExtractDataJob.java b/backend/src/main/java/io/dataease/job/sechedule/ExtractDataJob.java new file mode 100644 index 0000000000..2704f33a02 --- /dev/null +++ b/backend/src/main/java/io/dataease/job/sechedule/ExtractDataJob.java @@ -0,0 +1,22 @@ +package io.dataease.job.sechedule; + +import io.dataease.commons.utils.CommonBeanFactory; +import io.dataease.service.dataset.ExtractDataService; +import org.quartz.JobExecutionContext; +import org.springframework.stereotype.Component; + +@Component +public class ExtractDataJob extends DeScheduleJob{ + private ExtractDataService extractDataService; + + public ExtractDataJob() { + extractDataService = (ExtractDataService) CommonBeanFactory.getBean(ExtractDataService.class); + } + + + @Override + void businessExecute(JobExecutionContext context) { + extractDataService.extractData(datasetTableId, taskId); + } + +} diff --git a/backend/src/main/java/io/dataease/service/dataset/ExtractDataService.java b/backend/src/main/java/io/dataease/service/dataset/ExtractDataService.java new file mode 100644 index 0000000000..037fe397d5 --- /dev/null +++ b/backend/src/main/java/io/dataease/service/dataset/ExtractDataService.java @@ -0,0 +1,93 @@ +package io.dataease.service.dataset; + +import com.google.gson.Gson; +import io.dataease.base.domain.DatasetTable; +import io.dataease.base.domain.DatasetTableField; +import io.dataease.base.domain.DatasetTableTaskLog; +import io.dataease.commons.constants.JobStatus; +import io.dataease.commons.utils.CommonBeanFactory; +import io.dataease.dto.dataset.DataTableInfoDTO; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.*; +import org.springframework.stereotype.Service; + +import javax.annotation.Resource; +import java.util.List; +import java.util.UUID; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +@Service +public class ExtractDataService { + + @Resource + private DataSetTableService dataSetTableService; + @Resource + private DataSetTableFieldsService dataSetTableFieldsService; + @Resource + private DataSetTableTaskLogService dataSetTableTaskLogService; + private Long pageSize = 10000l; + private static ExecutorService pool = Executors.newScheduledThreadPool(50); //设置连接池 + private Connection connection; + + public void extractData(String datasetTableId, String taskId) { + DatasetTableTaskLog datasetTableTaskLog = new DatasetTableTaskLog(); + try { + datasetTableTaskLog.setTableId(datasetTableId); + datasetTableTaskLog.setTaskId(taskId); + datasetTableTaskLog.setStatus(JobStatus.Underway.name()); + datasetTableTaskLog.setStartTime(System.currentTimeMillis()); + dataSetTableTaskLogService.save(datasetTableTaskLog); + Admin admin = getConnection().getAdmin(); + DatasetTable datasetTable = dataSetTableService.get(datasetTableId); + List datasetTableFields = dataSetTableFieldsService.list(DatasetTableField.builder().tableId(datasetTable.getId()).build()); + String table = new Gson().fromJson(datasetTable.getInfo(), DataTableInfoDTO.class).getTable(); + TableName tableName = TableName.valueOf(table + "-" + datasetTable.getDataSourceId()); + if(!admin.tableExists(tableName)){ + TableDescriptorBuilder descBuilder = TableDescriptorBuilder.newBuilder(tableName); + ColumnFamilyDescriptor hcd = ColumnFamilyDescriptorBuilder.of("cf"); + descBuilder.setColumnFamily(hcd); + TableDescriptor desc = descBuilder.build(); + admin.createTable(desc); + } + admin.disableTable(tableName); + admin.truncateTable(tableName, true); + + Table tab = getConnection().getTable(tableName); + Long total = dataSetTableService.getDataSetTotalData(datasetTable.getDataSourceId(), table); + Long pageCount = total % pageSize == 0 ? total / pageSize : (total / pageSize) + 1; + + for (Long pageIndex = 1l; pageIndex <= pageCount; pageIndex++) { + List data = dataSetTableService.getDataSetPageData(datasetTable.getDataSourceId(), table, datasetTableFields, pageIndex, pageSize); + for (String[] d : data) { + for(int i=0;i org.springframework.boot spring-boot-starter-parent - 2.4.3 - + dataease backend + frontend