优化这段代码:override def load(dataFrame: DataFrame, delta: Boolean): DataFrame = { val dataFrameCache=dataFrame.repartition(col("TRANSACTION_TYPE_CODE")) logger.info(s"Method Load execution start and data count is " + dataFrame.count()) val rbkFactTableLists: Seq[RbkFactTableEntity] = rbkConfigUtil.getRbkFactTableList(rbkConfig) logger.info(s"Method rbkFactTableLists is " + rbkFactTableLists.size) val rbkMainFactTable = rbkConfigUtil.getRbkMainFactTable(rbkConfig) logger.info(s"Method rbkMainFactTable is " + rbkMainFactTable) val rbkUnionFactTableList =rbkConfigUtil.getRbkUnionFactTableList(rbkConfig) logger.info(s"Method rbkUnionFactTableList is " + rbkUnionFactTableList.size) loadRbkMainData( dataFrameCache,false , rbkMainFactTable ) for(rbkUnionTable<-rbkUnionFactTableList) { loadRbkUnionData( dataFrameCache,false , rbkUnionTable ) } for(rbkFactTable <- rbkFactTableLists){ loadRbkData(dataFrameCache, false ,rbkFactTable) } logger.info(s"Method Load execution end and data count is " + dataFrame.count()) dataFrameCache }
时间: 2023-06-02 09:06:03 浏览: 99
Java和scala实现 Spark RDD转换成DataFrame的两种方法小结
5星 · 资源好评率100%
override def load(dataFrame: DataFrame, delta: Boolean): DataFrame = {
val dataFrameCache = dataFrame.repartition(col("TRANSACTION_TYPE_CODE"))
val rbkFactTableLists: Seq[RbkFactTableEntity] = rbkConfigUtil.getRbkFactTableList(rbkConfig)
val rbkMainFactTable = rbkConfigUtil.getRbkMainFactTable(rbkConfig)
val rbkUnionFactTableList = rbkConfigUtil.getRbkUnionFactTableList(rbkConfig)
loadRbkMainData(dataFrameCache, false, rbkMainFactTable)
for (rbkUnionTable <- rbkUnionFactTableList) {
loadRbkUnionData(dataFrameCache, false, rbkUnionTable)
}
for (rbkFactTable <- rbkFactTableLists) {
loadRbkData(dataFrameCache, false, rbkFactTable)
}
dataFrameCache
}
优化建议:
1.减少不必要的日志记录。
2.尽可能使用 val 关键字。
3.使用更具有表现力和更易读的名称。
4.拆分复杂函数。
阅读全文