val partitionDF = indexer(idDF.select("partition").distinct, "partitionId") val subpartitionDF = indexer(idDF.select("subpartition").distinct, "subpartitionId")
然后把dim表中对应的字段替换成字段编号
1 2 3 4
val transformDimTableDF = dimTableDF .join(partitionDF, Seq("partition")) .join(subpartitionDF, Seq("subpartition")) .drop("partition", "subpartition")
4.3 M 13.0 M hdfs://bigdata01:8020/user/hive/warehouse/bilibili_dw.db/bilibili_data 49.1 K 147.4 K hdfs://bigdata01:8020/user/hive/warehouse/bilibili_dw.db/dim 511 1.5 K hdfs://bigdata01:8020/user/hive/warehouse/bilibili_dw.db/partition_id 1.3 K 4.0 K hdfs://bigdata01:8020/user/hive/warehouse/bilibili_dw.db/subpartition_id 198.7 K 596.2 K hdfs://bigdata01:8020/user/hive/warehouse/bilibili_dw.db/video_data 3.7 M 11.1 M hdfs://bigdata01:8020/user/hive/warehouse/bilibili_dw.db/video_info