二百四十四、Hive——Hive中解析复杂JSON,既有对象还有数组,而且数组中包含数组
一、目的
由于协议修改,修改后的原始数据JSON更加复杂,导致Hive中解析的难度更大,搞了一天,还好同事发了篇知乎文章,终于得以解决,天哪,太不容易了
二、数据协议案例
{
"deviceNo": "39",
"sourceDeviceType": null,
"sn": null,
"model": null,
"createTime": "2024-07-16 07:30:00",
"data": {
"cycle": 300,
"sectionList": [{
"sectionNo": 1,
"coilList": [{
"laneNo": 1,
"laneType": null,
"coilNo": 1,
"volumeSum": 2,
"volumePerson": 0,
"volumeCarNon": 0,
"volumeCarSmall": 2,
"volumeCarMiddle": 0,
"volumeCarBig": 0,
"speedAvg": 29.65,
"timeOccupancy": 0.63,
"averageHeadway": 154.79,
"averageGap": 153.49,
"speed85": 40.0
},
{
"laneNo": 2,
"laneType": null,
"coilNo": 2,
"volumeSum": 5,
"volumePerson": 0,
"volumeCarNon": 0,
"volumeCarSmall": 5,
"volumeCarMiddle": 0,
"volumeCarBig": 0,
"speedAvg": 23.35,
"timeOccupancy": 2.99,
"averageHeadway": 123.27,
"averageGap": 121.08,
"speed85": 34.0
},
{
"laneNo": 3,
"laneType": null,
"coilNo": 3,
"volumeSum": 9,
"volumePerson": 0,
"volumeCarNon": 0,
"volumeCarSmall": 9,
"volumeCarMiddle": 0,
"volumeCarBig": 0,
"speedAvg": 26.22,
"timeOccupancy": 4.52,
"averageHeadway": 36.98,
"averageGap": 35.49,
"speed85": 36.0
},
{
"laneNo": 4,
"laneType": null,
"coilNo": 4,
"volumeSum": 10,
"volumePerson": 0,
"volumeCarNon": 0,
"volumeCarSmall": 10,
"volumeCarMiddle": 0,
"volumeCarBig": 0,
"speedAvg": 39.47,
"timeOccupancy": 2.69,
"averageHeadway": 34.73,
"averageGap": 33.78,
"speed85": 56.0
}]
},
{
"sectionNo": 2,
"coilList": [{
"laneNo": 5,
"laneType": null,
"coilNo": 5,
"volumeSum": 1,
"volumePerson": 0,
"volumeCarNon": 0,
"volumeCarSmall": 1,
"volumeCarMiddle": 0,
"volumeCarBig": 0,
"speedAvg": 32.74,
"timeOccupancy": 0.57,
"averageHeadway": 618.59,
"averageGap": 617.59,
"speed85": 32.74
},
{
"laneNo": 6,
"laneType": null,
"coilNo": 6,
"volumeSum": 3,
"volumePerson": 0,
"volumeCarNon": 0,
"volumeCarSmall": 3,
"volumeCarMiddle": 0,
"volumeCarBig": 0,
"speedAvg": 39.27,
"timeOccupancy": 0.37,
"averageHeadway": 125.1,
"averageGap": 124.26,
"speed85": 49.0
},
{
"laneNo": 7,
"laneType": null,
"coilNo": 7,
"volumeSum": 4,
"volumePerson": 0,
"volumeCarNon": 0,
"volumeCarSmall": 4,
"volumeCarMiddle": 0,
"volumeCarBig": 0,
"speedAvg": 49.15,
"timeOccupancy": 0.96,
"averageHeadway": 91.65,
"averageGap": 91.05,
"speed85": 54.0
},
{
"laneNo": 8,
"laneType": null,
"coilNo": 8,
"volumeSum": 1,
"volumePerson": 0,
"volumeCarNon": 0,
"volumeCarSmall": 1,
"volumeCarMiddle": 0,
"volumeCarBig": 0,
"speedAvg": 60.2,
"timeOccupancy": 0.17,
"averageHeadway": 50.3,
"averageGap": 49.7,
"speed85": 60.2
}]
}]
}
}
三、参考知乎文章链接
https://zhuanlan.zhihu.com/p/461838868
四、HiveSQL
1.首先,解析出第一层、第二层、第三层JSON
select get_json_object(statistics_json,'$.deviceNo') device_no, get_json_object(statistics_json,'$.sourceDeviceType') source_device_type, get_json_object(statistics_json,'$.sn') sn, get_json_object(statistics_json,'$.model') model, get_json_object(statistics_json,'$.createTime') create_time , get_json_object(statistics_json,'$.data.cycle') cycle, get_json_object(replace(replace(section_list,':{',':[{'),'}}','}]}'),'$.sectionNo') section_no, section_list from hurys_dc_ods.ods_statistics lateral view explode(split(replace(replace(replace(get_json_object(statistics_json,'$.data.sectionList'),'[',''),']',''),'},{"sectionNo"','}|{"sectionNo"'),"\\|")) tf as section_list where day='2024-07-16'
2.然后,解析出coil_list字段里的第四层JSON
select t1.device_no, source_device_type, sn, model, create_time, cycle, get_json_object(coil_list,'$.laneNo') lane_no, get_json_object(coil_list,'$.laneType') lane_type, section_no, get_json_object(coil_list,'$.coilNo') coil_no, get_json_object(coil_list,'$.volumeSum') volume_sum, get_json_object(coil_list,'$.volumePerson') volume_person, get_json_object(coil_list,'$.volumeCarNon') volume_car_non, get_json_object(coil_list,'$.volumeCarSmall') volume_car_small, get_json_object(coil_list,'$.volumeCarMiddle') volume_car_middle, get_json_object(coil_list,'$.volumeCarBig') volume_car_big, get_json_object(coil_list,'$.speedAvg') speed_avg, get_json_object(coil_list,'$.speed85') speed_85, get_json_object(coil_list,'$.timeOccupancy') time_occupancy, get_json_object(coil_list,'$.averageHeadway') average_headway, get_json_object(coil_list,'$.averageGap') average_gap, substr(create_time,1,10) day from (select get_json_object(statistics_json,'$.deviceNo') device_no, get_json_object(statistics_json,'$.sourceDeviceType') source_device_type, get_json_object(statistics_json,'$.sn') sn, get_json_object(statistics_json,'$.model') model, get_json_object(statistics_json,'$.createTime') create_time , get_json_object(statistics_json,'$.data.cycle') cycle, get_json_object(replace(replace(section_list,':{',':[{'),'}}','}]}'),'$.sectionNo') section_no, section_list from hurys_dc_ods.ods_statistics lateral view explode(split(replace(replace(replace(get_json_object(statistics_json,'$.data.sectionList'),'[',''),']',''),'},{"sectionNo"','}|{"sectionNo"'),"\\|")) tf as section_list where day='2024-07-16' ) as t1 lateral view explode(split(replace(replace(replace(get_json_object(replace(replace(section_list,':{',':[{'),'}}','}]}'),'$.coilList'),'[',''),']',''),'},','}|'),"\\|")) tf1 as coil_list; ;
3.运行SQL,验证一下
终于解决了,终于解决了!!!
更多推荐
所有评论(0)