Serde - JsonSerde
μλ°(SerDe, Serializer/Deserialaizer)λ νμ΄λΈκ° λ°μ΄ν°λ₯Ό ν΄μνλ λ°©λ²μ μ 곡νλ€. μλ₯Ό λ€μ΄ νλ‘μ Json νμΌμ λ°λ‘ μ μ²λ¦¬νμ§ μκ³ μλ°λ₯Ό μ΄μ©νμ¬ λ°λ‘ μ§μ΄ λ£μΌλ©΄ νμ΄λΈμμ Row ν¬λ§·μΌλ‘ λ³ννμ¬ μ½μ μ μκ² λλ€.
νμ΄λΈλ μλ°μ νμΌ ν¬λ§·μ μ΄μ©νμ¬ λ°μ΄ν°λ₯Ό μ½κ³ , μΈ μ μλ€.
μλλ νμ΄λΈκ° μλ°λ₯Ό μ΄μ©νμ¬ νμΌμ μ½μ λμ μΈ λμ μ²λ¦¬λ°©λ²μ΄λ€.
- νμΌ μ½μ λ HDFS files -> InputFileFormat -> [key, value] -> Deserializer -> Row object
- νμΌ μΈ λ Row object -> Serializer -> [key, value] -> OutputFileFormat -> HDFS files
νμ΄λΈ κΈ°λ³Έ μλ°
νμ΄λΈμμ μ 곡νλ κΈ°λ³Έ μλ°λ 7κ°μ§(Avro, ORC, RegEx, Thrift, Parquet, CSV, JsonSerDe)κ° μλ€. κ° μλ°λ STORED AS μ μ§μ νλ νμΌμ ν¬λ§·μ λ°λΌ μλμΌλ‘ μ νλλ€.
κ°μ₯ λ§μ΄ μ°λ λ°μ ν λ°μ΄ν° νμμΈ Jsonμ νλ‘μ λ£κ³ μλ°λ₯Ό μ΄μ©νμ¬ νμ΄λΈλ‘ μ‘°νν΄λ³΄μ.
λ£μ λ°μ΄ν°λ μλμ κ°λ€.
# localdata.json
{
"result": {
"header": {
"columns": [
{
"rowNum": "λ²νΈ",
"opnSfTeamCode": "κ°λ°©μμΉλ¨μ²΄μ½λ",
"mgtNo": "κ΄λ¦¬λ²νΈ",
"opnSvcId": "κ°λ°©μλΉμ€ID",
"updateGbn": "λ°μ΄ν°κ°±μ ꡬλΆ",
"updateDt": "λ°μ΄ν°κ°±μ μΌμ",
"opnSvcNm": "κ°λ°©μλΉμ€λͺ
",
"bplcNm": "μ¬μ
μ₯λͺ
",
"sitePostNo": "μ§λ²μ°νΈλ²νΈ",
"siteWhlAddr": "μ§λ²μ£Όμ",
"rdnPostNo": "λλ‘λͺ
μ°νΈλ²νΈ",
"rdnWhlAddr": "λλ‘λͺ
μ£Όμ",
"siteArea": "μμ¬μ§λ©΄μ ",
"apvPermYmd": "μΈνκ°μΌμ",
"apvCancelYmd": "μΈνκ°μ·¨μμΌμ",
"dcbYmd": "νμ
μΌμ",
"clgStdt": "ν΄μ
μμμΌμ",
"clgEnddt": "ν΄μ
μ’
λ£μΌμ",
"ropnYmd": "μ¬κ°μ
μΌμ",
"trdStateGbn": "μμ
μνμ½λ",
"trdStateNm": "μμ
μνλͺ
",
"dtlStateGbn": "μμΈμμ
μνμ½λ",
"dtlStateNm": "μμΈμμ
μνλͺ
",
"x": "μ’νμ 보(X)",
"y": "μ’νμ 보(Y)",
"lastModTs": "μ΅μ’
μμ μΌμ",
"uptaeNm": "μ
νꡬλΆλͺ
",
"siteTel": "μ νλ²νΈ"
}
],
"paging": {
"pageIndex": 1,
"totalCount": 7360,
"pageSize": 50
},
"process": {
"code": "00",
"message": "μ μ μ²λ¦¬λμμ΅λλ€."
}
},
"body": {
"rows": [
{
"row": [
{
"rowNum": 1,
"opnSfTeamCode": 3740000,
"mgtNo": "PHMD120213740032084000004",
"opnSvcId": "01_01_06_P",
"updateGbn": "I",
"updateDt": "2021-07-04 00:22:53.0",
"opnSvcNm": "μ½κ΅",
"bplcNm": "νλμ¨λ리μ½κ΅",
"sitePostNo": "",
"siteWhlAddr": "κ²½κΈ°λ μμμ κΆμ ꡬ μΈλ₯λ 1155-9 μΈλ₯λΉμ¦νμ",
"rdnPostNo": 16660,
"rdnWhlAddr": "κ²½κΈ°λ μμμ κΆμ ꡬ κ²½μλλ‘ 193, μΈλ₯λΉμ¦νμ 1μΈ΅ 105νΈ (μΈλ₯λ)",
"siteArea": "",
"apvPermYmd": 20210712,
"apvCancelYmd": "",
"dcbYmd": "",
"clgStdt": "",
"clgEnddt": "",
"ropnYmd": "",
"trdStateGbn": "01",
"trdStateNm": "μμ
/μ μ",
"dtlStateGbn": 13,
"dtlStateNm": "μμ
μ€",
"x": "201454.508790129 ",
"y": "415838.18218489 ",
"lastModTs": 20210702162433,
"uptaeNm": "",
"siteTel": ""
},
{
"rowNum": 2,
"opnSfTeamCode": 4180000,
"mgtNo": 4180000202189,
"opnSvcId": "11_50_03_P",
"updateGbn": "I",
"updateDt": "2021-07-04 00:22:53.0",
"opnSvcNm": "νμ μ¬μ
",
"bplcNm": "μ λν νμ μ¬ μ¬λ¬΄μ",
"sitePostNo": "",
"siteWhlAddr": "κ°μλ μΆμ²μ ν΄κ³λ ****",
"rdnPostNo": 24380,
"rdnWhlAddr": "κ°μλ μΆμ²μ λ¨μΆμ²μκΈΈ *(ν΄κ³λ)",
"siteArea": "",
"apvPermYmd": 20210708,
"apvCancelYmd": "",
"dcbYmd": "",
"clgStdt": "",
"clgEnddt": "",
"ropnYmd": "",
"trdStateGbn": "01",
"trdStateNm": "μμ
/μ μ",
"dtlStateGbn": "01",
"dtlStateNm": "μμ
μ€",
"x": "264546.296706142 ",
"y": "484486.499826113 ",
"lastModTs": 20210702151631,
"uptaeNm": "",
"siteTel": "033-264-1238"
},
...
]
}
]
}
}
}
μλ json λ°μ΄ν°λ₯Ό μ λ ¬μμΌ λμμ§λ§, hadoopμ μ€μ λ€μ΄κ°λ jsonμ μ λ ¬νμ§ λ§κ³ νμ€λ‘ λ€μ΄κ°μΌ νλ€.
json serdeλ₯Ό μ΄μ©ν createλ¬Έμ μλμ κ°λ€.
create table localdata (
result struct<
header: struct<
columns: array<
struct<
rowNum: string,
opnSfTeamCode: string,
mgtNo: string,
opnSvcId: string,
updateGbn: string,
updateDt: string,
opnSvcNm: string,
bplcNm: string,
sitePostNo: string,
siteWhlAddr: string,
rdnPostNo: string,
rdnWhlAddr: string,
siteArea: string,
apvPermYmd: string,
apvCancelYmd: string,
dcbYmd: string,
clgStdt: string,
clgEnddt: string,
ropnYmd: string,
trdStateGbn: string,
trdStateNm: string,
dtlStateGbn: string,
dtlStateNm: string,
x: string,
y: string,
lastModTs: string,
uptaeNm: string,
siteTel: string
>
>,
paging: struct<
pageIndex: string,
totalCount: string,
pageSize: string
>,
process: struct<
code: string,
message: string
>
>,
body: struct<
`rows`: array<
struct<
`row`: array<
struct<
rowNum: string,
opnSfTeamCode: string,
mgtNo: string,
opnSvcId: string,
updateGbn: string,
updateDt: string,
opnSvcNm: string,
bplcNm: string,
sitePostNo: string,
siteWhlAddr: string,
rdnPostNo: string,
rdnWhlAddr: string,
siteArea: string,
apvPermYmd: string,
apvCancelYmd: string,
dcbYmd: string,
clgStdt: string,
clgEnddt: string,
ropnYmd: string,
trdStateGbn: string,
trdStateNm: string,
dtlStateGbn: string,
dtlStateNm: string,
x: string,
y: string,
lastModTs: string,
uptaeNm: string,
siteTel: string
>
>
>
>
>
>
)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
LOCATION
'/user/hive/st/localdata'
;
ν μ΄λΈμ μμ±νμΌλ©΄ hadoopμ λ°μ΄ν°λ₯Ό λ£μ.
$ hadoop fs -put localdata.json /user/hive/st/localdata
λ°μ΄ν°μμ bodyλ§ μ‘°νλ₯Ό ν΄λ³΄μ.
select
msgarr.rowNum,
msgarr.opnSfTeamCode,
msgarr.mgtNo,
msgarr.opnSvcId,
msgarr.updateGbn,
msgarr.updateDt,
msgarr.opnSvcNm,
msgarr.bplcNm,
msgarr.sitePostNo,
msgarr.siteWhlAddr,
msgarr.rdnPostNo,
msgarr.rdnWhlAddr,
msgarr.siteArea,
msgarr.apvPermYmd,
msgarr.apvCancelYmd,
msgarr.dcbYmd,
msgarr.clgStdt,
msgarr.clgEnddt,
msgarr.ropnYmd,
msgarr.trdStateGbn,
msgarr.trdStateNm,
msgarr.dtlStateGbn,
msgarr.dtlStateNm,
msgarr.x,
msgarr.y,
msgarr.lastModTs,
msgarr.uptaeNm,
msgarr.siteTel
from localdata ld lateral view explode(result.body.`rows`.`row`) msgtable as msgarr
;
μ΄λ μλμ κ°μ΄ κ²°κ³Όκ° λμ¬ κ²μ΄λ€.
50κ°μ λ°μ΄ν°κ° κ° μ»¬λΌμ λ°°μ΄μ λ΄κ²¨ ν rowλ‘ λμ¨λ€.
κ°κ°μ rowλ‘ λ°μ΄ν°λ₯Ό λ³΄κ³ μΆμλ μλμ κ°μ΄ νλ©΄ λλ€.
select
msgarr2.rowNum,
msgarr2.opnSfTeamCode,
msgarr2.mgtNo,
msgarr2.opnSvcId,
msgarr2.updateGbn,
msgarr2.updateDt,
msgarr2.opnSvcNm,
msgarr2.bplcNm,
msgarr2.sitePostNo,
msgarr2.siteWhlAddr,
msgarr2.rdnPostNo,
msgarr2.rdnWhlAddr,
msgarr2.siteArea,
msgarr2.apvPermYmd,
msgarr2.apvCancelYmd,
msgarr2.dcbYmd,
msgarr2.clgStdt,
msgarr2.clgEnddt,
msgarr2.ropnYmd,
msgarr2.trdStateGbn,
msgarr2.trdStateNm,
msgarr2.dtlStateGbn,
msgarr2.dtlStateNm,
msgarr2.x,
msgarr2.y,
msgarr2.lastModTs,
msgarr2.uptaeNm,
msgarr2.siteTel
from localdata ld lateral view explode(result.body.`rows`.`row`) msgtable as msgarr
LATERAL VIEW explode(msgarr) msgtable2 AS msgarr2
;
κ²°κ³Όλ μλμ κ°λ€.
λ°μ΄ν° μ΄ κ°μλ₯Ό ꡬνλ 쿼리문μ μλμ κ°λ€.
select
size(msgarr)
from localdata ld lateral view explode(result.body.`rows`.`row`) msgtable as msgarr
;
Leave a comment