Get table size and statistics from Hive metadata MySQL database
This is detailed query collecting data at partition/storage granular level. One row = one partition. The result set of this query can be rolled up to
database - > table -> partition
summary. Use this query for snapshot view or per periodical database snapshot size collection.
SELECT
D.DB_ID as D_DB_ID
, D.DESC as D_DESC
, D.DB_LOCATION_URI as D_DB_LOCATION_URI
, D.NAME as D_NAME
, D.OWNER_NAME as D_OWNER_NAME
, D.OWNER_TYPE as D_OWNER_TYPE
, T.TBL_ID as T_TBL_ID
, from_unixtime(T.CREATE_TIME,'%Y-%m-%d %T') as T_CREATE_TIME
, from_unixtime(T.LAST_ACCESS_TIME,'%Y-%m-%d %T') as T_LAST_ACCESS_TIME
, T.OWNER as T_OWNER
, T.RETENTION as T_RETENTION
, T.SD_ID as T_SD_ID
, T.TBL_NAME as T_TBL_NAME
, T.TBL_TYPE as T_TBL_TYPE
, T.VIEW_EXPANDED_TEXT as T_VIEW_EXPANDED_TEXT
, T.VIEW_ORIGINAL_TEXT as T_VIEW_ORIGINAL_TEXT
, P.PART_ID as T_PART_ID
, from_unixtime(P.CREATE_TIME ,'%Y-%m-%d %T') as P_CREATE_TIME
, from_unixtime(P.LAST_ACCESS_TIME,'%Y-%m-%d %T') as P_LAST_ACCESS_TIME
, P.PART_NAME as P_PART_NAME
,max(if(R.PARAM_KEY='COLUMN_STATS_ACCURATE' , R.PARAM_VALUE,null)) AS P_COLUMN_STATS_ACCURATE
,max(if(R.PARAM_KEY='STATS_GENERATED_VIA_STATS_TASK', R.PARAM_VALUE,null)) AS P_STATS_GENERATED_VIA_STATS_TASK
,nullif(sum(if(R.PARAM_KEY='numFiles' ,ifnull(R.PARAM_VALUE,0),0)),0) AS P_NUMFILES
,nullif(sum(if(R.PARAM_KEY='numRows' ,ifnull(R.PARAM_VALUE,0),0)),0) AS P_NUMROWS
,nullif(sum(if(R.PARAM_KEY='rawDataSize' ,ifnull(R.PARAM_VALUE,0),0)),0) AS P_RAWDATASIZE
,nullif(sum(if(R.PARAM_KEY='totalSize' ,ifnull(R.PARAM_VALUE,0),0)),0) AS P_TOTALSIZE
,from_unixtime(sum(if(R.PARAM_KEY='transient_lastDdlTime' ,ifnull(R.PARAM_VALUE,0),0)),'%Y-%m-%d %T') AS P_LASTDDLTIME
, S.SD_ID as S_SD_ID
, S.CD_ID as S_CD_ID
, S.INPUT_FORMAT as S_INPUT_FORMAT
, S.IS_COMPRESSED as S_IS_COMPRESSED
, S.IS_STOREDASSUBDIRECTORIES as S_IS_STOREDASSUBDIRECTORIES
, S.LOCATION as S_LOCATION
, S.NUM_BUCKETS as S_NUM_BUCKETS
, S.OUTPUT_FORMAT as S_OUTPUT_FORMAT
, S.SERDE_ID as S_SERDE_ID
FROM hive.PARTITION_PARAMS R
left outer join hive.PARTITIONS P USING (PART_ID)
left outer join hive.SDS S USING (SD_ID)
left outer join hive.TBLS T USING (TBL_ID)
left outer join hive.DBS D USING (DB_ID)
where 1=1
and D.NAME = 'ingest'
group by
D.DB_ID, D.DESC, D.DB_LOCATION_URI, D.NAME, D.OWNER_NAME, D.OWNER_TYPE
,T.TBL_ID, T.CREATE_TIME, T.DB_ID, T.LAST_ACCESS_TIME, T.OWNER, T.RETENTION, T.SD_ID, T.TBL_NAME, T.TBL_TYPE, T.VIEW_EXPANDED_TEXT, T.VIEW_ORIGINAL_TEXT
,S.SD_ID,S.CD_ID,S.INPUT_FORMAT,S.IS_COMPRESSED,S.IS_STOREDASSUBDIRECTORIES,S.LOCATION,S.NUM_BUCKETS,S.OUTPUT_FORMAT,S.SERDE_ID
,P.PART_ID, P.CREATE_TIME, P.LAST_ACCESS_TIME, P.PART_NAME ;
No comments:
Post a Comment