[英]Percentile in Case Statement In Athena SQL
嘗試在 Athena SQL 中執行此操作。我將“75”作為默認的 tax_type,我需要將其替換為“76”以獲得 20% 的 tax_amt
當前 output 原樣
geocode tax_type tax_amt
32 75 10
32 75 15
32 75 20
32 75 18
32 75 20
32 75 17
我需要 output 作為
geocode tax_type tax_amt
32 75 10
32 75 15
32 75 20
32 75 18
32 76 20
32 75 17
我不斷收到以下錯誤:
“GROUP BY 子句不能包含聚合或 window 函數:["count"("tax_amt")、"count"( )、"count"("tax_amt")、"count"( )]"
如果我從group by
子句中刪除,那么它會說
“必須是聚合表達式或出現在 GROUP BY 子句中”
任何幫助將非常感激
SQL
WITH a AS
(SELECT *
FROM "dl_wireless_boost"."ngpp_tax_fact"
WHERE dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_tax_fact")
)
, b AS
(SELECT g.geocode_id
, g.geocode
, coalesce(sq_actual_names.actual_name, g.jurisdiction_name) AS JUR_NAME
FROM "dl_wireless_boost"."ngpp_geocode_dim" g
LEFT JOIN
(SELECT REPLACE(geocode,'-','') AS stripped_geocode
, MAX(jurisdiction_name) AS actual_name
FROM "dl_wireless_boost"."ngpp_geocode_dim"
WHERE 1=1 AND dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_geocode_dim")
AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4))
AND jurisdiction_name <> 'U'
GROUP BY 1) sq_actual_names
ON g.geocode = sq_actual_names.stripped_geocode
WHERE 1=1
AND g.dt = (SELECT MAX(dt) FROM "dl_wireless_boost"."ngpp_geocode_dim")
AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4))
)
, d AS
(SELECT *
FROM "dl_wireless_boost"."ngpp_tax_code_dim"
WHERE dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_tax_code_dim")
)
, agg AS
(SELECT
CASE
WHEN d.tax_auth = '1' THEN SUBSTRING(b.geocode,1,2) || '0000000'
WHEN d.tax_auth = '2' THEN SUBSTRING(b.geocode,1,5) || '0000'
ELSE b.geocode
END AS GEOCODE
, b.JUR_NAME AS JURIS_NAME
, CAST(COUNT(a.tax_amt)/COUNT(*)*100 AS DECIMAL(30,2)) AS PERCENTILE
, d.tax_type
, CAST (d.tax_rate AS DECIMAL (30,5)) AS TAX_RATE
, SUM(CAST(a.tax_amt AS DECIMAL(30,2))) AS TAX
, SUBSTRING(b.geocode,1,2) AS STATE_GEO
, 'PPD Boost Usage' AS ID
, d.tax_auth AS TAX_AUTH
, f.auth_name AS AUTH_NAME
, CASE
WHEN SUBSTRING(b.geocode,1,2) = '72'
THEN '417'
ELSE '317'
END AS BU
, c.tax_descrp AS TAX_DESCRP
, CASE
WHEN b.geocode LIKE '%-%-%'
THEN b.geocode
ELSE SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4)
END AS GEOMASK
, 'NO' AS COST_RECOVERY_IDENTIFIER
, CONCAT(SUBSTRING(b.geocode,1,2),d.tax_auth,d.tax_type) AS GL_CROSS_REF
, SUBSTRING(load_id,1,6) AS YYYYMM
, c.gl_account AS GL_ACCOUNT
, d.tax_srv_type
FROM a
LEFT JOIN b
ON a.geocode_id = b.geocode_id
LEFT JOIN d
ON a.tax_code_id = d.tax_code_id
LEFT JOIN "sbx_accntnganalytics"."tax_auth_name" f
ON d.tax_auth = f.tax_auth
LEFT JOIN "sbx_accntnganalytics"."tax_gl_desc" c
ON d.tax_type = c.tax_type
AND SUBSTRING(b.geocode,1,2) = c.state_geo
GROUP BY 1,2,4,5,7,8,9,10,11,12,13,14,15,16,17,18
)
SELECT GEOCODE
, JURIS_NAME
, PERCENTILE
, CASE
when STATE_GEO = '32' AND tax_type = '75' AND CAST(COUNT(tax)/COUNT(*)*100 AS DECIMAL(30,2)) = 0.20
THEN REPLACE (tax_type,'75','76') else tax_type end as tax_type
, TAX_RATE
, TAX
, STATE_GEO
, TAX_AUTH
, AUTH_NAME
, BU
, TAX_DESCRP
, GEOMASK
, COST_RECOVERY_IDENTIFIER
, GL_CROSS_REF
, YYYYMM
, GL_ACCOUNT
, tax_srv_type
FROM agg
我懷疑你想要:
select geocode, tax_type,
case when ntile(5) over(order by id) = 1 then 76 else tax_amt end tax_amt
from mytable
這假定列id
可用於對記錄進行排序。 該查詢使用 window function ntil ntil()
標識前 20% 的記錄,並將值76
分配給列tax_amt
。
因為您在同一表達式中組合了一個分組列和聚合,所以您遇到了GROUP BY
錯誤。 考慮先聚合然后運行您的CASE
表達式,這可以通過使用WITH
子句的公用表表達式 (CTE) 來促進。 您甚至可以使用多個WHEN
邏輯表達式來展平您的CASE
。
WITH a AS
(SELECT *
FROM "dl_wireless_boost"."ngpp_tax_fact"
WHERE dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_tax_fact")
)
, b AS
(SELECT g.geocode_id
, g.geocode
, coalesce(sq_actual_names.actual_name, g.jurisdiction_name) AS JUR_NAME
FROM "dl_wireless_boost"."ngpp_geocode_dim" g `enter code here`
LEFT JOIN
(SELECT REPLACE(geocode,'-','') AS stripped_geocode
, MAX(jurisdiction_name) AS actual_name
FROM "dl_wireless_boost"."ngpp_geocode_dim"
WHERE 1=1 AND dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_geocode_dim")
AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4))
AND jurisdiction_name <> 'U'
GROUP BY 1) sq_actual_names
ON g.geocode = sq_actual_names.stripped_geocode
WHERE 1=1
AND g.dt = (SELECT MAX(dt) FROM "dl_wireless_boost"."ngpp_geocode_dim")
AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4))
)
, d AS
(SELECT *
FROM "dl_wireless_boost"."ngpp_tax_code_dim"
WHERE dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_tax_code_dim")
)
, agg AS
(SELECT
CASE
WHEN d.tax_auth = '1' THEN SUBSTRING(b.geocode,1,2) || '0000000'
WHEN d.tax_auth = '2' THEN SUBSTRING(b.geocode,1,5) || '0000'
ELSE b.geocode
END AS GEOCODE
, b.JUR_NAME AS JURIS_NAME
, CAST(COUNT(a.tax_amt)/COUNT(*)*100 AS DECIMAL(30,2)) AS PERCENTILE,
, d.tax_type
, CAST (d.tax_rate AS DECIMAL (30,5)) AS TAX_RATE
, SUM(CAST(a.tax_amt AS DECIMAL(30,2))) AS TAX
, SUBSTRING(b.geocode,1,2) AS STATE_GEO
, 'PPD Boost Usage' AS ID
, d.tax_auth AS TAX_AUTH
, f.auth_name AS AUTH_NAME
, CASE
WHEN SUBSTRING(b.geocode,1,2) = '72'
THEN '417'
ELSE '317'
END AS BU
, c.tax_descrp AS TAX_DESCRP
, CASE
WHEN b.geocode LIKE '%-%-%'
THEN b.geocode
ELSE SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4)
END AS GEOMASK
, 'NO' AS COST_RECOVERY_IDENTIFIER
, CONCAT(SUBSTRING(b.geocode,1,2),d.tax_auth,d.tax_type) AS GL_CROSS_REF
, SUBSTRING(load_id,1,6) AS YYYYMM
, c.gl_account AS GL_ACCOUNT
, d.tax_srv_type
FROM a
LEFT JOIN b
ON a.geocode_id = b.geocode_id
LEFT JOIN d
ON a.tax_code_id = d.tax_code_id
LEFT JOIN "sbx_accntnganalytics"."tax_auth_name" f
ON d.tax_auth = f.tax_auth
LEFT JOIN "sbx_accntnganalytics"."tax_gl_desc" c
ON d.tax_type = c.tax_type
AND SUBSTRING(b.geocode,1,2) = c.state_geo
GROUP BY 1,2,4,5,7,8,9,10,11,12,13,14,15,16,17,18
)
SELECT GEOCODE
, JURIS_NAME
, PERCENTILE
, CASE
WHEN STATE_GEO = '32' AND d.tax_type = '75' AND PERCENTILE = 0.20
THEN REPLACE (d.tax_type,'75','76')
WHEN (STATE_GEO <> '32' OR d.tax_type <> '75') AND PERCENTILE = 0.80
THEN d.tax_type
ELSE NULL
END AS TAX_TYPE
, TAX_RATE
, TAX
, STATE_GEO
, ID
, TAX_AUTH
, AUTH_NAME
, BU
, TAX_DESCRP
, GEOMASK
, COST_RECOVERY_IDENTIFIER
, GL_CROSS_REF
, YYYYMM
, GL_ACCOUNT
, tax_srv_type
FROM agg
除此之外 - 一定要注意改掉的壞習慣:使用表別名,例如 (a, b, c) 或 (t1, t2, t3)以及提供更多信息的別名。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.