ITPub博客

首页 > 应用开发 > IT综合 > DB2 SQL改写

DB2 SQL改写

翻译 IT综合 作者:zchbaby2000 时间:2018-10-25 18:01:17 0 删除 编辑

DB2的查询重新是很厉害的,但是有时候不够完美,如果我们发现高消耗的SQL需要重新,那么下面的一些例子可能是有帮助的

-> 把outer join放在inner join的后面
SELECT T1.C10,T2.C20,T3.C30,T4.C40,T5,C50
FROM T1 LEFT OUTER JOIN T2 ON T1.C2=T2.C2
LEFT OUTER JOIN T3 ON T1.C3=T3.C3
INNER JOIN T4 ON T1.C4=T4.C4
INNER JOIN T5 ON T1.C5=T5.C5
WHERE T4.C0=2

=改成=>

SELECT T1.C10,T2.C20,T3.C30,T4.C40,T5,C50
FROM T1 INNER JOIN T4 ON T1.C4=T4.C4
INNER JOIN T5 ON T1.C5=T5.C5
LEFT OUTER JOIN T2 ON T1.C2=T2.C2
LEFT OUTER JOIN T3 ON T1.C3=T3.C3
WHERE T4.C0=2


-> DB2 DPF环境, T2是大表
SELECT T1.C1,T1.CX,T2.C2,T2.C3 ......, T2.C20
FROM T1 LEFT OUTER JOIN T2
ON T1.C2=T2.C2
WHERE T1.C0=2

=改成=>

SELECT T1.C1,T1.CX,T2.C2,T2.C3 ......, T2.C20
FROM T1 LEFT OUTER JOIN
(
    SELECT T2.* FROM T2,(SELECT DISTINCT C2
    FROM T1 WHERE T1.C0=2) T1
    WHERE T1.C2=T2.C2
)
T2
ON T1.C2=T2.C2
WHERE T1.C0=2

-> 减少需要Aggregation的行数
SELECT T1.C2,T1.C10,T1.C20,T1.C30,TX.S
FROM T1 INNER JOIN
(
    SELECT T2.C2,SUM(T2.SALES) AS S
    FROM T2 INNER JOIN T3
    ON T2.C3=T3.C3
    GROUP BY T2.C2
)
TX
ON T1.C2=T2.C2
WHERE T1.C0=5

=改成=>

SELECT T1.C2,T1.C10,T1.C20,T1.C30,TX.S
FROM T1 INNER JOIN
(
    SELECT T2.C2,SUM(T2.SALES) AS S
    FROM T2 INNER JOIN T3
    ON T2.C3=T3.C3
    INNER JOIN
    (
        SELECT DISTINCT T1.C2
        FROM T1
        WHERE T1.C0=5        
    ) T1 ON T1.C2=T2.C2
    GROUP BY T2.C2
)
TX
ON T1.C2=T2.C2
WHERE T1.C0=5

--> 提前做一部分Aggregation

SELECT T2.C20,T3.C30,T4.C40,T4.C41,T4.C42,T5.C50,
SUM(T1.SALES)
FROM T1 INNER JOIN T2 ON T1.C2=T2.C2
INNER JOIN T3 ON T1.C3=T3.C3
INNER JOIN T4 ON T1.C4=T4.C4
INNER JOIN T5 ON T1.C5=T5.C5
WHERE T2.C0=2 AND
T3.C0=MONTH('03.10.2017')
GROUP BY T2.C20,T3.C30,T4.C40,T4.C41,T4.C42,T5.C50

=改成=>

SELECT T2.C20,T3.C30,T4.C40,T4.C41,T4.C42,T5.C50,
SUM(TX.S)
FROM
(
SELECT T2.C20,T3.C30,T1.C4,T1.C5,SUM(T1.SALES) AS S
FROM T1 INNER JOIN T2 ON T1.C2=T2.C2
INNER JOIN T3 ON T1.C3=T3.C3
WHERE T2.C0=2 AND
T3.C0=MONTH('03.10.2017')
GROUP BY T2.C20,T3.C30,T1.C4,T1.C5
) TX
INNER JOIN T4 ON TX.C4=T4.C4
INNER JOIN T5 ON TX.C5=T5.C5
GROUP BY TX.C20,TX.C30,T4.C40,T4.C41,T4.C42,T5.C50

--> 去重要趁早
SELECT T1.C10,T2.C20,T3.C30,T4.C40,T5.C50
FROM T1 INNER JOIN T2 ON T1.C2=T2.C2
INNER JOIN T3 ON T1.C3=T3.C3
INNER JOIN (SELECT DISTINCT C4,CX FROM T4) T4 ON T1.C4=T4.C4
INNER JOIN (SELECT DISTINCT C5,CY FROM T5) T5 ON T1.C5=T5.C5
WHERE ...

=改成=>

SELECT T1.C10,T2.C20,T3.C30,T4.C40,T5.C50
FROM T1 INNER JOIN T2 ON T1.C2=T2.C2
INNER JOIN T3 ON T1.C3=T3.C3
INNER JOIN (SELECT C4,CX FROM T4 GROUP BY C4,CX) T4 ON T1.C4=T4.C4
INNER JOIN (SELECT C5,CY FROM T5 GROUP BY C5,CY) T5 ON T1.C5=T5.C5
WHERE ...

DISTINCT和GROUP BY都可以去重,但是GROUP BY不会像DISTINCT那样,可能做了JOIN之后再去重


--> DPF 高频列的DISTINCT Aggregation, 减少内存消耗

SELECT COUNT(DISTINCT C1) - High number of distinct values,很吃内存的操作,所有不同的值都需要放在内存中,无法做部分的group by
FROM T1
GROUP BY C0 --> Low number of distinct values or no group by

=改成=>

select COUNT(*)   -- 可以做部分的group by
FROM
(
SELECT C0,C1 FROM T1 GROUP BY C0,C1
) AS T
GROUP BY C0

SELECT C0,
COUNT(DISTINCT C1) AS CDC1,
COUNT(DISTINCT C2) AS CDC2
FROM T1
GROUP BY C0

=改成=>

SELECT C0,TX.CDC1,TY.CDC2
FROM
(
    SELECT C0,COUNT(*) AS CDC1
    FROM
    (
        SELECT C0,C1
        FROM T1
        GROUP BY C0,C1
    ) AS T
    GROUP BY C0
) TX,
(
    SELECT C0,COUNT(*) AS CDC2
    FROM
    (
        SELECT C0,C2
        FROM T1
        GROUP BY C0,C2
    ) AS T
    GROUP BY C0
)
TY
WHERE TX.C0=TY.C0


--> 关联子查询变成表连接
SELECT *
FROM VERSIONED_TBL T1
WHERE T1.SALARY >= 100000 AND
 T1.DATE_COL >=
 (SELECT MIN(DATE_COL) FROM VERSIONED_TBL T2
 WHERE T1.EMP_ID=T2.EMP_ID)
 
=改成=>

SELECT *
FROM VERSIONED_TBL T1,
(SELECT T1.EMP_ID,MIN(DATE_COL) AS M
FROM VERSIONED_TBL T2
GROUP BY T2.EMP_ID) TX
WHERE T1.SALARY>= 100000 AND
T1.DATE_COL >= TX.M AND
T1.EMP_ID=TX.EMP_ID

SELECT C1,
(SELECT MAX(C2) FROM T2,T3
WHERE T1.C1=T2.C1 AND T2.C3=T3.C3) AS C2,
C3
FROM T1
WHERE T1.C0 > 10

=改成=>

SELECT T1.C1,
TX.MAXC2 AS C2,
T1.C3
FROM T1 LEFT OUTER JOIN
(
SELECT C1,MAX(C2) AS MAXC2
FROM T2,T3
WHERE T2.C3=T3.C3
GROUP BY C1
)
TX
ON (T1.C1=TX.C1)
WHERE T1.C0>10


--> OR join谓词改写成UNION
OR Join predicates cannot be processed by a Hash Join
Nested Loop Joins work well with row tables and indexes
Column oriented tables perform better with a UNION
DISTINCT must be present to transfrom

SELECT DISTINCT T1,C1,T1,C2,T1.C3,T2.CX
FROM T1,T2
WHERE ( (T1.C6=T2.C2 AND T2.C3 IN ('1','2')) OR
(T1.C7=T2.C2 AND T2.C3 NOT IN ('1','2')))

=改成=>

SELECT T1.C2,T1.C2,T1.C3,T2.CX
FROM
(
SELECT T1.C2,T1.C2,T1.C3,T2.CX
FROM T1,T2
WHERE T1.C6=T2.C2 AND T2.C3 IN ('1','2')
UNION
SELECT T1.C2,T1.C2,T1.C3,T2.CX
FROM T1,T2
WHERE T1.C7=T2.C2 AND T2.C3 NOT IN ('1','2')
)


--> 改写NOT EXISTS
If no complexity, db2 tries to use left outer joins for better performance
The following example is with Not exists AND NOT EXISTS
IS NOT NULL is used for the exists or exists transfromation
SELECT T1.C2,T1.C2,T1.C3
FROM T1
WHERE NOT EXISTS (SELECT 1 FROM T2 WHERE T1.C2=T2.C2)
AND
NOT EXISTS (SELECT 1 FROM T3 WHERE T1.C3=T2.C3)

=改成=>

SELECT T1.C2,T1.C2,T1.C3
FROM T1 LEFT OUTER JOIN T2
ON T1.C2=T2.C2
LEFT OUTER JOIN T3
ON T1.C3=T3.C3
WHERE T2.C2 IS NULL AND T2.C3 IS NULL

--> 改写OR EXISTS

SELECT DISTINCT T1.C2,T1.C2,T1.C3
FROM T1
WHERE T1.C0=0 OR
EXISTS (
SELECT 1 FROM T2 WHERE T1.C2=T2.C2
)

=改成=>

SELECT T1.C1,T1.C2,T1.C3
FROM
(
SELECT T1.C2,T1.C2,T1.C3
FROM T1 WHERE T1.C0=0
UNION
SELECT T1.C2,T1.C2,T1.C3
FROM T1 WHERE EXISTS
(
SELECT 1 FROM T2 WHERE T1.C2=T2.C2
)
)

--Not in 和Not exist在存在NULL的情况下是不同的,不要以为可以用NOT EXISTS可以代替NOT EXISTS

来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/725820/viewspace-2217568/,如需转载,请注明出处,否则将追究法律责任。

上一篇: 一次DB2 DR演练
下一篇: db2 reorg,runstats
请登录后发表评论 登录
全部评论
喜欢各种数据库相关的开发,架构设计,运维和性能优化
  • 博文量
    242
  • 访问量
    91185