[20180928]exists与cardinality.txt
--//优化遇到的问题,做一个例子演示出来.
1.环境:
SCOTT@test01p> @ ver1
PORT_STRING VERSION BANNER CON_ID
------------------------------ -------------- -------------------------------------------------------------------------------- ----------
IBMPC/WIN_NT64-9.1.0 12.1.0.1.0 Oracle Database 12c Enterprise Edition Release 12.1.0.1.0 - 64bit Production 0
2.测试建立:
create table t1 as select rownum id ,lpad('a',100,'a') vc from dual connect by level<=1000;
create table t2 as select rownum idx,mod(rownum,1000)+1 id ,lpad('b',20,'b') vc from dual connect by level<=40000;
create unique index pk_t1 on t1(id);
alter table t1 add constraint pk_t1 primary key (id);
create unique index pk_t2 on t2(idx);
alter table t2 add constraint pk_t2 primary key (idx);
create index i_t2_id on t2(id);
3.测试1:
SCOTT@test01p> alter session set statistics_level = all;
Session altered.
SCOTT@test01p> select * from t1 where exists (select 1 from t2 where t2.id=t1.id) and t1.id=32;
ID VC
---------- ----------------------------------------------------------------------------------------------------
32 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
--//执行计划如下:
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID ajnkhp6968v8r, child number 1
-------------------------------------
select * from t1 where exists (select 1 from t2 where t2.id=t1.id) and
t1.id=32
Plan hash value: 1277462125
-------------------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers | Reads |
-------------------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 3 (100)| | 1 |00:00:00.02 | 5 | 1 |
| 1 | NESTED LOOPS SEMI | | 1 | 1 | 109 | 3 (0)| 00:00:01 | 1 |00:00:00.02 | 5 | 1 |
| 2 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 105 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 3 | 0 |
|* 3 | INDEX UNIQUE SCAN | PK_T1 | 1 | 1 | | 1 (0)| 00:00:01 | 1 |00:00:00.01 | 2 | 0 |
|* 4 | INDEX RANGE SCAN | I_T2_ID | 1 | 40 | 160 | 1 (0)| 00:00:01 | 1 |00:00:00.02 | 2 | 1 |
-------------------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$5DA710D3
2 - SEL$5DA710D3 / T1@SEL$1
3 - SEL$5DA710D3 / T1@SEL$1
4 - SEL$5DA710D3 / T2@SEL$2
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("T1"."ID"=32)
4 - access("T2"."ID"=32)
filter("T2"."ID"="T1"."ID")
--//实际上开始让我困惑的是id=4,E_rows=40,实际上exists只要1条满足条件就ok了.不需要继续判断,有点短路的作用.
--//这里非常容易误判,我们生产系统E_rows更高,差点给误导了.
4.测试2:
--//测试not exists的情况如下:
SCOTT@test01p> select * from t1 where not exists (select 1 from t2 where t2.id=t1.id) and t1.id=32;
no rows selected
SCOTT@test01p> @ dpc '' ''
PLAN_TABLE_OUTPUT
-------------------------------------
SQL_ID 21f5mqdya13v8, child number 1
-------------------------------------
select * from t1 where not exists (select 1 from t2 where t2.id=t1.id)
and t1.id=32
Plan hash value: 1740670345
----------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows |E-Bytes| Cost (%CPU)| E-Time | A-Rows | A-Time | Buffers |
----------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | | 3 (100)| | 0 |00:00:00.01 | 5 |
| 1 | NESTED LOOPS ANTI | | 1 | 1 | 109 | 3 (0)| 00:00:01 | 0 |00:00:00.01 | 5 |
| 2 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 105 | 2 (0)| 00:00:01 | 1 |00:00:00.01 | 3 |
|* 3 | INDEX UNIQUE SCAN | PK_T1 | 1 | 1 | | 1 (0)| 00:00:01 | 1 |00:00:00.01 | 2 |
|* 4 | INDEX RANGE SCAN | I_T2_ID | 1 | 40 | 160 | 1 (0)| 00:00:01 | 1 |00:00:00.01 | 2 |
----------------------------------------------------------------------------------------------------------------------------------
Query Block Name / Object Alias (identified by operation id):
-------------------------------------------------------------
1 - SEL$5DA710D3
2 - SEL$5DA710D3 / T1@SEL$1
3 - SEL$5DA710D3 / T1@SEL$1
4 - SEL$5DA710D3 / T2@SEL$2
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("T1"."ID"=32)
4 - access("T2"."ID"=32)
31 rows selected.
--//实际上我遇到的优化问题就是一个项目表有2千多条记录,查询业务表有那些项目已经开展的.每次进入程序界面就
--//以列表的形式显示,调用如下:
select * from 项目表 where exists (select 1 from 业务表 where 业务表.项目_id=项目表.id);
--//业务表巨大无比,看到以上执行计划的E_rows达到上万,习惯思维,差点被误导.
--//我自己查看我工作笔记,这个在业务表上"项目_id"字段索引还是我去年建立的.该索引重复值太多,而且这个索引除了这样的查询一点用都没有.
--//有时候想开发为什么写这样的sql语句,写前考虑没有.代价太大了.
--//真心希望开发写sql语句想一想.....
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/267265/viewspace-2215298/,如需转载,请注明出处,否则将追究法律责任。