关于left join...on...and...以及left join...on...where的区别,网上很多的说法是对于left join...on...and...,and所限定的条件只对关联字段起作用,比如select a.* from tmp_table_a a left join tmp_table_b b on a.col1=b.col1 and b.col2=xx,很多人认为条件b.col2=xx是不起作用的。
对于这种说法,我个人是不认同的,至少来讲,这是一种不负责任的说法。以下是我的一些测试例子,从这里大家就可以理解这两个用法的区别。
select count(*) from tb_bo_valusr_new where month=201010 and brand=3;--141858
select count(*) from tb_bo_valusr_new where month=201010;---2281189 select count(distinct usr_nbr) from tb_bo_valusr_new where brand=3;--152110 select count(distinct usr_nbr) from tb_bo_valusr_new where month=201010;--2281189 select count(*) from tb_bo_valusr_new;--4602747 select count(*) from tmp_msy_bj_001;--986843 select count(*) from tmp_msy_bj_001 where if_wlg='是';--272623--例1
create table tmp_msy_bj_007 as select a.usr_nbr,b.cmcc_branch_cd from tmp_msy_bj_001 a left join tb_bo_valusr_new b on a.usr_nbr=b.usr_nbr; select count(*) from tmp_msy_bj_007;--1957872 select count(distinct usr_nbr) from tmp_msy_bj_007;---986843 select count(*) from (select distinct * from tmp_msy_bj_007);--1024792--例2
create table tmp_msy_bj_008
as select a.usr_nbr,b.cmcc_branch_cd from tmp_msy_bj_001 a left join tb_bo_valusr_new b on a.usr_nbr=b.usr_nbr and b.month=201010; select count(*) from tmp_msy_bj_008;--986843 select count(distinct usr_nbr) from tmp_msy_bj_008;--986843 select count(*) from (select distinct * from tmp_msy_bj_008);--986843--例3
create table tmp_msy_bj_006
as select a.usr_nbr,b.cmcc_branch_cd from tmp_msy_bj_001 a left join tb_bo_valusr_new b on a.usr_nbr=b.usr_nbr and b.month=201010 where a.if_wlg='是'; select count(*) from tmp_msy_bj_006;--272623 select count(distinct usr_nbr) from tmp_msy_bj_006;--272623 select count(*) from (select distinct * from tmp_msy_bj_006);--272623--例4
create table tmp_msy_bj_005
as select a.usr_nbr,b.cmcc_branch_cd from tmp_msy_bj_001 a left join tb_bo_valusr_new b on a.usr_nbr=b.usr_nbr and b.month=201010 and b.brand=3; select count(*) from tmp_msy_bj_005;--986843 select count(distinct usr_nbr) from tmp_msy_bj_005;--986843 select count(*) from(select distinct * from tmp_msy_bj_005);--986843--例5
create table tmp_msy_bj_003
as select a.usr_nbr,b.cmcc_branch_cd,b.brand from tmp_msy_bj_001 a left join tb_bo_valusr_new b on a.usr_nbr=b.usr_nbr and b.brand=3; select count(*) from tmp_msy_bj_003;--1062507 select count(distinct usr_nbr) from tmp_msy_bj_003;--986843 select count(*) from (select distinct * from tmp_msy_bj_003);--991560 select count(*) from tmp_msy_bj_003 where brand=3;--154124 select count(distinct usr_nbr) from tmp_msy_bj_003 where brand=3;--78460 select count(*) from tmp_msy_bj_003 where brand is null;--908383--例6
create table tmp_msy_bj_002
as select a.usr_nbr,b.cmcc_branch_cd from tmp_msy_bj_001 a left join tb_bo_valusr_new b on a.usr_nbr=b.usr_nbr where b.brand=3; select count(*) from tmp_msy_bj_002;---154124 select count(distinct usr_nbr) from tmp_msy_bj_002;--78460 select count(*) from(select distinct * from tmp_msy_bj_002);--83177--例7
create table tmp_msy_bj_011
as select a.usr_nbr,b.cmcc_branch_cd from tmp_msy_bj_001 a,tb_bo_valusr_new b where a.usr_nbr=b.usr_nbr and b.brand=3; select count(*) from tmp_msy_bj_011;---154124 select count(distinct usr_nbr) from tmp_msy_bj_011;--78460 select count(*) from(select distinct * from tmp_msy_bj_011);--83177--例8
create table tmp_msy_bj_009
as select a.usr_nbr,b.cmcc_branch_cd from tmp_msy_bj_001 a left join tb_bo_valusr_new b on a.usr_nbr=b.usr_nbr where b.brand=3 and b.month=201010; select count(*) from tmp_msy_bj_009;---78421 select count(distinct usr_nbr) from tmp_msy_bj_009;--78421 select count(*) from(select distinct * from tmp_msy_bj_009);--78421 --以上各个表的创建时间都在20秒内完成--例9
create table tmp_msy_bj_004
as select a.usr_nbr,b.cmcc_branch_cd from tmp_msy_bj_001 a left join tb_bo_valusr_new b on a.usr_nbr=b.usr_nbr and a.if_wlg='是'; --运行25分钟都还没有结果在以上本例子中,b表tb_bo_valusr_new表中每个月都包含当月全量号码,且单月号码是没重复的,而a 表tmp_msy_bj_001没有重复记录。在这样的例子中,如果在left join 之后用 where 指定 b的非连接字段的限制条件,已相当于将A与B等值连接了,很明显,这时的left join已失去意义,left join的本义是保留前面表的所有记录,因此说将后一表中非连接字段的限制条件放在where之后是一种愚蠢的做法。而从上面的例5还可以看出,在left join 后指定and b.column=....起的效果是:将a中符合b条件的记录连接,并保留a中其它不符合的记录一次。因此可以这么理解left join ...on...and...:可以保证保留前面表的全部记录,并可取到后面表中符合指定条件的记录。而出现后面表的and...条件不起作用的情况是:后一表即b表的记录集是a表的一个子集。
再从例9中看出,绝对不要把a的限制条件放在left join on...and...指定,那是找死。