假设有一张表oldyang_bayern,取其中的10条数据作为参考:
现在需要合并此张表中姓名重复的数据,将符合条件的字段值,合并到重名数据中ID最高的那一条。
要合并的字段分为三种情况:
1. 常规字段(DEPARTMENT/JOBTITLE):取重名数据中ID最高的非空值,例如“施魏因施泰格”的DEPARTMENT字段合并后,应取值“组织部”;
2. 依据数值大小取值的字段(SALARY):取重名数据中的最大值;
3. 非常规字段(CITY/DITRICT/ADDRESS/ADDRESSRANK):这些字段都与地址相关,以ADDRESSRANK最高的那条数据为准(ADDRESSRANK由高到低分别为S|A|B|C|D),然后取此条数据中的CITY/DITRICT/ADDRESS值;如果ADDRESSRANK相同,取ID最高的数据。
我所给出的方法:
处理合并的存储过程 prc_oldyang_bayern
create or replace procedure prc_oldyang_bayern(pname varchar2) is cursor cur is select * from oldyang_bayern where name=pname order by id desc; --NAME相同的数据按id降序排列 --申明记录表info_table,用于存放NAME相同的数据 type info_table_type is table of oldyang_bayern%ROWTYPE index by binary_integer; info_table info_table_type; info_dest oldyang_bayern%rowtype; nCount number(2):=0; --计数变量 vi number(2); --在更新地址字段时将会用于放入记录号 begin open cur; --通过循环计数的方式将重复的记录放入记录表 loop nCount:= nCount+1; fetch cur into info_table(nCount); exit when cur%notfound; end loop; close cur; nCount := nCount - 1; --更新常规字段DEPARTMENT/JOBTITLE for i in 1..nCount-1 loop if i = 1 then info_dest.DEPARTMENT:= nvl(info_table(i).DEPARTMENT, info_table(i+1).DEPARTMENT); else info_dest.DEPARTMENT:= nvl(info_dest.DEPARTMENT, info_table(i+1).DEPARTMENT); end if; end loop; for i in 1..nCount-1 loop if i = 1 then info_dest.JOBTITLE:= nvl(info_table(i).JOBTITLE, info_table(i+1).JOBTITLE); else info_dest.JOBTITLE:= nvl(info_dest.JOBTITLE, info_table(i+1).JOBTITLE); end if; end loop; --更新SALARY字段 for i in 1..nCount-1 loop if i = 1 then info_dest.SALARY:= greatest(nvl(info_table(i).SALARY,-1), nvl(info_table(i+1).SALARY,-1)); else info_dest.SALARY:= greatest(nvl(info_dest.SALARY,-1), nvl(info_table(i+1).SALARY,-1)); end if; end loop; info_dest.SALARY:= replace(info_dest.SALARY,-1); --处理当SALARY为空值时的状况 --更新与ADDRESSRANK相关的字段 --此处引用了一个自定义函数fun_get_ranknum --利用这个函数将地址等级S|A|B|C|D转换成对应的地址等级序号1|2|3|4|5(函数代码会在后面列出) --接着拼接地址等级序号+记录号,取拼接后最小值 --此方法用于处理当ADDRESSRANK相同时,取ID最高的那条ADDRESSRANK相关记录 --(注:在游标中记录已按ID降序排列) for i in 1..nCount-1 loop if i = 1 then info_dest.ADDRESSRANK:= least(fun_get_ranknum(info_table(i).ADDRESSRANK)||i, fun_get_ranknum(info_table(i+1).ADDRESSRANK)||i+1); else info_dest.ADDRESSRANK:= least(info_dest.ADDRESSRANK, fun_get_ranknum(info_table(i+1).ADDRESSRANK)||i+1); end if; end loop; vi:= regexp_substr(info_dest.ADDRESSRANK, '\d$'); --使用正则表达式取出拼接值中的记录号 info_dest.ADDRESSRANK:=info_table(vi).ADDRESSRANK; info_dest.CITY:=info_table(vi).CITY; info_dest.DISTRICT:=info_table(vi).DISTRICT; info_dest.ADDRESS:=info_table(vi).ADDRESS; --将筛选后的变量值更新到ID最高的记录当中 update oldyang_bayern set DEPARTMENT=info_dest.DEPARTMENT, JOBTITLE=info_dest.JOBTITLE, SALARY=info_dest.SALARY, CITY=info_dest.CITY, DISTRICT=info_dest.DISTRICT, ADDRESS=info_dest.ADDRESS, ADDRESSRANK=info_dest.ADDRESSRANK where id = info_table(1).id; --删除重复的记录 delete from oldyang_bayern where name=pname and id != info_table(1).id; commit; end ;
存储过程中所用到的自定义函数 fun_get_ranknum
create or replace function fun_get_ranknum(prank in varchar2) return varchar2 is Result varchar(2); vcTemp varchar(2); begin vcTemp:=prank; case when vcTemp = 'S' then vcTemp:= '1'; when vcTemp = 'A' then vcTemp:= '2'; when vcTemp = 'B' then vcTemp:= '3'; when vcTemp = 'C' then vcTemp:= '4'; when vcTemp = 'D' then vcTemp:= '5'; when vcTemp is null then vcTemp:= '9'; end case; Result := vcTemp; return(Result); exception when others then Result := '9'; return result; end ;
利用匿名块传递重复姓名到prc_oldyang_bayern,执行合并过程
declare cursor cur is select name from oldyang_bayern group by name having count(*)>1; rec cur%rowtype; begin open cur; loop fetch cur into rec; exit when cur%notfound; prc_oldyang_bayern(rec.name); end loop; close cur; end;
转自: http://www.oldyang.com/2009/06/23/using_plsql_remove_duplicates/