SQL> create table emp_dup as select * from emp;
Table created.
SQL> insert into emp_dup select * from emp;
14 rows created.
SQL> /
14 rows created.
SQL> insert into emp_dup select * from emp_dup;
42 rows created.
SQL> commit;
Commit complete.
Total no of rows...
SQL> select count(*) from emp_dup;
COUNT(*)
----------
84
These are the unique row
SQL> select * from (SELECT rowid,empno,deptno, ename, hiredate, ROW_NUMBER() OVER (PARTITION BY empno ORDER BY empno) AS emp_id
FROM emp_dup )
where emp_id=1;
ROWID EMPNO DEPTNO ENAME HIREDATE EMP_ID
------------------ ---------- ---------- ---------- --------- ----------
AAAaL6AAEAAABDkAAA 7369 20 SMITH 17-DEC-80 1
AAAaL6AAEAAABDkAAB 7499 30 ALLEN 20-FEB-81 1
AAAaL6AAEAAABDkAAC 7521 30 WARD 22-FEB-81 1
AAAaL6AAEAAABDkAAD 7566 20 JONES 02-APR-81 1
AAAaL6AAEAAABDkAAE 7654 30 MARTIN 28-SEP-81 1
AAAaL6AAEAAABDkAAF 7698 30 BLAKE 01-MAY-81 1
AAAaL6AAEAAABDkAAG 7782 10 CLARK 09-JUN-81 1
AAAaL6AAEAAABDkAAH 7788 20 SCOTT 19-APR-87 1
AAAaL6AAEAAABDkAAI 7839 10 KING 17-NOV-81 1
AAAaL6AAEAAABDkAAJ 7844 30 TURNER 08-SEP-81 1
AAAaL6AAEAAABDkAAK 7876 20 ADAMS 23-MAY-87 1
AAAaL6AAEAAABDkAAL 7900 30 JAMES 03-DEC-81 1
AAAaL6AAEAAABDkAAM 7902 20 FORD 03-DEC-81 1
AAAaL6AAEAAABDkAAN 7934 10 MILLER 23-JAN-82 1
14 rows selected.
SQL> select rowid from (SELECT rowid,empno,deptno, ename, hiredate, ROW_NUMBER() OVER (PARTITION BY empno ORDER BY empno) AS emp_id
FROM emp_dup )
where emp_id=1;
ROWID
------------------
AAAaL6AAEAAABDkAAA
AAAaL6AAEAAABDkAAB
AAAaL6AAEAAABDkAAC
AAAaL6AAEAAABDkAAD
AAAaL6AAEAAABDkAAE
AAAaL6AAEAAABDkAAF
AAAaL6AAEAAABDkAAG
AAAaL6AAEAAABDkAAH
AAAaL6AAEAAABDkAAI
AAAaL6AAEAAABDkAAJ
AAAaL6AAEAAABDkAAK
AAAaL6AAEAAABDkAAL
AAAaL6AAEAAABDkAAM
AAAaL6AAEAAABDkAAN
14 rows selected.
SQL> delete from emp_dup
where rowid in (select rowid from (SELECT rowid,empno,deptno, ename, hiredate, ROW_NUMBER() OVER (PARTITION BY empno ORDER BY empno) AS emp_id
FROM emp_dup )
where emp_id!=1)
/
70 rows deleted.
SQL> select * from emp;
EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO
---------- ---------- --------- ---------- --------- ---------- ---------- ----------
7369 SMITH CLERK 7902 17-DEC-80 800 20
7499 ALLEN SALESMAN 7698 20-FEB-81 1600 300 30
7521 WARD SALESMAN 7698 22-FEB-81 1250 500 30
7566 JONES MANAGER 7839 02-APR-81 2975 20
7654 MARTIN SALESMAN 7698 28-SEP-81 1250 1400 30
7698 BLAKE MANAGER 7839 01-MAY-81 2850 30
7782 CLARK MANAGER 7839 09-JUN-81 2450 10
7788 SCOTT ANALYST 7566 19-APR-87 3000 20
7839 KING PRESIDENT 17-NOV-81 5000 10
7844 TURNER SALESMAN 7698 08-SEP-81 1500 0 30
7876 ADAMS CLERK 7788 23-MAY-87 1100 20
7900 JAMES CLERK 7698 03-DEC-81 950 30
7902 FORD ANALYST 7566 03-DEC-81 3000 20
7934 MILLER CLERK 7782 23-JAN-82 1300 10
14 rows selected.
SQL> select * from emp_dup;
EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO
---------- ---------- --------- ---------- --------- ---------- ---------- ----------
7369 SMITH CLERK 7902 17-DEC-80 800 20
7499 ALLEN SALESMAN 7698 20-FEB-81 1600 300 30
7876 ADAMS CLERK 7788 23-MAY-87 1100 20
7934 MILLER CLERK 7782 23-JAN-82 1300 10
7844 TURNER SALESMAN 7698 08-SEP-81 1500 0 30
7902 FORD ANALYST 7566 03-DEC-81 3000 20
7566 JONES MANAGER 7839 02-APR-81 2975 20
7698 BLAKE MANAGER 7839 01-MAY-81 2850 30
7788 SCOTT ANALYST 7566 19-APR-87 3000 20
7521 WARD SALESMAN 7698 22-FEB-81 1250 500 30
7654 MARTIN SALESMAN 7698 28-SEP-81 1250 1400 30
7782 CLARK MANAGER 7839 09-JUN-81 2450 10
7839 KING PRESIDENT 17-NOV-81 5000 10
7900 JAMES CLERK 7698 03-DEC-81 950 30
14 rows selected.
=======================================
>>>>>>>>>>>>>>>>>>>>>>>>>>> Important
=======================================
Duplicate Data Cleanup
The ANUJ.EMP table contained a total of 3,670,016 rows.
select count(*) from ANUJ.EMP ;
COUNT(*)
----------
3670016
Only 14 distinct employee records were present; the remaining 3,670,002 rows were duplicates.
select distinct * from anuj.emp ;
EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO
---------- ---------- --------- ---------- --------- ---------- ---------- ----------
7934 MILLER CLERK 7782 23-JAN-82 1300 10
7566 JONES MANAGER 7839 02-APR-81 2975 20
7698 BLAKE MANAGER 7839 01-MAY-81 2850 30
7499 ALLEN SALESMAN 7698 20-FEB-81 1600 300 30
7902 FORD ANALYST 7566 03-DEC-81 3000 20
7788 SCOTT ANALYST 7566 19-APR-87 3000 20
7369 SMITH CLERK 7902 17-DEC-80 800 20
7876 ADAMS CLERK 7788 23-MAY-87 1100 20
7521 WARD SALESMAN 7698 22-FEB-81 1250 500 30
7844 TURNER SALESMAN 7698 08-SEP-81 1500 0 30
7654 MARTIN SALESMAN 7698 28-SEP-81 1250 1400 30
7900 JAMES CLERK 7698 03-DEC-81 950 30
7782 CLARK MANAGER 7839 09-JUN-81 2450 10
7839 KING PRESIDENT 17-NOV-81 5000 10
14 rows selected.
Elapsed: 00:00:01.52
Analysis
Duplicate rows were identified based on EMPNO.
A ROW_NUMBER() analytic function was used to retain one unique row per employee.
Actions Performed
Identified distinct rows
Used ROW_NUMBER() OVER (PARTITION BY empno) to isolate unique records.
Confirmed 14 distinct rows.
test the sqlselect * from (SELECT rowid,empno,deptno, ename, hiredate, ROW_NUMBER() OVER (PARTITION BY empno ORDER BY empno) AS emp_id FROM anuj.emp ) where emp_id=1; ROWID EMPNO DEPTNO ENAME HIREDATE EMP_ID ------------------ ---------- ---------- ---------- --------- ---------- AAA20PAAMAAAC34AAf 7369 20 SMITH 17-DEC-80 1 AAA20PAAMAAADmsACA 7499 30 ALLEN 20-FEB-81 1 AAA20PAAMAAAC34AAh 7521 30 WARD 22-FEB-81 1 AAA20PAAMAAADmsAB8 7566 20 JONES 02-APR-81 1 AAA20PAAMAAAC34AAi 7654 30 MARTIN 28-SEP-81 1 AAA20PAAMAAADw7ACp 7698 30 BLAKE 01-MAY-81 1 AAA20PAAMAAAC34AAp 7782 10 CLARK 09-JUN-81 1 AAA20PAAMAAADw8AAA 7788 20 SCOTT 19-APR-87 1 AAA20PAAMAAAC34AAn 7839 10 KING 17-NOV-81 1 AAA20PAAMAAADw7ACk 7844 30 TURNER 08-SEP-81 1 AAA20PAAMAAAC/KAAS 7876 20 ADAMS 23-MAY-87 1 AAA20PAAMAAADw7ACm 7900 30 JAMES 03-DEC-81 1 AAA20PAAMAAAC/KAAM 7902 20 FORD 03-DEC-81 1 AAA20PAAMAAADw7ACn 7934 10 MILLER 23-JAN-82 1 14 rows selected.
Created a staging table with distinct rows
CREATE TABLE anuj.emp_dist AS
SELECT rowid AS uniq,
empno,
deptno,
ename,
hiredate,
emp_id
FROM (
SELECT rowid,
empno,
deptno,
ename,
hiredate,
ROW_NUMBER() OVER (PARTITION BY empno ORDER BY empno) AS emp_id
FROM anuj.emp
)
WHERE emp_id = 1;
select * from anuj.emp_dist ;
UNIQ EMPNO DEPTNO ENAME HIREDATE EMP_ID
------------------ ---------- ---------- ---------- --------- ----------
AAA20PAAMAAAC34AAf 7369 20 SMITH 17-DEC-80 1
AAA20PAAMAAADmsACA 7499 30 ALLEN 20-FEB-81 1
AAA20PAAMAAAC34AAh 7521 30 WARD 22-FEB-81 1
AAA20PAAMAAADmsAB8 7566 20 JONES 02-APR-81 1
AAA20PAAMAAAC34AAi 7654 30 MARTIN 28-SEP-81 1
AAA20PAAMAAADw7ACp 7698 30 BLAKE 01-MAY-81 1
AAA20PAAMAAAC34AAp 7782 10 CLARK 09-JUN-81 1
AAA20PAAMAAADw8AAA 7788 20 SCOTT 19-APR-87 1
AAA20PAAMAAAC34AAn 7839 10 KING 17-NOV-81 1
AAA20PAAMAAADw7ACk 7844 30 TURNER 08-SEP-81 1
AAA20PAAMAAAC/KAAS 7876 20 ADAMS 23-MAY-87 1
AAA20PAAMAAADw7ACm 7900 30 JAMES 03-DEC-81 1
AAA20PAAMAAAC/KAAM 7902 20 FORD 03-DEC-81 1
AAA20PAAMAAADw7ACn 7934 10 MILLER 23-JAN-82 1
14 rows selected.
delete from anuj.emp where rowid not in (select UNIQ from anuj.emp_dist);
3670002 rows deleted.
Elapsed: 00:01:00.86 <<<<<<<<<<<<<<<<<<<<<<<
select * from anuj.emp ;
EMPNO ENAME JOB MGR HIREDATE SAL COMM DEPTNO
---------- ---------- --------- ---------- --------- ---------- ---------- ----------
7369 SMITH CLERK 7902 17-DEC-80 800 20
7521 WARD SALESMAN 7698 22-FEB-81 1250 500 30
7654 MARTIN SALESMAN 7698 28-SEP-81 1250 1400 30
7839 KING PRESIDENT 17-NOV-81 5000 10
7782 CLARK MANAGER 7839 09-JUN-81 2450 10
7902 FORD ANALYST 7566 03-DEC-81 3000 20
7876 ADAMS CLERK 7788 23-MAY-87 1100 20
7566 JONES MANAGER 7839 02-APR-81 2975 20
7499 ALLEN SALESMAN 7698 20-FEB-81 1600 300 30
7844 TURNER SALESMAN 7698 08-SEP-81 1500 0 30
7900 JAMES CLERK 7698 03-DEC-81 950 30
7934 MILLER CLERK 7782 23-JAN-82 1300 10
7698 BLAKE MANAGER 7839 01-MAY-81 2850 30
7788 SCOTT ANALYST 7566 19-APR-87 3000 20
14 rows selected.
Elapsed: 00:00:00.18How to perform long deletes ( commiting every n number of records)
When performing a DELETE from a table, Oracle will generate rollback. If the particular table you are trying to delete from contains a large number of records, it is possible for the transaction to fail because of the rollback segment not being large enough
Solution would be to use the PL/SQL block below.
set serveroutout on
DECLARE
cnt NUMBER := 0;
total NUMBER := 0;
BEGIN
FOR rec IN (select rowid from (SELECT rowid,empno,deptno, ename, hiredate,
ROW_NUMBER() OVER (PARTITION BY empno ORDER BY empno) AS emp_id
FROM emp_dup )
where emp_id!=1)
LOOP
DELETE FROM emp_dup
WHERE rowid = rec.rowid;
total := total + 1;
cnt := cnt + 1;
IF (cnt >= 5000) THEN
COMMIT;
cnt := 0;
END IF;
END LOOP;
COMMIT;
DBMS_OUTPUT.PUT_LINE('Deleted ' || total || ' records from .');
END;

2 comments:
For unique rows
1.
set linesize 300
select max(rowid) from emp group by empno ;
2.
select * from emp a where rowid in ( select max(rowid) from emp b where a.empno=b.empno );
3.
select * from (select rowid rid, row_number() over (partition by empno order by empno ) rn from emp )
where rn =1
Delete row via plsql !!!!!
https://anuj-singh.blogspot.com/2018/11/oracle-delete-row-via-plsql-rowid.html
Post a Comment