How to Delete/Remove Duplicate Records/Rows from Snowflake Table? Snowflake| VCKLY Tech| Data Cloud

About
Share

Published On Sep 25, 2021

In this video, I am going to show you How to Delete/Remove Duplicate Records/Rows from Snowflake Table.

⌚Timestamps
00:00 Introduction
02:06 Case 1
06:03 Case 2
08:48 Case 3
13:18 Case 4
19:14 Case 5
22:48 Outro

--How to Delete Duplicate Records/Rows from Snowflake Table
create or replace TABLE DEPT (
DEPTNO NUMBER(38,0),
DNAME VARCHAR(16777216),
LOC VARCHAR(16777216)
);
select * from dept;
insert into DEPT (DEPTNO, DNAME, LOC)
values(10, 'ACCOUNTING', 'NEW YORK'),
(20, 'RESEARCH', 'DALLAS'),
(30, 'SALES', 'CHICAGO'),
(40, 'OPERATIONS', 'BOSTON');
insert into DEPT (DEPTNO, DNAME, LOC)
values(10, 'ACCOUNTING', 'NEW YORK'),
(20, 'RESEARCH', 'DALLAS'),
(30, 'SALES', 'CHICAGO')
;
select * from dept order by deptno desc;

--Case 1:
--Step 1: Create a new table dept_distinct using DISTINCT Clause
--Step 2: Drop existing table DEPT
--Step 3: Rename new table with Original table dept.
CREATE TABLE DEPT_DISTINCT AS
SELECT DISTINCT * from DEPT;
DROP TABLE dept;
alter table dept_distinct rename to dept;
select * from dept order by deptno desc;
--Case 2:
--Step 1: Create a new table DEPT_DISTINCT using DISTINCT Clause
--Step 2: ALTER TABLE using SWAP WITH method
--Step 3: Drop table created from step 1

-- Prepare data
truncate table dept;
insert into DEPT (DEPTNO, DNAME, LOC)
values(10, 'ACCOUNTING', 'NEW YORK'),
(20, 'RESEARCH', 'DALLAS'),
(30, 'SALES', 'CHICAGO'),
(40, 'OPERATIONS', 'BOSTON');

insert into DEPT (DEPTNO, DNAME, LOC)
values(10, 'ACCOUNTING', 'NEW YORK'),
(20, 'RESEARCH', 'DALLAS'),
(30, 'SALES', 'CHICAGO')
;
CREATE TABLE DEPT_DISTINCT AS
SELECT DISTINCT * from DEPT;
select * from DEPT_DISTINCT;
ALTER TABLE DEPT SWAP WITH DEPT_DISTINCT;
select * from DEPT_DISTINCT;
DROP TABLE DEPT_DISTINCT;
SELECT * FROM DEPT;

--Case 3: Use ROW_NUMBER Analytic function if we have any one column value with unique value.

--Step 1: Use delete statement
alter table dept add column insert_ts timestamp;
truncate table dept;
select * from dept order by deptno, insert_ts desc;
DELETE FROM DEPT WHERE (deptno, insert_ts) not in
(SELECT deptno, insert_ts
FROM
( SELECT *,
ROW_NUMBER() OVER (PARTITION BY deptno ORDER BY insert_ts desc) RNO
FROM DEPT
)
WHERE RNO = 1
);
select * from dept order by deptno, insert_ts desc;
----
--Case 4: Use ROW_NUMBER Analytic function with QUALIFY if we have any one column value with unique value.

--Step 1: Use delete statement

truncate table dept;
insert into DEPT (DEPTNO, DNAME, LOC,INSERT_TS)
values(10, 'ACCOUNTING', 'NEW YORK',current_date() -1),
(20, 'RESEARCH', 'DALLAS',current_date() -1),
(30, 'SALES', 'CHICAGO',current_date() -1),
(30, 'SALES', 'CHICAGO',current_date() -2),
(40, 'OPERATIONS', 'BOSTON',current_date() -1);

insert into DEPT (DEPTNO, DNAME, LOC,INSERT_TS)
values(10, 'ACCOUNTING', 'NEW YORK',current_date()),
(20, 'RESEARCH', 'DALLAS',current_date()),
(30, 'SALES', 'CHICAGO',current_date())
;
select * from dept order by deptno, insert_ts desc;
DELETE FROM DEPT WHERE (deptno, insert_ts) not in
(SELECT deptno, insert_ts
FROM
( SELECT *,
ROW_NUMBER() OVER (PARTITION BY deptno ORDER BY insert_ts desc) RNO
FROM DEPT QUALIFY RNO=1
) );
select * from dept order by deptno, insert_ts desc;
---- here is problem if no column with unique value
truncate table dept;
insert into DEPT (DEPTNO, DNAME, LOC,INSERT_TS)
values(10, 'ACCOUNTING', 'NEW YORK',current_date() -1),
(20, 'RESEARCH', 'DALLAS',current_date() -1),
(30, 'SALES', 'CHICAGO',current_date() -1),
(40, 'OPERATIONS', 'BOSTON',current_date() -1),
(40, 'OPERATIONS', 'BOSTON',current_date() -1);
insert into DEPT (DEPTNO, DNAME, LOC,INSERT_TS)
values(10, 'ACCOUNTING', 'NEW YORK',current_date()),
(20, 'RESEARCH', 'DALLAS',current_date()),
(30, 'SALES', 'CHICAGO',current_date())
;
select * from dept order by deptno, insert_ts desc;
DELETE FROM DEPT WHERE (deptno, insert_ts) not in
(SELECT deptno, insert_ts
FROM
( SELECT *,
ROW_NUMBER() OVER (PARTITION BY deptno ORDER BY insert_ts desc) RNO
FROM DEPT QUALIFY RNO=1
) )
;
select * from dept order by deptno, insert_ts desc;
-- in this case we have to use distinct option
-- Case 5:
---GROUP BY Clause to Remove Duplicate
alter table dept drop column insert_ts;
truncate table dept;
insert into DEPT (DEPTNO, DNAME, LOC)
values(10, 'ACCOUNTING', 'NEW YORK'),
(20, 'RESEARCH', 'DALLAS'),
(30, 'SALES', 'CHICAGO'),
(40, 'OPERATIONS', 'BOSTON')
;
insert into DEPT (DEPTNO, DNAME, LOC)
values(10, 'ACCOUNTING', 'NEW YORK'),
(20, 'RESEARCH', 'DALLAS'),
(30, 'SALES', 'CHICAGO')
;
select * from dept order by deptno desc;
CREATE TABLE dept_group_by as
SELECT deptno, dname, loc
FROM DEPT group by deptno, dname, loc
;
select * from dept_group_by order by deptno desc;
Alter table dept swap with dept_group_by;
select * from dept;
select * from dept_group_by;
drop table dept_group_by;

#snowflake#datacloud#vcklytech#snowflakedatacloud

Published On Sep 25, 2021

Share/Embed

Video Link