Tuesday, October 22, 2013

Mysql:Removing duplicate entries

-- creating table for experiment
CREATE TABLE `dup` (
`id` int(11) NOT NULL ,
`val` varchar(100) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;


-- adding duplicate entries to build desired query
insert into `dup` values (1,'san');
insert into `dup` values(2,'man');
insert into `dup` values(3,'van');
insert into `dup` values(4,'can');
insert into `dup` values(5,'ran');
insert into `dup` values(6,'pan');
insert into `dup` values(7,'van');
insert into `dup` values(8,'man');
insert into `dup` values(9,'ran');
insert into `dup` values(10,'man');


-- all entries at glance
select * from dup order by val

-- no of occurance of each entry
select val,count(val) cnt from dup group by val order by cnt desc

-- duplicate entry with first occurance
select id,val,count(val) cnt from dup group by val having cnt > 1 order by cnt desc

-- entries which are duplicated
select val from dup group by val having count(val) > 1 order by count(val) desc

-- entries which are duplicated
select id,val,count(val) from dup group by val having count(val) > 1 order by count(val) desc

-- id's of duplicate entry
select id from dup group by val having count(val) > 1 order by count(val) desc

-- duplicate entries without first ocurance
select id,val from dup where id not in(select id from dup group by val having count(val) > 1 order by count(val) desc) and
val in (select val from dup group by val having count(val) > 1)

-- excluding first occurance
select distinct b.id,b.val from dup a,dup b where a.val = b.val and a.id < b.id -- excluding last occurance select distinct b.id,b.val from dup a,dup b where a.val = b.val and a.id > b.id

-- duplicating table for back_up
create table dup_del as select * from dup

select * from dup_del

select * from dup

-- deleting duplicate entries except first occurance
delete b from dup a,dup b where a.val = b.val and a.id < b.id -- see which one deleted select * from dup -- restoring to old step insert into `dup` values(7,'van'); insert into `dup` values(8,'man'); insert into `dup` values(9,'ran'); insert into `dup` values(10,'man'); -- deleting duplicate entries except last occurance delete b from dup a,dup b where a.val = b.val and a.id > b.id


-- see which one deleted
select * from dup


-- restoring to old stage
insert into `dup` values(2,'man');
insert into `dup` values(3,'van');
insert into `dup` values(5,'ran');
insert into `dup` values(8,'man'); 

No comments:

Post a Comment