First, create one demo table
Let’s create one demo table first. It will be called Person and will hold some fictive person data.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE dbo.Person
(
Id int IDENTITY(1,1) NOT NULL,
Firstname nvarchar(50) NULL,
Lastname nvarchar(50) NULL,
Email varchar(100) NULL,
Created datetime NULL,
CONSTRAINT PK_Person PRIMARY KEY CLUSTERED
(
Id ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON PRIMARY
) ON PRIMARY
GO
SET ANSI_PADDING OFF
GO
Now, fill the data
Insert some fictive person data and do some duplicated records that have the same values for fields firstname, lastname and email.
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('John', 'Belvien', 'john.belvien@example.com', GETDATE())
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('Peter', 'Lumberjack', 'peter.lumberjack@example.com', GETDATE())
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('Peter', 'Lumberjack', 'peter.lumberjack@example.com', GETDATE())
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('Peter', 'Lumberjack', 'peter.lumberjack@example.com', GETDATE())
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('Suzy', 'Leghorn', 'suzy.leghorn@example.com', GETDATE())
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('Suzy', 'Leghorn', 'suzy.leghorn@example.com', GETDATE())
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('Daisy', 'Sommerfield', 'daisy.sommerfield@example.com', GETDATE())
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('Daisy', 'Sommerfield', 'daisy.sommerfield@example.com', GETDATE())
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('Daisy', 'Sommerfield', 'daisy.sommerfield@example.com', GETDATE())
INSERT INTO Person (Firstname, Lastname, Email, Created) VALUES ('Daisy', 'Sommerfield', 'daisy.sommerfield@example.com', GETDATE())
Now the magic
Use this subquery trick to extract duplicate records and then delete them. Notice the use of PARTITTION and WITH Transact-SQL keywords. All records that have RowNumber > 1 are deleted.
WITH CTE AS
(
SELECT Id,
Firstname,
Lastname,
Email,
ROW_NUMBER() OVER (PARTITION BY Firstname, Lastname, Email
ORDER BY Created) AS RowNumber
FROM Person
)
DELETE FROM Person WHERE Id IN (SELECT Id FROM CTE WHERE RowNumber > 1)