Friday, December 11, 2015

Performing Type 1+2 Updates on a Dimension Using Try Catch and MERGE

In a datawarehouse, we often come across dimensions which are combination of type  1 and type 2. We would need to preserve history on certain columns and overwrite history on other columns. In this example we will see how we can perform type 1 and type 2 update on a dimension in one transaction using MERGE and Try Catch available in T-SQL.


/* Dimension Table for Customer */
CREATE TABLE [dbo].[CustomerDim](
       [CustomerKey] [int] IDENTITY(1,1) NOT NULL,
       SourceCustomerID int,
       [FirstName] [nvarchar](50) NULL, -- Type 1
       [MiddleName] [nvarchar](50) NULL, -- Type 1
       [LastName] [nvarchar](50) NULL, -- Type 1
       [BirthDate] [date] NULL, -- Type 1
       [MaritalStatus] [nchar](1) NULL, -- Type 2
       [YearlyIncome] [money] NULL, -- Type 2
       [NumberChildrenAtHome] [tinyint] NULL, -- Type 2
       StartDate Date,
       EndDate Date
)


/* Staging Table to Hold Customer Data */
CREATE TABLE [dbo].[StagingCustomer](
       SourceCustomerID int,
       [FirstName] [nvarchar](50) NULL,
       [MiddleName] [nvarchar](50) NULL,
       [LastName] [nvarchar](50) NULL,
       [BirthDate] [date] NULL,
       [MaritalStatus] [nchar](1) NULL,
       [YearlyIncome] [money] NULL,
       [NumberChildrenAtHome] [tinyint] NULL
)


GO

/* Procedure to Peform Type1, Type2 Updates on CustomerDim */

CREATE PROC usp_CustomerDim_Type1Type2UPdates

AS

BEGIN TRY
       BEGIN TRAN -- Type 1 and Type 2 Updates will be in One Transaction
              -- Type 2 Inserts.
              INSERT INTO [dbo].[CustomerDim]
                     (
                     SourceCustomerID ,
                     [FirstName] ,
                     [MiddleName] ,
                     [LastName] ,
                     [BirthDate] ,
                     [MaritalStatus] ,
                     [YearlyIncome] ,
                     [NumberChildrenAtHome],
                     StartDate,
                     EndDate
                     )
              SELECT -- These columns are returned from OUTPUT Clause.
                     SourceCustomerID ,
                     [FirstName] ,
                     [MiddleName] ,
                     [LastName] ,
                     [BirthDate] ,
                     [MaritalStatus] ,
                     [YearlyIncome] ,
                     [NumberChildrenAtHome],
                     GETDATE() - 1 as StartDate,
                     '20500101' as EndDate
              FROM
                     (

                     -- Perform Type 2 Merge.
                     Merge [dbo].[CustomerDim] cust
                     Using [dbo].[StagingCustomer] stg
                      on cust.SourceCustomerID = stg.SourceCustomerID
                     WHEN NOT MATCHED  THEN     -- New Records
                     INSERT
                     (
                     SourceCustomerID ,
                     [FirstName] ,
                     [MiddleName] ,
                     [LastName] ,
                     [BirthDate] ,
                     [MaritalStatus] ,
                     [YearlyIncome] ,
                     [NumberChildrenAtHome],
                     StartDate,
                     EndDate
                     )
                     VALUES
                     (
                     stg.SourceCustomerID ,
                     stg.[FirstName] ,
                     stg.[MiddleName] ,
                     stg.[LastName] ,
                     stg.[BirthDate] ,
                     stg.[MaritalStatus] ,
                     stg.[YearlyIncome] ,
                     stg.[NumberChildrenAtHome],
                     GETDATE()-1,
                     '20500101'
                     )
                     WHEN MATCHED  -- Type 2 Changes Check
                           AND EndDate = '20500101' -- Current Record.
                           /* Check for Changes to Type 2 Columns */
                           AND EXISTS
                                  (
                                  SELECT cust.[YearlyIncome],
                                                cust.[NumberChildrenAtHome],
                                                cust.[MaritalStatus]

                                  EXCEPT

                                  SELECT stg.[YearlyIncome],
                                                stg.[NumberChildrenAtHome],
                                                stg.[MaritalStatus]
                                  )
                     THEN
                     UPDATE -- Set EndDate for Records having Type 2 Change.
                     Set EndDate = GETDATE()-1

              OUTPUT -- This will be used to insert recrods to CustomerDim Table.
                     $Action as MergeAction,
                     stg.SourceCustomerID ,
                     stg.[FirstName] ,
                     stg.[MiddleName] ,
                     stg.[LastName] ,
                     stg.[BirthDate] ,
                     stg.[MaritalStatus] ,
                     stg.[YearlyIncome] ,
                     stg.[NumberChildrenAtHome]

       ) as Type2Merge
WHERE                            
MergeAction = 'Update';   


-- Type 1 Updates.
;Merge [dbo].[CustomerDim] cust
 Using [dbo].[StagingCustomer] stg
 on cust.SourceCustomerID = stg.SourceCustomerID
WHEN MATCHED
       /* Check for Changes to Type 1 Columns */
       AND EXISTS
       (
       SELECT              
       cust.[FirstName] ,
       cust.[MiddleName] ,
       cust.[LastName] ,
       cust.[BirthDate]

       EXCEPT

       SELECT
       stg.[FirstName] ,
       stg.[MiddleName] ,
       stg.[LastName] ,
       stg.[BirthDate]
       )
THEN UPDATE
Set   
       [FirstName] =  stg.[FirstName] ,
       [MiddleName] = stg.[MiddleName] ,
       [LastName] = stg.[LastName],
       [BirthDate] = stg.[BirthDate]
;

COMMIT -- COMMIT Type 1 and Type2 Changes.

END TRY
BEGIN CATCH

--ROLLBACK
ROLLBACK TRAN; -- Rollback Type 1 and Type2 Changes.
THROW -- THROW Error.


END CATCH

GO – END of PROCEDURE

-- Testing.
-- Insert New Records
truncate table [dbo].[StagingCustomer]
Insert Into [dbo].[StagingCustomer] values
       ( 1,'John', '', 'Doe' , '19750101','M',100000,1),
       ( 2, 'Jack', '', 'Smith' , '19750101','M',100000,1),
       ( 3, 'Mary', '', 'Smith' , '19750101','M',100000,1)

exec usp_CustomerDim_Type1Type2UPdates



 -- Perform TYpe 2 Change
truncate table [dbo].[StagingCustomer]
Insert Into [dbo].[StagingCustomer] values
       ( 1,'John', '', 'Doe' , '19750101','M',200000,1)

exec usp_CustomerDim_Type1Type2UPdates


-- Pefrorm Type 1 Change
truncate table [dbo].[StagingCustomer]
Insert Into [dbo].[StagingCustomer] values
       ( 1,'John2', '', 'Doe' , '19750101','M',200000,1)

exec usp_CustomerDim_Type1Type2UPdates



-- Show the records
select * from [dbo].[CustomerDim]


CustomerKey
SourceCustomerID
FirstName
MiddleName
LastName
BirthDate
MaritalStatus
YearlyIncome
NumberChildrenAtHome
StartDate
EndDate
1
1
John2

Doe
1/1/1975
M
100000
1
12/10/2015
12/10/2015
2
2
Jack

Smith
1/1/1975
M
100000
1
12/10/2015
1/1/2050
3
3
Mary

Smith
1/1/1975
M
100000
1
12/10/2015
1/1/2050
4
1
John2

Doe
1/1/1975
M
200000
1
12/10/2015
1/1/2050


No comments: