Find duplicate, save in a list, delete duplicate using macro

  • Follow


Hello.
I need to find a duplicate from ID col and if found, it needs to look
at the data in the associated row to compare if it is truly a
duplicate. And if it is (same Product and Acct #), the true duplicate
will be the one with later date. Then the duplicate that has the later
date record will store this info in a separate worksheet or area and
then delete the dupicate from the orginal list. Can this be done by
using macro? Thank you for your help in advance.

For Example:
ID	Date	   Product	Acct #
1150	7/24/2009	   102	53
888	12/30/2009    Gas	50
1150	11/4/2009	    102	53
5524	3/27/2009	    Truck	48
888	11/30/2009    Gas	31
5524	4/27/2009	    Truck	90
5524	5/30/2009	    Truck	90

Final outcome:
ID	Date	   Product	Acct #
1150	7/24/2009	   102	53
888	12/30/2009    Gas	50
5524	3/27/2009	    Truck	48
888	11/30/2009    Gas	31
5524	4/27/2009	    Truck	90

Separate List:
ID	Date	   Product	Acct #
1150	11/4/2009	    102	53
5524	5/30/2009	    Truck	90
0
Reply Erica 1/26/2010 7:16:10 PM

Erica,

I tried to comment the code so that you could follow what I was doing.  I 
haven't put a lot of thought into it, so there may be a "better" way; 
however, the code below seems to work.  You can test this by simply putting 
your source data, anchored in A1 (and which is contiguous), into the first 
worksheet; and by having a blank worksheet for the second worksheet.  You can 
then run "TestIt".  

What is assumed is that your "For example" data is the source data and will 
become your "Final outcome" and that your "Separate List" will be placed on 
the second worksheet.  Also, the macro assumes that the "For example" data is 
sorted appropriately by date (see comments in the code for the order of 
execution) and that the "Separate List" output location is prepared 
appropriately (i.e. the copy/paste operation will overwrite any existing 
data).

I hope this helps.

Best,

Matthew Herbert

Sub TestIt()
AlterDuplicates Worksheets(1).Range("A1").CurrentRegion, _
                Array("ID", "Product", "Acct #"), _
                Worksheets(2).Range("A1")
End Sub

Sub AlterDuplicates(rngData As Range, _
                    varArrKey As Variant, _
                    rngOutAnchor As Range)
'rngData        the complete data set to serach for duplicates
'               (including the header row)
'varArrKey      an array of columns to create a key for the
'               duplicate test
'rngOutAnchor   the anchor location for the output of the
'               duplicates

Dim lngCnt As Long
Dim lngCntArr As Long
Dim intCnt As Integer
Dim rngHdr As Range
Dim rngAnchor As Range
Dim rngTemp As Range
Dim rngDup As Range
Dim rngCopy As Range
Dim rngResize As Range
Dim varRes As Variant
Dim varKey As Variant
Dim intArrCol() As Integer
Dim strArrDup() As String
Dim strTemp As String
Dim wksTemp As Worksheet

Application.ScreenUpdating = False

Set rngHdr = rngData.Rows(1)

intCnt = 0
'find the column headers necessary to create a unique key
For Each varKey In varArrKey
    varRes = Application.Match(varKey, rngHdr, 0)
    If Not IsError(varRes) Then
        ReDim Preserve intArrCol(intCnt)
        intArrCol(intCnt) = varRes
        intCnt = intCnt + 1
    End If
Next varKey

'test that intArrCol is loaded
varRes = True
On Error Resume Next
varRes = IsEmpty(intArrCol(0))
On Error GoTo 0
If varRes Then Exit Sub

'test that that intArrCol and varArrKey match in size, i.e.
'   ensure that you have the right columns for the unique key
If UBound(varArrKey) <> UBound(intArrCol) Then Exit Sub

Set rngAnchor = rngData(1)

'don't include the header and leave as zero-based
ReDim strArrDup((rngData.Rows.Count - 1) - 1)

'build the unique key for duplicate test (could just
'   as easily create a formula in a temporary column
'   that concatenates the columns together)
lngCntArr = 0
For lngCnt = 2 To rngData.Rows.Count
    strTemp = ""
    For intCnt = LBound(intArrCol) To UBound(intArrCol)
        With rngData.Parent
            strTemp = strTemp & .Cells(lngCnt, rngAnchor.Offset(0, _
                                intArrCol(intCnt) - 1).Column).Value
        End With
    Next intCnt
    strArrDup(lngCntArr) = strTemp
    lngCntArr = lngCntArr + 1
Next lngCnt

'use a temporary worksheet to run the calculations and
'   leverage the CountIf formula
Set wksTemp = ThisWorkbook.Worksheets.Add

'copy the base data to the temporary worksheet
rngData.Copy wksTemp.Range("A1")

'insert the unique key into the column right of the copied data
Set rngTemp = wksTemp.Range("A1").CurrentRegion

With rngTemp
    Set rngTemp = rngTemp(rngTemp.Count).Offset(0, 1)
    Set rngTemp = Range(rngTemp, rngTemp.End(xlUp).Offset(1, 0))
    rngTemp = Application.Transpose(strArrDup)
End With

'get the duplicates
'ASSUMES the data is date sorted appropriately because the loop
'   works from bottom to top, so duplicates are added in a
'   bottom to top order
For lngCnt = rngTemp.Rows.Count To 1 Step -1

    'size the range for CountIf (don't want to double count
    '   anything)
    Set rngResize = rngTemp.Resize(lngCnt, 1)
    
    'test for duplicates
    If Application.WorksheetFunction.CountIf(rngResize, _
                                             rngTemp(lngCnt)) > 1 Then
        
        'if a duplicate exists, get the data set
        With rngTemp(lngCnt)
            Set rngCopy = Range(.Offset(0, -1), _
                                .Offset(0, -1).End(xlToLeft))
        End With
        
        'add the duplicates into one range
        If rngDup Is Nothing Then
            Set rngDup = rngCopy
        Else
            Set rngDup = Union(rngDup, rngCopy)
        End If
    End If
Next lngCnt

'clear the temp column on the temp worksheet
rngTemp.Clear

'add the header to rngDup for copying to rngOutAnchor
With wksTemp
    Set rngDup = Union(rngDup, .Range(.Range("A1"), _
                               .Range("A1").End(xlToRight)))
End With

'clear the original data set, leaving the header
With rngData
    Set rngData = .Offset(1, 0).Resize(.Rows.Count - 1, _
                                       .Columns.Count)
End With
rngData.Clear

'copy the duplicates to the output range
rngDup.Copy rngOutAnchor

'delete the duplicate rows
rngDup.EntireRow.Delete

'copy the non-duplicate data back to the original worksheet,
'    just below the header
With wksTemp
    .Range("A1").CurrentRegion.Copy rngAnchor.Offset(1, 0)
End With

'delete the temporary worksheet
With Application
    .DisplayAlerts = False
    wksTemp.Delete
    .DisplayAlerts = True
End With

End Sub

"Erica" wrote:

> Hello.
> I need to find a duplicate from ID col and if found, it needs to look
> at the data in the associated row to compare if it is truly a
> duplicate. And if it is (same Product and Acct #), the true duplicate
> will be the one with later date. Then the duplicate that has the later
> date record will store this info in a separate worksheet or area and
> then delete the dupicate from the orginal list. Can this be done by
> using macro? Thank you for your help in advance.
> 
> For Example:
> ID	Date	   Product	Acct #
> 1150	7/24/2009	   102	53
> 888	12/30/2009    Gas	50
> 1150	11/4/2009	    102	53
> 5524	3/27/2009	    Truck	48
> 888	11/30/2009    Gas	31
> 5524	4/27/2009	    Truck	90
> 5524	5/30/2009	    Truck	90
> 
> Final outcome:
> ID	Date	   Product	Acct #
> 1150	7/24/2009	   102	53
> 888	12/30/2009    Gas	50
> 5524	3/27/2009	    Truck	48
> 888	11/30/2009    Gas	31
> 5524	4/27/2009	    Truck	90
> 
> Separate List:
> ID	Date	   Product	Acct #
> 1150	11/4/2009	    102	53
> 5524	5/30/2009	    Truck	90
> .
> 
0
Reply Utf 1/27/2010 6:53:01 PM


1 Replies
482 Views

(page loaded in 1.39 seconds)

Similiar Articles:
















7/19/2012 9:05:05 PM


Reply: