From d78a1b5d9063a3482684572cb8955136214018ea Mon Sep 17 00:00:00 2001
From: Arun Srinivasan <aragorn168b@gmail.com>
Date: Mon, 13 Oct 2014 10:34:43 +0200
Subject: [PATCH] Closes #872. Clearer explanation of duplicated().

---
 README.md         |  1 +
 man/duplicated.Rd | 55 +++++++++++++++++++++++------------------------
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index d2ae8b6d27..504c92aa55 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,7 @@
 
 #### NOTES
 
+  1. Clearer explanation of what `duplicated()` does (borrowed from base). Thanks to @matthieugomez for pointing out. Closes [#872](https://github.com/Rdatatable/data.table/issues/872).
 
 ### Changes in v1.9.4  (on CRAN 2 Oct 2014)
 
diff --git a/man/duplicated.Rd b/man/duplicated.Rd
index 81bc407dae..2a204af5f6 100644
--- a/man/duplicated.Rd
+++ b/man/duplicated.Rd
@@ -7,10 +7,10 @@
 \alias{anyDuplicated.data.table}
 \title{ Determine Duplicate Rows }
 \description{
-     \code{duplicated} returns a logical vector indicating which rows of a \code{data.table}
-     have duplicate rows (by key).
+     \code{duplicated} returns a logical vector indicating which rows of a \code{data.table} (by 
+     key columns or when no key all columns) are duplicates of a row with smaller subscripts.
 
-     \code{unique} returns a data table with duplicated rows (by key) removed, or
+     \code{unique} returns a \code{data.table} with duplicated rows (by key) removed, or
      (when no key) duplicated rows by all columns removed.
 
      \code{anyDuplicated} returns the \emph{index} \code{i} of the first duplicated entry if there is one, and 0 otherwise. 
@@ -65,38 +65,37 @@
 }
 \seealso{ \code{\link{data.table}}, \code{\link{duplicated}}, \code{\link{unique}}, \code{\link{all.equal}}}
 \examples{
-    DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3), C = rep(1:2, 6), key = "A,B")
-    duplicated(DT)
-    unique(DT)
+DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3), C = rep(1:2, 6), key = "A,B")
+duplicated(DT)
+unique(DT)
 
-    duplicated(DT, by="B")
-    unique(DT, by="B")
+duplicated(DT, by="B")
+unique(DT, by="B")
 
-    duplicated(DT, by=c("A", "C"))
-    unique(DT, by=c("A", "C"))
+duplicated(DT, by=c("A", "C"))
+unique(DT, by=c("A", "C"))
 
-    DT = data.table(a=c(2L,1L,2L), b=c(1L,2L,1L))   # no key
-    unique(DT)                   # rows 1 and 2 (row 3 is a duplicate of row 1)
+DT = data.table(a=c(2L,1L,2L), b=c(1L,2L,1L))   # no key
+unique(DT)                   # rows 1 and 2 (row 3 is a duplicate of row 1)
 
-    DT = data.table(a=c(3.142, 4.2, 4.2, 3.142, 1.223, 1.223), b=rep(1,6))
-    unique(DT)                   # rows 1,2 and 5
+DT = data.table(a=c(3.142, 4.2, 4.2, 3.142, 1.223, 1.223), b=rep(1,6))
+unique(DT)                   # rows 1,2 and 5
 
-    DT = data.table(a=tan(pi*(1/4 + 1:10)), b=rep(1,10))   # example from ?all.equal
-    length(unique(DT$a))         # 10 strictly unique floating point values
-    all.equal(DT$a,rep(1,10))    # TRUE, all within tolerance of 1.0
-    DT[,which.min(a)]            # row 10, the strictly smallest floating point value
-    identical(unique(DT),DT[1])  # TRUE, stable within tolerance
-    identical(unique(DT),DT[10]) # FALSE
+DT = data.table(a=tan(pi*(1/4 + 1:10)), b=rep(1,10))   # example from ?all.equal
+length(unique(DT$a))         # 10 strictly unique floating point values
+all.equal(DT$a,rep(1,10))    # TRUE, all within tolerance of 1.0
+DT[,which.min(a)]            # row 10, the strictly smallest floating point value
+identical(unique(DT),DT[1])  # TRUE, stable within tolerance
+identical(unique(DT),DT[10]) # FALSE
 
-    # fromLast=TRUE
-    DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3), C = rep(1:2, 6), key = "A,B")
-    duplicated(DT, by="B", fromLast=TRUE)
-    unique(DT, by="B", fromLast=TRUE)
+# fromLast=TRUE
+DT <- data.table(A = rep(1:3, each=4), B = rep(1:4, each=3), C = rep(1:2, 6), key = "A,B")
+duplicated(DT, by="B", fromLast=TRUE)
+unique(DT, by="B", fromLast=TRUE)
 
-    # anyDuplicated
-    anyDuplicated(DT, by=c("A", "B"))    # 3L
-    any(duplicated(DT, by=c("A", "B")))  # TRUE
+# anyDuplicated
+anyDuplicated(DT, by=c("A", "B"))    # 3L
+any(duplicated(DT, by=c("A", "B")))  # TRUE
 }
 \keyword{ data }
 
-