Pattern Recognition for Hosts, Services and Content
βRapid7β developed a framework dubbed βRecogβ https://github.com/rapid7/recog to facilitate fingerprinting hosts, services and content. The original program was written in βRubyβ. Tools are provided to download and match fingerprints using R.
The following functions are implemented:
download_fingerprints
: Download fingerprints from the Recog repoload_fingerprints
: Load a directory of fingerprintsread_fingerprints_file
: Ingest Recog XML fingerprints from a file and precompile regular expressionsrecog_match
: Find fingerprint matches for a given sourcerecog_pick
: Find first fingerprint match for a given sourceuse_builtin_fingerprints
: Use built-in fingerprints
devtools::install_github("hrbrmstr/rrecog")
library(rrecog)
# current verison
packageVersion("rrecog")
## [1] '0.1.0'
library(httr)
library(tidyverse)
## ββ Attaching packages βββββββββββββββββββββββββββββββββββββββββββββββ tidyverse 1.2.1 ββ
## β ggplot2 3.0.0 β purrr 0.2.5
## β tibble 1.4.2 β dplyr 0.7.6
## β tidyr 0.8.0 β stringr 1.3.0
## β readr 1.1.1 β forcats 0.3.0
## ββ Conflicts ββββββββββββββββββββββββββββββββββββββββββββββββββ tidyverse_conflicts() ββ
## β dplyr::filter() masks stats::filter()
## β dplyr::lag() masks stats::lag()
# using the internet as a data source is fraught with peril
safe_GET <- safely(httr::GET)
sprintf(
fmt = "http://%s",
c(
"r-project.org", "pypi.org", "www.mvnrepository.com", "spark.apache.org",
"www.oracle.com", "www.microsoft.com", "www.apple.com", "feedly.com"
)
) -> use_these
pb <- progress_estimated(length(use_these))
map(use_these, ~{
pb$tick()$print()
res <- safe_GET(.x, httr::timeout(2))
if (is.null(res$result)) return(NULL)
res$result$headers$server
}) %>%
compact() %>%
flatten_chr() -> server_headers
server_headers
## [1] "Apache/2.4.10 (Debian)" "nginx/1.13.9" "nginx/1.10.1" "Apache/2.4.18 (Ubuntu)"
## [5] "Oracle-HTTP-Server" "Apache" "cloudflare"
recog_db <- use_builtin_fingerprints()
map_df(server_headers, ~recog_match(recog_db, .x, "http")) %>%
glimpse() -> found
## Observations: 6
## Variables: 9
## $ service.vendor <chr> "Apache", "nginx", "nginx", "Apache", "Apache", "Apache"
## $ service.product <chr> "HTTPD", "nginx", "nginx", "HTTPD", "HTTPD", "HTTPD"
## $ service.family <chr> "Apache", "nginx", "nginx", "Apache", "Apache", "Apache"
## $ service.version <chr> "2.4.10", "1.13.9", "1.10.1", "2.4.18", NA, NA
## $ apache.info <chr> "(Debian)", NA, NA, "(Ubuntu)", NA, NA
## $ preference <dbl> 0.9, 0.9, 0.9, 0.9, 0.9, 0.9
## $ description <chr> "Apache", "nginx with version info", "nginx with version info", "Apache", "Apache returning...
## $ pattern <chr> "^Apache(?:-AdvancedExtranetServer)?(?:/([012][\\d.]*)\\s*(.*))?$", "^nginx/(\\S+)", "^ngin...
## $ orig <chr> "Apache/2.4.10 (Debian)", "nginx/1.13.9", "nginx/1.10.1", "Apache/2.4.18 (Ubuntu)", "Apache...
select(found, orig, service.vendor, service.version, apache.info, description)
## # A tibble: 6 x 5
## orig service.vendor service.version apache.info description
## <chr> <chr> <chr> <chr> <chr>
## 1 Apache/2.4.10 (Debian) Apache 2.4.10 (Debian) Apache
## 2 nginx/1.13.9 nginx 1.13.9 <NA> nginx with version info
## 3 nginx/1.10.1 nginx 1.10.1 <NA> nginx with version info
## 4 Apache/2.4.18 (Ubuntu) Apache 2.4.18 (Ubuntu) Apache
## 5 Apache Apache <NA> <NA> Apache returning no version information
## 6 Apache Apache <NA> <NA> Apache