Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace geolix with locus #2362

Merged
merged 11 commits into from
Jan 17, 2023
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ plausible-report.xml

# Geolocation databases
/priv/geodb/*.mmdb
/priv/geodb/*.mmdb.gz

# Auto-generated tracker files
/priv/tracker/js/*.js
1 change: 1 addition & 0 deletions config/.env.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ SELFHOST=false
SITE_LIMIT=3
HCAPTCHA_SITEKEY=test
HCAPTCHA_SECRET=scottiger
IP_GEOLOCATION_DB=test/priv/GeoLite2-City-Test.mmdb
47 changes: 35 additions & 12 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,13 @@ geolite2_country_db =
get_var_from_path_or_env(
config_dir,
"GEOLITE2_COUNTRY_DB",
Application.app_dir(:plausible, "/priv/geodb/dbip-country.mmdb")
Application.app_dir(:plausible, "/priv/geodb/dbip-country.mmdb.gz")
)

ip_geolocation_db = get_var_from_path_or_env(config_dir, "IP_GEOLOCATION_DB", geolite2_country_db)
geonames_source_file = get_var_from_path_or_env(config_dir, "GEONAMES_SOURCE_FILE")
maxmind_license_key = get_var_from_path_or_env(config_dir, "MAXMIND_LICENSE_KEY")
maxmind_edition = get_var_from_path_or_env(config_dir, "MAXMIND_EDITION", "GeoLite2-City")

if System.get_env("DISABLE_AUTH") do
require Logger
Expand Down Expand Up @@ -433,17 +435,38 @@ config :kaffy,
]
]

if config_env() != :test do
config :geolix,
databases: [
%{
id: :geolocation,
adapter: Geolix.Adapter.MMDB2,
source: ip_geolocation_db,
result_as: :raw
}
]
end
geo_opts =
cond do
maxmind_license_key ->
[
license_key: maxmind_license_key,
edition: maxmind_edition,
async: true
]

ip_geolocation_db ->
[path: ip_geolocation_db]

true ->
raise """
Missing geolocation database configuration.

Please set the IP_GEOLOCATION_DB environment value to the location of
your IP geolocation .mmdb file:

IP_GEOLOCATION_DB=/etc/plausible/dbip-city.mmdb

Or authenticate with MaxMind by
configuring MAXMIND_LICENSE_KEY and (optionally) MAXMIND_EDITION environment
variables:

MAXMIND_LICENSE_KEY=LNpsJCCKPis6XvBP
MAXMIND_EDITION=GeoLite2-City # this is the default edition

"""
end

config :plausible, Plausible.Geo, geo_opts

if geonames_source_file do
config :location, :geonames_source_file, geonames_source_file
Expand Down
39 changes: 0 additions & 39 deletions config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -24,45 +24,6 @@ config :plausible, :google,

config :bamboo, :refute_timeout, 10

geolix_sample_lookup = %{
city: %{geoname_id: 2_988_507, names: %{en: "Paris"}},
continent: %{code: "EU", geoname_id: 6_255_148, names: %{en: "Europe"}},
country: %{
geoname_id: 3_017_382,
is_in_european_union: true,
iso_code: "FR",
names: %{en: "France"}
},
ip_address: {2, 2, 2, 2},
location: %{
latitude: 48.8566,
longitude: 2.35222,
time_zone: "Europe/Paris",
weather_code: "FRXX0076"
},
postal: %{code: "75000"},
subdivisions: [
%{geoname_id: 3_012_874, iso_code: "IDF", names: %{en: "Île-de-France"}},
%{geoname_id: 2_968_815, iso_code: "75", names: %{en: "Paris"}}
]
}

config :geolix,
databases: [
%{
id: :geolocation,
adapter: Geolix.Adapter.Fake,
data: %{
{1, 1, 1, 1} => %{country: %{iso_code: "US"}},
{2, 2, 2, 2} => geolix_sample_lookup,
{1, 1, 1, 1, 1, 1, 1, 1} => %{country: %{iso_code: "US"}},
{0, 0, 0, 0} => %{country: %{iso_code: "ZZ"}, city: %{geoname_id: 123_123}},
{0, 0, 0, 1} => %{country: %{iso_code: "XX"}, subdivisions: [%{iso_code: "IDF"}]},
{0, 0, 0, 2} => %{country: %{iso_code: "T1"}, subdivisions: [%{}, %{iso_code: "IDF"}]}
}
}
]

config :plausible,
session_timeout: 0,
http_impl: Plausible.HTTPClient.Mock,
Expand Down
2 changes: 1 addition & 1 deletion lib/mix/tasks/download_country_database.ex
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ defmodule Mix.Tasks.DownloadCountryDatabase do

if res.status_code == 200 do
File.mkdir("priv/geodb")
File.write!("priv/geodb/dbip-country.mmdb", res.body)
File.write!("priv/geodb/dbip-country.mmdb.gz", res.body)
Copy link
Contributor Author

@ruslandoga ruslandoga Oct 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

locus trusts the file extension.
locus didn't try to gunzip dbip-country.mmdb because it didn't have a .gz suffix.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For reference, the error that locus logged was

[error] [locus] [geolocation] database failed to load (:filesystem): {:unpack_database_from, :mmdb_blob,
 {:bad_metadata,
  {:marker_not_found,
   <<171, 205, 239, 77, 97, 120, 77, 105, 110, 100, 46, 99, 111, 109>>}}}

Logger.info("Downloaded and saved the database successfully")
else
Logger.error("Unable to download and save the database. Response: #{inspect(res)}")
Expand Down
10 changes: 10 additions & 0 deletions lib/plausible/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,14 @@ defmodule Plausible.Application do
]

opts = [strategy: :one_for_one, name: Plausible.Supervisor]

setup_sentry()
setup_opentelemetry()

setup_geolocation()
Location.load_all()
Plausible.Geo.await_loader()

Supervisor.start_link(children, opts)
end

Expand Down Expand Up @@ -119,4 +124,9 @@ defmodule Plausible.Application do
OpentelemetryEcto.setup([:plausible, :clickhouse_repo])
OpentelemetryOban.setup()
end

defp setup_geolocation do
opts = Application.fetch_env!(:plausible, Plausible.Geo)
:ok = Plausible.Geo.load_db(opts)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this seems to be async by default, I think it would also be a good idea to add this loading step to the /api/health endpoint like we do with the sites cache. This way the load balancer will not route traffic to a newly restarted node until the geo database is loaded.

Side question: how long does it take to download and be ready to run geolocation lookups with the Maxmind license key?

Side note: not a concern for this PR but I think we're getting to a point where we should really separate liveness vs readiness probes as suggested by @cnkk. Loading the geolocation DB is required for the app to be 'ready' but it's independent from it being 'live'.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's sync by default in this implementation.

Copy link
Contributor Author

@ruslandoga ruslandoga Jan 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Side question: how long does it take to download and be ready to run geolocation lookups with the Maxmind license key?

On my laptop it takes six seconds. I don't know how much bigger the paid version is though. I think most of the time is spent on IO, decoding (or rather, verification) is fast.

Mix.install [:locus]

:timer.tc fn ->
  Application.put_env(:locus, :license_key, "XXdDMc5OhchOTazu") # I'll delete it in a few days
  :ok = :locus.start_loader(:city, {:maxmind, "GeoLite2-City"}, [:no_cache])
  {:ok, _} = :locus.await_loader(:city)
end

#=> {6204296, {:ok, {{2023, 1, 10}, {15, 33, 48}}}}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. Seeing as this is sync and called before Location.load, I assume this will slow down the whole startup process. If this loading was async, it could run in parallel and probably finish before Location.load is done if I understand right. That seems desirable so the whole startup process is only limited by the slowest piece which would be Location.load. What do you think @ruslandoga @vinibrsl ?

Copy link
Contributor Author

@ruslandoga ruslandoga Jan 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, maybe we can fire it off before Location.load, and wait for it with something like Plausible.Geo.await_load() after Location.load (just in case)? await_load would call :locus.await_loader(:city)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's a good idea

Copy link
Contributor

@vinibrsl vinibrsl Jan 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried using :locus.await_load but I couldn't know when the task was completed. The alternative I considered is to use Task.await_many/2 to start Location and Geolix in parallel. This ensures the app will never be in an invalid state where Geolix or Location haven't yet finished booting up. Changed in 35393b2. Let me know what you think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried using :locus.await_load but I couldn't know when the task was completed.

What was the problem?

Copy link
Contributor

@vinibrsl vinibrsl Jan 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem was that I was using it wrong 😅. Fixed in c775070. Let me know what you think :)

end
end
189 changes: 189 additions & 0 deletions lib/plausible/geo.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
defmodule Plausible.Geo do
@moduledoc """
This module provides an API for fetching IP geolocation.
"""

require Logger

@db :geolocation

@doc """
Starts the geodatabase loading process. Two modes are supported: local file
and MaxMind license key.

## Options

* `:path` - the path to the .mmdb database local file. When present,
`:license_key` and `:edition` are not required.

* `:license_key` - the [license key](https://support.maxmind.com/hc/en-us/articles/4407111582235-Generate-a-License-Key)
from MaxMind to authenticate requests to MaxMind.

* `:edition` - the name of the MaxMind database to be downloaded from MaxMind
servers. Defaults to `GeoLite2-City`.

* `:async` - when used, configures the database loading to run
asynchronously.

## Examples

Loading from a local file:

iex> load_db(path: "/etc/plausible/dbip-city.mmdb")
:ok

Downloading a MaxMind DB (this license key is no longer active):

iex> load_db(license_key: "LNpsJCCKPis6XvBP", edition: "GeoLite2-City", async: true)
:ok

"""
def load_db(opts) do
cond do
license_key = opts[:license_key] ->
edition = opts[:edition] || "GeoLite2-City"
:ok = :locus.start_loader(@db, {:maxmind, edition}, license_key: license_key)

path = opts[:path] ->
:ok = :locus.start_loader(@db, path)

true ->
raise "failed to load geolocation db: need :path or :license_key to be provided"
end

unless opts[:async] do
{:ok, _version} = :locus.await_loader(@db)
end

:ok
end

@doc """
Waits for the database to start after calling `load_db/1` with the async option.
"""
def await_loader, do: :locus.await_loader(@db)

@doc """
Returns geodatabase type.

Used for deciding whether to show the DB-IP disclaimer or not.

## Examples

In the case of a DB-IP database:

iex> database_type()
"DBIP-City-Lite"

In the case of a MaxMind database:

iex> database_type()
"GeoLite2-City"

"""
def database_type do
case :locus.get_info(@db, :metadata) do
{:ok, %{database_type: type}} -> type
_other -> nil
end
end

@doc """
Looks up geo info about an IP address.

## Examples

iex> lookup("8.7.6.5")
%{
"city" => %{
"geoname_id" => 5349755,
"names" => %{
"de" => "Fontana",
"en" => "Fontana",
"ja" => "フォンタナ",
"ru" => "Фонтана"
}
},
"continent" => %{
"code" => "NA",
"geoname_id" => 6255149,
"names" => %{
"de" => "Nordamerika",
"en" => "North America",
"es" => "Norteamérica",
"fr" => "Amérique du Nord",
"ja" => "北アメリカ",
"pt-BR" => "América do Norte",
"ru" => "Северная Америка",
"zh-CN" => "北美洲"
}
},
"country" => %{
"geoname_id" => 6252001,
"iso_code" => "US",
"names" => %{
"de" => "Vereinigte Staaten",
"en" => "United States",
"es" => "Estados Unidos",
"fr" => "États Unis",
"ja" => "アメリカ",
"pt-BR" => "EUA",
"ru" => "США",
"zh-CN" => "美国"
}
},
"location" => %{
"accuracy_radius" => 50,
"latitude" => 34.1211,
"longitude" => -117.4362,
"metro_code" => 803,
"time_zone" => "America/Los_Angeles"
},
"postal" => %{"code" => "92336"},
"registered_country" => %{
"geoname_id" => 6252001,
"iso_code" => "US",
"names" => %{
"de" => "Vereinigte Staaten",
"en" => "United States",
"es" => "Estados Unidos",
"fr" => "États Unis",
"ja" => "アメリカ",
"pt-BR" => "EUA",
"ru" => "США",
"zh-CN" => "美国"
}
},
"subdivisions" => [
%{
"geoname_id" => 5332921,
"iso_code" => "CA",
"names" => %{
"de" => "Kalifornien",
"en" => "California",
"es" => "California",
"fr" => "Californie",
"ja" => "カリフォルニア州",
"pt-BR" => "Califórnia",
"ru" => "Калифорния",
"zh-CN" => "加州"
}
}
]
}

"""
def lookup(ip_address) do
case :locus.lookup(@db, ip_address) do
{:ok, entry} ->
entry

:not_found ->
nil

{:error, reason} ->
Logger.error("failed to lookup ip address: " <> inspect(reason))
nil
end
end
end
2 changes: 1 addition & 1 deletion lib/plausible/ingestion/event.ex
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ defmodule Plausible.Ingestion.Event do
end

defp put_geolocation(%__MODULE__{} = event) do
result = Plausible.Ingestion.Geolocation.lookup(event.request.remote_ip)
result = Plausible.Ingestion.Geolocation.lookup(event.request.remote_ip) || %{}

update_attrs(event, result)
end
Expand Down
Loading