Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace geolix with locus #2362

Merged
merged 11 commits into from
Jan 17, 2023
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ plausible-report.xml

# Geolocation databases
/priv/geodb/*.mmdb
/priv/geodb/*.mmdb.gz

# Auto-generated tracker files
/priv/tracker/js/*.js
1 change: 1 addition & 0 deletions config/.env.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ SELFHOST=false
SITE_LIMIT=3
HCAPTCHA_SITEKEY=test
HCAPTCHA_SECRET=scottiger
IP_GEOLOCATION_DB=test/priv/GeoLite2-City-Test.mmdb
46 changes: 34 additions & 12 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,13 @@ geolite2_country_db =
get_var_from_path_or_env(
config_dir,
"GEOLITE2_COUNTRY_DB",
Application.app_dir(:plausible, "/priv/geodb/dbip-country.mmdb")
Application.app_dir(:plausible, "/priv/geodb/dbip-country.mmdb.gz")
)

ip_geolocation_db = get_var_from_path_or_env(config_dir, "IP_GEOLOCATION_DB", geolite2_country_db)
geonames_source_file = get_var_from_path_or_env(config_dir, "GEONAMES_SOURCE_FILE")
maxmind_license_key = get_var_from_path_or_env(config_dir, "MAXMIND_LICENSE_KEY")
maxmind_edition = get_var_from_path_or_env(config_dir, "MAXMIND_EDITION", "GeoLite2-City")

if System.get_env("DISABLE_AUTH") do
require Logger
Expand Down Expand Up @@ -433,17 +435,37 @@ config :kaffy,
]
]

if config_env() != :test do
config :geolix,
databases: [
%{
id: :geolocation,
adapter: Geolix.Adapter.MMDB2,
source: ip_geolocation_db,
result_as: :raw
}
]
end
geo_opts =
cond do
maxmind_license_key ->
[
license_key: maxmind_license_key,
edition: maxmind_edition
]

ip_geolocation_db ->
[path: ip_geolocation_db]

true ->
raise """
Missing geolocation database configuration.

Please set the IP_GEOLOCATION_DB environment value to the location of
your IP geolocation .mmdb file:

IP_GEOLOCATION_DB=/etc/plausible/dbip-city.mmdb

Or authenticate with MaxMind by
configuring MAXMIND_LICENSE_KEY and (optionally) MAXMIND_EDITION environment
variables:

MAXMIND_LICENSE_KEY=LNpsJCCKPis6XvBP
MAXMIND_EDITION=GeoLite2-City # this is the default edition

"""
end

config :plausible, Plausible.Geo, geo_opts

if geonames_source_file do
config :location, :geonames_source_file, geonames_source_file
Expand Down
39 changes: 0 additions & 39 deletions config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -24,45 +24,6 @@ config :plausible, :google,

config :bamboo, :refute_timeout, 10

geolix_sample_lookup = %{
city: %{geoname_id: 2_988_507, names: %{en: "Paris"}},
continent: %{code: "EU", geoname_id: 6_255_148, names: %{en: "Europe"}},
country: %{
geoname_id: 3_017_382,
is_in_european_union: true,
iso_code: "FR",
names: %{en: "France"}
},
ip_address: {2, 2, 2, 2},
location: %{
latitude: 48.8566,
longitude: 2.35222,
time_zone: "Europe/Paris",
weather_code: "FRXX0076"
},
postal: %{code: "75000"},
subdivisions: [
%{geoname_id: 3_012_874, iso_code: "IDF", names: %{en: "Île-de-France"}},
%{geoname_id: 2_968_815, iso_code: "75", names: %{en: "Paris"}}
]
}

config :geolix,
databases: [
%{
id: :geolocation,
adapter: Geolix.Adapter.Fake,
data: %{
{1, 1, 1, 1} => %{country: %{iso_code: "US"}},
{2, 2, 2, 2} => geolix_sample_lookup,
{1, 1, 1, 1, 1, 1, 1, 1} => %{country: %{iso_code: "US"}},
{0, 0, 0, 0} => %{country: %{iso_code: "ZZ"}, city: %{geoname_id: 123_123}},
{0, 0, 0, 1} => %{country: %{iso_code: "XX"}, subdivisions: [%{iso_code: "IDF"}]},
{0, 0, 0, 2} => %{country: %{iso_code: "T1"}, subdivisions: [%{}, %{iso_code: "IDF"}]}
}
}
]

config :plausible,
session_timeout: 0,
http_impl: Plausible.HTTPClient.Mock,
Expand Down
2 changes: 1 addition & 1 deletion lib/mix/tasks/download_country_database.ex
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ defmodule Mix.Tasks.DownloadCountryDatabase do

if res.status_code == 200 do
File.mkdir("priv/geodb")
File.write!("priv/geodb/dbip-country.mmdb", res.body)
File.write!("priv/geodb/dbip-country.mmdb.gz", res.body)
Copy link
Contributor Author

@ruslandoga ruslandoga Oct 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

locus trusts the file extension.
locus didn't try to gunzip dbip-country.mmdb because it didn't have a .gz suffix.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For reference, the error that locus logged was

[error] [locus] [geolocation] database failed to load (:filesystem): {:unpack_database_from, :mmdb_blob,
 {:bad_metadata,
  {:marker_not_found,
   <<171, 205, 239, 77, 97, 120, 77, 105, 110, 100, 46, 99, 111, 109>>}}}

Logger.info("Downloaded and saved the database successfully")
else
Logger.error("Unable to download and save the database. Response: #{inspect(res)}")
Expand Down
6 changes: 6 additions & 0 deletions lib/plausible/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ defmodule Plausible.Application do
opts = [strategy: :one_for_one, name: Plausible.Supervisor]
setup_sentry()
setup_opentelemetry()
setup_geolocation()
Location.load_all()
Supervisor.start_link(children, opts)
end
Expand Down Expand Up @@ -119,4 +120,9 @@ defmodule Plausible.Application do
OpentelemetryEcto.setup([:plausible, :clickhouse_repo])
OpentelemetryOban.setup()
end

defp setup_geolocation do
opts = Application.fetch_env!(:plausible, Plausible.Geo)
:ok = Plausible.Geo.load_db(opts)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this seems to be async by default, I think it would also be a good idea to add this loading step to the /api/health endpoint like we do with the sites cache. This way the load balancer will not route traffic to a newly restarted node until the geo database is loaded.

Side question: how long does it take to download and be ready to run geolocation lookups with the Maxmind license key?

Side note: not a concern for this PR but I think we're getting to a point where we should really separate liveness vs readiness probes as suggested by @cnkk. Loading the geolocation DB is required for the app to be 'ready' but it's independent from it being 'live'.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's sync by default in this implementation.

Copy link
Contributor Author

@ruslandoga ruslandoga Jan 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Side question: how long does it take to download and be ready to run geolocation lookups with the Maxmind license key?

On my laptop it takes six seconds. I don't know how much bigger the paid version is though. I think most of the time is spent on IO, decoding (or rather, verification) is fast.

Mix.install [:locus]

:timer.tc fn ->
  Application.put_env(:locus, :license_key, "XXdDMc5OhchOTazu") # I'll delete it in a few days
  :ok = :locus.start_loader(:city, {:maxmind, "GeoLite2-City"}, [:no_cache])
  {:ok, _} = :locus.await_loader(:city)
end

#=> {6204296, {:ok, {{2023, 1, 10}, {15, 33, 48}}}}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. Seeing as this is sync and called before Location.load, I assume this will slow down the whole startup process. If this loading was async, it could run in parallel and probably finish before Location.load is done if I understand right. That seems desirable so the whole startup process is only limited by the slowest piece which would be Location.load. What do you think @ruslandoga @vinibrsl ?

Copy link
Contributor Author

@ruslandoga ruslandoga Jan 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, maybe we can fire it off before Location.load, and wait for it with something like Plausible.Geo.await_load() after Location.load (just in case)? await_load would call :locus.await_loader(:city)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's a good idea

Copy link
Contributor

@vinibrsl vinibrsl Jan 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried using :locus.await_load but I couldn't know when the task was completed. The alternative I considered is to use Task.await_many/2 to start Location and Geolix in parallel. This ensures the app will never be in an invalid state where Geolix or Location haven't yet finished booting up. Changed in 35393b2. Let me know what you think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried using :locus.await_load but I couldn't know when the task was completed.

What was the problem?

Copy link
Contributor

@vinibrsl vinibrsl Jan 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem was that I was using it wrong 😅. Fixed in c775070. Let me know what you think :)

end
end
164 changes: 164 additions & 0 deletions lib/plausible/geo.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
defmodule Plausible.Geo do
@moduledoc """
This module provides an API for fetching IP geolocation.
"""

require Logger

@db :geolocation

@doc """
Starts the geodatabase loading process. Two options are supported, local file and maxmind key.

Loading a local file:

iex> load_db(path: "/etc/plausible/dbip-city.mmdb")
:ok

Loading a maxmind db:

# this license key is no longer active
iex> load_db(license_key: "LNpsJCCKPis6XvBP", edition: "GeoLite2-City", async: true)
:ok

"""
def load_db(opts) do
cond do
license_key = opts[:license_key] ->
edition = opts[:edition] || "GeoLite2-City"
:ok = :locus.start_loader(@db, {:maxmind, edition}, license_key: license_key)

path = opts[:path] ->
:ok = :locus.start_loader(@db, path)

true ->
raise "failed to load geolocation db: need :path or :license_key to be provided"
end

unless opts[:async] do
{:ok, _version} = :locus.await_loader(@db)
end

:ok
end

@doc """
Returns geodatabase type. Used for deciding whether to show the DBIP disclaimer.

Example:

# in the case of a dbip db
iex> database_type()
"DBIP-City-Lite"

# in the case of a maxmind db
iex> database_type()
"GeoLite2-City"

"""
def database_type do
case :locus.get_info(@db, :metadata) do
{:ok, %{database_type: type}} -> type
_other -> nil
end
end

@doc """
Looks up geo info about an ip address.

Example:

iex> lookup("8.7.6.5")
%{
"city" => %{
"geoname_id" => 5349755,
"names" => %{
"de" => "Fontana",
"en" => "Fontana",
"ja" => "フォンタナ",
"ru" => "Фонтана"
}
},
"continent" => %{
"code" => "NA",
"geoname_id" => 6255149,
"names" => %{
"de" => "Nordamerika",
"en" => "North America",
"es" => "Norteamérica",
"fr" => "Amérique du Nord",
"ja" => "北アメリカ",
"pt-BR" => "América do Norte",
"ru" => "Северная Америка",
"zh-CN" => "北美洲"
}
},
"country" => %{
"geoname_id" => 6252001,
"iso_code" => "US",
"names" => %{
"de" => "Vereinigte Staaten",
"en" => "United States",
"es" => "Estados Unidos",
"fr" => "États Unis",
"ja" => "アメリカ",
"pt-BR" => "EUA",
"ru" => "США",
"zh-CN" => "美国"
}
},
"location" => %{
"accuracy_radius" => 50,
"latitude" => 34.1211,
"longitude" => -117.4362,
"metro_code" => 803,
"time_zone" => "America/Los_Angeles"
},
"postal" => %{"code" => "92336"},
"registered_country" => %{
"geoname_id" => 6252001,
"iso_code" => "US",
"names" => %{
"de" => "Vereinigte Staaten",
"en" => "United States",
"es" => "Estados Unidos",
"fr" => "États Unis",
"ja" => "アメリカ",
"pt-BR" => "EUA",
"ru" => "США",
"zh-CN" => "美国"
}
},
"subdivisions" => [
%{
"geoname_id" => 5332921,
"iso_code" => "CA",
"names" => %{
"de" => "Kalifornien",
"en" => "California",
"es" => "California",
"fr" => "Californie",
"ja" => "カリフォルニア州",
"pt-BR" => "Califórnia",
"ru" => "Калифорния",
"zh-CN" => "加州"
}
}
]
}

"""
def lookup(ip_address) do
case :locus.lookup(@db, ip_address) do
{:ok, entry} ->
entry

:not_found ->
nil

{:error, reason} ->
Logger.error("failed to lookup ip address: " <> inspect(reason))
nil
end
end
end
2 changes: 1 addition & 1 deletion lib/plausible/ingestion/event.ex
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ defmodule Plausible.Ingestion.Event do
end

defp put_geolocation(%__MODULE__{} = event) do
result = Plausible.Ingestion.Geolocation.lookup(event.request.remote_ip)
result = Plausible.Ingestion.Geolocation.lookup(event.request.remote_ip) || %{}

update_attrs(event, result)
end
Expand Down
44 changes: 25 additions & 19 deletions lib/plausible/ingestion/geolocation.ex
Original file line number Diff line number Diff line change
@@ -1,33 +1,39 @@
defmodule Plausible.Ingestion.Geolocation do
@moduledoc false
alias Plausible.Ingestion.CityOverrides

def lookup(remote_ip) do
result = Geolix.lookup(remote_ip, where: :geolocation)

country_code =
get_in(result, [:country, :iso_code])
|> ignore_unknown_country()

city_geoname_id = country_code && get_in(result, [:city, :geoname_id])
city_geoname_id = CityOverrides.get(city_geoname_id, city_geoname_id)

%{
country_code: country_code,
subdivision1_code: subdivision1_code(country_code, result),
subdivision2_code: subdivision2_code(country_code, result),
city_geoname_id: city_geoname_id
}
def lookup(ip_address) do
case Plausible.Geo.lookup(ip_address) do
%{} = entry ->
country_code =
entry
|> get_in(["country", "iso_code"])
|> ignore_unknown_country()

city_geoname_id = country_code && get_in(entry, ["city", "geoname_id"])
city_geoname_id = Plausible.Ingestion.CityOverrides.get(city_geoname_id, city_geoname_id)

%{
country_code: country_code,
subdivision1_code: subdivision1_code(country_code, entry),
subdivision2_code: subdivision2_code(country_code, entry),
city_geoname_id: city_geoname_id
}

nil ->
nil
end
end

defp subdivision1_code(country_code, %{subdivisions: [%{iso_code: iso_code} | _rest]})
defp subdivision1_code(country_code, %{"subdivisions" => [%{"iso_code" => iso_code} | _rest]})
when not is_nil(country_code) do
country_code <> "-" <> iso_code
end

defp subdivision1_code(_, _), do: nil

defp subdivision2_code(country_code, %{subdivisions: [_first, %{iso_code: iso_code} | _rest]})
defp subdivision2_code(country_code, %{
"subdivisions" => [_first, %{"iso_code" => iso_code} | _rest]
})
when not is_nil(country_code) do
country_code <> "-" <> iso_code
end
Expand Down
9 changes: 1 addition & 8 deletions lib/plausible_web/controllers/api/external_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,7 @@ defmodule PlausibleWeb.Api.ExternalController do
|> Keyword.take([:version, :commit, :created, :tags])
|> Map.new()

geo_database =
case Geolix.metadata(where: :geolocation) do
%{database_type: type} ->
type

_ ->
"(not configured)"
end
geo_database = Plausible.Geo.database_type() || "(not configured)"

json(conn, %{
geo_database: geo_database,
Expand Down
Loading