Skip to content

Commit

Permalink
Support proxy for a page (#300)
Browse files Browse the repository at this point in the history
* refactor: parse options with a dedicated Options class

* chore!: drop support for proxy `server: true` option

* fix: support options for xvfb

* style: fix rubocop warnings

* fix: missing Ferrum module

* feat: support :proxy option for Browser#create_page
  • Loading branch information
route authored Oct 20, 2022
1 parent e530a4d commit e78f223
Show file tree
Hide file tree
Showing 43 changed files with 3,555 additions and 3,515 deletions.
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -605,21 +605,24 @@ You can set a proxy with the `proxy` option.

```ruby
browser = Ferrum::Browser.new(proxy: { host: "x.x.x.x", port: "8800" })
browser = Ferrum::Browser.new(proxy: { host: "x.x.x.x", port: "8800", user: "user", pasword: "pa$$" })
browser = Ferrum::Browser.new(proxy: { host: "x.x.x.x", port: "8800", user: "user", password: "pa$$" })
```

Chrome Devtools Protocol does not support changing proxies after the browser is launched. If you want to change proxies, you must restart your browser, which may not be convenient. There is a workaround. Ferrum provides a wrapper for a proxy server that can rotate proxies. We can run a proxy in the same process and rotate proxies inside this proxy server:
Chrome Devtools Protocol does not support changing proxies after the browser is launched. If you want to change proxies,
you must restart your browser, which may not be convenient. There is a workaround. Ferrum provides a wrapper for a proxy
server that can rotate proxies. We can run a proxy in the same process and rotate proxies inside this proxy server:

```ruby
browser = Ferrum::Browser.new(proxy: { server: true })
proxy = Ferrum::Proxy.start(**options)
browser = Ferrum::Browser.new(proxy: { host: proxy.host, port: proxy.port })

browser.proxy_server.rotate(host: "x.x.x.x", port: 31337, user: "user", password: "password")
proxy.rotate(host: "x.x.x.x", port: 31337, user: "user", password: "password")
browser.create_page(new_context: true) do |page|
page.go_to("https://api.ipify.org?format=json")
page.body # => "x.x.x.x"
end

browser.proxy_server.rotate(host: "y.y.y.y", port: 31337, user: "user", password: "password")
proxy.rotate(host: "y.y.y.y", port: 31337, user: "user", password: "password")
browser.create_page(new_context: true) do |page|
page.go_to("https://api.ipify.org?format=json")
page.body # => "y.y.y.y"
Expand All @@ -633,7 +636,6 @@ You can specify semi-colon-separated list of hosts for which proxy shouldn't be

```ruby
browser = Ferrum::Browser.new(proxy: { host: "x.x.x.x", port: "8800", bypass: "*.google.com;*foo.com" })
browser = Ferrum::Browser.new(proxy: { server: true, bypass: "*.google.com;*foo.com" })
```


Expand Down
105 changes: 41 additions & 64 deletions lib/ferrum/browser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,14 @@
require "ferrum/proxy"
require "ferrum/contexts"
require "ferrum/browser/xvfb"
require "ferrum/browser/options"
require "ferrum/browser/process"
require "ferrum/browser/client"
require "ferrum/browser/binary"
require "ferrum/browser/version_info"

module Ferrum
class Browser
DEFAULT_TIMEOUT = ENV.fetch("FERRUM_DEFAULT_TIMEOUT", 5).to_i
WINDOW_SIZE = [1024, 768].freeze
BASE_URL_SCHEMA = %w[http https].freeze

extend Forwardable
delegate %i[default_context] => :contexts
delegate %i[targets create_target page pages windows] => :default_context
Expand All @@ -33,10 +30,8 @@ class Browser
playback_rate playback_rate=] => :page
delegate %i[default_user_agent] => :process

attr_reader :client, :process, :contexts, :logger, :js_errors, :pending_connection_errors,
:slowmo, :base_url, :options, :window_size, :ws_max_receive_size, :proxy_options,
:proxy_server
attr_writer :timeout
attr_reader :client, :process, :contexts, :options, :window_size, :base_url
attr_accessor :timeout

#
# Initializes the browser.
Expand All @@ -63,7 +58,7 @@ class Browser
#
# @option options [Integer, Float] :slowmo
# Set a delay in seconds to wait before sending command.
# Usefull companion of headless option, so that you have time to see
# Useful companion of headless option, so that you have time to see
# changes.
#
# @option options [Numeric] :timeout (5)
Expand Down Expand Up @@ -126,43 +121,12 @@ class Browser
# Environment variables you'd like to pass through to the process.
#
def initialize(options = nil)
options ||= {}

@client = nil
@window_size = options.fetch(:window_size, WINDOW_SIZE)
@original_window_size = @window_size

@options = Hash(options.merge(window_size: @window_size))
@logger, @timeout, @ws_max_receive_size =
@options.values_at(:logger, :timeout, :ws_max_receive_size)
@js_errors = @options.fetch(:js_errors, false)

if @options[:proxy]
@proxy_options = @options[:proxy]

if @proxy_options[:server]
@proxy_server = Proxy.start(**@proxy_options.slice(:host, :port, :user, :password))
@proxy_options.merge!(host: @proxy_server.host, port: @proxy_server.port)
end

@options[:browser_options] ||= {}
address = "#{@proxy_options[:host]}:#{@proxy_options[:port]}"
@options[:browser_options].merge!("proxy-server" => address)
@options[:browser_options].merge!("proxy-bypass-list" => @proxy_options[:bypass]) if @proxy_options[:bypass]
end

@pending_connection_errors = @options.fetch(:pending_connection_errors, true)
@slowmo = @options[:slowmo].to_f

self.base_url = @options[:base_url] if @options.key?(:base_url)

if ENV.fetch("FERRUM_DEBUG", nil) && !@logger
$stdout.sync = true
@logger = $stdout
@options[:logger] = @logger
end
@options = Options.new(options)
@client = @process = @contexts = nil

@options.freeze
@timeout = @options.timeout
@window_size = @options.window_size
@base_url = @options.base_url if @options.base_url

start
end
Expand All @@ -173,22 +137,39 @@ def initialize(options = nil)
# @param [String] value
# The new base URL value.
#
# @return [String]
# The base URL value.
# @raise [ArgumentError] when path is not absolute or doesn't include schema
#
# @return [Addressable::URI]
# The parsed base URI value.
#
def base_url=(value)
parsed = Addressable::URI.parse(value)
unless BASE_URL_SCHEMA.include?(parsed.normalized_scheme)
raise "Set `base_url` should be absolute and include schema: #{BASE_URL_SCHEMA}"
end

@base_url = parsed
@base_url = options.parse_base_url(value)
end

def create_page(new_context: false)
page = if new_context
context = contexts.create
context.create_page
#
# Creates a new page.
#
# @param [Boolean] new_context
# Whether to create a page in a new context or not.
#
# @param [Hash] proxy
# Whether to use proxy for a page. The page will be created in a new context if so.
#
# @return [Ferrum::Page]
# Created page.
#
def create_page(new_context: false, proxy: nil)
page = if new_context || proxy
params = {}

if proxy
options.parse_proxy(proxy)
params.merge!(proxyServer: "#{proxy[:host]}:#{proxy[:port]}")
params.merge!(proxyBypassList: proxy[:bypass]) if proxy[:bypass]
end

context = contexts.create(**params)
context.create_page(proxy: proxy)
else
default_context.create_page
end
Expand All @@ -202,7 +183,7 @@ def create_page(new_context: false)
end

def extensions
@extensions ||= Array(@options[:extensions]).map do |ext|
@extensions ||= Array(options.extensions).map do |ext|
(ext.is_a?(Hash) && ext[:source]) || File.read(ext)
end
end
Expand All @@ -224,10 +205,6 @@ def evaluate_on_new_document(expression)
extensions << expression
end

def timeout
@timeout || DEFAULT_TIMEOUT
end

def command(*args)
@client.command(*args)
rescue DeadBrowserError
Expand All @@ -250,7 +227,7 @@ def command(*args)
# browser.quit
#
def reset
@window_size = @original_window_size
@window_size = options.window_size
contexts.reset
end

Expand Down Expand Up @@ -289,7 +266,7 @@ def version

def start
Utils::ElapsedTime.start
@process = Process.start(@options)
@process = Process.start(options)
@client = Client.new(self, @process.ws_url)
@contexts = Contexts.new(self)
end
Expand Down
2 changes: 1 addition & 1 deletion lib/ferrum/browser/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def initialize(browser, ws_url, id_starts_with: 0)
@browser = browser
@command_id = id_starts_with
@pendings = Concurrent::Hash.new
@ws = WebSocket.new(ws_url, @browser.ws_max_receive_size, @browser.logger)
@ws = WebSocket.new(ws_url, @browser.options.ws_max_receive_size, @browser.options.logger)
@subscriber, @interrupter = Subscriber.build(2)

@thread = Thread.new do
Expand Down
11 changes: 5 additions & 6 deletions lib/ferrum/browser/command.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Command
# Currently only these browsers support CDP:
# https://github.com/cyrus-and/chrome-remote-interface#implementations
def self.build(options, user_data_dir)
defaults = case options[:browser_name]
defaults = case options.browser_name
when :firefox
Options::Firefox.options
when :chrome, :opera, :edge, nil
Expand All @@ -29,14 +29,14 @@ def initialize(defaults, options, user_data_dir)
@defaults = defaults
@options = options
@user_data_dir = user_data_dir
@path = options[:browser_path] || ENV.fetch("BROWSER_PATH", nil) || defaults.detect_path
@path = options.browser_path || ENV.fetch("BROWSER_PATH", nil) || defaults.detect_path
raise BinaryNotFoundError, NOT_FOUND unless @path

merge_options
end

def xvfb?
!!options[:xvfb]
!!options.xvfb
end

def to_a
Expand All @@ -47,9 +47,8 @@ def to_a

def merge_options
@flags = defaults.merge_required(@flags, options, @user_data_dir)
@flags = defaults.merge_default(@flags, options) unless options[:ignore_default_browser_options]

@flags.merge!(options.fetch(:browser_options, {}))
@flags = defaults.merge_default(@flags, options) unless options.ignore_default_browser_options
@flags.merge!(options.browser_options)
end
end
end
Expand Down
84 changes: 84 additions & 0 deletions lib/ferrum/browser/options.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# frozen_string_literal: true

module Ferrum
class Browser
class Options
HEADLESS = true
BROWSER_PORT = "0"
BROWSER_HOST = "127.0.0.1"
WINDOW_SIZE = [1024, 768].freeze
BASE_URL_SCHEMA = %w[http https].freeze
DEFAULT_TIMEOUT = ENV.fetch("FERRUM_DEFAULT_TIMEOUT", 5).to_i
PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT", 10).to_i
DEBUG_MODE = !ENV.fetch("FERRUM_DEBUG", nil).nil?

attr_reader :window_size, :timeout, :logger, :ws_max_receive_size,
:js_errors, :base_url, :slowmo, :pending_connection_errors,
:url, :env, :process_timeout, :browser_name, :browser_path,
:save_path, :extensions, :proxy, :port, :host, :headless,
:ignore_default_browser_options, :browser_options, :xvfb

def initialize(options = nil)
@options = Hash(options&.dup)
@port = @options.fetch(:port, BROWSER_PORT)
@host = @options.fetch(:host, BROWSER_HOST)
@timeout = @options.fetch(:timeout, DEFAULT_TIMEOUT)
@window_size = @options.fetch(:window_size, WINDOW_SIZE)
@js_errors = @options.fetch(:js_errors, false)
@headless = @options.fetch(:headless, HEADLESS)
@pending_connection_errors = @options.fetch(:pending_connection_errors, true)
@process_timeout = @options.fetch(:process_timeout, PROCESS_TIMEOUT)
@browser_options = @options.fetch(:browser_options, {})
@slowmo = @options[:slowmo].to_f

@ws_max_receive_size, @env, @browser_name, @browser_path,
@save_path, @extensions, @ignore_default_browser_options, @xvfb = @options.values_at(
:ws_max_receive_size, :env, :browser_name, :browser_path, :save_path, :extensions,
:ignore_default_browser_options, :xvfb
)

@options[:window_size] = @window_size
@proxy = parse_proxy(@options[:proxy])
@logger = parse_logger(@options[:logger])
@base_url = parse_base_url(@options[:base_url]) if @options[:base_url]
@url = @options[:url].to_s if @options[:url]

@options.freeze
@browser_options.freeze
end

def to_h
@options
end

def parse_base_url(value)
parsed = Addressable::URI.parse(value)
unless BASE_URL_SCHEMA.include?(parsed&.normalized_scheme)
raise ArgumentError, "`base_url` should be absolute and include schema: #{BASE_URL_SCHEMA.join(' | ')}"
end

parsed
end

def parse_proxy(options)
return unless options

raise ArgumentError, "proxy options must be a Hash" unless options.is_a?(Hash)

if options[:host].nil? && options[:port].nil?
raise ArgumentError, "proxy options must be a Hash with at least :host | :port"
end

options
end

private

def parse_logger(logger)
return logger if logger

!logger && DEBUG_MODE ? $stdout.tap { |s| s.sync = true } : logger
end
end
end
end
5 changes: 1 addition & 4 deletions lib/ferrum/browser/options/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,8 @@

module Ferrum
class Browser
module Options
class Options
class Base
BROWSER_HOST = "127.0.0.1"
BROWSER_PORT = "0"

include Singleton

def self.options
Expand Down
Loading

0 comments on commit e78f223

Please sign in to comment.