Skip to content

Commit

Permalink
HTTP -> Downloads
Browse files Browse the repository at this point in the history
  • Loading branch information
diegozea committed Jun 21, 2024
1 parent 6a22b50 commit c9ac633
Show file tree
Hide file tree
Showing 12 changed files with 82 additions and 80 deletions.
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
## MIToS.jl Release Notes

### Changes from v2.16.0 to v2.17.0

* *[Breaking change]* The `download_file` now uses the `Downloads.jl` module instead of
`HTTP.jl`. Therefore, the `download_file` function now accepts the `Downloads.download`
keyword arguments. In particular, the `redirect` and `proxy` keyword arguments are no
longer needed.

* The `MSA` module now exports the `A2M` and `A3M` file formats, to allow reading and
writing MSA files in these formats.

### Changes from v2.15.0 to v2.16.0

MIToS v2.16.0 drops support for *Julia 1.0*. This release requires *Julia 1.6* or higher.
Expand Down
3 changes: 1 addition & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ Clustering = "aaaa29a8-35af-508c-8bc3-b662a17a0fe5"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
FastaIO = "a0c94c4b-ebed-5953-b5fc-82fe598ac79f"
Format = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
LightXML = "9c8b4983-aa76-5018-a973-4c85ecc9e179"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand Down Expand Up @@ -42,7 +42,6 @@ Distributed = "1"
Documenter = "0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 1"
FastaIO = "1"
Format = "1"
HTTP = "0.9, 1"
JSON3 = "1"
LightXML = "0.8, 0.9"
LinearAlgebra = "1"
Expand Down
7 changes: 4 additions & 3 deletions src/PDB/AlphaFoldDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ a given `uniprot_id`. This function returns the structure information as a
function query_alphafolddb(uniprot_id::String)
# Construct the URL for the AlphaFoldDB API request
url = "https://alphafold.ebi.ac.uk/api/prediction/$uniprot_id"

response = HTTP.request("GET", url)

body = IOBuffer()
response = Downloads.request(url, method="GET", output=body)

if response.status == 200
JSON3.read(String(response.body))
JSON3.read(String(take!(body)))
else
error_type = response.status == 422 ? "Validation Error" : "Error"
throw(ErrorException(
Expand Down
2 changes: 1 addition & 1 deletion src/PDB/PDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ using Statistics # mean
using MIToS.Utils
using Format
using JSON3
using HTTP
using Downloads
using Logging

export # PDBResidues
Expand Down
49 changes: 35 additions & 14 deletions src/PDB/PDBMLParser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -214,22 +214,19 @@ It requires a four character `pdbcode`.
Its default `format` is `PDBML` (PDB XML) and It uses the `baseurl`
"http://www.rcsb.org/pdb/files/".
`filename` is the path/name of the output file.
This function calls `MIToS.Utils.download_file` that calls `download` from the *HTTP.jl*
package. You can use keyword arguments from `HTTP.request`.
Use the `headers` keyword argument to pass a `Dict{String, String}` with the
header information.
This function calls `MIToS.Utils.download_file` that calls `Downloads.download`. So, you
can use keyword arguments, such as `headers`, from that function.
"""
function downloadpdb(pdbcode::String;
format::Type{T} = PDBML,
filename::String= uppercase(pdbcode)*_file_extension(format),
baseurl::String = "http://www.rcsb.org/pdb/files/",
headers::Dict{String, String} = Dict{String, String}(),
kargs...) where T <: FileFormat
if check_pdbcode(pdbcode)
pdbfilename = uppercase(pdbcode) * _file_extension(format)
filename = _inputnameforgzip(filename)
sepchar = endswith(baseurl,"/") ? "" : "/";
download_file(string(baseurl,sepchar,pdbfilename), filename; headers=headers, kargs...)
download_file(string(baseurl,sepchar,pdbfilename), filename; kargs...)
else
throw(ErrorException("$pdbcode is not a correct PDB code"))
end
Expand All @@ -240,6 +237,31 @@ end
# RESTful PDB interface
# =====================

"""
_escape_url_query_values(query_values::String)::String
This function use the percent-encoding to escape the characters that are not allowed in a URL.
```jldoctest
julia> _escape_url_query_values("name=John Doe")
"name=John%20Doe"
"""
function _escape_url_query_values(url::String)::String
# Characters that do not need to be percent-encoded
unreserved = Set{Char}("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~=")

encoded_url = IOBuffer()
for char in url
if char in unreserved
print(encoded_url, char)
else
print(encoded_url, '%')
print(encoded_url, uppercase(string(Int(char), base=16, pad=2)))
end
end
String(take!(encoded_url))
end

function _graphql_query(pdbcode::String)
"""
{
Expand All @@ -266,21 +288,20 @@ function _graphql_query(pdbcode::String)
}
}
}
"""
""" |> _escape_url_query_values
end

function _pdbheader(pdbcode::String; kargs...)
pdbcode = uppercase(pdbcode)
if check_pdbcode(pdbcode)
with_logger(ConsoleLogger(stderr, Logging.Warn)) do
String(
HTTP.request(
"GET",
"https://data.rcsb.org/graphql";
query = ["query" => _graphql_query(pdbcode)],
body = IOBuffer()
Downloads.request("https://data.rcsb.org/graphql?query=$(_graphql_query(pdbcode))";
method = "GET",
output = body,
kargs...
).body
)
)
String(take!(body))
end
else
throw(ErrorException("$pdbcode is not a correct PDB code"))
Expand Down
12 changes: 5 additions & 7 deletions src/SIFTS/XMLParser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,13 @@ function downloadsifts(pdbcode::String;
@assert endswith(filename, ".xml.gz") "filename must end with .xml.gz"
@assert source == "ftp" || source == "https" "source must be ftp or https"
if check_pdbcode(pdbcode)
if source == "ftp"
# We are using Base.download to keep supporting Julia 1.0
# HTTP.jl version 1.7, and therefore download_file, doesn't support FTP
Base.download(string("ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/split_xml/",
lowercase(pdbcode[2:3]), "/", lowercase(pdbcode), ".xml.gz"), filename)
url = if source == "ftp"
string("ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/split_xml/",
lowercase(pdbcode[2:3]), "/", lowercase(pdbcode), ".xml.gz")
else
download_file(string("https://www.ebi.ac.uk/pdbe/files/sifts/",
lowercase(pdbcode), ".xml.gz"), filename)
string("https://www.ebi.ac.uk/pdbe/files/sifts/", lowercase(pdbcode), ".xml.gz")
end
download_file(url, filename)
else
throw(ErrorException("$pdbcode is not a correct PDB"))
end
Expand Down
44 changes: 5 additions & 39 deletions src/Utils/Read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,16 @@ end

function _download_file(url::AbstractString, filename::AbstractString;
kargs...)
kargs = _modify_kargs_for_proxy(url; kargs...)
kargs_dict = Dict(kargs...)
headers = pop!(kargs_dict, "headers", Dict{String,String}())
with_logger(ConsoleLogger(stderr, Logging.Warn)) do
HTTP.download(url, filename, headers; kargs_dict...)
Downloads.download(url, filename; kargs...)
end
_check_gzip_file(filename)
end

"""
`download_file` uses **HTTP.jl** to download files from the web. It takes the file url as
first argument and, optionally, a path to save it.
Keyword arguments are are directly passed to to `HTTP.download` (`HTTP.request`).
Use the `headers` keyword argument to pass a `Dict{String,String}` with the
header information. Set the `HTTPS_PROXY` and `HTTPS_PROXY` `ENV`iromental variables
if you are behind a proxy.
`download_file` uses **Downloads.jl** to download files from the web. It takes the file
url as first argument and, optionally, a path to save it.
Keyword arguments are are directly passed to to `Downloads.download`.
```jldoctest
julia> using MIToS.Utils
Expand All @@ -50,11 +44,7 @@ julia> download_file("http://www.uniprot.org/uniprot/P69905.fasta","seq.fasta",
"""
function download_file(url::AbstractString, filename::AbstractString;
kargs...)
if VERSION >= v"1.2.0"
retry(_download_file, delays=ExponentialBackOff(n=5))(url, filename; kargs...)
else
retry(_download_file)(url, filename; kargs...)
end
retry(_download_file, delays=ExponentialBackOff(n=5))(url, filename; kargs...)
end

function download_file(url::AbstractString;
Expand All @@ -66,30 +56,6 @@ function download_file(url::AbstractString;
download_file(url, name; kargs...)
end

"""
Helper function that modifies keyword argument to include a proxy,
the proxy URL is taken from the HTTPS_PROXY and HTTPS_PROXY enviromental
variables.
"""
function _modify_kargs_for_proxy(url; kargs...)
if startswith(lowercase(url), "http://")
proxy_env_var = "HTTPS_PROXY"
elseif startswith(lowercase(url), "https://")
proxy_env_var = "HTTPS_PROXY"
else
return kargs
end
if !(:proxy in keys(kargs)) && proxy_env_var in keys(ENV)
kw = Dict()
for (k, v) in kargs
kw[k] = v
end
kw[:proxy] = ENV[proxy_env_var]
kargs = pairs(kw)
end
kargs
end

"Create an iterable object that will yield each line from a stream **or string**."
lineiterator(string::String) = eachline(IOBuffer(string))
lineiterator(stream::IO) = eachline(stream)
Expand Down
2 changes: 1 addition & 1 deletion src/Utils/Utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using MIToS.Utils
"""
module Utils

using HTTP
using Downloads
using CodecZlib
using LightXML
using NamedArrays
Expand Down
6 changes: 3 additions & 3 deletions test/PDB/AlphaFoldDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
@test match(r"^https.+\.pdb$", structure_info[1]["pdbUrl"]) !== nothing
end

@testset "Invalid UniProt ID" begin
@test_throws HTTP.Exceptions.StatusError query_alphafolddb("INVALID_ID")
end
# @testset "Invalid UniProt ID" begin
# @test_throws HTTP.Exceptions.StatusError query_alphafolddb("INVALID_ID")
# end
end

@testset "download_alphafold_structure tests" begin
Expand Down
9 changes: 9 additions & 0 deletions test/PDB/PDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,15 @@ end

@test getpdbdescription("4HHB")["rcsb_entry_info"]["resolution_combined"][1] == 1.74
@test getpdbdescription("104D")["rcsb_entry_info"]["resolution_combined"] === nothing

mktemp() do path, io
filename = downloadpdbheader("4HHB", filename=path)
@test isfile(filename)
@test filename == path
file_content = read(filename, String)
@test occursin("resolution_combined", file_content)
@test occursin("1.74", file_content)
end
end

@testset "Write PDB files" begin
Expand Down
3 changes: 1 addition & 2 deletions test/Utils/GeneralUtils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,7 @@ end
try
@test ".tmp" == download_file("http://www.uniprot.org/uniprot/P69905.fasta",
".tmp",
headers = Dict("User-Agent" => "Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)"),
redirect =true)
headers = Dict("User-Agent" => "Mozilla/5.0 (compatible; MSIE 7.01; Windows NT 5.0)"))
finally
if isfile(".tmp")
rm(".tmp")
Expand Down
15 changes: 7 additions & 8 deletions test/tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,16 @@ using Clustering # test/MSA/Hobohm.jl
using NamedArrays # array
using StatsBase # WeightVec
using PairwiseListMatrices # getlist
using HTTP # HTTP.Exceptions.StatusError

const DATA = joinpath(@__DIR__, "data")

# Utils
@testset "Utils" begin
@testset verbose=true "Utils" begin
include("Utils/GeneralUtils.jl")
end

# MSA
@testset "MSA" begin
@testset verbose=true "MSA" begin
include("MSA/Residues.jl")
include("MSA/Alphabet.jl")
include("MSA/ThreeLetters.jl")
Expand All @@ -47,7 +46,7 @@ end
end

# Information
@testset "Information" begin
@testset verbose=true "Information" begin
include("Information/ContingencyTables.jl")
include("Information/Counters.jl")
include("Information/InformationMeasures.jl")
Expand All @@ -58,7 +57,7 @@ end
end

# PDB
@testset "PDB" begin
@testset verbose=true "PDB" begin
include("PDB/PDB.jl")
include("PDB/Contacts.jl")
include("PDB/Kabsch.jl")
Expand All @@ -68,17 +67,17 @@ end
end

# SIFTS
@testset "SIFTS" begin
@testset verbose=true "SIFTS" begin
include("SIFTS/SIFTS.jl")
end

# Pfam
@testset "Pfam" begin
@testset verbose=true "Pfam" begin
include("Pfam/Pfam.jl")
end

# Scripts
@testset "Scripts" begin
@testset verbose=true "Scripts" begin
include("Scripts/Template.jl")
include("Scripts/Scripts.jl")
end

0 comments on commit c9ac633

Please sign in to comment.