Skip to content

Commit

Permalink
Merge pull request #623 from metanivek/improve_raw_decode
Browse files Browse the repository at this point in the history
Fix raw parsing in middle of buffer
  • Loading branch information
dinosaure authored Aug 27, 2023
2 parents 1a1121e + 16fd8b4 commit 138b789
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 5 deletions.
24 changes: 19 additions & 5 deletions src/git/value.ml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ module type S = sig
include S.BASE with type t := t

val length : t -> int64
val length_with_header : t -> int64
val to_raw : t -> string
val to_raw_without_header : t -> string

Expand Down Expand Up @@ -185,6 +186,17 @@ module Make (Hash : S.HASH) : S with type hash = Hash.t = struct
| Tree tree -> Tree.length tree
| Blob blob -> Blob.length blob

let length_with_header t =
let ( + ) = Int64.add in
let length = length t in
let kind_length =
match t with Commit _ -> 6L | Tree _ -> 4L | Blob _ -> 4L | Tag _ -> 3L
in
let length_length =
Int64.to_string length |> String.length |> Int64.of_int
in
kind_length + 1L (* ' ' *) + length_length + 1L (* '\000' *) + length

let digest = function
| Blob blob -> Blob.digest blob
| Commit commit -> Commit.digest commit
Expand Down Expand Up @@ -271,23 +283,25 @@ module Make (Hash : S.HASH) : S with type hash = Hash.t = struct

let fiber =
cut ~sep:(v " ") sub >>= fun (kind, rest) ->
cut ~sep:(v "\000") rest >>= fun (_length, rest) ->
cut ~sep:(v "\000") rest >>= fun (length, rest) ->
let length = to_string length |> Int64.of_string |> Int64.to_int in
let rest = with_range ~len:length rest |> to_string in
match to_string kind with
| "commit" ->
let decoder = Encore.to_angstrom Commit.format in
Stdlib.Result.to_option
(Angstrom.parse_string ~consume:All decoder (to_string rest))
(Angstrom.parse_string ~consume:All decoder rest)
>>| commit
| "tree" ->
let decoder = Encore.to_angstrom Tree.format in
Stdlib.Result.to_option
(Angstrom.parse_string ~consume:All decoder (to_string rest))
(Angstrom.parse_string ~consume:All decoder rest)
>>| tree
| "blob" -> Some (Blob (Blob.of_string (to_string rest)))
| "blob" -> Some (Blob (Blob.of_string rest))
| "tag" ->
let decoder = Encore.to_angstrom Tag.format in
Stdlib.Result.to_option
(Angstrom.parse_string ~consume:All decoder (to_string rest))
(Angstrom.parse_string ~consume:All decoder rest)
>>| tag
| _ -> None
in
Expand Down
4 changes: 4 additions & 0 deletions src/git/value.mli
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ module type S = sig
include S.BASE with type t := t

val length : t -> int64

val length_with_header : t -> int64
(** [length_with_header t] is [length t] plus the length of the header. *)

val to_raw : t -> string
val to_raw_without_header : t -> string

Expand Down
11 changes: 11 additions & 0 deletions test/value/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
(executable
(name test)
(libraries rresult digestif.c fmt git alcotest))

(rule
(alias runtest)
(package git)
(deps
(:test test.exe))
(action
(run %{test} --color=always)))
70 changes: 70 additions & 0 deletions test/value/test.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
module Hash = Git.Hash.Make (Digestif.SHA1)
module Value = Git.Value.Make (Hash)

let () = Random.self_init ()

let random_string len =
let buf = Buffer.create len in
for _ = 0 to len - 1 do
Buffer.add_char buf (Char.chr (Random.int 256))
done;
Buffer.contents buf

let value = Alcotest.testable Value.pp Value.equal

let examples : (string * Value.t) list =
let blob = Value.Blob.of_string (random_string 100) |> Value.blob in
let tree = Value.Tree.v [] |> Value.tree in
let author = Git.User.{ name = "me"; email = "me@me.com"; date = 0L, None } in
let tree_hash = Value.digest tree in
let long_message = random_string 255 in
let commit =
Value.Commit.make ~author ~committer:author ~tree:tree_hash
(Some long_message)
|> Value.commit
in
let tag =
Value.Tag.make tree_hash Tree ~tagger:author ~tag:"v1.0" (Some long_message)
|> Value.tag
in
[ "blob", blob; "tree", tree; "commit", commit; "tag", tag ]

let test_to_and_of_raw =
Alcotest.test_case "to_raw and of_raw_with_header roundtrip" `Quick
@@ fun () ->
let check_roundtrip (msg, base) =
let raw = Value.to_raw base in
let of_raw = Value.of_raw_with_header raw |> Rresult.R.failwith_error_msg in
Alcotest.check value msg base of_raw
in
List.iter check_roundtrip examples

let test_length_with_header =
Alcotest.test_case "lenght_with_header" `Quick @@ fun () ->
let check_length_with_header (msg, v) =
let raw = Value.to_raw v in
let raw_length = String.length raw in
let length_with_header = Value.length_with_header v |> Int64.to_int in
Alcotest.(check int msg raw_length length_with_header)
in
List.iter check_length_with_header examples

let test_of_raw_middle_of_buffer =
Alcotest.test_case "of_raw when in middle of buffer" `Quick @@ fun () ->
let check_roundtrip (msg, base) =
let raw = Value.to_raw base in
let padded_raw = Fmt.str "hello%sworld" raw in
let of_raw =
Value.of_raw_with_header ~off:5 padded_raw |> Rresult.R.failwith_error_msg
in
Alcotest.check value msg base of_raw
in
List.iter check_roundtrip examples

let () =
Random.self_init ();
Alcotest.run "git-value"
[
"raw", [ test_to_and_of_raw; test_of_raw_middle_of_buffer ];
"value", [ test_length_with_header ];
]

0 comments on commit 138b789

Please sign in to comment.