diff --git a/lib/ast.ml b/lib/ast.ml index 0da9f10b..6976466f 100644 --- a/lib/ast.ml +++ b/lib/ast.ml @@ -1,9 +1,10 @@ open Import -type ('a, _) ast = +type ('a, _) ast : immutable_data with 'a = | Alternative : 'a list -> ('a, [> `Uncased ]) ast | No_case : 'a -> ('a, [> `Cased ]) ast | Case : 'a -> ('a, [> `Cased ]) ast +[@@unsafe_allow_any_mode_crossing] let dyn_of_ast f = let open Dyn in diff --git a/lib/ast.mli b/lib/ast.mli index 04a89e9b..1f675842 100644 --- a/lib/ast.mli +++ b/lib/ast.mli @@ -1,7 +1,10 @@ -type ('a, _) ast = private +@@ portable + +type ('a, _) ast : immutable_data with 'a = private | Alternative : 'a list -> ('a, [> `Uncased ]) ast | No_case : 'a -> ('a, [> `Cased ]) ast | Case : 'a -> ('a, [> `Cased ]) ast +[@@unsafe_allow_any_mode_crossing] type cset = private | Cset of Cset.t diff --git a/lib/automata.ml b/lib/automata.ml index 8623436f..2671cdde 100644 --- a/lib/automata.ml +++ b/lib/automata.ml @@ -24,9 +24,9 @@ open Import let hash_combine h accu = (accu * 65599) + h -module Ids : sig +module Ids : sig @@ portable module Id : sig - type t + type t : immediate val equal : t -> t -> bool val zero : t @@ -105,7 +105,7 @@ module Rep_kind = struct let pp fmt t = Format.pp_print_string fmt (to_string t) end -module Mark : sig +module Mark : sig @@ portable type t = private int val compare : t -> t -> int @@ -136,7 +136,7 @@ end = struct ;; end -module Idx : sig +module Idx : sig @@ portable type t = private int val pp : t Fmt.t @@ -292,10 +292,12 @@ type expr = Expr.t include Expr module Marks = struct - type t = + type t : immutable_data = { marks : (Mark.t * Idx.t) list ; pmarks : Pmark.Set.t } + [@@unsafe_allow_any_mode_crossing + (* SAFETY: [Pmark.Set.t] is actually immutable, but the stdlib doesn't say that *)] let to_dyn { marks; pmarks } : Dyn.t = let open Dyn in @@ -376,8 +378,8 @@ module Status = struct | Running end -module Desc : sig - type t +module Desc : sig @@ portable + type t : immutable_data val pp : t Fmt.t @@ -458,7 +460,7 @@ end = struct | TMatch m -> variant "TMarks" [ Marks.to_dyn m ] ;; - let to_dyn = to_dyn None + let to_dyn t = to_dyn None t open E @@ -517,8 +519,8 @@ end = struct | _ :: r -> first_match r ;; - let remove_matches = - List.filter ~f:(function + let remove_matches t = + List.filter t ~f:(function | TMatch _ -> false | _ -> true) ;; @@ -590,13 +592,14 @@ end module E = Desc.E module State = struct - type t = + type t : immutable_data = { idx : Idx.t ; category : Category.t ; desc : Desc.t ; mutable status : Status.t option ; hash : int } + [@@unsafe_allow_any_mode_crossing] (* Thread-safety: We use double-checked locking to access field [status] in function [status] below. *) @@ -651,7 +654,7 @@ module State = struct st ;; - module Table = Hashtbl.Make (struct + module Table = Hashtbl.MakePortable (struct type nonrec t = t let equal = equal diff --git a/lib/automata.mli b/lib/automata.mli index 95b57c57..d014a2a2 100644 --- a/lib/automata.mli +++ b/lib/automata.mli @@ -1,3 +1,5 @@ +@@ portable + (* RE - A regular expression library @@ -23,7 +25,7 @@ (* Regular expressions *) module Mark : sig - type t [@@immediate] + type t : immediate val compare : t -> t -> int val start : t diff --git a/lib/bit_vector.mli b/lib/bit_vector.mli index 8f07063f..b88f920d 100644 --- a/lib/bit_vector.mli +++ b/lib/bit_vector.mli @@ -1,3 +1,5 @@ +@@ portable + type t val length : t -> int diff --git a/lib/category.mli b/lib/category.mli index 6be2910a..48f63cca 100644 --- a/lib/category.mli +++ b/lib/category.mli @@ -1,3 +1,5 @@ +@@ portable + (** Categories represent the various kinds of characters that can be tested by look-ahead and look-behind operations. diff --git a/lib/color_map.mli b/lib/color_map.mli index 98a41d64..46c25f84 100644 --- a/lib/color_map.mli +++ b/lib/color_map.mli @@ -1,3 +1,5 @@ +@@ portable + (* Color maps exists to provide an optimization for the regex engine. The fact that some characters are entirely equivalent for some regexes means that we can use them interchangeably. diff --git a/lib/compile.ml b/lib/compile.ml index 00681af8..057fda66 100644 --- a/lib/compile.ml +++ b/lib/compile.ml @@ -2,7 +2,7 @@ open Import let rec iter n f v = if Int.equal n 0 then v else iter (n - 1) f (f v) -module Idx : sig +module Idx : sig @@ portable type t [@@immediate] val unknown : t @@ -54,8 +54,8 @@ type state_info = color. For performance reason, to avoid an indirection, we manually unbox the transition table: we allocate a single array, with the state information at index 0, followed by the transitions. *) -module State : sig - type t +module State : sig @@ portable + type t : mutable_data val make : ncol:int -> state_info -> t val make_break : state_info -> t @@ -93,7 +93,7 @@ end = struct let unknown_state = dummy { idx = Idx.unknown; final = []; desc = Automata.State.dummy } let make ~ncol state = - let st = Table (Array.make (ncol + 1) unknown_state) in + let st = Table (Array.make (ncol + 1) (Stdlib.Obj.magic_uncontended unknown_state)) in set_info st state; st ;; @@ -152,6 +152,9 @@ module Positions = struct } let empty = { positions = [||]; length = 0 } + let get_empty () = + (* SAFETY: this record is never mutated. *) + Stdlib.Obj.magic_uncontended empty let length t = t.length let unsafe_set t idx pos = Array.unsafe_set t.positions idx pos @@ -181,7 +184,7 @@ module Positions = struct always checking whether it is large enough before modifying it. *) let length = Automata.Working_area.index_count re.tbl + 1 in { positions = Array.make length 0; length }) - else empty + else get_empty () ;; end @@ -466,7 +469,7 @@ module Stream = struct let finalize t s ~pos ~len = (* TODO bound checks? *) let last = pos + len in - let state = scan_str t.re Positions.empty s t.state ~last ~pos ~groups:false in + let state = scan_str t.re (Positions.get_empty ()) s t.state ~last ~pos ~groups:false in let info = State.get_info state in match let _idx, res = @@ -630,7 +633,7 @@ let match_str_no_bounds ~groups ~partial re s ~pos ~len = let match_str_p re s ~pos ~len = if pos < 0 || len < -1 || pos + len > String.length s then invalid_arg "Re.exec: out of bounds"; - match make_match_str re Positions.empty ~len ~groups:false ~partial:false s ~pos with + match make_match_str re (Positions.get_empty ()) ~len ~groups:false ~partial:false s ~pos with | Match _ -> true | _ -> false ;; @@ -812,7 +815,7 @@ let compile_1 regexp = ; greedy = `Greedy ; pos = ref A.Mark.start ; names = ref [] - ; cache = ref Cset.CSetMap.empty + ; cache = ref (Stdlib.Obj.magic_uncontended Cset.CSetMap.empty) ; colors } in diff --git a/lib/compile.mli b/lib/compile.mli index 5e1bde3c..eda96227 100644 --- a/lib/compile.mli +++ b/lib/compile.mli @@ -1,3 +1,5 @@ +@@ portable + type re module Stream : sig diff --git a/lib/core.mli b/lib/core.mli index a3d09297..0b80cc24 100644 --- a/lib/core.mli +++ b/lib/core.mli @@ -1,3 +1,5 @@ +@@ portable + (* RE - A regular expression library diff --git a/lib/cset.ml b/lib/cset.ml index 9183761e..071a2642 100644 --- a/lib/cset.ml +++ b/lib/cset.ml @@ -41,8 +41,8 @@ let compare_pair (x, y) (x', y') = ;; let equal_pair (x, y) (x', y') = Int.equal x x' && Int.equal y y' -let equal = List.equal ~eq:equal_pair -let compare : t -> t -> int = List.compare ~cmp:compare_pair +let equal x y = List.equal ~eq:equal_pair x y +let compare x y = List.compare ~cmp:compare_pair x y let rec union l l' = match l, l' with @@ -87,7 +87,7 @@ let rec diff l l' = ;; let single = - let single c = [ c, c ] in + let single (c : c) = [ c, c ] in Dense_map.make (* an extra color for lnl *) ~size:257 ~f:single ;; @@ -101,10 +101,10 @@ let rec offset o l = | (c1, c2) :: r -> (c1 + o, c2 + o) :: offset o r ;; -let empty = [] +let empty : t = [] let cany = [ 0, 255 ] -let union_all : t list -> t = List.fold_left ~init:empty ~f:union -let intersect_all : t list -> t = List.fold_left ~init:cany ~f:inter +let union_all ts = List.fold_left ~init:empty ~f:union ts +let intersect_all ts = List.fold_left ~init:cany ~f:inter ts let rec mem (c : int) s = match s with @@ -127,7 +127,7 @@ let print_one ch (c1, c2) = if Int.equal c1 c2 then Format.fprintf ch "%d" c1 else Format.fprintf ch "%d-%d" c1 c2 ;; -let pp = Fmt.list ~pp_sep:(Fmt.lit ", ") print_one +let pp ts = Fmt.list ~pp_sep:(Fmt.lit ", ") print_one ts let to_dyn t = let open Dyn in @@ -149,7 +149,7 @@ let one_char = function | _ -> None ;; -module CSetMap = Map.Make (struct +module CSetMap = Map.MakePortable (struct type t = int * (int * int) list let compare (i, u) (j, v) = diff --git a/lib/cset.mli b/lib/cset.mli index e875d072..7f551f4d 100644 --- a/lib/cset.mli +++ b/lib/cset.mli @@ -1,3 +1,5 @@ +@@ portable + (* RE - A regular expression library @@ -22,7 +24,7 @@ (* Character sets, represented as sorted list of intervals *) -type c [@@immediate] +type c : immediate val equal_c : c -> c -> bool val to_int : c -> int @@ -30,7 +32,7 @@ val of_int : int -> c val to_char : c -> char val of_char : char -> c -type t +type t : immutable_data (** special characters which isn't present in any set (not even in [cany]) *) val null_char : c diff --git a/lib/dense_map.ml b/lib/dense_map.ml index 052469c8..1552d726 100644 --- a/lib/dense_map.ml +++ b/lib/dense_map.ml @@ -1,4 +1,9 @@ -let make ~size ~f = - let cache = Array.init size f in - fun i -> cache.(i) +let make : size:int -> f:(int -> 'a @ portable) -> (int -> 'a @ contended) @ portable = + fun ~size ~f -> + let cache = + (* SAFETY: Interface requires that we never put anything nonportable in here *) + Obj.magic_portable (Array.init size f) + in + fun i -> + (((* SAFETY: we never mutate this array. *) Obj.magic_uncontended cache).(i)) ;; diff --git a/lib/dense_map.mli b/lib/dense_map.mli index f83f663d..5e11e123 100644 --- a/lib/dense_map.mli +++ b/lib/dense_map.mli @@ -1 +1,3 @@ -val make : size:int -> f:(int -> 'a) -> int -> 'a +@@ portable + +val make : size:int -> f:(int -> 'a @ portable) -> (int -> 'a @ contended) @ portable diff --git a/lib/emacs.mli b/lib/emacs.mli index 5092a200..7da1d526 100644 --- a/lib/emacs.mli +++ b/lib/emacs.mli @@ -1,3 +1,5 @@ +@@ portable + (* RE - A regular expression library diff --git a/lib/fmt.mli b/lib/fmt.mli index 3dbadf46..44edf810 100644 --- a/lib/fmt.mli +++ b/lib/fmt.mli @@ -1,3 +1,5 @@ +@@ portable + type formatter := Format.formatter type 'a t = formatter -> 'a -> unit diff --git a/lib/glob.mli b/lib/glob.mli index 04968066..fcadbc9d 100644 --- a/lib/glob.mli +++ b/lib/glob.mli @@ -1,3 +1,5 @@ +@@ portable + (* RE - A regular expression library diff --git a/lib/group.mli b/lib/group.mli index 2e0263a9..e16efbdc 100644 --- a/lib/group.mli +++ b/lib/group.mli @@ -1,3 +1,5 @@ +@@ portable + (** Information about groups in a match. *) (** Result of a successful match. *) diff --git a/lib/hash_set.ml b/lib/hash_set.ml index c49ca15c..f249e483 100644 --- a/lib/hash_set.ml +++ b/lib/hash_set.ml @@ -60,7 +60,7 @@ let () = assert (Array.unsafe_get x 0 = absent) ;; -let create () = ref Option.none +let create () = ref None let[@inline] index_of_offset slots index i = let i = index + !i in diff --git a/lib/hash_set.mli b/lib/hash_set.mli index f2f82215..11518f1d 100644 --- a/lib/hash_set.mli +++ b/lib/hash_set.mli @@ -1,3 +1,5 @@ +@@ portable + type t val create : unit -> t diff --git a/lib/mark_infos.ml b/lib/mark_infos.ml index f4a4251e..69dd6d08 100644 --- a/lib/mark_infos.ml +++ b/lib/mark_infos.ml @@ -5,7 +5,8 @@ type t = int array let make marks = let len = 1 + List.fold_left ~f:(fun ma (i, _) -> max ma i) ~init:(-1) marks in let t = Array.make len (-1) in - List.iter ~f:(fun (i, v) -> t.(i) <- v) marks; + let set (i, v) = t.(i) <- v in + List.iter ~f:set marks; t ;; diff --git a/lib/mark_infos.mli b/lib/mark_infos.mli index f729058f..a6783a45 100644 --- a/lib/mark_infos.mli +++ b/lib/mark_infos.mli @@ -1,3 +1,5 @@ +@@ portable + (** store mark information for groups in an array *) type t diff --git a/lib/parse_buffer.mli b/lib/parse_buffer.mli index 202b54f7..cedebc49 100644 --- a/lib/parse_buffer.mli +++ b/lib/parse_buffer.mli @@ -1,3 +1,5 @@ +@@ portable + type t exception Parse_error diff --git a/lib/pcre.mli b/lib/pcre.mli index 0591211c..34d7d9f5 100644 --- a/lib/pcre.mli +++ b/lib/pcre.mli @@ -1,3 +1,5 @@ +@@ portable + (** NOTE: Only a subset of the PCRE spec is supported *) exception Parse_error diff --git a/lib/perl.mli b/lib/perl.mli index 1044812e..e8af45e7 100644 --- a/lib/perl.mli +++ b/lib/perl.mli @@ -1,3 +1,5 @@ +@@ portable + (* RE - A regular expression library diff --git a/lib/pmark.ml b/lib/pmark.ml index 9686b245..eec7c2cb 100644 --- a/lib/pmark.ml +++ b/lib/pmark.ml @@ -3,20 +3,15 @@ module Pmark = struct let equal (x : int) (y : int) = x = y let compare (x : int) (y : int) = compare x y - let r = ref 0 - - let gen () = - incr r; - !r - ;; - + let r = Atomic.make 1 + let gen () = Atomic.fetch_and_add r 1 let pp = Format.pp_print_int end include Pmark module Set = struct - module Set = Set.Make (Pmark) + module Set = Set.MakePortable (Pmark) let[@warning "-32"] to_list x = let open Set in diff --git a/lib/pmark.mli b/lib/pmark.mli index 100ee4ca..3cc498a3 100644 --- a/lib/pmark.mli +++ b/lib/pmark.mli @@ -1,3 +1,5 @@ +@@ portable + type t = private int val equal : t -> t -> bool diff --git a/lib/posix.mli b/lib/posix.mli index a2213b0d..ac7463a0 100644 --- a/lib/posix.mli +++ b/lib/posix.mli @@ -1,3 +1,5 @@ +@@ portable + (* RE - A regular expression library diff --git a/lib/posix_class.mli b/lib/posix_class.mli index a06c4f7b..ad1cd23e 100644 --- a/lib/posix_class.mli +++ b/lib/posix_class.mli @@ -1,3 +1,5 @@ +@@ portable + val names : string list val of_name : string -> Core.t val parse : Parse_buffer.t -> Core.t option diff --git a/lib/replace.mli b/lib/replace.mli index 387ee951..84d38891 100644 --- a/lib/replace.mli +++ b/lib/replace.mli @@ -1,3 +1,5 @@ +@@ portable + (** [replace ~all re ~f s] iterates on [s], and replaces every occurrence of [re] with [f substring] where [substring] is the current match. If [all = false], then only the first occurrence of [re] is replaced. *) diff --git a/lib/slice.mli b/lib/slice.mli index 67d9ec20..dc837ff9 100644 --- a/lib/slice.mli +++ b/lib/slice.mli @@ -1,3 +1,5 @@ +@@ portable + type t = { s : string ; pos : int diff --git a/lib/view.mli b/lib/view.mli index d069f8c4..26e49bc0 100644 --- a/lib/view.mli +++ b/lib/view.mli @@ -1,3 +1,5 @@ +@@ portable + (** A view of the top-level of a regex. This type is unstable and may change *) module Cset : sig diff --git a/lib_test/concurrency/suppress.txt b/lib_test/concurrency/suppress.txt index 513d95e1..8357a0fd 100644 --- a/lib_test/concurrency/suppress.txt +++ b/lib_test/concurrency/suppress.txt @@ -16,3 +16,7 @@ race_top:^camlRe__Automata.status # Race within Compile.final race_top:^camlRe__Compile.final + +# Spurious data race due to the two-step initialization in Mark_info.make +# (between Mark_info.make and other functions in module Mark_infos) +race_top:^camlRe__Mark_infos.set diff --git a/lib_test/concurrency/test.ml b/lib_test/concurrency/test.ml index 0ee42c4a..2fffb2b7 100644 --- a/lib_test/concurrency/test.ml +++ b/lib_test/concurrency/test.ml @@ -81,12 +81,16 @@ let execute ~short re a = (inverse_permutation a) (Array.map (fun i -> - try Some (Re.exec ~pos:(if short then 30 - 7 else 0) re strings.(i)) with + try + Some + (Re.Group.all_offset + @@ Re.exec ~pos:(if short then 30 - 7 else 0) re strings.(i)) + with | Not_found -> None) a) ;; -let compare_groups g g' = Re.Group.(all_offset g = all_offset g') +let compare_groups g g' = g = g' let concurrent f f' = let barrier = Barrier.create 2 in