Skip to content

Commit

Permalink
bugfix: check that timer servicing worked (#4846)
Browse files Browse the repository at this point in the history
This fixes a disappearing timer situation described by Timo in https://dfinity.slack.com/archives/CPL67E7MX/p1736347600078339.

It turns out that under high message load the `async` timer servicing routine cannot be run. The fix is simple, check if the self-call succeeded (causes a `throw` already), and if not, set a very near global timer to retry ASAP (in the top-level `catch`).

TODO:
- [x] `catch` send errors for user workers (and mitigate) — see #4852
- [ ] document that the user thunk may be called more than once, and thus should have no side effects other than submitting the self-call — see dfinity/motoko-base#682
  • Loading branch information
ggreif authored Jan 17, 2025
1 parent 607ecfd commit a9bc214
Show file tree
Hide file tree
Showing 10 changed files with 69 additions and 29 deletions.
7 changes: 7 additions & 0 deletions src/ir_def/construct.ml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ let primE prim es =
| DeserializePrim ts -> T.seq ts
| DeserializeOptPrim ts -> T.Opt (T.seq ts)
| OtherPrim "trap" -> T.Non
| OtherPrim "global_timer_set" -> T.nat64
| OtherPrim "call_perform_status" -> T.(Prim Nat32)
| OtherPrim "call_perform_message" -> T.text
| OtherPrim "array_len"
Expand Down Expand Up @@ -269,6 +270,12 @@ let nat32E n =
note = Note.{ def with typ = T.(Prim Nat32) }
}

let nat64E n =
{ it = LitE (Nat64Lit n);
at = no_region;
note = Note.{ def with typ = T.nat64 }
}

let natE n =
{ it = LitE (NatLit n);
at = no_region;
Expand Down
1 change: 1 addition & 0 deletions src/ir_def/construct.mli
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ val let_else_switch : pat -> exp -> exp -> exp
val natE : Mo_values.Numerics.Nat.t -> exp
val intE : Mo_values.Numerics.Int.t -> exp
val nat32E : Mo_values.Numerics.Nat32.t -> exp
val nat64E : Mo_values.Numerics.Nat64.t -> exp
val textE : string -> exp
val blobE : string -> exp
val letE : var -> exp -> exp -> exp
Expand Down
16 changes: 13 additions & 3 deletions src/ir_passes/await.ml
Original file line number Diff line number Diff line change
Expand Up @@ -653,14 +653,14 @@ and t_comp_unit context = function
preupgrade = t_exp LabelEnv.empty preupgrade;
postupgrade = t_exp LabelEnv.empty postupgrade;
heartbeat = t_ignore_throw LabelEnv.empty heartbeat;
timer = t_ignore_throw LabelEnv.empty timer;
timer = t_timer_throw LabelEnv.empty timer;
inspect = t_exp LabelEnv.empty inspect;
stable_record = t_exp LabelEnv.empty stable_record;
stable_type;
},
t)

and t_ignore_throw context exp =
and t_on_throw context exp t_exp =
match exp.it with
| Ir.PrimE (Ir.TupPrim, []) ->
exp
Expand All @@ -671,14 +671,24 @@ and t_ignore_throw context exp =
(LabelEnv.add Throw (Cont throw) context) in
let e = fresh_var "e" T.catch in
{ (blockE [
funcD throw e (tupE[]);
funcD throw e t_exp;
]
(c_exp context' exp (meta (T.unit) (fun v1 -> tupE []))))
(* timer logic requires us to preserve any source location,
or timer won't be initialized in compile.ml *)
with at = exp.at
}

and t_ignore_throw context exp = t_on_throw context exp (tupE[])

(* if self-call queue full: expire global timer soon and retry *)
and t_timer_throw context exp =
t_on_throw context exp
(blockE
[expD (primE
(OtherPrim "global_timer_set")
[Mo_values.Numerics.Nat64.of_int 1 |> nat64E])]
(tupE[]))

and t_prog (prog, flavor) =
(t_comp_unit LabelEnv.empty prog, { flavor with has_await = false })
Expand Down
8 changes: 4 additions & 4 deletions src/lowering/desugar.ml
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,8 @@ and call_system_func_opt name es obj_typ =
let timer =
blockE
[ expD T.(callE (varE (var id.it note)) [Any]
(varE (var "@set_global_timer" (Func (Local, Returns, [], [Prim Nat64], []))))) ]
(unitE ()) in
(varE (var "@set_global_timer" T.global_timer_set_type))) ]
(unitE()) in
{ timer with at }
| "heartbeat" ->
blockE
Expand Down Expand Up @@ -605,8 +605,8 @@ and build_actor at ts self_id es obj_typ =
| Some call -> call
| None when !Mo_config.Flags.global_timer ->
blockE
[ expD T.(callE (varE (var "@timer_helper" Mo_frontend.Typing.heartbeat_type)) [unit] (unitE())) ]
(unitE ())
[ expD T.(callE (varE (var "@timer_helper" T.heartbeat_type)) [unit] (unitE())) ]
(unitE())
| None -> tupE []);
inspect =
(match call_system_func_opt "inspect" es obj_typ with
Expand Down
12 changes: 2 additions & 10 deletions src/mo_frontend/typing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -386,18 +386,10 @@ let infer_mut mut : T.typ -> T.typ =

(* System method types *)

let heartbeat_type =
T.(Func (Local, Returns, [scope_bind], [], [Async (Fut, Var (default_scope_var, 0), unit)]))

let timer_type =
T.(Func (Local, Returns, [scope_bind],
[Func (Local, Returns, [], [Prim Nat64], [])],
[Async (Fut, Var (default_scope_var, 0), unit)]))

let system_funcs tfs =
[
("heartbeat", heartbeat_type);
("timer", timer_type);
("heartbeat", T.heartbeat_type);
("timer", T.timer_type);
T.("preupgrade", Func (Local, Returns, [scope_bind], [], []));
T.("postupgrade", Func (Local, Returns, [scope_bind], [], []));
("inspect",
Expand Down
2 changes: 0 additions & 2 deletions src/mo_frontend/typing.mli
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,3 @@ val infer_prog : ?viper_mode:bool -> scope -> string option -> Async_cap.async_c
val check_lib : scope -> string option -> Syntax.lib -> scope Diag.result
val check_actors : ?viper_mode:bool -> ?check_actors:bool -> scope -> Syntax.prog list -> unit Diag.result
val check_stab_sig : scope -> Syntax.stab_sig -> (field list) Diag.result

val heartbeat_type : typ
16 changes: 15 additions & 1 deletion src/mo_types/type.ml
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ let compare_field f1 f2 =
| {lab = l1; typ = _; _}, {lab = l2; typ = _; _} -> compare l1 l2


(* Short-hands *)
(* Shorthands *)

let unit = Tup []
let bool = Prim Bool
Expand All @@ -321,6 +321,7 @@ let char = Prim Char
let principal = Prim Principal
let region = Prim Region


let fields flds =
List.sort compare_field
(List.map (fun (lab, typ) -> {lab; typ; src = empty_src}) flds)
Expand Down Expand Up @@ -1341,6 +1342,19 @@ let default_scope_var = scope_var ""
let scope_bound = Any
let scope_bind = { var = default_scope_var; sort = Scope; bound = scope_bound }

(* Shorthands for replica callbacks *)

let heartbeat_type =
Func (Local, Returns, [scope_bind], [], [Async (Fut, Var (default_scope_var, 0), unit)])

let global_timer_set_type = Func (Local, Returns, [], [Prim Nat64], [])

let timer_type =
Func (Local, Returns, [scope_bind],
[global_timer_set_type],
[Async (Fut, Var (default_scope_var, 0), unit)])


(* Well-known fields *)

let motoko_async_helper_fld =
Expand Down
5 changes: 4 additions & 1 deletion src/mo_types/type.mli
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ end
val is_shared_sort : 'a shared -> bool


(* Short-hands *)
(* Shorthands *)

val unit : typ
val bool : typ
Expand All @@ -97,6 +97,9 @@ val error : typ
val char : typ
val principal : typ
val region : typ
val heartbeat_type : typ
val timer_type : typ
val global_timer_set_type : typ

val sum : (lab * typ) list -> typ
val obj : obj_sort -> (lab * typ) list -> typ
Expand Down
21 changes: 18 additions & 3 deletions src/prelude/internals.mo
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ func @prune(n : ?@Node) : ?@Node = switch n {
if (n.expire[0] == 0) {
@prune(n.post) // by corollary
} else {
?{ n with pre = @prune(n.pre); post = @prune(n.post) }
?{ n with pre = @prune(n.pre) }
}
}
};
Expand Down Expand Up @@ -612,9 +612,25 @@ func @timer_helper() : async () {
ignore (prim "global_timer_set" : Nat64 -> Nat64) exp;
if (exp == 0) @timers := null;
var failed : Nat64 = 0;
func reinsert(job : () -> async ()) {
if (failed == 0) {
@timers := @prune @timers;
ignore (prim "global_timer_set" : Nat64 -> Nat64) 1
};
failed += 1;
@timers := ?(switch @timers {
case (?{ id = 0; pre; post; job = j; expire; delay })
// push top node's contents into pre
({ expire = [var failed]; id = 0; delay; job; post
; pre = ?{ id = 0; expire; pre; post = null; delay; job = j } });
case _ ({ expire = [var failed]; id = 0; delay = null; job; pre = null; post = @timers })
})
};
for (o in thunks.vals()) {
switch o {
case (?thunk) ignore thunk();
case (?thunk) try ignore thunk() catch _ reinsert thunk;
case _ return
}
}
Expand Down Expand Up @@ -675,5 +691,4 @@ func @cancelTimer(id : Nat) {
}
};
func @set_global_timer(time : Nat64) = ignore (prim "global_timer_set" : Nat64 -> Nat64) time;
10 changes: 5 additions & 5 deletions test/fail/ok/illegal-await.tc.ok
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ illegal-await.mo:24.11: info, start of scope $@anon-async-24.11 mentioned in err
illegal-await.mo:26.5: info, end of scope $@anon-async-24.11 mentioned in error at illegal-await.mo:25.7-25.14
illegal-await.mo:22.10: info, start of scope $@anon-async-22.10 mentioned in error at illegal-await.mo:25.7-25.14
illegal-await.mo:27.3: info, end of scope $@anon-async-22.10 mentioned in error at illegal-await.mo:25.7-25.14
illegal-await.mo:35.11-35.12: type error [M0087], ill-scoped await: expected async type from current scope $Rec, found async type from other scope $__15
illegal-await.mo:35.11-35.12: type error [M0087], ill-scoped await: expected async type from current scope $Rec, found async type from other scope $__19
scope $Rec is illegal-await.mo:33.44-40.2
scope $__15 is illegal-await.mo:33.1-40.2
scope $__19 is illegal-await.mo:33.1-40.2
illegal-await.mo:33.44: info, start of scope $Rec mentioned in error at illegal-await.mo:35.5-35.12
illegal-await.mo:40.1: info, end of scope $Rec mentioned in error at illegal-await.mo:35.5-35.12
illegal-await.mo:33.1: info, start of scope $__15 mentioned in error at illegal-await.mo:35.5-35.12
illegal-await.mo:40.1: info, end of scope $__15 mentioned in error at illegal-await.mo:35.5-35.12
illegal-await.mo:33.1: info, start of scope $__19 mentioned in error at illegal-await.mo:35.5-35.12
illegal-await.mo:40.1: info, end of scope $__19 mentioned in error at illegal-await.mo:35.5-35.12
illegal-await.mo:38.20-38.21: type error [M0096], expression of type
async<$__15> ()
async<$__19> ()
cannot produce expected type
async<$Rec> ()

0 comments on commit a9bc214

Please sign in to comment.