From 7ed9b73f4d4c994b603cd369758c79eacdafc62f Mon Sep 17 00:00:00 2001 From: Mac Malone Date: Thu, 13 Jun 2024 19:15:49 -0400 Subject: [PATCH] feat: lake: reliably cache logs and hashes (#4402) Moves the cached log into the trace file (no more `.log.json`). This means logs are no longer cached on fatal errors and this ensures that an out-of-date log is not associated with an up-to-date trace. Separately, `.hash` file generation was changed to be more reliable as well. `.hash` files are deleted as part of the build and always regenerate with `--rehash`. Closes #2751. (cherry picked from commit db74ee9e83c631dd4bab9b7b6a10d0793cb0cbef) --- src/lake/Lake/Build/Common.lean | 182 +++++++++++++++++-------------- src/lake/Lake/Build/Module.lean | 36 +++--- src/lake/Lake/Build/Trace.lean | 38 +++++-- src/lake/Lake/Config/Module.lean | 6 +- src/lake/Lake/Util/IO.lean | 2 +- 5 files changed, 159 insertions(+), 105 deletions(-) diff --git a/src/lake/Lake/Build/Common.lean b/src/lake/Lake/Build/Common.lean index 71f1ce9947ca..c52aa2728d13 100644 --- a/src/lake/Lake/Build/Common.lean +++ b/src/lake/Lake/Build/Common.lean @@ -25,109 +25,137 @@ and will be rebuilt on different host platforms. def platformTrace := pureHash System.Platform.target /-- -Checks if the `info` is up-to-date by comparing `depTrace` with `traceFile`. -If old mode is enabled (e.g., `--old`), uses the modification time of `oldTrace` +The build trace file format, +which stores information about a (successful) build. +-/ +structure BuildMetadata where + depHash : Hash + log : Log + deriving ToJson, FromJson + +/-- Read persistent trace data from a file. -/ +def readTraceFile? (path : FilePath) : LogIO (Option BuildMetadata) := OptionT.run do + match (← IO.FS.readFile path |>.toBaseIO) with + | .ok contents => + match Json.parse contents >>= fromJson? with + | .ok contents => return contents + | .error e => logVerbose s!"{path}: invalid trace file: {e}"; failure + | .error (.noFileOrDirectory ..) => failure + | .error e => logWarning s!"{path}: read failed: {e}"; failure + +/-- Write persistent trace data to a file. -/ +def writeTraceFile (path : FilePath) (depTrace : BuildTrace) (log : Log) := do + createParentDirs path + let data := {log, depHash := depTrace.hash : BuildMetadata} + IO.FS.writeFile path (toJson data).pretty + +/-- +Checks if the `info` is up-to-date by comparing `depTrace` with `depHash`. +If old mode is enabled (e.g., `--old`), uses the `oldTrace` modification time as the point of comparison instead. -/ -@[specialize] def BuildTrace.checkUpToDate +@[specialize] def checkHashUpToDate [CheckExists ι] [GetMTime ι] - (info : ι) (depTrace : BuildTrace) (traceFile : FilePath) - (oldTrace := depTrace) + (info : ι) (depTrace : BuildTrace) (depHash : Option Hash) + (oldTrace := depTrace.mtime) : JobM Bool := do - if (← getIsOldMode) then - if (← oldTrace.checkAgainstTime info) then - return true - else if let some hash ← Hash.load? traceFile then - depTrace.checkAgainstHash info hash - else - return false + if depTrace.hash == depHash then + checkExists info + else if (← getIsOldMode) then + oldTrace.checkUpToDate info else - depTrace.checkAgainstFile info traceFile + return false /-- -Build `info` unless it already exists and `depTrace` matches that -of the `traceFile`. If rebuilt, save the new `depTrace` to the `tracefile`. +Checks whether `info` is up-to-date, and runs `build` to recreate it if not. +If rebuilt, saves the new `depTrace` and build log to `traceFile`. Returns whether `info` was already up-to-date. + +**Up-to-date Checking** + +If `traceFile` exists, checks that the hash in `depTrace` matches that +of the `traceFile`. If not, and old mode is enabled (e.g., `--old`), falls back +to the `oldTrace` modification time as the point of comparison. +If up-to-date, replay the build log stored in `traceFile`. + +If `traceFile` does not exist, checks that `info` has a newer modification time +then `depTrace` / `oldTrace`. No log will be replayed. -/ -@[inline] def buildUnlessUpToDate? +@[specialize] def buildUnlessUpToDate? [CheckExists ι] [GetMTime ι] (info : ι) (depTrace : BuildTrace) (traceFile : FilePath) (build : JobM PUnit) - (action : JobAction := .build) (oldTrace := depTrace) + (action : JobAction := .build) (oldTrace := depTrace.mtime) : JobM Bool := do - if (← depTrace.checkUpToDate info traceFile oldTrace) then + if let some data ← readTraceFile? traceFile then + if (← checkHashUpToDate info depTrace data.depHash oldTrace) then + updateAction .replay + data.log.replay + return true + else if (← getIsOldMode) then + if (← oldTrace.checkUpToDate info) then + return true + else if (← depTrace.checkAgainstTime info) then return true - else if (← getNoBuild) then + if (← getNoBuild) then IO.Process.exit noBuildCode.toUInt8 else updateAction action - build - depTrace.writeToFile traceFile + let iniPos ← getLogPos + build -- fatal errors will not produce a trace (or cache their log) + let log := (← getLog).takeFrom iniPos + writeTraceFile traceFile depTrace log return false /-- -Build `info` unless it already exists and `depTrace` matches that -of the `traceFile`. If rebuilt, save the new `depTrace` to the `tracefile`. +Checks whether `info` is up-to-date, and runs `build` to recreate it if not. +If rebuilt, saves the new `depTrace` and build log to `traceFile`. + +See `buildUnlessUpToDate?` for more details on how Lake determines whether +`info` is up-to-date. -/ @[inline] def buildUnlessUpToDate [CheckExists ι] [GetMTime ι] (info : ι) (depTrace : BuildTrace) (traceFile : FilePath) (build : JobM PUnit) - (action : JobAction := .build) (oldTrace := depTrace) + (action : JobAction := .build) (oldTrace := depTrace.mtime) : JobM PUnit := do discard <| buildUnlessUpToDate? info depTrace traceFile build action oldTrace -/-- Fetch the trace of a file that may have its hash already cached in a `.hash` file. -/ -def fetchFileTrace (file : FilePath) : JobM BuildTrace := do +/-- Computes the hash of a file and saves it to a `.hash` file. -/ +def cacheFileHash (file : FilePath) : IO Unit := do + let hash ← computeHash file + let hashFile := FilePath.mk <| file.toString ++ ".hash" + createParentDirs hashFile + IO.FS.writeFile hashFile hash.toString + +/-- Remove the cached hash of a file (its `.hash` file). -/ +def clearFileHash (file : FilePath) : IO Unit := do + try + IO.FS.removeFile <| file.toString ++ ".hash" + catch + | .noFileOrDirectory .. => pure () + | e => throw e + +/-- +Fetches the hash of a file that may already be cached in a `.hash` file. +If the `.hash` file does not exist or hash files are not trusted +(e.g., with `--rehash`), creates it with a newly computed hash. +-/ +def fetchFileHash (file : FilePath) : JobM Hash := do + let hashFile := FilePath.mk <| file.toString ++ ".hash" if (← getTrustHash) then - let hashFile := FilePath.mk <| file.toString ++ ".hash" if let some hash ← Hash.load? hashFile then - return .mk hash (← getMTime file) - else - let hash ← computeHash file - IO.FS.writeFile hashFile hash.toString - return .mk hash (← getMTime file) - else - computeTrace file - -/-- Compute the hash of a file and save it to a `.hash` file. -/ -def cacheFileHash (file : FilePath) : IO Hash := do + return hash let hash ← computeHash file - let hashFile := FilePath.mk <| file.toString ++ ".hash" + createParentDirs hashFile IO.FS.writeFile hashFile hash.toString return hash -/-- The build log file format. -/ -structure BuildLog where - depHash : Hash - log : Log - deriving ToJson, FromJson - /-- -Replays the JSON log in `logFile` (if it exists). -If the log file is malformed, logs a warning. +Fetches the trace of a file that may have already have its hash cached +in a `.hash` file. If no such `.hash` file exists, recomputes and creates it. -/ -def replayBuildLog (logFile : FilePath) (depTrace : BuildTrace) : LogIO PUnit := do - match (← IO.FS.readFile logFile |>.toBaseIO) with - | .ok contents => - match Json.parse contents >>= fromJson? with - | .ok {log, depHash : BuildLog} => - if depTrace.hash == depHash then - log.replay - | .error e => logWarning s!"{logFile}: invalid build log: {e}" - | .error (.noFileOrDirectory ..) => pure () - | .error e => logWarning s!"{logFile}: read failed: {e}" - -/-- Saves the log produce by `build` as JSON to `logFile`. -/ -def cacheBuildLog - (logFile : FilePath) (depTrace : BuildTrace) (build : JobM PUnit) -: JobM PUnit := do - let iniPos ← getLogPos - let errPos? ← try build; pure none catch errPos => pure (some errPos) - let log := (← getLog).takeFrom iniPos - unless log.isEmpty do - let log := {log, depHash := depTrace.hash : BuildLog} - IO.FS.writeFile logFile (toJson log).pretty - if let some errPos := errPos? then - throw errPos +def fetchFileTrace (file : FilePath) : JobM BuildTrace := do + return .mk (← fetchFileHash file) (← getMTime file) /-- Builds `file` using `build` unless it already exists and `depTrace` matches @@ -137,21 +165,17 @@ the `.hash` file using `fetchFileTrace`. Build logs (if any) are saved to a `.log.json` file and replayed from there if the build is skipped. For example, given `file := "foo.c"`, compares `depTrace` with that in -`foo.c.trace` with the hash cache in `foo.c.hash` and the log cache in -`foo.c.log.json`. +`foo.c.trace` with the hash cached in `foo.c.hash` and the log cached in +`foo.c.trace`. -/ def buildFileUnlessUpToDate (file : FilePath) (depTrace : BuildTrace) (build : JobM PUnit) : JobM BuildTrace := do let traceFile := FilePath.mk <| file.toString ++ ".trace" - let logFile := FilePath.mk <| file.toString ++ ".log.json" - let build := cacheBuildLog logFile depTrace build - if (← buildUnlessUpToDate? file depTrace traceFile build) then - updateAction .replay - replayBuildLog logFile depTrace - fetchFileTrace file - else - return .mk (← cacheFileHash file) (← getMTime file) + buildUnlessUpToDate file depTrace traceFile do + build + clearFileHash file + fetchFileTrace file /-- Build `file` using `build` after `dep` completes if the dependency's diff --git a/src/lake/Lake/Build/Module.lean b/src/lake/Lake/Build/Module.lean index 63a99af92d5f..784f9bf9846f 100644 --- a/src/lake/Lake/Build/Module.lean +++ b/src/lake/Lake/Build/Module.lean @@ -143,6 +143,22 @@ def Module.recBuildDeps (mod : Module) : FetchM (BuildJob (SearchPath × Array F def Module.depsFacetConfig : ModuleFacetConfig depsFacet := mkFacetJobConfig (·.recBuildDeps) +/-- Remove cached file hashes of the module build outputs (in `.hash` files). -/ +def Module.clearOutputHashes (mod : Module) : IO PUnit := do + clearFileHash mod.oleanFile + clearFileHash mod.ileanFile + clearFileHash mod.cFile + if Lean.Internal.hasLLVMBackend () then + clearFileHash mod.bcFile + +/-- Cache the file hashes of the module build outputs in `.hash` files. -/ +def Module.cacheOutputHashes (mod : Module) : IO PUnit := do + cacheFileHash mod.oleanFile + cacheFileHash mod.ileanFile + cacheFileHash mod.cFile + if Lean.Internal.hasLLVMBackend () then + cacheFileHash mod.bcFile + /-- Recursively build a Lean module. Fetch its dependencies and then elaborate the Lean source file, producing @@ -154,20 +170,12 @@ def Module.recBuildLean (mod : Module) : FetchM (BuildJob Unit) := do let argTrace : BuildTrace := pureHash mod.leanArgs let srcTrace : BuildTrace ← computeTrace { path := mod.leanFile : TextFilePath } let modTrace := (← getLeanTrace).mix <| argTrace.mix <| srcTrace.mix depTrace - let upToDate ← buildUnlessUpToDate? (oldTrace := srcTrace) mod modTrace mod.traceFile do - let hasLLVM := Lean.Internal.hasLLVMBackend () - let bcFile? := if hasLLVM then some mod.bcFile else none - cacheBuildLog mod.logFile modTrace do - compileLeanModule mod.leanFile mod.oleanFile mod.ileanFile mod.cFile bcFile? - (← getLeanPath) mod.rootDir dynlibs dynlibPath (mod.weakLeanArgs ++ mod.leanArgs) (← getLean) - discard <| cacheFileHash mod.oleanFile - discard <| cacheFileHash mod.ileanFile - discard <| cacheFileHash mod.cFile - if hasLLVM then - discard <| cacheFileHash mod.bcFile - if upToDate then - updateAction .replay - replayBuildLog mod.logFile modTrace + let upToDate ← buildUnlessUpToDate? (oldTrace := srcTrace.mtime) mod modTrace mod.traceFile do + compileLeanModule mod.leanFile mod.oleanFile mod.ileanFile mod.cFile mod.bcFile? + (← getLeanPath) mod.rootDir dynlibs dynlibPath (mod.weakLeanArgs ++ mod.leanArgs) (← getLean) + mod.clearOutputHashes + unless upToDate && (← getTrustHash) do + mod.cacheOutputHashes return ((), depTrace) /-- The `ModuleFacetConfig` for the builtin `leanArtsFacet`. -/ diff --git a/src/lake/Lake/Build/Trace.lean b/src/lake/Lake/Build/Trace.lean index eaa7123ed6a7..0eb7bf1d9987 100644 --- a/src/lake/Lake/Build/Trace.lean +++ b/src/lake/Lake/Build/Trace.lean @@ -201,6 +201,17 @@ instance [GetMTime α] : ComputeTrace α IO MTime := ⟨getMTime⟩ instance : GetMTime FilePath := ⟨getFileMTime⟩ instance : GetMTime TextFilePath := ⟨(getFileMTime ·.path)⟩ +/-- +Check if `info` is up-to-date using modification time. +That is, check if the info is newer than `self`. +-/ +@[specialize] def MTime.checkUpToDate + [GetMTime i] (info : i) (self : MTime) +: BaseIO Bool := do + match (← getMTime info |>.toBaseIO) with + | .ok mtime => return self < mtime + | .error _ => return false + -------------------------------------------------------------------------------- /-! # Lake Build Trace (Hash + MTIme) -/ -------------------------------------------------------------------------------- @@ -242,7 +253,7 @@ instance : MixTrace BuildTrace := ⟨mix⟩ Check if the info is up-to-date using a hash. That is, check that info exists and its input hash matches this trace's hash. -/ -@[inline] def checkAgainstHash [CheckExists i] +@[specialize] def checkAgainstHash [CheckExists i] (info : i) (hash : Hash) (self : BuildTrace) : BaseIO Bool := pure (hash == self.hash) <&&> checkExists info @@ -250,24 +261,35 @@ That is, check that info exists and its input hash matches this trace's hash. Check if the info is up-to-date using modification time. That is, check if the info is newer than this input trace's modification time. -/ -@[inline] def checkAgainstTime [GetMTime i] -(info : i) (self : BuildTrace) : BaseIO Bool := - EIO.catchExceptions (h := fun _ => pure false) do - return self.mtime < (← getMTime info) +@[inline] def checkAgainstTime + [GetMTime i] (info : i) (self : BuildTrace) +: BaseIO Bool := do + self.mtime.checkUpToDate info /-- Check if the info is up-to-date using a trace file. If the file exists, match its hash to this trace's hash. If not, check if the info is newer than this trace's modification time. + +**Deprecated:** Should not be done manually, +but as part of `buildUnlessUpToDate`. -/ -@[inline] def checkAgainstFile [CheckExists i] [GetMTime i] -(info : i) (traceFile : FilePath) (self : BuildTrace) : BaseIO Bool := do +@[deprecated, specialize] def checkAgainstFile + [CheckExists i] [GetMTime i] + (info : i) (traceFile : FilePath) (self : BuildTrace) +: BaseIO Bool := do if let some hash ← Hash.load? traceFile then self.checkAgainstHash info hash else self.checkAgainstTime info -@[inline] def writeToFile (traceFile : FilePath) (self : BuildTrace) : IO PUnit := do +/-- +Write trace to a file. + +**Deprecated:** Should not be done manually, +but as part of `buildUnlessUpToDate`. +-/ +@[deprecated] def writeToFile (traceFile : FilePath) (self : BuildTrace) : IO PUnit := do createParentDirs traceFile IO.FS.writeFile traceFile self.hash.toString diff --git a/src/lake/Lake/Config/Module.lean b/src/lake/Lake/Config/Module.lean index 22e8e81b70a4..c65893221cbf 100644 --- a/src/lake/Lake/Config/Module.lean +++ b/src/lake/Lake/Config/Module.lean @@ -80,9 +80,6 @@ abbrev pkg (self : Module) : Package := @[inline] def ileanFile (self : Module) : FilePath := self.leanLibPath "ilean" -@[inline] def logFile (self : Module) : FilePath := - self.leanLibPath "log.json" - @[inline] def traceFile (self : Module) : FilePath := self.leanLibPath "trace" @@ -101,6 +98,9 @@ abbrev pkg (self : Module) : Package := @[inline] def bcFile (self : Module) : FilePath := self.irPath "bc" +def bcFile? (self : Module) : Option FilePath := + if Lean.Internal.hasLLVMBackend () then some self.bcFile else none + @[inline] def bcoFile (self : Module) : FilePath := self.irPath "bc.o" diff --git a/src/lake/Lake/Util/IO.lean b/src/lake/Lake/Util/IO.lean index e67ec54aa735..7fae188e340b 100644 --- a/src/lake/Lake/Util/IO.lean +++ b/src/lake/Lake/Util/IO.lean @@ -7,5 +7,5 @@ Authors: Mac Malone namespace Lake /-- Creates any missing parent directories of `path`. -/ -@[inline] def createParentDirs (path : System.FilePath) : IO Unit := do +def createParentDirs (path : System.FilePath) : IO Unit := do if let some dir := path.parent then IO.FS.createDirAll dir