Skip to content

Commit

Permalink
feat: lake: reliably cache logs and hashes (#4402)
Browse files Browse the repository at this point in the history
Moves the cached log into the trace file (no more `.log.json`). This
means logs are no longer cached on fatal errors and this ensures that an
out-of-date log is not associated with an up-to-date trace. Separately,
`.hash` file generation was changed to be more reliable as well. `.hash`
files are deleted as part of the build and always regenerate with
`--rehash`.

Closes #2751.

(cherry picked from commit db74ee9)
  • Loading branch information
tydeu authored and kim-em committed Jun 14, 2024
1 parent ddf11b4 commit 7ed9b73
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 105 deletions.
182 changes: 103 additions & 79 deletions src/lake/Lake/Build/Common.lean
Original file line number Diff line number Diff line change
Expand Up @@ -25,109 +25,137 @@ and will be rebuilt on different host platforms.
def platformTrace := pureHash System.Platform.target

/--
Checks if the `info` is up-to-date by comparing `depTrace` with `traceFile`.
If old mode is enabled (e.g., `--old`), uses the modification time of `oldTrace`
The build trace file format,
which stores information about a (successful) build.
-/
structure BuildMetadata where
depHash : Hash
log : Log
deriving ToJson, FromJson

/-- Read persistent trace data from a file. -/
def readTraceFile? (path : FilePath) : LogIO (Option BuildMetadata) := OptionT.run do
match (← IO.FS.readFile path |>.toBaseIO) with
| .ok contents =>
match Json.parse contents >>= fromJson? with
| .ok contents => return contents
| .error e => logVerbose s!"{path}: invalid trace file: {e}"; failure
| .error (.noFileOrDirectory ..) => failure
| .error e => logWarning s!"{path}: read failed: {e}"; failure

/-- Write persistent trace data to a file. -/
def writeTraceFile (path : FilePath) (depTrace : BuildTrace) (log : Log) := do
createParentDirs path
let data := {log, depHash := depTrace.hash : BuildMetadata}
IO.FS.writeFile path (toJson data).pretty

/--
Checks if the `info` is up-to-date by comparing `depTrace` with `depHash`.
If old mode is enabled (e.g., `--old`), uses the `oldTrace` modification time
as the point of comparison instead.
-/
@[specialize] def BuildTrace.checkUpToDate
@[specialize] def checkHashUpToDate
[CheckExists ι] [GetMTime ι]
(info : ι) (depTrace : BuildTrace) (traceFile : FilePath)
(oldTrace := depTrace)
(info : ι) (depTrace : BuildTrace) (depHash : Option Hash)
(oldTrace := depTrace.mtime)
: JobM Bool := do
if (← getIsOldMode) then
if (← oldTrace.checkAgainstTime info) then
return true
else if let some hash ← Hash.load? traceFile then
depTrace.checkAgainstHash info hash
else
return false
if depTrace.hash == depHash then
checkExists info
else if (← getIsOldMode) then
oldTrace.checkUpToDate info
else
depTrace.checkAgainstFile info traceFile
return false

/--
Build `info` unless it already exists and `depTrace` matches that
of the `traceFile`. If rebuilt, save the new `depTrace` to the `tracefile`.
Checks whether `info` is up-to-date, and runs `build` to recreate it if not.
If rebuilt, saves the new `depTrace` and build log to `traceFile`.
Returns whether `info` was already up-to-date.
**Up-to-date Checking**
If `traceFile` exists, checks that the hash in `depTrace` matches that
of the `traceFile`. If not, and old mode is enabled (e.g., `--old`), falls back
to the `oldTrace` modification time as the point of comparison.
If up-to-date, replay the build log stored in `traceFile`.
If `traceFile` does not exist, checks that `info` has a newer modification time
then `depTrace` / `oldTrace`. No log will be replayed.
-/
@[inline] def buildUnlessUpToDate?
@[specialize] def buildUnlessUpToDate?
[CheckExists ι] [GetMTime ι] (info : ι)
(depTrace : BuildTrace) (traceFile : FilePath) (build : JobM PUnit)
(action : JobAction := .build) (oldTrace := depTrace)
(action : JobAction := .build) (oldTrace := depTrace.mtime)
: JobM Bool := do
if (← depTrace.checkUpToDate info traceFile oldTrace) then
if let some data ← readTraceFile? traceFile then
if (← checkHashUpToDate info depTrace data.depHash oldTrace) then
updateAction .replay
data.log.replay
return true
else if (← getIsOldMode) then
if (← oldTrace.checkUpToDate info) then
return true
else if (← depTrace.checkAgainstTime info) then
return true
else if (← getNoBuild) then
if (← getNoBuild) then
IO.Process.exit noBuildCode.toUInt8
else
updateAction action
build
depTrace.writeToFile traceFile
let iniPos ← getLogPos
build -- fatal errors will not produce a trace (or cache their log)
let log := (← getLog).takeFrom iniPos
writeTraceFile traceFile depTrace log
return false

/--
Build `info` unless it already exists and `depTrace` matches that
of the `traceFile`. If rebuilt, save the new `depTrace` to the `tracefile`.
Checks whether `info` is up-to-date, and runs `build` to recreate it if not.
If rebuilt, saves the new `depTrace` and build log to `traceFile`.
See `buildUnlessUpToDate?` for more details on how Lake determines whether
`info` is up-to-date.
-/
@[inline] def buildUnlessUpToDate
[CheckExists ι] [GetMTime ι] (info : ι)
(depTrace : BuildTrace) (traceFile : FilePath) (build : JobM PUnit)
(action : JobAction := .build) (oldTrace := depTrace)
(action : JobAction := .build) (oldTrace := depTrace.mtime)
: JobM PUnit := do
discard <| buildUnlessUpToDate? info depTrace traceFile build action oldTrace

/-- Fetch the trace of a file that may have its hash already cached in a `.hash` file. -/
def fetchFileTrace (file : FilePath) : JobM BuildTrace := do
/-- Computes the hash of a file and saves it to a `.hash` file. -/
def cacheFileHash (file : FilePath) : IO Unit := do
let hash ← computeHash file
let hashFile := FilePath.mk <| file.toString ++ ".hash"
createParentDirs hashFile
IO.FS.writeFile hashFile hash.toString

/-- Remove the cached hash of a file (its `.hash` file). -/
def clearFileHash (file : FilePath) : IO Unit := do
try
IO.FS.removeFile <| file.toString ++ ".hash"
catch
| .noFileOrDirectory .. => pure ()
| e => throw e

/--
Fetches the hash of a file that may already be cached in a `.hash` file.
If the `.hash` file does not exist or hash files are not trusted
(e.g., with `--rehash`), creates it with a newly computed hash.
-/
def fetchFileHash (file : FilePath) : JobM Hash := do
let hashFile := FilePath.mk <| file.toString ++ ".hash"
if (← getTrustHash) then
let hashFile := FilePath.mk <| file.toString ++ ".hash"
if let some hash ← Hash.load? hashFile then
return .mk hash (← getMTime file)
else
let hash ← computeHash file
IO.FS.writeFile hashFile hash.toString
return .mk hash (← getMTime file)
else
computeTrace file

/-- Compute the hash of a file and save it to a `.hash` file. -/
def cacheFileHash (file : FilePath) : IO Hash := do
return hash
let hash ← computeHash file
let hashFile := FilePath.mk <| file.toString ++ ".hash"
createParentDirs hashFile
IO.FS.writeFile hashFile hash.toString
return hash

/-- The build log file format. -/
structure BuildLog where
depHash : Hash
log : Log
deriving ToJson, FromJson

/--
Replays the JSON log in `logFile` (if it exists).
If the log file is malformed, logs a warning.
Fetches the trace of a file that may have already have its hash cached
in a `.hash` file. If no such `.hash` file exists, recomputes and creates it.
-/
def replayBuildLog (logFile : FilePath) (depTrace : BuildTrace) : LogIO PUnit := do
match (← IO.FS.readFile logFile |>.toBaseIO) with
| .ok contents =>
match Json.parse contents >>= fromJson? with
| .ok {log, depHash : BuildLog} =>
if depTrace.hash == depHash then
log.replay
| .error e => logWarning s!"{logFile}: invalid build log: {e}"
| .error (.noFileOrDirectory ..) => pure ()
| .error e => logWarning s!"{logFile}: read failed: {e}"

/-- Saves the log produce by `build` as JSON to `logFile`. -/
def cacheBuildLog
(logFile : FilePath) (depTrace : BuildTrace) (build : JobM PUnit)
: JobM PUnit := do
let iniPos ← getLogPos
let errPos? ← try build; pure none catch errPos => pure (some errPos)
let log := (← getLog).takeFrom iniPos
unless log.isEmpty do
let log := {log, depHash := depTrace.hash : BuildLog}
IO.FS.writeFile logFile (toJson log).pretty
if let some errPos := errPos? then
throw errPos
def fetchFileTrace (file : FilePath) : JobM BuildTrace := do
return .mk (← fetchFileHash file) (← getMTime file)

/--
Builds `file` using `build` unless it already exists and `depTrace` matches
Expand All @@ -137,21 +165,17 @@ the `.hash` file using `fetchFileTrace`. Build logs (if any) are saved to
a `.log.json` file and replayed from there if the build is skipped.
For example, given `file := "foo.c"`, compares `depTrace` with that in
`foo.c.trace` with the hash cache in `foo.c.hash` and the log cache in
`foo.c.log.json`.
`foo.c.trace` with the hash cached in `foo.c.hash` and the log cached in
`foo.c.trace`.
-/
def buildFileUnlessUpToDate
(file : FilePath) (depTrace : BuildTrace) (build : JobM PUnit)
: JobM BuildTrace := do
let traceFile := FilePath.mk <| file.toString ++ ".trace"
let logFile := FilePath.mk <| file.toString ++ ".log.json"
let build := cacheBuildLog logFile depTrace build
if (← buildUnlessUpToDate? file depTrace traceFile build) then
updateAction .replay
replayBuildLog logFile depTrace
fetchFileTrace file
else
return .mk (← cacheFileHash file) (← getMTime file)
buildUnlessUpToDate file depTrace traceFile do
build
clearFileHash file
fetchFileTrace file

/--
Build `file` using `build` after `dep` completes if the dependency's
Expand Down
36 changes: 22 additions & 14 deletions src/lake/Lake/Build/Module.lean
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,22 @@ def Module.recBuildDeps (mod : Module) : FetchM (BuildJob (SearchPath × Array F
def Module.depsFacetConfig : ModuleFacetConfig depsFacet :=
mkFacetJobConfig (·.recBuildDeps)

/-- Remove cached file hashes of the module build outputs (in `.hash` files). -/
def Module.clearOutputHashes (mod : Module) : IO PUnit := do
clearFileHash mod.oleanFile
clearFileHash mod.ileanFile
clearFileHash mod.cFile
if Lean.Internal.hasLLVMBackend () then
clearFileHash mod.bcFile

/-- Cache the file hashes of the module build outputs in `.hash` files. -/
def Module.cacheOutputHashes (mod : Module) : IO PUnit := do
cacheFileHash mod.oleanFile
cacheFileHash mod.ileanFile
cacheFileHash mod.cFile
if Lean.Internal.hasLLVMBackend () then
cacheFileHash mod.bcFile

/--
Recursively build a Lean module.
Fetch its dependencies and then elaborate the Lean source file, producing
Expand All @@ -154,20 +170,12 @@ def Module.recBuildLean (mod : Module) : FetchM (BuildJob Unit) := do
let argTrace : BuildTrace := pureHash mod.leanArgs
let srcTrace : BuildTrace ← computeTrace { path := mod.leanFile : TextFilePath }
let modTrace := (← getLeanTrace).mix <| argTrace.mix <| srcTrace.mix depTrace
let upToDate ← buildUnlessUpToDate? (oldTrace := srcTrace) mod modTrace mod.traceFile do
let hasLLVM := Lean.Internal.hasLLVMBackend ()
let bcFile? := if hasLLVM then some mod.bcFile else none
cacheBuildLog mod.logFile modTrace do
compileLeanModule mod.leanFile mod.oleanFile mod.ileanFile mod.cFile bcFile?
(← getLeanPath) mod.rootDir dynlibs dynlibPath (mod.weakLeanArgs ++ mod.leanArgs) (← getLean)
discard <| cacheFileHash mod.oleanFile
discard <| cacheFileHash mod.ileanFile
discard <| cacheFileHash mod.cFile
if hasLLVM then
discard <| cacheFileHash mod.bcFile
if upToDate then
updateAction .replay
replayBuildLog mod.logFile modTrace
let upToDate ← buildUnlessUpToDate? (oldTrace := srcTrace.mtime) mod modTrace mod.traceFile do
compileLeanModule mod.leanFile mod.oleanFile mod.ileanFile mod.cFile mod.bcFile?
(← getLeanPath) mod.rootDir dynlibs dynlibPath (mod.weakLeanArgs ++ mod.leanArgs) (← getLean)
mod.clearOutputHashes
unless upToDate && (← getTrustHash) do
mod.cacheOutputHashes
return ((), depTrace)

/-- The `ModuleFacetConfig` for the builtin `leanArtsFacet`. -/
Expand Down
38 changes: 30 additions & 8 deletions src/lake/Lake/Build/Trace.lean
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,17 @@ instance [GetMTime α] : ComputeTrace α IO MTime := ⟨getMTime⟩
instance : GetMTime FilePath := ⟨getFileMTime⟩
instance : GetMTime TextFilePath := ⟨(getFileMTime ·.path)⟩

/--
Check if `info` is up-to-date using modification time.
That is, check if the info is newer than `self`.
-/
@[specialize] def MTime.checkUpToDate
[GetMTime i] (info : i) (self : MTime)
: BaseIO Bool := do
match (← getMTime info |>.toBaseIO) with
| .ok mtime => return self < mtime
| .error _ => return false

--------------------------------------------------------------------------------
/-! # Lake Build Trace (Hash + MTIme) -/
--------------------------------------------------------------------------------
Expand Down Expand Up @@ -242,32 +253,43 @@ instance : MixTrace BuildTrace := ⟨mix⟩
Check if the info is up-to-date using a hash.
That is, check that info exists and its input hash matches this trace's hash.
-/
@[inline] def checkAgainstHash [CheckExists i]
@[specialize] def checkAgainstHash [CheckExists i]
(info : i) (hash : Hash) (self : BuildTrace) : BaseIO Bool :=
pure (hash == self.hash) <&&> checkExists info

/--
Check if the info is up-to-date using modification time.
That is, check if the info is newer than this input trace's modification time.
-/
@[inline] def checkAgainstTime [GetMTime i]
(info : i) (self : BuildTrace) : BaseIO Bool :=
EIO.catchExceptions (h := fun _ => pure false) do
return self.mtime < (← getMTime info)
@[inline] def checkAgainstTime
[GetMTime i] (info : i) (self : BuildTrace)
: BaseIO Bool := do
self.mtime.checkUpToDate info

/--
Check if the info is up-to-date using a trace file.
If the file exists, match its hash to this trace's hash.
If not, check if the info is newer than this trace's modification time.
**Deprecated:** Should not be done manually,
but as part of `buildUnlessUpToDate`.
-/
@[inline] def checkAgainstFile [CheckExists i] [GetMTime i]
(info : i) (traceFile : FilePath) (self : BuildTrace) : BaseIO Bool := do
@[deprecated, specialize] def checkAgainstFile
[CheckExists i] [GetMTime i]
(info : i) (traceFile : FilePath) (self : BuildTrace)
: BaseIO Bool := do
if let some hash ← Hash.load? traceFile then
self.checkAgainstHash info hash
else
self.checkAgainstTime info

@[inline] def writeToFile (traceFile : FilePath) (self : BuildTrace) : IO PUnit := do
/--
Write trace to a file.
**Deprecated:** Should not be done manually,
but as part of `buildUnlessUpToDate`.
-/
@[deprecated] def writeToFile (traceFile : FilePath) (self : BuildTrace) : IO PUnit := do
createParentDirs traceFile
IO.FS.writeFile traceFile self.hash.toString

Expand Down
6 changes: 3 additions & 3 deletions src/lake/Lake/Config/Module.lean
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,6 @@ abbrev pkg (self : Module) : Package :=
@[inline] def ileanFile (self : Module) : FilePath :=
self.leanLibPath "ilean"

@[inline] def logFile (self : Module) : FilePath :=
self.leanLibPath "log.json"

@[inline] def traceFile (self : Module) : FilePath :=
self.leanLibPath "trace"

Expand All @@ -101,6 +98,9 @@ abbrev pkg (self : Module) : Package :=
@[inline] def bcFile (self : Module) : FilePath :=
self.irPath "bc"

def bcFile? (self : Module) : Option FilePath :=
if Lean.Internal.hasLLVMBackend () then some self.bcFile else none

@[inline] def bcoFile (self : Module) : FilePath :=
self.irPath "bc.o"

Expand Down
2 changes: 1 addition & 1 deletion src/lake/Lake/Util/IO.lean
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ Authors: Mac Malone
namespace Lake

/-- Creates any missing parent directories of `path`. -/
@[inline] def createParentDirs (path : System.FilePath) : IO Unit := do
def createParentDirs (path : System.FilePath) : IO Unit := do
if let some dir := path.parent then IO.FS.createDirAll dir

0 comments on commit 7ed9b73

Please sign in to comment.