bazel C++ coverage for Envoy crashes JVM

Created on 28 Jan 2019  路  22Comments  路  Source: bazelbuild/bazel

With Bazel 0.22.0 and gcc 7.3.0, if you clone https://github.com/envoyproxy/envoy and run:

bazel coverage --experimental_cc_coverage test/...  --instrumentation_filter=//source/...,//include/... --coverage_report_generator=@bazel_tools//tools/test/CoverageOutputGenerator/java/com/google/devtools/coverageoutputgenerator:Main --combined_report=lcov

the following stack trace is emitted:

Internal error thrown during build. Printing stack trace: java.lang.RuntimeException: Unrecoverable error while evaluating node 'ActionLookupData{actionLookupKey=com.google.devtools.build.lib.skyframe.CoverageReportValue$CoverageReportKey@2fe2e16c, actionIndex=1}' (requested by nodes 'File:[[<execution
_root>]bazel-out]_coverage/_coverage_report.dat')                                                              
        at com.google.devtools.build.skyframe.AbstractParallelEvaluator$Evaluate.run(AbstractParallelEvaluator.java:514)                              
        at com.google.devtools.build.lib.concurrent.AbstractQueueVisitor$WrappedRunnable.run(AbstractQueueVisitor.java:368)         
        at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)                                     
        at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)                                    
        at java.base/java.lang.Thread.run(Unknown Source)                                              
Caused by: java.lang.IllegalStateException: null for File:[[<execution_root>]bazel-out/host/bin]external/bazel_tools/tools/test/CoverageOutputGenerator/java/com/google/devtools/coverageoutputgenerator/Main.jar
        at com.google.devtools.build.lib.skyframe.ActionMetadataHandler.getMetadata(ActionMetadataHandler.java:206)                                   
        at com.google.devtools.build.lib.skyframe.SkyframeActionExecutor$DelegatingPairFileCache.getMetadata(SkyframeActionExecutor.java:1377)
        at com.google.devtools.build.lib.remote.TreeNodeRepository.getInputMetadata(TreeNodeRepository.java:500)           
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:330)            
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)                                                                                                                                                                                       
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)         
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)                         
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)    
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)         
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)                                                                                                  
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)    
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)                               
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348) 
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)
...
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildParentNode(TreeNodeRepository.java:348)
        at com.google.devtools.build.lib.remote.TreeNodeRepository.buildFromActionInputs(TreeNodeRepository.java:285)
        at com.google.devtools.build.lib.remote.RemoteSpawnCache.lookup(RemoteSpawnCache.java:104)
        at com.google.devtools.build.lib.exec.AbstractSpawnStrategy.exec(AbstractSpawnStrategy.java:102)
        at com.google.devtools.build.lib.exec.AbstractSpawnStrategy.exec(AbstractSpawnStrategy.java:76)
        at com.google.devtools.build.lib.exec.ProxySpawnActionContext.exec(ProxySpawnActionContext.java:44)
        at com.google.devtools.build.lib.bazel.coverage.CoverageReportActionBuilder$CoverageReportAction.execute(CoverageReportActionBuilder.java:125)
        at com.google.devtools.build.lib.skyframe.SkyframeActionExecutor$ActionRunner.executeAction(SkyframeActionExecutor.java:889)
        at com.google.devtools.build.lib.skyframe.SkyframeActionExecutor$ActionRunner.call(SkyframeActionExecutor.java:854)
        at com.google.devtools.build.lib.skyframe.SkyframeActionExecutor$ActionRunner.call(SkyframeActionExecutor.java:787)
        at java.base/java.util.concurrent.FutureTask.run(Unknown Source)
        at com.google.devtools.build.lib.skyframe.SkyframeActionExecutor.executeAction(SkyframeActionExecutor.java:541)
        at com.google.devtools.build.lib.skyframe.ActionExecutionFunction.checkCacheAndExecuteIfNeeded(ActionExecutionFunction.java:697)
        at com.google.devtools.build.lib.skyframe.ActionExecutionFunction.compute(ActionExecutionFunction.java:240)
        at com.google.devtools.build.skyframe.AbstractParallelEvaluator$Evaluate.run(AbstractParallelEvaluator.java:437)
        ... 4 more

This seems to occur when running hot_restart_test, which isn't even a direct C++ unit test, but instead a shell script integration test wrapped around the Envoy binary, which is executed multiple times under the script. I'm guessing there might be some issue related to that.

team-Rules-Server bug untriaged

All 22 comments

CC @dslomov @iirina

@lberki please take a look or assign someone to take a look - apparently java/com/google/devtools/coverageoutputgenerator/Main.jar is not bundled with bazel?

Erm, sorry for the slow reply. I have an inbox problem these days. @buchgr , can you take a look? doesn't seem to be specific to coverage collection.

@htuch : do I need to add some remote execution flags? The above command line doesn't reproduce the issue by itself.

@lberki nup, that's the CLI I used. Can you make sense of the JVM strack trace without repro? This happens to me consistently.

Well, I have ideas, but without a reproduction, it's hard to test whether any fix I can conceive fixes the problem. @buchgr , any ideas about reproduction this on our workstations? (it appears to require remote execution)

@lberki there is no remote execution happening; I'm doing this on a gLinux workstation which should be pretty similar to what you have :)

I am getting this exact same error when running locally, but for rules_scala with some work-in-progress support for jacoco code coverage.

I can't reproduce this yet but it looks like an issue with what bazel embeds into @bazel_tools. By a quick look I already found a bug at https://github.com/bazelbuild/bazel/blob/master/tools/test/BUILD#L124. It's a leftover from renaming LcovMerger to CoverageOutputGenerator. It's not clear to me why this wasn't a bigger problem until now. I'll send a PR, however I don't know how to test if it fixes the issue since I can't reproduce.

@iirina I will patch this PR onto master locally and try again today.

After thinking more about it I'm not confident anymore that this is the issue. I think the "bug" I found is actually not relevant because the embedding of CoverageOutputGenerator/ happens someplace else (in //tools:embedded_tools_srcs).

The CoverageReportGeneratorAction tries to access a file that does not exist: external/bazel_tools/tools/test/CoverageOutputGenerator/java/com/google/devtools/coverageoutputgenerator/Main.jar. The file shouldn't exist. I don't understand how the action generates that file path, since it is never referenced... From the refrenced label tools/test/CoverageOutputGenerator/java/com/google/devtools/coverageoutputgenerator:Main (a java_binary) it tries to look for bazel_tools/tools/test/CoverageOutputGenerator/java/com/google/devtools/coverageoutputgenerator/Main.jar. And that doesn't happen in every environment (I can't reproduce). Maybe the function that computes the tools label is different. I'll investigate further...

@iirina yeah, no luck at HEAD + #7412. I'll setup a 1:1 meeting to debug this in real-time.

@andyscott do you also get the similar error when using --combined_report?

@htuch From the logs I can see remote execution is enabled. Do you have some flags in some bazelrc maybe that enables it (e.g. --disk_cache, --remote_executor)? I was able to replicate the issue when using --disk_cache. Code coverage doesn't work well with remote execution at this point.

Hmm how --disk_cache is relevant? The execution is not remote... (just trying to understand)

Local disk cache also (confusingly) behind the RemoteSpawnCache interface. And yes, this confused me, too :)

Yes, I had build --remote_http_cache=http://localhost:9090 enabled for just caching. There is no remote execution happening though. Thanks for spotting this, if I disable that then I can get a coverage report.

@iirina I get the same error with the same command, but for the Scala rules.

In a few days I'll have PRs up where I can share a repo, if needed.

I should add that I also have a disk cache enabled for all of my local builds.

This seems like a simple case of the full CoverageGenerator inputs not actually being declared. I did not try this, but I bet something like this would do the trick:

diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD
index fa0c6af555..c252ee850d 100644
--- a/src/main/java/com/google/devtools/build/lib/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/BUILD
@@ -902,6 +902,7 @@ java_library(
         ":util",
         "//src/main/java/com/google/devtools/build/lib/actions",
         "//src/main/java/com/google/devtools/build/lib/actions:localhost_capacity",
+        "//src/main/java/com/google/devtools/build/lib/collect/nestedset",
         "//src/main/java/com/google/devtools/build/lib/concurrent",
         "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
         "//src/main/java/com/google/devtools/common/options",
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/coverage/CoverageReportActionBuilder.java b/src/main/java/com/google/devtools/build/lib/bazel/coverage/CoverageReportActionBuilder.java
index ba1ef1deec..e375bcfdb1 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/coverage/CoverageReportActionBuilder.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/coverage/CoverageReportActionBuilder.java
@@ -19,6 +19,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.devtools.build.lib.actions.AbstractAction;
 import com.google.devtools.build.lib.actions.Action;
+import com.google.devtools.build.lib.actions.ActionEnvironment;
 import com.google.devtools.build.lib.actions.ActionExecutionContext;
 import com.google.devtools.build.lib.actions.ActionExecutionException;
 import com.google.devtools.build.lib.actions.ActionKeyContext;
@@ -32,7 +33,6 @@ import com.google.devtools.build.lib.actions.BaseSpawn;
 import com.google.devtools.build.lib.actions.ExecException;
 import com.google.devtools.build.lib.actions.NotifyOnActionCacheHit;
 import com.google.devtools.build.lib.actions.ResourceSet;
-import com.google.devtools.build.lib.actions.RunfilesSupplier;
 import com.google.devtools.build.lib.actions.Spawn;
 import com.google.devtools.build.lib.actions.SpawnActionContext;
 import com.google.devtools.build.lib.actions.SpawnResult;
@@ -44,6 +44,7 @@ import com.google.devtools.build.lib.analysis.test.CoverageReportActionFactory.C
 import com.google.devtools.build.lib.analysis.test.TestProvider;
 import com.google.devtools.build.lib.analysis.test.TestProvider.TestParams;
 import com.google.devtools.build.lib.analysis.test.TestRunnerAction;
+import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
 import com.google.devtools.build.lib.concurrent.ThreadSafety.Immutable;
 import com.google.devtools.build.lib.events.Event;
 import com.google.devtools.build.lib.events.EventHandler;
@@ -95,16 +96,25 @@ public final class CoverageReportActionBuilder {
     private final ImmutableList<String> command;
     private final boolean remotable;
     private final String locationMessage;
-    private final RunfilesSupplier runfilesSupplier;

-    protected CoverageReportAction(ActionOwner owner, Iterable<Artifact> inputs,
-        Iterable<Artifact> outputs, ImmutableList<String> command, String locationMessage,
-        boolean remotable, RunfilesSupplier runfilesSupplier) {
-      super(owner, inputs, outputs);
+    protected CoverageReportAction(
+        ActionOwner owner,
+        Iterable<Artifact> inputs,
+        Iterable<Artifact> outputs,
+        ImmutableList<String> command,
+        String locationMessage,
+        boolean remotable,
+        FilesToRunProvider reportGenerator) {
+      super(
+          owner,
+          reportGenerator.getFilesToRun(),
+          inputs,
+          reportGenerator.getRunfilesSupplier(),
+          outputs,
+          ActionEnvironment.EMPTY);
       this.command = command;
       this.remotable = remotable;
       this.locationMessage = locationMessage;
-      this.runfilesSupplier = runfilesSupplier;
     }

     @Override
@@ -114,13 +124,14 @@ public final class CoverageReportActionBuilder {
         ImmutableMap<String, String> executionInfo = remotable
             ? ImmutableMap.<String, String>of()
             : ImmutableMap.<String, String>of("local", "");
-        Spawn spawn = new BaseSpawn(
-            command,
-            ImmutableMap.<String, String>of(),
-            executionInfo,
-            runfilesSupplier,
-            this,
-            LOCAL_RESOURCES);
+        Spawn spawn =
+            new BaseSpawn(
+                command,
+                ImmutableMap.<String, String>of(),
+                executionInfo,
+                getRunfilesSupplier(),
+                this,
+                LOCAL_RESOURCES);
         List<SpawnResult> spawnResults =
             actionExecutionContext.getContext(SpawnActionContext.class)
                 .exec(spawn, actionExecutionContext);
@@ -239,15 +250,15 @@ public final class CoverageReportActionBuilder {
     PathFragment coverageDir = TestRunnerAction.COVERAGE_TMP_ROOT;
     Artifact lcovOutput = args.factory().getDerivedArtifact(
         coverageDir.getRelative("_coverage_report.dat"), root, args.artifactOwner());
-    Artifact reportGeneratorExec = args.reportGenerator().getExecutable();
     args = CoverageArgs.createCopyWithCoverageDirAndLcovOutput(args, coverageDir, lcovOutput);
     ImmutableList<String> actionArgs = argsFunc.apply(args);

-    ImmutableList<Artifact> inputs = ImmutableList.<Artifact>builder()
-        .addAll(args.coverageArtifacts())
-        .add(reportGeneratorExec)
-        .add(args.lcovArtifact())
-        .build();
+    Iterable<Artifact> inputs =
+        NestedSetBuilder.<Artifact>stableOrder()
+            .addAll(args.coverageArtifacts())
+            .addTransitive(args.reportGenerator().getFilesToRun())
+            .add(args.lcovArtifact())
+            .build();
     return new CoverageReportAction(
         ACTION_OWNER,
         inputs,
@@ -255,6 +266,6 @@ public final class CoverageReportActionBuilder {
         actionArgs,
         locationFunc.apply(args),
         !args.htmlReport(),
-        args.reportGenerator().getRunfilesSupplier());
+        args.reportGenerator());
   }
 }

@htuch is this still an issue with current Bazel?

We've switched out coverage technique to llvm-cov and no longer use gcc, so not sure if it's going to possible to validate this on the Envoy tree.

Was this page helpful?
0 / 5 - 0 ratings