The fundamental issue is here:
https://github.com/pantsbuild/pants/blob/ef9e0543a80568ee270f63e1bbbeddb92c4faa08/src/python/pants/engine/target.py#L334-L339
The cls.PluginField will always be exactly Target.PluginField unless some subclass shadows the inner class member. I.E.:
$ python
Python 3.8.3 (default, May 17 2020, 18:15:42)
[GCC 10.1.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> class Outer:
... class Inner:
... ...
...
>>> class SubclassNaive(Outer):
... ...
...
>>> class SubclassVerbose(Outer):
... class Inner:
... ...
...
>>> Outer.Inner is SubclassNaive.Inner
True
>>> Outer.Inner is SubclassVerbose.Inner
False
>>>
The effect of which is probably best shown with a few tests:
$ git diff src/python/pants/engine/target_test.py
diff --git a/src/python/pants/engine/target_test.py b/src/python/pants/engine/target_test.py
index f2646d33c..16a3a9060 100644
--- a/src/python/pants/engine/target_test.py
+++ b/src/python/pants/engine/target_test.py
@@ -13,7 +13,7 @@ from pants.base.specs import FilesystemLiteralSpec
from pants.engine.addresses import Address, Addresses
from pants.engine.fs import EMPTY_DIGEST, FileContent, InputFilesContent, PathGlobs, Snapshot
from pants.engine.internals.scheduler import ExecutionError
-from pants.engine.rules import RootRule, rule
+from pants.engine.rules import RootRule, RuleIndex, rule
from pants.engine.selectors import Get, Params
from pants.engine.target import (
AmbiguousCodegenImplementationsException,
@@ -270,35 +270,75 @@ def test_async_field() -> None:
assert "//:lib" in str(exc)
-def test_add_custom_fields() -> None:
- class CustomField(BoolField):
- alias: ClassVar = "custom_field"
- default: ClassVar = False
+class RustTarget(Target):
+ alias: ClassVar = "rust"
+ core_fields: ClassVar = (Dependencies, Sources)
+
+
+class RustEditionCustomField(StringField):
+ alias: ClassVar = "rust_edition"
+ required: ClassVar = True
+
+
+class FortranCustomField(BoolField):
+ alias: ClassVar = "custom_field"
+ default: ClassVar = False
- union_membership = UnionMembership({FortranTarget.PluginField: [CustomField]})
- tgt_values = {CustomField.alias: True}
+
+def assert_fortran_target_fields(union_membership: UnionMembership) -> None:
+ assert {FortranExtensions, FortranSources, FortranCustomField} == set(
+ FortranTarget.class_field_types(union_membership)
+ )
+
+
+def test_add_custom_fields_last_wins_bug() -> None:
+ union_membership = UnionMembership(
+ {
+ FortranTarget.PluginField: [FortranCustomField],
+ RustTarget.PluginField: [RustEditionCustomField],
+ }
+ )
+ assert_fortran_target_fields(union_membership)
+
+
+def test_add_custom_fields_full_union_bug() -> None:
+ rule_index = RuleIndex.create(
+ [
+ UnionRule(FortranTarget.PluginField, FortranCustomField),
+ UnionRule(RustTarget.PluginField, RustEditionCustomField),
+ ]
+ )
+ union_membership = UnionMembership(rule_index.normalized_rules().union_rules)
+ assert_fortran_target_fields(union_membership)
+
+
+def test_add_custom_fields() -> None:
+ union_membership = UnionMembership({FortranTarget.PluginField: [FortranCustomField]})
+ tgt_values = {FortranCustomField.alias: True}
tgt = FortranTarget(
tgt_values, address=Address.parse(":lib"), union_membership=union_membership
)
- assert tgt.field_types == (FortranExtensions, FortranSources, CustomField)
+ assert tgt.field_types == (FortranExtensions, FortranSources, FortranCustomField)
assert tgt.core_fields == (FortranExtensions, FortranSources)
- assert tgt.plugin_fields == (CustomField,)
- assert tgt.has_field(CustomField) is True
+ assert tgt.plugin_fields == (FortranCustomField,)
+ assert tgt.has_field(FortranCustomField) is True
assert FortranTarget.class_field_types(union_membership=union_membership) == (
FortranExtensions,
FortranSources,
- CustomField,
+ FortranCustomField,
+ )
+ assert (
+ FortranTarget.class_has_field(FortranCustomField, union_membership=union_membership) is True
)
- assert FortranTarget.class_has_field(CustomField, union_membership=union_membership) is True
- assert tgt[CustomField].value is True
+ assert tgt[FortranCustomField].value is True
default_tgt = FortranTarget(
{}, address=Address.parse(":default"), union_membership=union_membership
)
- assert default_tgt[CustomField].value is False
+ assert default_tgt[FortranCustomField].value is False
def test_override_preexisting_field_via_new_target() -> None:
Which reveals:
$ NO_REGEN_PEX=true ./v2 test src/python/pants/engine/target_test.py --pytest-args=-vvktest_add_custom_fields_bug_
01:37:06:659 [INFO] Starting tests: src/python/pants/engine:tests
01:37:11:732 [INFO] Tests failed: src/python/pants/engine:tests
饜剛 src/python/pants/engine:tests
============================= test session starts ==============================
platform linux -- Python 3.8.3, pytest-5.3.5, py-1.8.1, pluggy-0.13.1 -- /home/jsirois/dev/pantsbuild/jsirois-pants/build-support/virtualenvs/Linux/pants_dev_deps.py38.venv/bin/python
cachedir: .pytest_cache
rootdir: /tmp/process-executionfueO7a
plugins: cov-2.8.1, icdiff-0.5, timeout-1.3.4
collecting ... collected 32 items / 30 deselected / 2 selected
pants/engine/target_test.py::test_add_custom_fields_bug_last_wins FAILED [ 50%]
pants/engine/target_test.py::test_add_custom_fields_bug_full_union FAILED [100%]
=================================== FAILURES ===================================
_____________________ test_add_custom_fields_bug_last_wins _____________________
def test_add_custom_fields_bug_last_wins() -> None:
union_membership = UnionMembership(
{
FortranTarget.PluginField: [FortranCustomField],
RustTarget.PluginField: [RustEditionCustomField],
}
)
> assert_fortran_target_fields(union_membership)
pants/engine/target_test.py:301:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
union_membership = UnionMembership(union_rules=FrozenDict({<class 'pants.util.meta.PluginField'>: FrozenOrderedSet([<class 'pants.engine.target_test.RustEditionCustomField'>])}))
def assert_fortran_target_fields(union_membership: UnionMembership) -> None:
> assert {FortranExtensions, FortranSources, FortranCustomField} == set(
FortranTarget.class_field_types(union_membership)
)
E AssertionError: assert equals failed
E set([ set([
E <class 'pants.engine.target_t <class 'pants.engine.target_t
E est.FortranExtensions'>, est.FortranExtensions'>,
E <class 'pants.engine.target_t <class 'pants.engine.target_t
E est.FortranCustomField'>, est.RustEditionCustomField'>,
E <class 'pants.engine.target_t <class 'pants.engine.target_t
E est.FortranSources'>, est.FortranSources'>,
E ]) ])
pants/engine/target_test.py:289: AssertionError
____________________ test_add_custom_fields_bug_full_union _____________________
def test_add_custom_fields_bug_full_union() -> None:
rule_index = RuleIndex.create(
[
UnionRule(FortranTarget.PluginField, FortranCustomField),
UnionRule(RustTarget.PluginField, RustEditionCustomField),
]
)
union_membership = UnionMembership(rule_index.normalized_rules().union_rules)
> assert_fortran_target_fields(union_membership)
pants/engine/target_test.py:312:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
union_membership = UnionMembership(union_rules=FrozenDict({<class 'pants.util.meta.PluginField'>: FrozenOrderedSet([<class 'pants.engine.target_test.FortranCustomField'>, <class 'pants.engine.target_test.RustEditionCustomField'>])}))
def assert_fortran_target_fields(union_membership: UnionMembership) -> None:
> assert {FortranExtensions, FortranSources, FortranCustomField} == set(
FortranTarget.class_field_types(union_membership)
)
E AssertionError: assert equals failed
E set([ set([
E <class 'pants.engine.target_t <class 'pants.engine.target_t
E est.FortranExtensions'>, est.FortranExtensions'>,
E <class 'pants.engine.target_t
E est.RustEditionCustomField'>,
E <class 'pants.engine.target_t <class 'pants.engine.target_t
E est.FortranCustomField'>, est.FortranCustomField'>,
E <class 'pants.engine.target_t <class 'pants.engine.target_t
E est.FortranSources'>, est.FortranSources'>,
E ]) ])
pants/engine/target_test.py:289: AssertionError
======================= 2 failed, 30 deselected in 1.19s =======================
Noting that this is the same weeds Goal.Options waded into in the past. The only known way to save the typing intended here is to introduce magic deemed not worthwhile in the past. I'm not sure if there are solutions from left field, but it seems wise to search for those; i.e.: is there another way altogether to support convenient custom fields with the current targeted capabilities and light syntax that does not use unions is one direction.
Somewhat-sortof... Goal.Options _was_ in fact a unique anonymous class, defined within a method named def Options. So it didn't have this same issue of "technically being the same class": each instance was unique to its outer class.
The issue that Goal.Options _did_ have though, was that mypy didn't like it.
Somewhat-sortof... Goal.Options was in fact a unique anonymous class, defined within a method named def Options. So it didn't have this same issue of "technically being the same class": each instance was unique to its outer class.
That was my point, thats what Target.PluginField needs to work today, a unique class per Target subclass.
The other issue with Goal.Options is it was inscrutable. Well before mypy issues came to the fore, my brain had a very hard time re-parsing each time I came across it.
That aside I think this is easy to solve by just not using unions ... more after a quick experiment.
Ah, got it. Thanks!
I think the most straightforward way to right the ship here using existing code is to just plumb Target.plugin_fields via a dedicated registry for those instead of trying to use the UnionMembership registry for this [1]. The registry could be built up like UnionMembership is built up with new psuedo-rules:
def rules():
return [
*other_rules(),
PluginField(PythonTarget, JvmVersion),
PluginField(PythonTarget, Flavor)
]
Or we could even start breaking out new plugin entrypoints to not muddle rules.
That PluginField(PythonTarget, JvmVersion) suggestion reads nicely. I like it. The only downside I see is adding more concepts to rules.py; in the past, Stu has been (rightfully) cautious of adding new register.py entry points to avoid an explosion of concepts.
I think this proposal would be acceptable, though, in part because PluginFields are very infrequently used.
--
There is a bug in the current plumbing
Oof. Reminder to self that unit tests do not replace stress testing things in the wild.
--
Alternative proposal is to use the Goal.Options approach to still have PythonLibrary.PluginField.
This did not work very well with Goal.Options because we frequently used Goal.Options in type hints via rule signature. But here, the only interaction with PluginField is saying UnionRule() and then the Target constructor inspecting the union membership. I don't think MyPy will complain because there are so few call sites. And I don't think users will be too confused because this is such a niche topic.
OK - given these two I vote for elevating plugin field registration to its own concept. Its still a concept only folks that need it need to learn. And - unlike today, they do not need to learn an unrelated concept - unions. Which picking this path would make truly unrelated since they (unions) would now truly be able to capture subclass relationships always and possibly be eliminated boilerplate via __subclasses__.
Sounds good! I agree. Thanks for putting this all together.
And a plug for not listening to reads nicely too much as a guid post. Although PluginField(...) does read nicely when focusing on just that line, when you read more widely and see a few lines above this is a "rule", the niceness evaporates. Its not a "rule" as has been conceptually indoctrinated - its something a good bit different. I do agree its convenient though and punts which entrypoints v2 plugins should have for a while.
I'm slightly negative on new entrypoints as Eric said, as I think it would be preferable to adapt the usecases to avoid (or use more gently) subclassing rather than add new facilities. But not worth blocking over.
and possibly be eliminated boilerplate via __subclasses__.
This I'm more skeptical of. __subclasses__ is a global mutable singleton, which we have otherwise made great progress on removing from the codebase. They're harder to test, more magical, make it impossible to share a process with another run, etc.
This I'm more skeptical of. __subclasses__ is a global mutable singleton, which we have otherwise made great progress on removing from the codebase. They're harder to test, more magical, make it impossible to share a process with another run, etc.
+1 on this skepticism. I don't think the verbosity of UnionRules is too offensive. A bit boiler-platey, but it's not very common to register a UnionRule and I'm not sure how much of a problem we're solving. I appreciate the transparency of explicitly marking UnionRules.
This is no different in spirit to @frozen_after_init. If we're ok with that I think we're ok with this:
from typing import ClassVar, Optional, Tuple, Type
class Freezable:
__frozen_subclasses: ClassVar[Optional[Tuple[Type["Freezable"]]]] = None
@classmethod
def freeze(cls) -> Tuple[Type["Freezable"]]:
if cls.__frozen_subclasses is None:
cls.__frozen_subclasses = tuple(cls.__subclasses__())
return cls.__frozen_subclasses
@classmethod
def __init_subclass__(cls, **kwargs):
if Freezable.__frozen_subclasses is not None:
raise Exception("No soup for you!")
super().__init_subclass__(**kwargs)
class Sub1(Freezable):
...
print(f"{Freezable.freeze()}")
try:
class Sub2(Freezable):
...
except Exception as e:
print(str(e))
print(f"{Freezable.freeze()}")
Gives:
(<class '__main__.Sub1'>,)
No soup for you!
(<class '__main__.Sub1'>,)
I.E.: We can control the exact point in time before engine construction when subclassing is no longer allowed.
I.E.: We can control the exact point in time before engine construction when subclassing is no longer allowed.
Unless it races someone else doing the same thing? You'd need locking around the mutable bits.
@frozen_after_init doesn't suffer from this because the mutable state isn't shared... while __init__ is running, it has the only reference to the mutable thing.
Well, that time is after calling plugin entrypoints and before collecting all the info to construct a scheduler with - so there is no race right? That is single-threaded.
Well, that time is after calling plugin entrypoints and before collecting all the info to construct a scheduler with - so there is no race right? That is single-threaded.
It is single threaded per instance of config initialization. Parsing config for multiple clients in parallel in pantsd, or running pytest tests in parallel in the same process would cause collisions.
The latter is not a thing I'm aware of. Pants doesn't do that today in v1 or v2 and pytest xdist doesn't support threading as a distribution model.
The former is a hypothetical future-looking argument, right? Today if the options that add or remove plugins are altered, a new pants is started.
The former is a hypothetical future-looking argument, right? Today if the options that add or remove plugins are altered, a new pants is started.
Yea. And I'm not sure that that precise usecase is even desirable. But many other globals/singletons (that were more likely to change run-over-run) have stood between us and concurrent runs of pants within one process, and we've made good progress on removing them.
Ok - its good to know the source of the pushback is future looking and not based on present facts.
Tackling future looking, I completely understand the general sentiment but I believe this case logically excludes itself in any future where we hold the invariant that a new universe of rules should force a new scheduler to be constructed.
Do you see that changing to support concurrent requests better somehow? AFAICT the scheduler is concurrent today.
More generally - we should be afraid and push back against mutable objects being passed to the scheculer constructor, but any object that is frozen before then is fine in principle without the need to soul search each time. That seems true to me but perhaps missing something.
Ok - its good to know the source of the pushback is future looking and not based on present facts.
1.30.x. So the general idea (if not this instance) is pretty immediate.Do you see that changing to support concurrent requests better somehow? AFAICT the scheduler is concurrent today.
I don't think that the ability to dynamically add/remove rules is in the cards, no. We will likely continue to need to know about all of them before constructing the scheduler.
More generally - we should be afraid and push back against mutable objects being passed to the scheculer constructor, but any object that is frozen before then is fine in principle without the need to soul search each time. That seems true to me but perhaps missing something.
If it works for tests (TestBase), where multiple schedulers are used within the same process (although not concurrently: test method at a time), then it might be ok. But the mutability of BuildConfigInitializer and OptionsInitializer still bites us periodically in unit tests. It feels like a pretty significant increase in usability/understandability would be needed to justify the magic involved.
OK - to be clear, from a user perspective there is 0 magic. They expect subclass relationships to work. If an existing rule registers a @union plugin point, they expect subclassing that plugin point to add their variation should just work. I don't think they understand the engine doesn't know subclasses without reading the right readme. Instead, I have to imagine its strange to have to spell out that this is a subclass of that again with a thing not even named after that description - ie: UnionRule, which - further - is not even really a rule as they've come to understand them from writing a function and decorating it with @rule.