Skip to content

Commit e34594f

Browse files
committed
[WIP] Implement an invalid-external-ref lint rule
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
1 parent 57ba169 commit e34594f

9 files changed

Lines changed: 723 additions & 0 deletions

src/extension/alterschema/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ sourcemeta_library(NAMESPACE sourcemeta PROJECT core NAME alterschema
8383
linter/enum_to_const.h
8484
linter/equal_numeric_bounds_to_const.h
8585
linter/forbid_empty_enum.h
86+
linter/invalid_external_ref.h
8687
linter/items_array_default.h
8788
linter/items_schema_default.h
8889
linter/multiple_of_default.h

src/extension/alterschema/alterschema.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <algorithm> // std::sort, std::unique
66
#include <cmath> // std::floor
77
#include <iterator> // std::back_inserter
8+
#include <memory> // std::unique_ptr, std::make_unique
89
#include <unordered_map> // std::unordered_map
910
#include <unordered_set> // std::unordered_set
1011
#include <utility> // std::move
@@ -111,6 +112,7 @@ inline auto APPLIES_TO_POINTERS(std::vector<Pointer> &&keywords)
111112
#include "linter/enum_to_const.h"
112113
#include "linter/equal_numeric_bounds_to_const.h"
113114
#include "linter/forbid_empty_enum.h"
115+
#include "linter/invalid_external_ref.h"
114116
#include "linter/items_array_default.h"
115117
#include "linter/items_schema_default.h"
116118
#include "linter/multiple_of_default.h"
@@ -239,6 +241,7 @@ auto add(SchemaTransformer &bundle, const AlterSchemaMode mode) -> void {
239241
bundle.add<CommentTrim>();
240242
bundle.add<DuplicateExamples>();
241243
bundle.add<SimplePropertiesIdentifiers>();
244+
bundle.add<InvalidExternalRef>();
242245
}
243246

244247
bundle.add<UnnecessaryAllOfRefWrapperModern>();
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
class InvalidExternalRef final : public SchemaTransformRule {
2+
public:
3+
using mutates = std::false_type;
4+
using reframe_after_transform = std::false_type;
5+
InvalidExternalRef()
6+
: SchemaTransformRule{
7+
"invalid_external_ref",
8+
"External references must point to schemas that can be "
9+
"resolved"} {};
10+
11+
[[nodiscard]] auto
12+
condition(const JSON &schema, const JSON &, const Vocabularies &vocabularies,
13+
const SchemaFrame &frame, const SchemaFrame::Location &location,
14+
const SchemaWalker &walker, const SchemaResolver &resolver) const
15+
-> SchemaTransformRule::Result override {
16+
ONLY_CONTINUE_IF(!frame.standalone());
17+
ONLY_CONTINUE_IF(vocabularies.contains_any(
18+
{Vocabularies::Known::JSON_Schema_2020_12_Core,
19+
Vocabularies::Known::JSON_Schema_2019_09_Core,
20+
Vocabularies::Known::JSON_Schema_Draft_7,
21+
Vocabularies::Known::JSON_Schema_Draft_6,
22+
Vocabularies::Known::JSON_Schema_Draft_4,
23+
Vocabularies::Known::JSON_Schema_Draft_3}));
24+
ONLY_CONTINUE_IF(schema.is_object() && schema.defines(KEYWORD) &&
25+
schema.at(KEYWORD).is_string());
26+
27+
auto keyword_pointer{location.pointer};
28+
keyword_pointer.push_back(std::cref(KEYWORD));
29+
const auto reference_entry{
30+
frame.reference(SchemaReferenceType::Static, keyword_pointer)};
31+
ONLY_CONTINUE_IF(reference_entry.has_value());
32+
33+
// If the destination exists in the frame, it is an internal reference
34+
ONLY_CONTINUE_IF(
35+
!frame.traverse(reference_entry->get().destination).has_value());
36+
37+
const auto &reference_base{reference_entry->get().base};
38+
39+
// Empty base with unresolvable destination is a local reference problem
40+
ONLY_CONTINUE_IF(!reference_base.empty());
41+
42+
// Known official metaschemas are always resolvable
43+
ONLY_CONTINUE_IF(!is_known_schema(reference_base));
44+
45+
// If the base exists in the frame, the reference is internal (e.g. an
46+
// embedded $id). A bad fragment on an internal base is handled by the
47+
// unknown_local_ref rule instead
48+
ONLY_CONTINUE_IF(!frame.traverse(reference_base).has_value());
49+
50+
const auto &has_fragment{reference_entry->get().fragment.has_value()};
51+
52+
// Check the resolver cache to avoid redundant lookups
53+
const auto cached{this->resolver_cache_.find(JSON::String{reference_base})};
54+
if (cached != this->resolver_cache_.end()) {
55+
if (!cached->second.has_value()) {
56+
return APPLIES_TO_KEYWORDS(KEYWORD);
57+
}
58+
59+
if (has_fragment) {
60+
return this->is_fragment_invalid(reference_entry->get(), cached->second,
61+
walker, resolver, location)
62+
? APPLIES_TO_KEYWORDS(KEYWORD)
63+
: false;
64+
}
65+
66+
return false;
67+
}
68+
69+
auto remote{resolver(reference_base)};
70+
const auto &[entry, _]{this->resolver_cache_.emplace(
71+
JSON::String{reference_base}, std::move(remote))};
72+
if (!entry->second.has_value()) {
73+
return APPLIES_TO_KEYWORDS(KEYWORD);
74+
}
75+
76+
if (has_fragment) {
77+
return this->is_fragment_invalid(reference_entry->get(), entry->second,
78+
walker, resolver, location)
79+
? APPLIES_TO_KEYWORDS(KEYWORD)
80+
: false;
81+
}
82+
83+
return false;
84+
}
85+
86+
private:
87+
static inline const std::string KEYWORD{"$ref"};
88+
mutable std::unordered_map<JSON::String, std::optional<JSON>> resolver_cache_;
89+
mutable std::unordered_map<JSON::String, std::unique_ptr<SchemaFrame>>
90+
frame_cache_;
91+
92+
[[nodiscard]] auto
93+
is_fragment_invalid(const SchemaFrame::ReferencesEntry &reference_entry,
94+
const std::optional<JSON> &remote,
95+
const SchemaWalker &walker,
96+
const SchemaResolver &resolver,
97+
const SchemaFrame::Location &location) const -> bool {
98+
const JSON::String base_key{reference_entry.base};
99+
auto frame_iterator{this->frame_cache_.find(base_key)};
100+
if (frame_iterator == this->frame_cache_.end()) {
101+
auto remote_frame{
102+
std::make_unique<SchemaFrame>(SchemaFrame::Mode::Locations)};
103+
remote_frame->analyse(remote.value(), walker, resolver, location.dialect,
104+
base_key);
105+
frame_iterator =
106+
this->frame_cache_.emplace(base_key, std::move(remote_frame)).first;
107+
}
108+
109+
return !frame_iterator->second->traverse(reference_entry.destination)
110+
.has_value();
111+
}
112+
};

test/alterschema/alterschema_lint_2019_09_test.cc

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4774,3 +4774,40 @@ TEST(AlterSchema_lint_2019_09, forbid_empty_enum_10) {
47744774

47754775
EXPECT_EQ(document, expected);
47764776
}
4777+
4778+
TEST(AlterSchema_lint_2019_09, invalid_external_ref_1) {
4779+
const sourcemeta::core::JSON document = sourcemeta::core::parse_json(R"JSON({
4780+
"$schema": "https://json-schema.org/draft/2019-09/schema",
4781+
"title": "Test",
4782+
"description": "Test description",
4783+
"examples": [{}],
4784+
"$ref": "https://unknown.example.com/nonexistent"
4785+
})JSON");
4786+
4787+
LINT_WITHOUT_FIX(document, result, traces);
4788+
4789+
EXPECT_FALSE(result.first);
4790+
EXPECT_EQ(traces.size(), 1);
4791+
EXPECT_LINT_TRACE(traces, 0, "", "invalid_external_ref",
4792+
"External references must point to schemas that can be "
4793+
"resolved",
4794+
false);
4795+
}
4796+
4797+
TEST(AlterSchema_lint_2019_09, invalid_external_ref_2) {
4798+
const sourcemeta::core::JSON document = sourcemeta::core::parse_json(R"JSON({
4799+
"$schema": "https://json-schema.org/draft/2019-09/schema",
4800+
"title": "Test",
4801+
"description": "Test description",
4802+
"examples": [{}],
4803+
"$defs": {
4804+
"foo": { "type": "string" }
4805+
},
4806+
"$ref": "#/$defs/foo"
4807+
})JSON");
4808+
4809+
LINT_WITHOUT_FIX(document, result, traces);
4810+
4811+
EXPECT_TRUE(result.first);
4812+
EXPECT_EQ(traces.size(), 0);
4813+
}

0 commit comments

Comments
 (0)