Skip to content

Commit acb4e29

Browse files
authored
Merge pull request #4015 from ruby/restructure
Restructure
2 parents aa6bce6 + ba16ae2 commit acb4e29

140 files changed

Lines changed: 12381 additions & 5652 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/cpp-bindings.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,6 @@ jobs:
2929
- name: Compile prism
3030
run: bundle exec rake compile
3131
- name: Compile C++
32-
run: g++ -o ./cpp_test cpp/test.cpp build/static/*.o build/static/util/*.o -Iinclude
32+
run: g++ -o ./cpp_test cpp/test.cpp build/static/*.o -Iinclude
3333
- name: Run C++
3434
run: ./cpp_test

.gitignore

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ out.svg
3131
/fuzz/output/
3232
/gemfiles/typecheck/bin/
3333
/include/prism/ast.h
34-
/include/prism/diagnostic.h
35-
/include/prism/node_new.h
34+
/include/prism/internal/diagnostic.h
3635
/javascript/node_modules/
3736
/javascript/package-lock.json
3837
/javascript/src/deserialize.js
@@ -58,7 +57,7 @@ out.svg
5857
/src/node.c
5958
/src/prettyprint.c
6059
/src/serialize.c
61-
/src/token_type.c
60+
/src/tokens.c
6261
/src/**/*.o
6362
/rbi/prism/dsl.rbi
6463
/rbi/prism/node.rbi

Doxyfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ PROJECT_NAME = "Prism Ruby parser"
2323
OUTPUT_DIRECTORY = doc
2424
JAVADOC_AUTOBRIEF = YES
2525
OPTIMIZE_OUTPUT_FOR_C = YES
26-
INPUT = src src/util include include/prism include/prism/util
27-
EXCLUDE = include/prism/debug_allocator.h
26+
INPUT = include/prism.h include/prism
27+
EXCLUDE = include/prism/internal
2828
HTML_OUTPUT = c
2929
SORT_MEMBER_DOCS = NO
3030
GENERATE_LATEX = NO

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,12 @@ build/fuzz.%: $(SOURCES) fuzz/%.c fuzz/fuzz.c
7070
$(ECHO) "building $* fuzzer"
7171
$(Q) $(MAKEDIRS) $(@D)
7272
$(ECHO) "building main fuzz binary"
73-
$(Q) afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize-ignorelist=fuzz/asan.ignore -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@ $^
73+
$(Q) afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@ $^
7474
$(ECHO) "building cmplog binary"
75-
$(Q) AFL_LLVM_CMPLOG=1 afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize-ignorelist=fuzz/asan.ignore -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@.cmplog $^
75+
$(Q) AFL_LLVM_CMPLOG=1 afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@.cmplog $^
7676

7777
build/fuzz.heisenbug.%: $(SOURCES) fuzz/%.c fuzz/heisenbug.c
78-
$(Q) afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize-ignorelist=fuzz/asan.ignore -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@ $^
78+
$(Q) afl-clang-lto $(DEBUG_FLAGS) $(CPPFLAGS) $(CFLAGS) $(FUZZ_FLAGS) -O0 -fsanitize=fuzzer,address -ggdb3 -std=c99 -Iinclude -o $@ $^
7979

8080
fuzz-debug:
8181
$(ECHO) "entering debug shell"

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ The repository contains the infrastructure for both a shared library (libprism)
4444
│ └── prism Sample code that uses the Ruby API for documentation purposes
4545
├── sig RBS type signatures for the Ruby library
4646
├── src
47-
│   ├── util various utility files
4847
│   └── prism.c main entrypoint for the shared library
4948
├── templates contains ERB templates generated by templates/template.rb
5049
│   └── template.rb generates code from the nodes and tokens configured by config.yml

cpp/test.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,20 @@ extern "C" {
55
#include <iostream>
66

77
int main() {
8-
pm_arena_t arena = { 0 };
9-
pm_parser_t parser;
10-
pm_parser_init(&arena, &parser, reinterpret_cast<const uint8_t *>("1 + 2"), 5, NULL);
8+
pm_arena_t *arena = pm_arena_new();
9+
pm_parser_t *parser = pm_parser_new(arena, reinterpret_cast<const uint8_t *>("1 + 2"), 5, NULL);
1110

12-
pm_node_t *root = pm_parse(&parser);
13-
pm_buffer_t buffer = { 0 };
11+
pm_node_t *root = pm_parse(parser);
12+
pm_buffer_t *buffer = pm_buffer_new();
1413

15-
pm_prettyprint(&buffer, &parser, root);
16-
pm_buffer_append_byte(&buffer, '\0');
14+
pm_prettyprint(buffer, parser, root);
1715

18-
std::cout << buffer.value << std::endl;
16+
std::string_view view(pm_buffer_value(buffer), pm_buffer_length(buffer));
17+
std::cout << view << std::endl;
1918

20-
pm_buffer_free(&buffer);
21-
pm_parser_free(&parser);
22-
pm_arena_free(&arena);
19+
pm_buffer_free(buffer);
20+
pm_parser_free(parser);
21+
pm_arena_free(arena);
2322

2423
return 0;
2524
}

docs/build_system.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ If you need to use memory allocation functions implemented outside of the standa
8787
* Additionally, include `-I [path/to/custom_allocator]` where your `prism_xallocator.h` is located
8888
* Link the implementation of `prism_xallocator.c` that contains functions declared in `prism_xallocator.h`
8989

90-
For further clarity, refer to `include/prism/defines.h`.
90+
For further clarity, refer to `include/prism/internal/allocator.h`.
9191

9292
### Building prism from source as a C library
9393

docs/encoding.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ For each of these encodings, prism provides functions for checking if the subseq
107107

108108
## Getting notified when the encoding changes
109109

110-
You may want to get notified when the encoding changes based on the result of parsing an encoding comment. We use this internally for our `lex` function in order to provide the correct encodings for the tokens that are returned. For that you can register a callback with `pm_parser_register_encoding_changed_callback`. The callback will be called with a pointer to the parser. The encoding can be accessed through `parser->encoding`.
110+
You may want to get notified when the encoding changes based on the result of parsing an encoding comment. We use this internally for our `lex` function in order to provide the correct encodings for the tokens that are returned. For that you can register a callback with `pm_parser_encoding_changed_callback_set`. The callback will be called with a pointer to the parser. The encoding can be accessed through `parser->encoding`.
111111

112112
```c
113113
// When the encoding that is being used to parse the source is changed by prism,
@@ -117,5 +117,5 @@ typedef void (*pm_encoding_changed_callback_t)(pm_parser_t *parser);
117117
// Register a callback that will be called whenever prism changes the encoding
118118
// it is using to parse based on the magic comment.
119119
PRISM_EXPORTED_FUNCTION void
120-
pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
120+
pm_parser_encoding_changed_callback_set(pm_parser_t *parser, pm_encoding_changed_callback_t callback);
121121
```

docs/fuzzing.md

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,34 +5,29 @@ We use fuzzing to test the various entrypoints to the library. The fuzzer we use
55
```
66
fuzz
77
├── corpus
8-
│   ├── parse fuzzing corpus for parsing (a symlink to our fixtures)
9-
│   └── regexp fuzzing corpus for regexp
8+
│   └── parse fuzzing corpus for parsing (a symlink to our fixtures)
109
├── dict a AFL++ dictionary containing various tokens
1110
├── docker
1211
│   └── Dockerfile for building a container with the fuzzer toolchain
1312
├── fuzz.c generic entrypoint for fuzzing
1413
├── heisenbug.c entrypoint for reproducing a crash or hang
1514
├── parse.c fuzz handler for parsing
1615
├── parse.sh script to run parsing fuzzer
17-
├── regexp.c fuzz handler for regular expression parsing
18-
├── regexp.sh script to run regexp fuzzer
1916
└── tools
2017
   ├── backtrace.sh generates backtrace files for a crash directory
2118
   └── minimize.sh generates minimized crash or hang files
2219
```
2320

2421
## Usage
2522

26-
There are currently three fuzzing targets
23+
There is currently one fuzz target:
2724

2825
- `pm_serialize_parse` (parse)
29-
- `pm_regexp_parse` (regexp)
3026

31-
Respectively, fuzzing can be performed with
27+
Fuzzing can be performed with
3228

3329
```
3430
make fuzz-run-parse
35-
make fuzz-run-regexp
3631
```
3732

3833
To end a fuzzing job, interrupt with CTRL+C. To enter a container with the fuzzing toolchain and debug utilities, run
@@ -43,8 +38,6 @@ make fuzz-debug
4338

4439
# Out-of-bounds reads
4540

46-
Currently, encoding functionality implementing the `pm_encoding_t` interface can read outside of inputs. For the time being, ASAN instrumentation is disabled for functions from src/enc. See `fuzz/asan.ignore`.
47-
4841
To disable ASAN read instrumentation globally, use the `FUZZ_FLAGS` environment variable e.g.
4942

5043
```
@@ -55,7 +48,7 @@ Note, that this may make reproducing bugs difficult as they may depend on memory
5548

5649
```
5750
make fuzz-debug # enter the docker container with build tools
58-
make build/fuzz.heisenbug.parse # or .regexp
51+
make build/fuzz.heisenbug.parse
5952
./build/fuzz.heisenbug.parse path-to-problem-input
6053
```
6154

docs/serialization.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@ typedef struct {
159159
size_t capacity;
160160
} pm_buffer_t;
161161

162-
// Free the memory associated with the buffer.
163-
void pm_buffer_free(pm_buffer_t *);
162+
// Free the memory held by the buffer.
163+
void pm_buffer_cleanup(pm_buffer_t *);
164164

165165
// Parse and serialize the AST represented by the given source to the given
166166
// buffer.
@@ -172,12 +172,12 @@ Typically you would use a stack-allocated `pm_buffer_t` and call `pm_serialize_p
172172
```c
173173
void
174174
serialize(const uint8_t *source, size_t length) {
175-
pm_buffer_t buffer = { 0 };
176-
pm_serialize_parse(&buffer, source, length, NULL);
175+
pm_buffer_t *buffer = pm_buffer_new();
176+
pm_serialize_parse(buffer, source, length, NULL);
177177
178178
// Do something with the serialized string.
179179
180-
pm_buffer_free(&buffer);
180+
pm_buffer_free(buffer);
181181
}
182182
```
183183

0 commit comments

Comments
 (0)