From 87791ec94bd39abb2d1cb4d120dd4d9afcac7d67 Mon Sep 17 00:00:00 2001 From: Tim Stack Date: Sun, 28 Aug 2022 18:55:32 -0700 Subject: [PATCH] [markdown] add support for going to an anchor --- .github/workflows/c-cpp.yml | 1 + NEWS | 12 + README.md | 6 +- demo/loggen.py | 54 + .../configs/tutorial1/config.json | 18 + .../lnav-tutorial-key-handler.lnav | 29 + .../formats/tutorial-lib/tutorial.sql | 141 + docs/tutorials/tutorial1/index.md | 96 + docs/tutorials/tutorial1/tutorial1.glog | 100 + src/CMakeLists.txt | 8 +- src/Makefile.am | 4 + src/base/auto_mem.hh | 10 +- src/base/intern_string.hh | 5 +- src/base/result.h | 14 +- src/base/string_util.cc | 59 +- src/base/string_util.hh | 8 + src/command_executor.cc | 57 +- src/data_scanner_re.cc | 2 +- src/document.sections.hh | 4 + src/field_overlay_source.cc | 42 +- src/file_collection.cc | 2 + src/file_collection.hh | 8 + src/file_vtab.cc | 129 +- src/formats/glog_log.json | 7 + src/fstat_vtab.cc | 18 +- src/help.md | 4 +- src/highlighter.cc | 8 + src/hotkeys.cc | 73 +- src/internals/cmd-ref.rst | 14 +- src/internals/sql-ref.rst | 34 +- src/lnav.cc | 138 +- src/lnav.hh | 7 +- src/lnav.indexing.cc | 47 +- src/lnav_commands.cc | 95 +- src/lnav_config.cc | 20 +- src/log_format_loader.cc | 6 +- src/log_format_loader.hh | 2 + src/logfile.hh | 16 + src/logfile_sub_source.cc | 15 +- src/md2attr_line.cc | 84 +- src/md2attr_line.hh | 1 + src/md4cpp.cc | 432 +- src/plain_text_source.cc | 68 + src/plain_text_source.hh | 12 +- src/readline_callbacks.cc | 8 +- src/shlex.resolver.hh | 6 +- src/sql_util.cc | 205 +- src/sql_util.hh | 16 +- src/sqlite-extension-func.cc | 20 +- src/sqlite-extension-func.hh | 6 +- src/sqlitepp.hh | 43 + src/static_file_vtab.cc | 333 + src/static_file_vtab.hh | 39 + src/styling.hh | 127 +- src/tailer/tailer.looper.cc | 14 +- src/text_format.hh | 16 +- src/textfile_sub_source.cc | 169 + src/textfile_sub_source.hh | 11 +- src/textview_curses.cc | 31 +- src/textview_curses.hh | 23 +- src/third-party/rapidyaml/ryml_all.hpp | 30945 ++++++++++++++++ src/time_formats.am | 1 + src/view_curses.cc | 147 +- src/view_curses.hh | 49 +- src/view_helpers.cc | 2 + src/views_vtab.cc | 50 + src/vtab_module.hh | 4 + src/yajlpp/yajlpp.cc | 5 +- src/yajlpp/yajlpp.hh | 31 +- src/yajlpp/yajlpp_def.hh | 51 +- src/yaml-extension-functions.cc | 103 + test/Makefile.am | 2 + test/document.sections.tests.cc | 3 + test/expected/expected.am | 14 + ...a3bb78e9d60e5e1f5ce5b18e40d2f1662707ab.out | 45 +- ...b4431dd0cc36c6201d263b727b3305e8cda6b1.err | 2 +- ...e85ba0c0945a5085fb4ee255771406061a9c17.err | 0 ...e85ba0c0945a5085fb4ee255771406061a9c17.out | 6 + ...acc1a8bb5028636fdbf08f077f9a835ab51bec.err | 0 ...acc1a8bb5028636fdbf08f077f9a835ab51bec.out | 19 + ...c6abde708a69e74f5b7fde865d88fa75f91e0a.err | 4 + ...c6abde708a69e74f5b7fde865d88fa75f91e0a.out | 0 ...69c22dcfa37b5c3e8490a6026eacb7ca953998.err | 2 + ...69c22dcfa37b5c3e8490a6026eacb7ca953998.out | 0 ...51b55dff7332c5bee2c9b797c401c5614d574a.out | 8 +- ...24078983cf1b7a80b6fb65d5186cd125498136.err | 0 ...24078983cf1b7a80b6fb65d5186cd125498136.out | 149 + ...486314c4e02e480d829ea2f077b86c49fedcec.err | 0 ...486314c4e02e480d829ea2f077b86c49fedcec.out | 4 + ...a346ca1da2da4346f1d310212e166767993ce9.out | 1 + ...88ea61a5382458cc48a2607e2639e52b0be1da.err | 0 ...88ea61a5382458cc48a2607e2639e52b0be1da.out | 149 + test/test_meta.sh | 7 + test/test_sql.sh | 4 +- test/test_sql_views_vtab.sh | 4 + test/test_sql_yaml_func.sh | 5 + test/test_text_file.sh | 9 + 97 files changed, 33976 insertions(+), 826 deletions(-) create mode 100755 demo/loggen.py create mode 100644 docs/tutorials/tutorial-lib/configs/tutorial1/config.json create mode 100644 docs/tutorials/tutorial-lib/formats/tutorial-lib/lnav-tutorial-key-handler.lnav create mode 100644 docs/tutorials/tutorial-lib/formats/tutorial-lib/tutorial.sql create mode 100644 docs/tutorials/tutorial1/index.md create mode 100644 docs/tutorials/tutorial1/tutorial1.glog create mode 100644 src/static_file_vtab.cc create mode 100644 src/static_file_vtab.hh create mode 100644 src/third-party/rapidyaml/ryml_all.hpp create mode 100644 src/yaml-extension-functions.cc create mode 100644 test/expected/test_meta.sh_48e85ba0c0945a5085fb4ee255771406061a9c17.err create mode 100644 test/expected/test_meta.sh_48e85ba0c0945a5085fb4ee255771406061a9c17.out create mode 100644 test/expected/test_sql_views_vtab.sh_32acc1a8bb5028636fdbf08f077f9a835ab51bec.err create mode 100644 test/expected/test_sql_views_vtab.sh_32acc1a8bb5028636fdbf08f077f9a835ab51bec.out create mode 100644 test/expected/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err create mode 100644 test/expected/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.out create mode 100644 test/expected/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.err create mode 100644 test/expected/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.out create mode 100644 test/expected/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.err create mode 100644 test/expected/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.out create mode 100644 test/expected/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.err create mode 100644 test/expected/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.out create mode 100644 test/expected/test_text_file.sh_e088ea61a5382458cc48a2607e2639e52b0be1da.err create mode 100644 test/expected/test_text_file.sh_e088ea61a5382458cc48a2607e2639e52b0be1da.out create mode 100644 test/test_sql_yaml_func.sh diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 66660984..19a8ce0c 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -45,6 +45,7 @@ jobs: --exclude src/ww898 --exclude src/yajl --exclude test + --exclude src/data_scanner_re.cc --gcov-options '\-lp' build: diff --git a/NEWS b/NEWS index f7253cb9..8c4edffa 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,16 @@ lnav v0.11.0: TEXT view. The breadcrumb bar at the top will also be updated depending on the section of the document that you are in and you can use it to jump to different parts of the doc. + * The ":goto" command will now accept anchor links (i.e. #section-id) + as an argument when the text file being viewed has sections. You + can also specify an anchor when opening a file by appending + "#". For example, "README.md#screenshot". + * Log message comments are now treated as markdown and rendered + accordingly in the overlay. Multi-line comments are now supported + as well. + * Metadata embedded in files can now be accessed by the + "lnav_file_metadata" table. Currently, only the front-matter in + Markdown files is supported. * Added an integration with regex101.com to make it easier to edit log message regular expressions. Using the new "management CLI" (activated by the -m option), a log format can be created from @@ -88,6 +98,8 @@ lnav v0.11.0: * Added an lnav_views_echo table that is a real SQLite table that you can create TRIGGERs on in order to perform actions when scrolling in a view. + * Added a "yaml_to_json()" SQL function that converts a YAML + document to the equivalent JSON. Breaking Changes: * Formats definitions are now checked to ensure that values have a diff --git a/README.md b/README.md index 706eabe3..372d25ac 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ + + [![Build](https://github.com/tstack/lnav/workflows/ci-build/badge.svg)](https://github.com/tstack/lnav/actions?query=workflow%3Aci-build) [![Docs](https://readthedocs.org/projects/lnav/badge/?version=latest&style=plastic)](https://docs.lnav.org) [![Coverage Status](https://coveralls.io/repos/github/tstack/lnav/badge.svg?branch=master)](https://coveralls.io/github/tstack/lnav?branch=master) -[![lnav](https://snapcraft.io//lnav/badge.svg)](https://snapcraft.io/lnav) +[![lnav](https://snapcraft.io/lnav/badge.svg)](https://snapcraft.io/lnav) -[](https://discord.gg/erBPnKwz7R) +[](https://discord.gg/erBPnKwz7R) _This is the source repository for **lnav**, visit [https://lnav.org](https://lnav.org) for a high level overview._ diff --git a/demo/loggen.py b/demo/loggen.py new file mode 100755 index 00000000..6790a095 --- /dev/null +++ b/demo/loggen.py @@ -0,0 +1,54 @@ +import datetime +import os +import random +import shutil +import sys + +MSGS = [ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.", + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.", + "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.", +] + +GLOG_DATE_FMT = "%Y%m%d %H:%M:%S" + +START_TIME = datetime.datetime.fromtimestamp(1490191111) + +try: + shutil.rmtree("/tmp/demo") + os.makedirs("/tmp/demo") +except OSError: + pass + +PIDS = [ + "123", + "123", + "123", + "121", + "124", + "123", + "61456", + "61456", + "61457", +] + +LOG_LOCS = [ + "demo.cc:123", + "demo.cc:352", + "loader.cc:13", + "loader.cc:552", + "blaster.cc:352", + "blaster.cc:112", + "blaster.cc:6782", +] + +CURR_TIME = START_TIME +for _index in range(0, int(sys.argv[1])): + CURR_TIME += datetime.timedelta(seconds=random.randrange(1, 22)) + print("I%s.%06d %s %s] %s" % ( + CURR_TIME.strftime(GLOG_DATE_FMT), + random.randrange(0, 100000), + random.choice(PIDS), + random.choice(LOG_LOCS), + random.choice(MSGS))) diff --git a/docs/tutorials/tutorial-lib/configs/tutorial1/config.json b/docs/tutorials/tutorial-lib/configs/tutorial1/config.json new file mode 100644 index 00000000..4407368d --- /dev/null +++ b/docs/tutorials/tutorial-lib/configs/tutorial1/config.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://lnav.org/schemas/config-v1.schema.json", + "global": { + "lnav_tutorial_name": "tutorial1" + }, + "ui": { + "keymap-defs": { + "default": { + "x79": { + "command": "|lnav-tutorial-key-handler next" + }, + "x59": { + "command": "|lnav-tutorial-key-handler prev" + } + } + } + } +} \ No newline at end of file diff --git a/docs/tutorials/tutorial-lib/formats/tutorial-lib/lnav-tutorial-key-handler.lnav b/docs/tutorials/tutorial-lib/formats/tutorial-lib/lnav-tutorial-key-handler.lnav new file mode 100644 index 00000000..9a98b5cc --- /dev/null +++ b/docs/tutorials/tutorial-lib/formats/tutorial-lib/lnav-tutorial-key-handler.lnav @@ -0,0 +1,29 @@ + +;SELECT filepath AS tutorial_path FROM lnav_file + WHERE filepath GLOB '*/tutorial1/index.md' LIMIT 1 +;SELECT CASE + WHEN $1 = 'next' AND + step < (SELECT max(step) FROM lnav_tutorial_steps WHERE name = $lnav_tutorial_name) + THEN step + 1 + WHEN $1 = 'prev' AND step > 1 THEN step - 1 + ELSE step + END AS new_step + FROM lnav_tutorial_step WHERE name = $lnav_tutorial_name +;SELECT CASE + WHEN $1 = 'next' AND + step = (SELECT max(step) FROM lnav_tutorial_steps WHERE name = $lnav_tutorial_name) + THEN '#conclusion' + ELSE '#step-' || $new_step + END AS new_anchor + FROM lnav_tutorial_step WHERE name = $lnav_tutorial_name +;UPDATE lnav_tutorial_step SET step = $new_step WHERE name = $lnav_tutorial_name +;UPDATE lnav_views SET top_meta = json_object( + 'file', $tutorial_path, + 'anchor', $new_anchor +) + WHERE name = 'text' +:switch-to-view text +;UPDATE lnav_views SET top = 0 + WHERE name = 'log' +;REPLACE INTO lnav_user_notifications (id, views, message) + SELECT * FROM lnav_tutorial_log_notification; \ No newline at end of file diff --git a/docs/tutorials/tutorial-lib/formats/tutorial-lib/tutorial.sql b/docs/tutorials/tutorial-lib/formats/tutorial-lib/tutorial.sql new file mode 100644 index 00000000..80af3629 --- /dev/null +++ b/docs/tutorials/tutorial-lib/formats/tutorial-lib/tutorial.sql @@ -0,0 +1,141 @@ +CREATE TABLE lnav_tutorial_step +( + name TEXT NOT NULL PRIMARY KEY, + step INTEGER NOT NULL +); + +INSERT INTO lnav_tutorial_step + VALUES ('tutorial1', 1); + +CREATE TABLE lnav_tutorial_steps +( + name TEXT NOT NULL, + step INTEGER NOT NULL, + achievements TEXT NOT NULL, + PRIMARY KEY (name, step) +); + +CREATE TABLE IF NOT EXISTS lnav_tutorial_progress +( + name TEXT NOT NULL, + step INTEGER NOT NULL, + achieved TEXT NOT NULL, + + PRIMARY KEY (name, step, achieved) +); + +CREATE TABLE IF NOT EXISTS lnav_tutorial_lines +( + name TEXT NOT NULL, + step INTEGER NOT NULL, + view_ptr TEXT NOT NULL, + view_value TEXT NOT NULL, + achievement TEXT NOT NULL, + log_comment TEXT +); + +CREATE TRIGGER IF NOT EXISTS add_tutorial_data + AFTER INSERT + ON lnav_events + WHEN jget(new.content, '/$schema') = 'https://lnav.org/event-file-format-detected-v1.schema.json' AND + jget(new.content, '/format') = 'text/markdown' +BEGIN + INSERT INTO lnav_tutorial_steps + SELECT jget(tutorial_meta, '/name'), + key + 1, + value + FROM (SELECT yaml_to_json(lnav_file_metadata.content) AS tutorial_meta + FROM lnav_file_metadata + WHERE filepath = jget(new.content, '/filename')) AS meta_content, + json_each(jget(meta_content.tutorial_meta, '/steps')); + + REPLACE INTO lnav_tutorial_lines + SELECT name, + step, + jget(value, '/view_ptr'), + jget(value, '/view_value'), + key, + jget(value, '/comment') + FROM lnav_tutorial_steps, + json_each(achievements) + WHERE jget(value, '/view_ptr') IS NOT NULL; + + REPLACE INTO lnav_user_notifications (id, views, message) + SELECT * + FROM lnav_tutorial_log_notification; + +END; + +CREATE TRIGGER IF NOT EXISTS lnav_tutorial_view_listener UPDATE OF top + ON lnav_views_echo + WHEN new.name = 'log' +BEGIN + INSERT OR IGNORE INTO lnav_tutorial_progress + SELECT lnav_tutorial_lines.name, + lnav_tutorial_lines.step, + achievement + FROM lnav_tutorial_step, + lnav_tutorial_lines + WHERE lnav_tutorial_step.step = lnav_tutorial_lines.step + AND jget(json_object('top', new.top, + 'left', new.left, + 'search', new.search), + view_ptr) = view_value; + UPDATE all_logs + SET log_comment = (SELECT log_comment + FROM lnav_tutorial_step, + lnav_tutorial_lines + WHERE lnav_tutorial_step.step = lnav_tutorial_lines.step + AND lnav_tutorial_lines.log_comment IS NOT NULL + AND jget(json_object('top', new.top, + 'left', new.left, + 'search', new.search), view_ptr) = view_value) + WHERE log_line = new.top + AND log_comment IS NULL; +END; + +CREATE TABLE lnav_tutorial_message +( + msgid INTEGER PRIMARY KEY, + msg TEXT +); + +CREATE VIEW lnav_tutorial_current_achievements AS +SELECT key AS achievement, value + FROM lnav_tutorial_step, + lnav_tutorial_steps, json_each(lnav_tutorial_steps.achievements) + WHERE lnav_tutorial_step.step = lnav_tutorial_steps.step; + +CREATE VIEW lnav_tutorial_current_progress AS +SELECT achieved + FROM lnav_tutorial_step, + lnav_tutorial_progress + WHERE lnav_tutorial_step.step = lnav_tutorial_progress.step; + +CREATE VIEW lnav_tutorial_remaining_achievements AS +SELECT * + FROM lnav_tutorial_current_achievements + WHERE achievement NOT IN (SELECT * FROM lnav_tutorial_current_progress); + +CREATE VIEW lnav_tutorial_log_notification AS +SELECT * + FROM (SELECT 'org.lnav.tutorial.log' AS id, '["log"]' AS views, jget(value, '/notification') AS message + FROM lnav_tutorial_remaining_achievements + UNION ALL + SELECT 'org.lnav.tutorial.log' AS id, + '["log"]' AS views, + 'Press y to go to the next step in the tutorial' AS message) + LIMIT 1; + +CREATE TRIGGER IF NOT EXISTS lnav_tutorial_progress_listener + AFTER INSERT + ON lnav_tutorial_progress +BEGIN + DELETE FROM lnav_user_notifications WHERE id = 'org.lnav.tutorial.log'; + REPLACE INTO lnav_user_notifications (id, views, message) + SELECT * + FROM lnav_tutorial_log_notification; +END; + +REPLACE INTO lnav_user_notifications (id, views, message) + VALUES ('org.lnav.tutorial.text', '["text"]', 'Press "q" to go to the log view') diff --git a/docs/tutorials/tutorial1/index.md b/docs/tutorials/tutorial1/index.md new file mode 100644 index 00000000..477111b1 --- /dev/null +++ b/docs/tutorials/tutorial1/index.md @@ -0,0 +1,96 @@ +--- +name: tutorial1 +steps: + - move-to-error: + description: "Move to an error" + view_ptr: /top + view_value: 6 + notification: "Press e/Shift+E to move through the errors" + comment: | + You found the error! + [Log formats](https://docs.lnav.org/en/latest/formats.html#format-file-reference) + can define the log levels for a given message. + The [theme](https://docs.lnav.org/en/latest/config.html#theme-definitions) defines + how the levels are displayed. + move-to-warning: + description: "Move to a warning" + notification: "Press w/Shift+W to move through the warnings" + view_ptr: /top + view_value: 3 + comment: | + You found the warning! The scrollbar on the right is highlighted + to show the position of + warnings and + errors in this + view. + - search-for-term: + description: "Search for something" + notification: "Press / to search for '1AF9...'" + view_ptr: /search + view_value: 1AF9293A-F42D-4318-BCDF-60234B240955 + move-to-next-hit: + description: "Move to the next hit" + notification: "Press n/Shift+N to move through the search hits" + view_ptr: /top + view_value: 53 + comment: | + The matching text in a search is highlighted in + reverse-video. + However, the text is not always on-screen, so the bar on the + left will also be highlighted. You can then press `>` to + move right to the next (horizontal) search hit. Pressing + `<` will move left to the previous (horizontal) hit or all + the way back to the start of the line. + move-right: + description: "Move to the right" + notification: "Press > to move horizontally to view the search hit" + view_ptr: /left + view_value: 150 +--- +# Tutorial 1 + +Welcome to the first _interactive_ **lnav** tutorial! + +This tutorial will guide you through the basics of navigating log files. + +## Step 1 + +Finding errors quickly is one of the main use-cases for **lnav**. To +make that quick and easy, **lnav** parses the log messages in log files +as they are loaded and builds indexes of the errors and warnings. You +can then use the following hotkeys to jump to them in the log view: + +| Key | Action | +|-----------|----------------------------------------------------------------------------------| +| `e` | Move to the next error | +| `Shift+E` | Move to the previous error | +| `w` | Move to the next warning | +| `Shift+W` | Move to the previous warning | + +To complete this step in the tutorial, you'll need to navigate to the +errors and warnings in the sample log file. You can check the upper-right +status bar for tips on what you need to do next. Now, press `q` to switch +to the log view and begin navigating the sample log file. + +## Step 2 + +To search for text in files, you can press `/` to enter the search +prompt. To make it easier to search for text that is on-screen, you +can press `TAB` to complete values that are shown on screen. For +example, to search for the UUID "1AF9293A-F42D-4318-BCDF-60234B240955" +that is in one of the error messages, you can enter "1AF9" and then +press `TAB` to complete the rest of the UUID. + +Press `q` to switch to the log view and try searching for the UUID. + +## Conclusion + +That's all for now, visit https://lnav.org/downloads to find how to +download/install a copy of lnav for your system. The full documentation +is available at https://docs.lnav.org. + +## Colophon + +The source for this tutorial is available here: + +https://github.com/tstack/lnav/tree/master/docs/tutorial/tutorial1 diff --git a/docs/tutorials/tutorial1/tutorial1.glog b/docs/tutorials/tutorial1/tutorial1.glog new file mode 100644 index 00000000..9a078eec --- /dev/null +++ b/docs/tutorials/tutorial1/tutorial1.glog @@ -0,0 +1,100 @@ +I20170322 06:58:47.082758 61456 blaster.cc:112] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 06:58:58.019562 121 demo.cc:123] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 06:58:59.059175 123 blaster.cc:6782] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +W20170322 06:59:16.062826 61456 demo.cc:352] Ut enim ad minim veniam, quis nostrud exercitation 1AF9293A-F42D-4318-BCDF-60234B240955 ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 06:59:28.084062 124 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 06:59:32.053551 123 loader.cc:13] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +E20170322 06:59:53.084969 123 loader.cc:552] Excepteur sint occaecat cupidatat non proident 1AF9293A-F42D-4318-BCDF-60234B240955, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:00:00.096693 123 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:00:03.049849 123 demo.cc:352] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:00:08.070575 123 blaster.cc:6782] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:00:23.019849 123 blaster.cc:352] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:00:28.022692 61457 loader.cc:552] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:00:29.058438 61456 blaster.cc:352] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:00:30.028483 123 loader.cc:13] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:00:49.070676 123 demo.cc:352] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:00:56.095214 123 loader.cc:552] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +W20170322 07:01:14.042785 123 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:01:31.083704 123 blaster.cc:112] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:01:44.013733 121 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:01:55.024085 121 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:02:02.027811 121 blaster.cc:6782] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:02:14.022939 61456 blaster.cc:112] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:02:30.035925 123 loader.cc:13] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:02:49.024985 123 loader.cc:13] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:03:09.056478 121 blaster.cc:6782] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:03:15.023777 123 demo.cc:352] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:03:32.066107 123 blaster.cc:352] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:03:48.028662 124 blaster.cc:6782] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:03:54.027078 123 demo.cc:123] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:04:09.041478 123 demo.cc:123] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:04:14.068162 121 blaster.cc:6782] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:04:28.099513 124 blaster.cc:112] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:04:40.063473 124 loader.cc:552] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:04:50.024030 123 loader.cc:552] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:04:56.081415 121 blaster.cc:352] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:05:14.096304 123 blaster.cc:352] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:05:21.086331 123 demo.cc:352] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:05:33.039503 123 loader.cc:13] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:05:43.092657 124 blaster.cc:6782] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:05:59.002644 123 demo.cc:123] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:06:01.022102 123 demo.cc:352] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:06:22.005675 123 blaster.cc:6782] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:06:37.088974 123 blaster.cc:112] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:06:44.043938 61457 demo.cc:123] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:06:47.060703 123 loader.cc:13] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:06:49.052185 61456 demo.cc:123] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:06:52.074424 61457 demo.cc:352] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:07:02.063191 123 demo.cc:123] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:07:10.030327 61457 blaster.cc:112] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:07:11.011338 123 loader.cc:13] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:07:27.078391 123 blaster.cc:352] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:07:41.061684 123 blaster.cc:112] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:07:53.076558 121 blaster.cc:112] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:08:04.055174 121 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur: 1AF9293A-F42D-4318-BCDF-60234B240955 +I20170322 07:08:18.046756 123 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:08:28.004198 123 loader.cc:552] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:08:36.032193 61457 blaster.cc:352] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:08:50.028964 61456 loader.cc:13] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:08:56.074576 124 blaster.cc:112] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:08:57.090258 123 loader.cc:13] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:09:00.067690 121 blaster.cc:352] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:09:19.036483 61457 blaster.cc:112] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:09:40.048046 123 blaster.cc:352] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:09:52.051526 123 loader.cc:13] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:10:11.003845 61456 loader.cc:552] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:10:27.094133 123 blaster.cc:6782] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:10:43.027892 121 blaster.cc:352] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:10:57.078489 124 demo.cc:352] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:11:09.014685 123 demo.cc:123] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:11:18.029203 61456 blaster.cc:352] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:11:24.067068 121 demo.cc:123] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:11:38.053891 61456 loader.cc:552] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:11:59.027292 61457 blaster.cc:112] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:12:10.069054 61457 loader.cc:13] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:12:22.018053 123 loader.cc:552] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:12:39.000436 123 demo.cc:352] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:12:53.009916 123 loader.cc:13] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:13:13.051890 121 demo.cc:123] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:13:24.076724 123 demo.cc:123] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:13:34.075980 123 blaster.cc:6782] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:13:35.096130 61456 blaster.cc:6782] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:13:49.087790 121 demo.cc:123] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:14:08.033671 61457 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:14:23.091358 61456 blaster.cc:112] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:14:35.088133 61456 demo.cc:123] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:14:55.005577 123 blaster.cc:352] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:14:58.008392 61457 demo.cc:123] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:15:05.004789 123 loader.cc:552] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:15:07.070013 123 blaster.cc:6782] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:15:08.012805 123 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:15:25.042509 61456 loader.cc:552] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +E20170322 07:15:32.027688 123 blaster.cc:6782] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:15:41.020299 61456 blaster.cc:6782] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:15:42.021039 124 loader.cc:552] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:15:59.063918 123 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +I20170322 07:16:19.082250 123 loader.cc:552] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:16:20.026445 61457 loader.cc:13] Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +I20170322 07:16:41.048447 123 blaster.cc:6782] Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. +I20170322 07:16:52.097215 61456 demo.cc:123] Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +I20170322 07:17:01.020663 61456 blaster.cc:112] Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b3361b88..6853f52e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -406,6 +406,7 @@ add_library( sequence_matcher.cc shlex.cc sqlite-extension-func.cc + static_file_vtab.cc statusview_curses.cc string-extension-functions.cc sysclip.cc @@ -435,6 +436,7 @@ add_library( xml_util.cc xpath_vtab.cc xterm_mouse.cc + yaml-extension-functions.cc third-party/md4c/md4c.c third-party/sqlite/ext/series.c third-party/sqlite/ext/dbdump.c @@ -525,6 +527,7 @@ add_library( sqlitepp.hh sql_help.hh sql_util.hh + static_file_vtab.hh strong_int.hh sysclip.hh sysclip.cfg.hh @@ -591,7 +594,10 @@ add_library( set(lnav_SRCS lnav.cc) target_include_directories(diag PUBLIC . fmtlib ${CMAKE_CURRENT_BINARY_DIR} - third-party third-party/base64/include) + third-party + third-party/base64/include + third-party/rapidyaml + ) target_link_libraries( diag diff --git a/src/Makefile.am b/src/Makefile.am index 4fbdbcd6..568621c1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -122,6 +122,7 @@ AM_CPPFLAGS = \ -I$(srcdir)/fmtlib \ -I$(srcdir)/third-party \ -I$(srcdir)/third-party/base64/include \ + -I$(srcdir)/third-party/rapidyaml \ -I$(top_srcdir)/src/third-party/scnlib/include \ -Wall \ $(CODE_COVERAGE_CPPFLAGS) \ @@ -284,6 +285,7 @@ noinst_HEADERS = \ sql_help.hh \ sql_util.hh \ sqlite-extension-func.hh \ + static_file_vtab.hh \ styling.hh \ statusview_curses.hh \ strong_int.hh \ @@ -436,6 +438,7 @@ libdiag_a_SOURCES = \ spectro_source.cc \ sqlitepp.cc \ sqlite-extension-func.cc \ + static_file_vtab.cc \ statusview_curses.cc \ string-extension-functions.cc \ styling.cc \ @@ -461,6 +464,7 @@ libdiag_a_SOURCES = \ xml_util.cc \ xpath_vtab.cc \ xterm_mouse.cc \ + yaml-extension-functions.cc \ spookyhash/SpookyV2.cpp PLUGIN_SRCS = \ diff --git a/src/base/auto_mem.hh b/src/base/auto_mem.hh index 56dfa893..e0d3122d 100644 --- a/src/base/auto_mem.hh +++ b/src/base/auto_mem.hh @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -238,10 +239,15 @@ public: const char* begin() const { return this->ab_buffer; } - void push_back(char ch) + auto_buffer& push_back(char ch) { + if (this->ab_size == this->ab_capacity) { + this->expand_by(256); + } this->ab_buffer[this->ab_size] = ch; this->ab_size += 1; + + return *this; } void pop_back() { this->ab_size -= 1; } @@ -370,6 +376,8 @@ public: this->expand_to(this->ab_capacity + amount); } + std::string to_string() const { return {this->ab_buffer, this->ab_size}; } + private: auto_buffer(char* buffer, size_t capacity) : ab_buffer(buffer), ab_capacity(capacity) diff --git a/src/base/intern_string.hh b/src/base/intern_string.hh index daf18ec7..3493bbce 100644 --- a/src/base/intern_string.hh +++ b/src/base/intern_string.hh @@ -266,8 +266,11 @@ struct string_fragment { template string_fragment find_left_boundary(size_t start, P&& predicate) const { - assert((int) start < this->length()); + assert((int) start <= this->length()); + if (start > 0 && start == this->length()) { + start -= 1; + } while (start > 0) { if (predicate(this->data()[start])) { start += 1; diff --git a/src/base/result.h b/src/base/result.h index 1eaa2a29..d0b617dc 100644 --- a/src/base/result.h +++ b/src/base/result.h @@ -1,6 +1,6 @@ -/* +/* Mathieu Stefani, 03 mai 2016 - + This header provides a Result type that can be used to replace exceptions in code that has to handle error. @@ -774,7 +774,7 @@ struct Result { { if (!isOk()) { ::fprintf(stderr, "%s\n", str); - std::terminate(); + abort(); } return expect_impl(std::is_same()); } @@ -887,7 +887,7 @@ struct Result { } ::fprintf(stderr, "Attempting to unwrap an error Result\n"); - std::terminate(); + abort(); } template @@ -901,7 +901,7 @@ struct Result { } ::fprintf(stderr, "Attempting to unwrap an error Result\n"); - std::terminate(); + abort(); } template @@ -913,7 +913,7 @@ struct Result { } ::fprintf(stderr, "Attempting to unwrap an error Result\n"); - std::terminate(); + abort(); } E unwrapErr() const @@ -923,7 +923,7 @@ struct Result { } ::fprintf(stderr, "Attempting to unwrapErr an ok Result\n"); - std::terminate(); + abort(); } private: diff --git a/src/base/string_util.cc b/src/base/string_util.cc index a48b54bd..130b2d6b 100644 --- a/src/base/string_util.cc +++ b/src/base/string_util.cc @@ -57,19 +57,46 @@ scrub_to_utf8(char* buffer, size_t length) } } -size_t -unquote(char* dst, const char* str, size_t len) +void +quote_content(auto_buffer& buf, const string_fragment& sf, char quote_char) { - if (str[0] == 'r' || str[0] == 'u') { - str += 1; - len -= 1; + for (char ch : sf) { + if (ch == quote_char) { + buf.push_back('\\').push_back(ch); + continue; + } + switch (ch) { + case '\\': + buf.push_back('\\').push_back('\\'); + break; + case '\n': + buf.push_back('\\').push_back('n'); + break; + case '\t': + buf.push_back('\\').push_back('t'); + break; + case '\r': + buf.push_back('\\').push_back('r'); + break; + case '\a': + buf.push_back('\\').push_back('a'); + break; + case '\b': + buf.push_back('\\').push_back('b'); + break; + default: + buf.push_back(ch); + break; + } } - char quote_char = str[0]; +} + +size_t +unquote_content(char* dst, const char* str, size_t len, char quote_char) +{ size_t index = 0; - require(str[0] == '\'' || str[0] == '"'); - - for (size_t lpc = 1; lpc < (len - 1); lpc++, index++) { + for (size_t lpc = 0; lpc < len; lpc++, index++) { dst[index] = str[lpc]; if (str[lpc] == quote_char) { lpc += 1; @@ -96,6 +123,20 @@ unquote(char* dst, const char* str, size_t len) return index; } +size_t +unquote(char* dst, const char* str, size_t len) +{ + if (str[0] == 'r' || str[0] == 'u') { + str += 1; + len -= 1; + } + char quote_char = str[0]; + + require(str[0] == '\'' || str[0] == '"'); + + return unquote_content(dst, &str[1], len - 2, quote_char); +} + size_t unquote_w3c(char* dst, const char* str, size_t len) { diff --git a/src/base/string_util.hh b/src/base/string_util.hh index 7ca22b07..73a8b87c 100644 --- a/src/base/string_util.hh +++ b/src/base/string_util.hh @@ -35,6 +35,8 @@ #include +#include "auto_mem.hh" +#include "intern_string.hh" #include "ww898/cp_utf8.hpp" void scrub_to_utf8(char* buffer, size_t length); @@ -45,6 +47,12 @@ is_line_ending(char ch) return ch == '\r' || ch == '\n'; } +void quote_content(auto_buffer& buf, + const string_fragment& sf, + char quote_char); + +size_t unquote_content(char* dst, const char* str, size_t len, char quote_char); + size_t unquote(char* dst, const char* str, size_t len); size_t unquote_w3c(char* dst, const char* str, size_t len); diff --git a/src/command_executor.cc b/src/command_executor.cc index 2c56717c..2c5de4ae 100644 --- a/src/command_executor.cc +++ b/src/command_executor.cc @@ -63,48 +63,6 @@ SELECT count(*) AS total, min(log_line) AS log_line, log_msg_format ORDER BY total DESC )"; -struct bind_visitor { - bind_visitor(sqlite3_stmt* stmt, int index) : bv_stmt(stmt), bv_index(index) - { - } - - void operator()(const std::string& str) const - { - sqlite3_bind_text(this->bv_stmt, - this->bv_index, - str.c_str(), - str.size(), - SQLITE_TRANSIENT); - } - - void operator()(const string_fragment& str) const - { - sqlite3_bind_text(this->bv_stmt, - this->bv_index, - str.data(), - str.length(), - SQLITE_TRANSIENT); - } - - void operator()(null_value_t) const - { - sqlite3_bind_null(this->bv_stmt, this->bv_index); - } - - void operator()(int64_t value) const - { - sqlite3_bind_int64(this->bv_stmt, this->bv_index, value); - } - - void operator()(double value) const - { - sqlite3_bind_double(this->bv_stmt, this->bv_index, value); - } - - sqlite3_stmt* bv_stmt; - int bv_index; -}; - int sql_progress(const struct log_cursor& lc) { @@ -272,12 +230,12 @@ bind_sql_parameters(exec_context& ec, sqlite3_stmt* stmt) } if ((local_var = lvars.find(&name[1])) != lvars.end()) { - mapbox::util::apply_visitor(bind_visitor(stmt, lpc + 1), - local_var->second); + mapbox::util::apply_visitor( + sqlitepp::bind_visitor(stmt, lpc + 1), local_var->second); retval[name] = local_var->second; } else if ((global_var = gvars.find(&name[1])) != gvars.end()) { - mapbox::util::apply_visitor(bind_visitor(stmt, lpc + 1), - global_var->second); + mapbox::util::apply_visitor( + sqlitepp::bind_visitor(stmt, lpc + 1), global_var->second); retval[name] = global_var->second; } else if ((env_value = getenv(&name[1])) != nullptr) { sqlite3_bind_text(stmt, lpc + 1, env_value, -1, SQLITE_STATIC); @@ -620,6 +578,9 @@ execute_file_contents(exec_context& ec, line_number += 1; if (trim(line.in()).empty()) { + if (multiline && cmdline) { + cmdline = cmdline.value() + "\n"; + } continue; } if (line[0] == '#') { @@ -920,7 +881,7 @@ execute_init_commands( .with_fd(std::move(fd_copy)) .with_include_in_session(false) .with_detect_format(false); - lnav_data.ld_files_to_front.emplace_back(OUTPUT_NAME, 0); + lnav_data.ld_files_to_front.emplace_back(OUTPUT_NAME, 0_vl); if (lnav_data.ld_rl_view != nullptr) { lnav_data.ld_rl_view->set_alt_value( @@ -1063,7 +1024,7 @@ pipe_callback(exec_context& ec, const std::string& cmdline, auto_fd& fd) .with_fd(pp->get_fd()) .with_include_in_session(false) .with_detect_format(false); - lnav_data.ld_files_to_front.emplace_back(desc, 0); + lnav_data.ld_files_to_front.emplace_back(desc, 0_vl); if (lnav_data.ld_rl_view != nullptr) { lnav_data.ld_rl_view->set_alt_value(HELP_MSG_1(X, "to close the file")); } diff --git a/src/data_scanner_re.cc b/src/data_scanner_re.cc index 1fec4264..201c5867 100644 --- a/src/data_scanner_re.cc +++ b/src/data_scanner_re.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 3.0 on Sat Aug 20 21:19:55 2022 */ +/* Generated by re2c 3.0 on Mon Aug 22 22:00:24 2022 */ #line 1 "../../lnav/src/data_scanner_re.re" /** * Copyright (c) 2015, Timothy Stack diff --git a/src/document.sections.hh b/src/document.sections.hh index 9224d02b..94cd01ac 100644 --- a/src/document.sections.hh +++ b/src/document.sections.hh @@ -87,6 +87,10 @@ struct hier_node { template static void depth_first(hier_node* root, F func) { + if (root == nullptr) { + return; + } + for (auto& child : root->hn_children) { depth_first(child.get(), func); } diff --git a/src/field_overlay_source.cc b/src/field_overlay_source.cc index 21fdc5d0..288d0736 100644 --- a/src/field_overlay_source.cc +++ b/src/field_overlay_source.cc @@ -35,6 +35,7 @@ #include "config.h" #include "log_format_ext.hh" #include "log_vtab_impl.hh" +#include "md2attr_line.hh" #include "readline_highlighters.hh" #include "relative_time.hh" #include "vtab_module.hh" @@ -459,20 +460,41 @@ field_overlay_source::build_meta_line(const listview_curses& lv, if (!line_meta.bm_comment.empty()) { const auto* lead = line_meta.bm_tags.empty() ? " \u2514 " : " \u251c "; + md2attr_line mdal; attr_line_t al; - al.with_string(lead).append(lnav::roles::comment(line_meta.bm_comment)); - al.insert(0, filename_width, ' '); - if (tc != nullptr) { - auto hl = tc->get_highlights(); - auto hl_iter = hl.find({highlight_source_t::PREVIEW, "search"}); - - if (hl_iter != hl.end()) { - hl_iter->second.annotate(al, filename_width); - } + auto parse_res = md4cpp::parse(line_meta.bm_comment, mdal); + if (parse_res.isOk()) { + al = parse_res.unwrap(); + } else { + log_error("%d: cannot convert comment to markdown: %s", + (int) row, + parse_res.unwrapErr().c_str()); + al = line_meta.bm_comment; } - dst.emplace_back(al); + auto comment_lines = al.rtrim().split_lines(); + for (size_t lpc = 0; lpc < comment_lines.size(); lpc++) { + auto& comment_line = comment_lines[lpc]; + + if (lpc == 0 && comment_line.empty()) { + continue; + } + comment_line.with_attr_for_all(VC_ROLE.value(role_t::VCR_COMMENT)); + comment_line.insert( + 0, lpc == comment_lines.size() - 1 ? lead : " \u2502 "); + comment_line.insert(0, filename_width, ' '); + if (tc != nullptr) { + auto hl = tc->get_highlights(); + auto hl_iter = hl.find({highlight_source_t::PREVIEW, "search"}); + + if (hl_iter != hl.end()) { + hl_iter->second.annotate(comment_line, filename_width); + } + } + + dst.emplace_back(comment_line); + } } if (!line_meta.bm_tags.empty()) { attr_line_t al; diff --git a/src/file_collection.cc b/src/file_collection.cc index f490f79f..a0208c9d 100644 --- a/src/file_collection.cc +++ b/src/file_collection.cc @@ -66,6 +66,8 @@ child_poller::poll(file_collection& fc) return child_poll_result_t::ALIVE; }, [this, &fc](auto_pid& finished) { + require(this->cp_finalizer); + this->cp_finalizer(fc, finished); return child_poll_result_t::FINISHED; }); diff --git a/src/file_collection.hh b/src/file_collection.hh index 47a3788f..926f8f17 100644 --- a/src/file_collection.hh +++ b/src/file_collection.hh @@ -88,16 +88,20 @@ public: auto_pid&)> finalizer) : cp_child(std::move(child)), cp_finalizer(std::move(finalizer)) { + ensure(this->cp_finalizer); } child_poller(child_poller&& other) noexcept : cp_child(std::move(other.cp_child)), cp_finalizer(std::move(other.cp_finalizer)) { + ensure(this->cp_finalizer); } child_poller& operator=(child_poller&& other) noexcept { + require(other.cp_finalizer); + this->cp_child = std::move(other.cp_child); this->cp_finalizer = std::move(other.cp_finalizer); @@ -106,6 +110,10 @@ public: ~child_poller() noexcept = default; + child_poller(const child_poller&) = delete; + + child_poller& operator=(const child_poller&) = delete; + child_poll_result_t poll(file_collection& fc); private: diff --git a/src/file_vtab.cc b/src/file_vtab.cc index 919c39fc..f635936c 100644 --- a/src/file_vtab.cc +++ b/src/file_vtab.cc @@ -63,15 +63,9 @@ CREATE TABLE lnav_file ( explicit lnav_file(file_collection& fc) : lf_collection(fc) {} - iterator begin() - { - return this->lf_collection.fc_files.begin(); - } + iterator begin() { return this->lf_collection.fc_files.begin(); } - iterator end() - { - return this->lf_collection.fc_files.end(); - } + iterator end() { return this->lf_collection.fc_files.end(); } int get_column(const cursor& vc, sqlite3_context* ctx, int col) { @@ -170,14 +164,14 @@ CREATE TABLE lnav_file ( { vt->zErrMsg = sqlite3_mprintf("Rows cannot be deleted from this table"); return SQLITE_ERROR; - }; + } int insert_row(sqlite3_vtab* tab, sqlite3_int64& rowid_out) { tab->zErrMsg = sqlite3_mprintf("Rows cannot be inserted into this table"); return SQLITE_ERROR; - }; + } int update_row(sqlite3_vtab* tab, sqlite3_int64& rowid, @@ -223,15 +217,128 @@ CREATE TABLE lnav_file ( } return SQLITE_OK; - }; + } file_collection& lf_collection; }; +struct lnav_file_metadata { + static constexpr const char* NAME = "lnav_file_metadata"; + static constexpr const char* CREATE_STMT = R"( +-- Access the metadata embedded in open files +CREATE TABLE lnav_file_metadata ( + filepath text, -- The path to the file. + descriptor text, -- The descriptor that identifies the source of the metadata. + mimetype text, -- The MIME type of the metadata. + content text -- The metadata itself. +); +)"; + + struct cursor { + struct metadata_row { + metadata_row(std::shared_ptr lf, std::string desc) + : mr_logfile(lf), mr_descriptor(std::move(desc)) + { + } + std::shared_ptr mr_logfile; + std::string mr_descriptor; + }; + + sqlite3_vtab_cursor base; + lnav_file_metadata& c_meta; + std::vector::iterator c_iter; + std::vector c_rows; + + cursor(sqlite3_vtab* vt) + : base({vt}), + c_meta(((vtab_module::vtab*) vt)->v_impl) + { + for (auto& lf : this->c_meta.lfm_collection.fc_files) { + auto& lf_meta = lf->get_embedded_metadata(); + + for (const auto& meta_pair : lf_meta) { + this->c_rows.emplace_back(lf, meta_pair.first); + } + } + } + + ~cursor() { this->c_iter = this->c_rows.end(); } + + int next() + { + if (this->c_iter != this->c_rows.end()) { + ++this->c_iter; + } + return SQLITE_OK; + } + + int eof() { return this->c_iter == this->c_rows.end(); } + + int reset() + { + this->c_iter = this->c_rows.begin(); + return SQLITE_OK; + } + + int get_rowid(sqlite3_int64& rowid_out) + { + rowid_out = this->c_iter - this->c_rows.begin(); + + return SQLITE_OK; + } + }; + + explicit lnav_file_metadata(file_collection& fc) : lfm_collection(fc) {} + + int get_column(const cursor& vc, sqlite3_context* ctx, int col) + { + auto& mr = *vc.c_iter; + + switch (col) { + case 0: + to_sqlite(ctx, mr.mr_logfile->get_filename()); + break; + case 1: + to_sqlite(ctx, mr.mr_descriptor); + break; + case 2: + to_sqlite( + ctx, + fmt::to_string( + mr.mr_logfile->get_embedded_metadata()[mr.mr_descriptor] + .m_format)); + break; + case 3: + to_sqlite( + ctx, + fmt::to_string( + mr.mr_logfile->get_embedded_metadata()[mr.mr_descriptor] + .m_value)); + break; + default: + ensure(0); + break; + } + + return SQLITE_OK; + } + + file_collection& lfm_collection; +}; + struct injectable_lnav_file : vtab_module { using vtab_module::vtab_module; using injectable = injectable_lnav_file(file_collection&); }; +struct injectable_lnav_file_metadata + : vtab_module> { + using vtab_module>::vtab_module; + using injectable = injectable_lnav_file_metadata(file_collection&); +}; + static auto file_binder = injector::bind_multiple().add(); + +static auto file_meta_binder = injector::bind_multiple() + .add(); diff --git a/src/formats/glog_log.json b/src/formats/glog_log.json index 3413d61b..09fcff4d 100644 --- a/src/formats/glog_log.json +++ b/src/formats/glog_log.json @@ -7,6 +7,9 @@ "regex": { "std": { "pattern": "^(?[IWECF])(?\\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}) +(?\\d+) (?[^:]+):(?\\d+)\\] (?.*)" + }, + "std-with-year": { + "pattern": "^(?[IWECF])(?\\d{8} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}) +(?\\d+) (?[^:]+):(?\\d+)\\] (?.*)" } }, "level-field": "level", @@ -39,6 +42,10 @@ }, { "line": "E0517 15:04:22.619632 52992 logging_unittest.cc:253] Log every 3, iteration 19" + }, + { + "line": "I20200308 23:47:32.089828 400441 config.cc:27] Loading user configuration: /home/aesophor/.config/wmderland/config", + "level": "info" } ] } diff --git a/src/fstat_vtab.cc b/src/fstat_vtab.cc index dd4b4985..6013c50d 100644 --- a/src/fstat_vtab.cc +++ b/src/fstat_vtab.cc @@ -104,7 +104,7 @@ CREATE TABLE fstat ( cursor(sqlite3_vtab* vt) : base({vt}) { memset(&this->c_stat, 0, sizeof(this->c_stat)); - }; + } void load_stat() { @@ -115,7 +115,7 @@ CREATE TABLE fstat ( { this->c_path_index += 1; } - }; + } int next() { @@ -124,25 +124,19 @@ CREATE TABLE fstat ( this->load_stat(); } - return SQLITE_OK; - }; - - int reset() - { return SQLITE_OK; } - int eof() - { - return this->c_path_index >= this->c_glob->gl_pathc; - }; + int reset() { return SQLITE_OK; } + + int eof() { return this->c_path_index >= this->c_glob->gl_pathc; } int get_rowid(sqlite3_int64& rowid_out) { rowid_out = this->c_path_index; return SQLITE_OK; - }; + } }; int get_column(const cursor& vc, sqlite3_context* ctx, int col) diff --git a/src/help.md b/src/help.md index 9eb8998a..d49cefd6 100644 --- a/src/help.md +++ b/src/help.md @@ -107,8 +107,8 @@ down to display the new lines, much like `tail -f`. On color displays, the lines will be highlighted as follows: -* Errors will be colored in ${ansi_red}red${ansi_norm}; -* warnings will be ${ansi_yellow}yellow${ansi_norm}; +* Errors will be colored in red; +* warnings will be yellow; * boundaries between days will be ${ansi_underline}underlined${ansi_norm}; and * various color highlights will be applied to: IP addresses, SQL keywords, XML tags, file and line numbers in Java backtraces, and quoted strings. diff --git a/src/highlighter.cc b/src/highlighter.cc index e684ba5c..636e1b14 100644 --- a/src/highlighter.cc +++ b/src/highlighter.cc @@ -100,12 +100,20 @@ highlighter::annotate_capture(attr_line_t& al, const line_range& lr) const void highlighter::annotate(attr_line_t& al, int start) const { + if (!this->h_regex) { + return; + } + auto& vc = view_colors::singleton(); const auto& str = al.get_string(); auto& sa = al.get_attrs(); auto sf = string_fragment::from_str_range( str, start, std::min(size_t{8192}, str.size())); + if (!sf.is_valid()) { + return; + } + pcre_context_static<60> pc; pcre_input pi(sf); diff --git a/src/hotkeys.cc b/src/hotkeys.cc index 9a8bc5a6..3237b0a9 100644 --- a/src/hotkeys.cc +++ b/src/hotkeys.cc @@ -162,6 +162,8 @@ handle_keyseq(const char* keyseq) auto& var_stack = ec.ec_local_vars; ec.ec_global_vars = lnav_data.ld_exec_context.ec_global_vars; + ec.ec_error_callback_stack + = lnav_data.ld_exec_context.ec_error_callback_stack; var_stack.push(std::map()); auto& vars = var_stack.top(); vars["keyseq"] = keyseq; @@ -174,11 +176,7 @@ handle_keyseq(const char* keyseq) } else { auto um = result.unwrapErr(); - um.um_snippets.clear(); - um.um_reason.clear(); - um.um_notes.clear(); - um.um_help.clear(); - lnav_data.ld_rl_view->set_attr_value(um.to_attr_line()); + ec.ec_error_callback_stack.back()(um); } if (!kc.kc_alt_msg.empty()) { @@ -295,12 +293,10 @@ handle_paging_key(int ch) break; case '>': { - std::pair range; - - tc->horiz_shift( - tc->get_top(), tc->get_bottom(), tc->get_left(), range); - if (range.second != INT_MAX) { - tc->set_left(range.second); + auto range_opt = tc->horiz_shift( + tc->get_top(), tc->get_bottom(), tc->get_left()); + if (range_opt && range_opt.value().second != INT_MAX) { + tc->set_left(range_opt.value().second); lnav_data.ld_rl_view->set_alt_value( HELP_MSG_1(m, "to bookmark a line")); } else { @@ -312,12 +308,10 @@ handle_paging_key(int ch) if (tc->get_left() == 0) { alerter::singleton().chime("no more search hits to the left"); } else { - std::pair range; - - tc->horiz_shift( - tc->get_top(), tc->get_bottom(), tc->get_left(), range); - if (range.first != -1) { - tc->set_left(range.first); + auto range_opt = tc->horiz_shift( + tc->get_top(), tc->get_bottom(), tc->get_left()); + if (range_opt && range_opt.value().first != -1) { + tc->set_left(range_opt.value().first); } else { tc->set_left(0); } @@ -395,8 +389,7 @@ handle_paging_key(int ch) tc->shift_top(1_vl); } if (lnav_data.ld_last_user_mark[tc] + 1 - >= tc->get_inner_height()) - { + >= tc->get_inner_height()) { break; } lnav_data.ld_last_user_mark[tc] += 1; @@ -442,8 +435,7 @@ handle_paging_key(int ch) case 'M': if (lnav_data.ld_last_user_mark.find(tc) - == lnav_data.ld_last_user_mark.end()) - { + == lnav_data.ld_last_user_mark.end()) { alerter::singleton().chime("no lines have been marked"); } else { int start_line = std::min((int) tc->get_top(), @@ -459,20 +451,20 @@ handle_paging_key(int ch) break; #if 0 - case 'S': - { - bookmark_vector::iterator iter; + case 'S': + { + bookmark_vector::iterator iter; - for (iter = bm[&textview_curses::BM_SEARCH].begin(); - iter != bm[&textview_curses::BM_SEARCH].end(); - ++iter) { - tc->toggle_user_mark(&textview_curses::BM_USER, *iter); - } + for (iter = bm[&textview_curses::BM_SEARCH].begin(); + iter != bm[&textview_curses::BM_SEARCH].end(); + ++iter) { + tc->toggle_user_mark(&textview_curses::BM_USER, *iter); + } - lnav_data.ld_last_user_mark[tc] = -1; - tc->reload_data(); - } - break; + lnav_data.ld_last_user_mark[tc] = -1; + tc->reload_data(); + } + break; #endif case 's': @@ -487,8 +479,7 @@ handle_paging_key(int ch) while (next_top < tc->get_inner_height()) { if (!lss->find_line(lss->at(next_top))->is_message()) { } else if (lss->get_line_accel_direction(next_top) - == log_accel::A_DECEL) - { + == log_accel::A_DECEL) { --next_top; tc->set_top(next_top); break; @@ -511,8 +502,7 @@ handle_paging_key(int ch) while (0 <= next_top && next_top < tc->get_inner_height()) { if (!lss->find_line(lss->at(next_top))->is_message()) { } else if (lss->get_line_accel_direction(next_top) - == log_accel::A_DECEL) - { + == log_accel::A_DECEL) { --next_top; tc->set_top(next_top); break; @@ -625,8 +615,7 @@ handle_paging_key(int ch) while (true) { if (ch == 'o') { if (++next_helper.lh_current_line - >= tc->get_inner_height()) - { + >= tc->get_inner_height()) { break; } } else { @@ -782,8 +771,7 @@ handle_paging_key(int ch) for (row = 0; row < dls.dls_rows.size(); row++) { if (strcmp(dls.dls_rows[row][log_line_index.value()], linestr.data()) - == 0) - { + == 0) { vis_line_t db_line(row); db_tc->set_top(db_line); @@ -821,8 +809,7 @@ handle_paging_key(int ch) size_t col_len = strlen(col_value); if (dts.scan(col_value, col_len, nullptr, &tm, tv) - != nullptr) - { + != nullptr) { lnav_data.ld_log_source.find_from_time(tv) | [tc](auto vl) { tc->set_top(vl); diff --git a/src/internals/cmd-ref.rst b/src/internals/cmd-ref.rst index 02154115..f23a31d1 100644 --- a/src/internals/cmd-ref.rst +++ b/src/internals/cmd-ref.rst @@ -161,7 +161,7 @@ :comment *text* ^^^^^^^^^^^^^^^ - Attach a comment to the top log line + Attach a comment to the top log line. The comment will be displayed right below the log message it is associated with. The comment can be formatted using markdown and you can add new-lines with '\n'. **Parameters** * **text\*** --- The comment text @@ -571,13 +571,13 @@ .. _goto: -:goto *line#|N%|timestamp* -^^^^^^^^^^^^^^^^^^^^^^^^^^ +:goto *line#|N%|timestamp|#anchor* +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Go to the given location in the top view **Parameters** - * **line#|N%|timestamp\*** --- A line number, percent into the file, or a timestamp + * **line#|N%|timestamp|#anchor\*** --- A line number, percent into the file, timestamp, or an anchor in a text file **Examples** To go to line 22: @@ -598,6 +598,12 @@ :goto 2017-01-01 + To go to the Screenshots section: + + .. code-block:: lnav + + :goto #screenshots + **See Also** :ref:`next_location`, :ref:`next_mark`, :ref:`prev_location`, :ref:`prev_mark`, :ref:`relative_goto` diff --git a/src/internals/sql-ref.rst b/src/internals/sql-ref.rst index 5200d980..6519b9ec 100644 --- a/src/internals/sql-ref.rst +++ b/src/internals/sql-ref.rst @@ -1626,7 +1626,7 @@ jget(*json*, *ptr*, *\[default\]*) Hello **See Also** - :ref:`json_concat`, :ref:`json_contains`, :ref:`json_group_array`, :ref:`json_group_object` + :ref:`json_concat`, :ref:`json_contains`, :ref:`json_group_array`, :ref:`json_group_object`, :ref:`yaml_to_json` ---- @@ -1710,7 +1710,7 @@ json_concat(*json*, *value*) [1,2,3,4,5] **See Also** - :ref:`jget`, :ref:`json_contains`, :ref:`json_group_array`, :ref:`json_group_object` + :ref:`jget`, :ref:`json_contains`, :ref:`json_group_array`, :ref:`json_group_object`, :ref:`yaml_to_json` ---- @@ -1742,7 +1742,7 @@ json_contains(*json*, *value*) 1 **See Also** - :ref:`jget`, :ref:`json_concat`, :ref:`json_group_array`, :ref:`json_group_object` + :ref:`jget`, :ref:`json_concat`, :ref:`json_group_array`, :ref:`json_group_object`, :ref:`yaml_to_json` ---- @@ -1773,7 +1773,7 @@ json_group_array(*value*) [1,2,3] **See Also** - :ref:`jget`, :ref:`json_concat`, :ref:`json_contains`, :ref:`json_group_object` + :ref:`jget`, :ref:`json_concat`, :ref:`json_contains`, :ref:`json_group_object`, :ref:`yaml_to_json` ---- @@ -1805,7 +1805,7 @@ json_group_object(*name*, *value*) {"a":1,"b":2} **See Also** - :ref:`jget`, :ref:`json_concat`, :ref:`json_contains`, :ref:`json_group_array` + :ref:`jget`, :ref:`json_concat`, :ref:`json_contains`, :ref:`json_group_array`, :ref:`yaml_to_json` ---- @@ -3731,6 +3731,30 @@ xpath(*xpath*, *xmldoc*) ---- +.. _yaml_to_json: + +yaml_to_json(*yaml*) +^^^^^^^^^^^^^^^^^^^^ + + Convert a YAML document to a JSON-encoded string + + **Parameters** + * **yaml\*** --- The YAML value to convert to JSON. + + **Examples** + To convert the document "abc: def": + + .. code-block:: custsqlite + + ;SELECT yaml_to_json('abc: def') + {"abc": "def"} + + **See Also** + :ref:`jget`, :ref:`json_concat`, :ref:`json_contains`, :ref:`json_group_array`, :ref:`json_group_object` + +---- + + .. _zeroblob: zeroblob(*N*) diff --git a/src/lnav.cc b/src/lnav.cc index 83045cb9..2e850942 100644 --- a/src/lnav.cc +++ b/src/lnav.cc @@ -118,6 +118,7 @@ #include "readline_curses.hh" #include "readline_highlighters.hh" #include "regexp_vtab.hh" +#include "scn/scn.h" #include "service_tags.hh" #include "session_data.hh" #include "spectro_source.hh" @@ -125,6 +126,7 @@ #include "sql_util.hh" #include "sqlite-extension-func.hh" #include "sqlitepp.client.hh" +#include "static_file_vtab.hh" #include "tailer/tailer.looper.hh" #include "term_extra.hh" #include "termios_guard.hh" @@ -1079,6 +1081,60 @@ looper() lnav_data.ld_rl_view->add_possibility( ln_mode_t::COMMAND, "levelname", level_names); + auto echo_views_stmt_res = prepare_stmt(lnav_data.ld_db, +#if SQLITE_VERSION_NUMBER < 3033000 + R"( + UPDATE lnav_views_echo + SET top = (SELECT top FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), + left = (SELECT left FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), + height = (SELECT height FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), + inner_height = (SELECT inner_height FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), + top_time = (SELECT top_time FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), + search = (SELECT search FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name) + WHERE EXISTS (SELECT * FROM lnav_views WHERE name = lnav_views_echo.name AND + ( + lnav_views.top != lnav_views_echo.top OR + lnav_views.left != lnav_views_echo.left OR + lnav_views.height != lnav_views_echo.height OR + lnav_views.inner_height != lnav_views_echo.inner_height OR + lnav_views.top_time != lnav_views_echo.top_time OR + lnav_views.search != lnav_views_echo.search + )) + )" +#else + R"( + UPDATE lnav_views_echo + SET top = orig.top, + left = orig.left, + height = orig.height, + inner_height = orig.inner_height, + top_time = orig.top_time, + search = orig.search + FROM (SELECT * FROM lnav_views) AS orig + WHERE orig.name = lnav_views_echo.name AND + ( + orig.top != lnav_views_echo.top OR + orig.left != lnav_views_echo.left OR + orig.height != lnav_views_echo.height OR + orig.inner_height != lnav_views_echo.inner_height OR + orig.top_time != lnav_views_echo.top_time OR + orig.search != lnav_views_echo.search + ) + )" +#endif + ); + + if (echo_views_stmt_res.isErr()) { + lnav::console::print( + stderr, + lnav::console::user_message::error( + "unable to prepare UPDATE statement for lnav_views_echo " + "table") + .with_reason(echo_views_stmt_res.unwrapErr())); + return; + } + auto echo_views_stmt = echo_views_stmt_res.unwrap(); + (void) signal(SIGINT, sigint); (void) signal(SIGTERM, sigint); (void) signal(SIGWINCH, sigwinch); @@ -1370,33 +1426,6 @@ looper() auto next_status_update_time = next_rebuild_time; auto next_rescan_time = next_rebuild_time; - auto echo_views_stmt = prepare_stmt(lnav_data.ld_db, -#if SQLITE_VERSION_NUMBER < 3033000 - R"( -UPDATE lnav_views_echo - SET top = (SELECT top FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), - left = (SELECT left FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), - height = (SELECT height FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), - inner_height = (SELECT inner_height FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), - top_time = (SELECT top_time FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name), - search = (SELECT search FROM lnav_views WHERE lnav_views.name = lnav_views_echo.name) -)" -#else - R"( -UPDATE lnav_views_echo - SET top = orig.top, - left = orig.left, - height = orig.height, - inner_height = orig.inner_height, - top_time = orig.top_time, - search = orig.search - FROM (SELECT * FROM lnav_views) AS orig - WHERE orig.name = lnav_views_echo.name -)" -#endif - ) - .unwrap(); - while (lnav_data.ld_looping) { auto loop_deadline = ui_clock::now() + (session_stage == 0 ? 3s : 50ms); @@ -1723,7 +1752,6 @@ UPDATE lnav_views_echo && lnav_data.ld_text_source.text_line_count() > 0) { ensure_view(&lnav_data.ld_views[LNV_TEXT]); - lnav_data.ld_views[LNV_TEXT].set_top(0_vl); lnav_data.ld_rl_view->set_alt_value(HELP_MSG_2( f, F, "to switch to the next/previous file")); } @@ -1811,8 +1839,12 @@ UPDATE lnav_views_echo { const auto& vs = session_data.sd_view_states[view_index]; + auto& tview = lnav_data.ld_views[view_index]; - if (vs.vs_top > 0) { + if (vs.vs_top > 0 && tview.get_top() == 0_vl) { + log_info("restoring %s view top: %d", + lnav_view_strings[view_index], + vs.vs_top); lnav_data.ld_views[view_index].set_top( vis_line_t(vs.vs_top)); } @@ -1820,7 +1852,7 @@ UPDATE lnav_views_echo if (lnav_data.ld_mode == ln_mode_t::FILES) { if (lnav_data.ld_active_files.fc_name_to_errors.empty()) { - log_debug("switching to paging!"); + log_info("switching to paging!"); lnav_data.ld_mode = ln_mode_t::PAGING; lnav_data.ld_active_files.fc_files | lnav::itertools::for_each( @@ -2017,6 +2049,7 @@ main(int argc, char* argv[]) } register_environ_vtab(lnav_data.ld_db.in()); + register_static_file_vtab(lnav_data.ld_db.in()); { static auto vtab_modules = injector::get>>(); @@ -2476,7 +2509,7 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%' lnav_data.ld_pipers.push_back(pp); lnav_data.ld_active_files.fc_file_names[desc].with_fd( pp->get_fd()); - lnav_data.ld_files_to_front.template emplace_back(desc, 0); + lnav_data.ld_files_to_front.template emplace_back(desc, 0_vl); })) .add_input_delegate(lnav_data.ld_log_source) .set_tail_space(2_vl) @@ -2557,8 +2590,10 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%' } } - load_format_extra( - lnav_data.ld_db.in(), lnav_data.ld_config_paths, loader_errors); + load_format_extra(lnav_data.ld_db.in(), + ec.ec_global_vars, + lnav_data.ld_config_paths, + loader_errors); load_format_vtabs(lnav_data.ld_vtab_manager.get(), loader_errors); if (!loader_errors.empty()) { @@ -2622,9 +2657,36 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%' } for (auto& file_path : file_args) { + auto file_path_without_trailer = file_path; + auto file_loc = file_location_t{mapbox::util::no_init{}}; auto_mem abspath; struct stat st; + auto colon_index = file_path.rfind(':'); + if (colon_index != std::string::npos) { + file_path_without_trailer = file_path.substr(0, colon_index); + auto top_range = scn::string_view{&file_path[colon_index + 1], + &(*file_path.cend())}; + auto scan_res = scn::scan_value(top_range); + + if (scan_res) { + file_path_without_trailer = file_path.substr(0, colon_index); + file_loc = vis_line_t(scan_res.value()); + } else { + log_warning( + "failed to parse line number from file path with colon: %s", + file_path.c_str()); + } + } + auto hash_index = file_path.rfind('#'); + if (hash_index != std::string::npos) { + file_loc = file_path.substr(hash_index); + file_path_without_trailer = file_path.substr(0, hash_index); + } + if (stat(file_path_without_trailer.c_str(), &st) == 0) { + file_path = file_path_without_trailer; + } + if (file_path == "-") { load_stdin = true; } @@ -2711,6 +2773,10 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%' } else { lnav_data.ld_active_files.fc_file_names.emplace( abspath.in(), logfile_open_options()); + if (file_loc.valid()) { + lnav_data.ld_files_to_front.emplace_back(abspath.in(), + file_loc); + } } } @@ -2933,13 +2999,13 @@ SELECT tbl_name FROM sqlite_master WHERE sql LIKE 'CREATE VIRTUAL TABLE%' log_tc->set_top(0_vl); text_tc = &lnav_data.ld_views[LNV_TEXT]; - text_tc->set_top(0_vl); - text_tc->set_height(vis_line_t(text_tc->get_inner_height())); + text_tc->set_height(vis_line_t(text_tc->get_inner_height() + - text_tc->get_top())); setup_highlights(lnav_data.ld_views[LNV_TEXT].get_highlights()); if (lnav_data.ld_log_source.text_line_count() == 0 && lnav_data.ld_text_source.text_line_count() > 0) { - toggle_view(&lnav_data.ld_views[LNV_TEXT]); + ensure_view(&lnav_data.ld_views[LNV_TEXT]); } log_info("Executing initial commands"); diff --git a/src/lnav.hh b/src/lnav.hh index 2f3022c1..9ed478e3 100644 --- a/src/lnav.hh +++ b/src/lnav.hh @@ -158,6 +158,8 @@ struct key_repeat_history { }; }; +using file_location_t = mapbox::util::variant; + struct lnav_data_t { std::map> ld_session_id; time_t ld_session_time; @@ -171,7 +173,7 @@ struct lnav_data_t { std::vector ld_config_paths; file_collection ld_active_files; std::list ld_child_pollers; - std::list> ld_files_to_front; + std::list> ld_files_to_front; bool ld_stdout_used; sig_atomic_t ld_looping; sig_atomic_t ld_winched; @@ -258,8 +260,7 @@ struct lnav_data_t { bool ld_show_help_view{false}; }; -struct static_service { -}; +struct static_service {}; class main_looper : public isc::service diff --git a/src/lnav.indexing.cc b/src/lnav.indexing.cc index 0bce9568..af771f4c 100644 --- a/src/lnav.indexing.cc +++ b/src/lnav.indexing.cc @@ -150,8 +150,8 @@ public: auto iter = session_data.sd_file_states.find(lf->get_filename()); if (iter != session_data.sd_file_states.end()) { - log_debug("found state for log file %d", - iter->second.fs_is_visible); + log_info(" found visibility state for log file: %d", + iter->second.fs_is_visible); lnav_data.ld_log_source.find_data(lf) | [&iter](auto ld) { ld->set_visibility(iter->second.fs_is_visible); @@ -181,7 +181,7 @@ public: } std::shared_ptr front_file; - int front_top{-1}; + file_location_t front_top; bool did_promotion{false}; }; @@ -219,12 +219,35 @@ rebuild_indexes(nonstd::optional deadline) old_bottoms[LNV_TEXT] = -1_vl; } - if (cb.front_top < 0) { - cb.front_top += text_view.get_inner_height(); - } - if (cb.front_top < text_view.get_inner_height()) { - text_view.set_top(vis_line_t(cb.front_top)); + nonstd::optional new_top_opt; + cb.front_top.match( + [&new_top_opt](vis_line_t vl) { + log_info("file open request to jump to line: %d", (int) vl); + if (vl < 0_vl) { + vl += lnav_data.ld_views[LNV_TEXT].get_inner_height(); + } + if (vl < lnav_data.ld_views[LNV_TEXT].get_inner_height()) { + new_top_opt = vl; + } + }, + [&new_top_opt](const std::string& loc) { + log_info("file open request to jump to anchor: %s", + loc.c_str()); + auto* ta = dynamic_cast( + lnav_data.ld_views[LNV_TEXT].get_sub_source()); + + if (ta != nullptr) { + new_top_opt = ta->row_for_anchor(loc); + } + }); + if (new_top_opt) { + log_info(" setting requested top line: %d", + (int) new_top_opt.value()); + text_view.set_top(new_top_opt.value()); + log_info(" actual top is now: %d", (int) text_view.get_top()); scroll_downs[LNV_TEXT] = false; + } else { + log_warning("could not jump to requested line"); } } if (cb.did_promotion && deadline) { @@ -364,6 +387,7 @@ update_active_files(file_collection& new_files) lnav_data.ld_active_files.fc_child_pollers.begin()), std::make_move_iterator( lnav_data.ld_active_files.fc_child_pollers.end())); + lnav_data.ld_active_files.fc_child_pollers.clear(); lnav::events::publish( lnav_data.ld_db.in(), new_files.fc_files, [](const auto& lf) { @@ -393,12 +417,19 @@ rescan_files(bool req) continue; } + if (lnav_data.ld_active_files.fc_name_to_errors.count(pair.first)) { + continue; + } + if (lnav_data.ld_active_files.fc_synced_files.count(pair.first) == 0) { all_synced = false; } } + if (!lnav_data.ld_active_files.fc_name_to_errors.empty()) { + return false; + } if (!all_synced) { delay = 30ms; } diff --git a/src/lnav_commands.cc b/src/lnav_commands.cc index 679041be..4228a2c4 100644 --- a/src/lnav_commands.cc +++ b/src/lnav_commands.cc @@ -335,10 +335,25 @@ com_goto(exec_context& ec, std::string cmdline, std::vector& args) std::string retval; if (args.empty()) { - args.emplace_back("move-time"); + args.emplace_back("move-args"); } else if (args.size() > 1) { std::string all_args = remaining_args(cmdline, args); auto* tc = *lnav_data.ld_view_stack.top(); + nonstd::optional dst_vl; + + if (startswith(all_args, "#")) { + auto* ta = dynamic_cast(tc->get_sub_source()); + + if (ta == nullptr) { + return ec.make_error("view does not support anchor links"); + } + + dst_vl = ta->row_for_anchor(all_args); + if (!dst_vl) { + return ec.make_error("unable to find anchor: {}", all_args); + } + } + auto* ttt = dynamic_cast(tc->get_sub_source()); int line_number, consumed; date_time_scanner dts; @@ -346,10 +361,12 @@ com_goto(exec_context& ec, std::string cmdline, std::vector& args) struct timeval tv; struct exttm tm; float value; - nonstd::optional dst_vl; auto parse_res = relative_time::from_str(all_args); - if (parse_res.isOk()) { + if (dst_vl) { + + } + else if (parse_res.isOk()) { if (ttt == nullptr) { return ec.make_error( "relative time values only work in a time-indexed view"); @@ -2355,7 +2372,6 @@ com_open(exec_context& ec, std::string cmdline, std::vector& args) } std::vector word_exp; - size_t colon_index; std::string pat; file_collection fc; @@ -2372,17 +2388,27 @@ com_open(exec_context& ec, std::string cmdline, std::vector& args) return ec.make_error("unable to parse arguments"); } - std::vector> files_to_front; + std::vector> files_to_front; std::vector closed_files; for (auto fn : split_args) { - int top = 0; + file_location_t file_loc; - if (access(fn.c_str(), R_OK) != 0 - && (colon_index = fn.rfind(':')) != std::string::npos) - { - if (sscanf(&fn.c_str()[colon_index + 1], "%d", &top) == 1) { - fn = fn.substr(0, colon_index); + if (access(fn.c_str(), R_OK) != 0) { + auto colon_index = fn.rfind(':'); + auto hash_index = fn.rfind('#'); + if (colon_index != std::string::npos) { + auto top_range = scn::string_view{ + &fn[colon_index + 1], &(*fn.cend())}; + auto scan_res = scn::scan_value(top_range); + + if (scan_res) { + fn = fn.substr(0, colon_index); + file_loc = vis_line_t(scan_res.value()); + } + } else if (hash_index != std::string::npos) { + file_loc = fn.substr(hash_index); + fn = fn.substr(0, hash_index); } } @@ -2398,7 +2424,7 @@ com_open(exec_context& ec, std::string cmdline, std::vector& args) break; } - files_to_front.emplace_back(fn, top); + files_to_front.emplace_back(fn, file_loc); retval = ""; break; } @@ -2418,7 +2444,7 @@ com_open(exec_context& ec, std::string cmdline, std::vector& args) ul->copy_fd()); isc::to().send( [ul](auto& clooper) { clooper.add_request(ul); }); - lnav_data.ld_files_to_front.emplace_back(fn, top); + lnav_data.ld_files_to_front.emplace_back(fn, file_loc); retval = "info: opened URL"; } else { retval = ""; @@ -2520,7 +2546,7 @@ com_open(exec_context& ec, std::string cmdline, std::vector& args) fn = abspath.in(); fc.fc_file_names.emplace(fn, logfile_open_options()); retval = "info: opened -- " + fn; - files_to_front.emplace_back(fn, top); + files_to_front.emplace_back(fn, file_loc); closed_files.push_back(fn); if (lnav_data.ld_rl_view != nullptr) { @@ -2862,7 +2888,7 @@ com_comment(exec_context& ec, if (ec.ec_dry_run) { return Ok(std::string()); } - textview_curses* tc = *lnav_data.ld_view_stack.top(); + auto* tc = *lnav_data.ld_view_stack.top(); if (tc != &lnav_data.ld_views[LNV_LOG]) { return ec.make_error( @@ -2871,12 +2897,15 @@ com_comment(exec_context& ec, auto& lss = lnav_data.ld_log_source; args[1] = trim(remaining_args(cmdline, args)); + auto unquoted = auto_buffer::alloc(args[1].size() + 1); + auto unquoted_len = unquote_content(unquoted.in(), args[1].c_str(), args[1].size(), 0); + unquoted.resize(unquoted_len + 1); tc->set_user_mark(&textview_curses::BM_META, tc->get_top(), true); auto& line_meta = lss.get_bookmark_metadata(tc->get_top()); - line_meta.bm_comment = args[1]; + line_meta.bm_comment = unquoted.in(); lss.set_line_meta_changed(); lss.text_filters_changed(); tc->reload_data(); @@ -2902,7 +2931,11 @@ com_comment_prompt(exec_context& ec, const std::string& cmdline) auto line_meta_opt = lss.find_bookmark_metadata(tc->get_top()); if (line_meta_opt && !line_meta_opt.value()->bm_comment.empty()) { - return trim(cmdline) + " " + trim(line_meta_opt.value()->bm_comment); + auto trimmed_comment = trim(line_meta_opt.value()->bm_comment); + auto buf = auto_buffer::alloc(trimmed_comment.size() + 16); + quote_content(buf, trimmed_comment, 0); + + return trim(cmdline) + " " + buf.to_string(); } return ""; @@ -4428,8 +4461,10 @@ com_quit(exec_context& ec, std::string cmdline, std::vector& args) static void command_prompt(std::vector& args) { - textview_curses* tc = *lnav_data.ld_view_stack.top(); + auto* tc = *lnav_data.ld_view_stack.top(); + auto* rlc = lnav_data.ld_rl_view; + rlc->clear_possibilities(ln_mode_t::COMMAND, "move-args"); if (lnav_data.ld_views[LNV_LOG].get_inner_height() > 0) { static const char* MOVE_TIMES[] = {"here", "now", "today", "yesterday", nullptr}; @@ -4497,10 +4532,9 @@ command_prompt(std::vector& args) ldh.clear(); - readline_curses* rlc = lnav_data.ld_rl_view; - rlc->clear_possibilities(ln_mode_t::COMMAND, "move-time"); rlc->add_possibility(ln_mode_t::COMMAND, "move-time", MOVE_TIMES); + rlc->add_possibility(ln_mode_t::COMMAND, "move-args", MOVE_TIMES); rlc->clear_possibilities(ln_mode_t::COMMAND, "line-time"); { struct timeval tv = lf->get_time_offset(); @@ -4509,6 +4543,7 @@ command_prompt(std::vector& args) sql_strftime( buffer, sizeof(buffer), ll->get_time(), ll->get_millis(), 'T'); rlc->add_possibility(ln_mode_t::COMMAND, "line-time", buffer); + rlc->add_possibility(ln_mode_t::COMMAND, "move-args", buffer); rlc->add_possibility(ln_mode_t::COMMAND, "move-time", buffer); sql_strftime(buffer, sizeof(buffer), @@ -4516,6 +4551,7 @@ command_prompt(std::vector& args) ll->get_millis() - (tv.tv_usec / 1000), 'T'); rlc->add_possibility(ln_mode_t::COMMAND, "line-time", buffer); + rlc->add_possibility(ln_mode_t::COMMAND, "move-args", buffer); rlc->add_possibility(ln_mode_t::COMMAND, "move-time", buffer); } } @@ -4537,6 +4573,11 @@ command_prompt(std::vector& args) add_file_possibilities(); add_recent_netlocs_possibilities(); + auto *ta = dynamic_cast(tc->get_sub_source()); + if (ta != nullptr) { + rlc->add_possibility(ln_mode_t::COMMAND, "move-args", ta->get_anchors()); + } + if (tc == &lnav_data.ld_views[LNV_LOG]) { add_filter_expr_possibilities( lnav_data.ld_rl_view, ln_mode_t::COMMAND, "filter-expr-syms"); @@ -4797,13 +4838,15 @@ readline_context::command_t STD_COMMANDS[] = { help_text(":goto") .with_summary("Go to the given location in the top view") .with_parameter( - help_text("line#|N%|timestamp", - "A line number, percent into the file, or a timestamp")) + help_text("line#|N%|timestamp|#anchor", + "A line number, percent into the file, timestamp, " + "or an anchor in a text file")) .with_examples( {{"To go to line 22", "22"}, {"To go to the line 75% of the way into the view", "75%"}, {"To go to the first message on the first day of 2017", - "2017-01-01"}}) + "2017-01-01"}, + {"To go to the Screenshots section", "#screenshots"}}) .with_tags({"navigation"})}, {"relative-goto", com_relative_goto, @@ -5326,7 +5369,11 @@ readline_context::command_t STD_COMMANDS[] = { com_comment, help_text(":comment") - .with_summary("Attach a comment to the top log line") + .with_summary( + "Attach a comment to the top log line. The comment will be " + "displayed right below the log message it is associated with. " + "The comment can be formatted using markdown and you can add " + "new-lines with '\\n'.") .with_parameter(help_text("text", "The comment text")) .with_example({"To add the comment 'This is where it all went " "wrong' to the top line", diff --git a/src/lnav_config.cc b/src/lnav_config.cc index e452a2bd..19f17552 100644 --- a/src/lnav_config.cc +++ b/src/lnav_config.cc @@ -620,42 +620,42 @@ static const struct json_path_container theme_styles_handlers = { .with_description("Styling for top-level headers") .with_obj_provider( [](const yajlpp_provider_context& ypc, lnav_theme* root) { - return &root->lt_style_header[0]; + return &root->lt_style_header[0].pp_value; }) .with_children(style_config_handlers), yajlpp::property_handler("h2") .with_description("Styling for 2nd-level headers") .with_obj_provider( [](const yajlpp_provider_context& ypc, lnav_theme* root) { - return &root->lt_style_header[1]; + return &root->lt_style_header[1].pp_value; }) .with_children(style_config_handlers), yajlpp::property_handler("h3") .with_description("Styling for 3rd-level headers") .with_obj_provider( [](const yajlpp_provider_context& ypc, lnav_theme* root) { - return &root->lt_style_header[2]; + return &root->lt_style_header[2].pp_value; }) .with_children(style_config_handlers), yajlpp::property_handler("h4") .with_description("Styling for 4th-level headers") .with_obj_provider( [](const yajlpp_provider_context& ypc, lnav_theme* root) { - return &root->lt_style_header[3]; + return &root->lt_style_header[3].pp_value; }) .with_children(style_config_handlers), yajlpp::property_handler("h5") .with_description("Styling for 5th-level headers") .with_obj_provider( [](const yajlpp_provider_context& ypc, lnav_theme* root) { - return &root->lt_style_header[4]; + return &root->lt_style_header[4].pp_value; }) .with_children(style_config_handlers), yajlpp::property_handler("h6") .with_description("Styling for 6th-level headers") .with_obj_provider( [](const yajlpp_provider_context& ypc, lnav_theme* root) { - return &root->lt_style_header[5]; + return &root->lt_style_header[5].pp_value; }) .with_children(style_config_handlers), yajlpp::property_handler("hr") @@ -844,10 +844,14 @@ static const struct json_path_container theme_log_level_styles_handlers = { "warning|error|critical|fatal|invalid)") .with_obj_provider( [](const yajlpp_provider_context& ypc, lnav_theme* root) { - style_config& sc = root->lt_level_styles[string2level( + auto& sc = root->lt_level_styles[string2level( ypc.ypc_extractor.get_substr_i("level").get())]; - return ≻ + if (ypc.ypc_parse_context != nullptr && sc.pp_path.empty()) { + sc.pp_path = ypc.ypc_parse_context->get_full_path(); + } + + return &sc.pp_value; }) .with_path_provider( [](struct lnav_theme* cfg, std::vector& paths_out) { diff --git a/src/log_format_loader.cc b/src/log_format_loader.cc index 04503aa8..ce65b165 100644 --- a/src/log_format_loader.cc +++ b/src/log_format_loader.cc @@ -1304,6 +1304,7 @@ load_formats(const std::vector& extra_paths, static void exec_sql_in_path(sqlite3* db, + const std::map& global_vars, const ghc::filesystem::path& path, std::vector& errors) { @@ -1321,7 +1322,7 @@ exec_sql_in_path(sqlite3* db, auto content = read_res.unwrap(); sql_execute_script( - db, filename.c_str(), content.c_str(), errors); + db, global_vars, filename.c_str(), content.c_str(), errors); } else { errors.emplace_back( lnav::console::user_message::error( @@ -1335,11 +1336,12 @@ exec_sql_in_path(sqlite3* db, void load_format_extra(sqlite3* db, + const std::map& global_vars, const std::vector& extra_paths, std::vector& errors) { for (const auto& extra_path : extra_paths) { - exec_sql_in_path(db, extra_path, errors); + exec_sql_in_path(db, global_vars, extra_path, errors); } } diff --git a/src/log_format_loader.hh b/src/log_format_loader.hh index 3c495db5..2e3e6bc3 100644 --- a/src/log_format_loader.hh +++ b/src/log_format_loader.hh @@ -40,6 +40,7 @@ #include "base/intern_string.hh" #include "base/lnav.console.hh" #include "ghc/filesystem.hpp" +#include "shlex.resolver.hh" class log_vtab_manager; @@ -54,6 +55,7 @@ void load_format_vtabs(log_vtab_manager* vtab_manager, std::vector& errors); void load_format_extra(sqlite3* db, + const std::map& global_vars, const std::vector& extra_paths, std::vector& errors); diff --git a/src/logfile.hh b/src/logfile.hh index 6ab6e0c0..264b63e3 100644 --- a/src/logfile.hh +++ b/src/logfile.hh @@ -100,6 +100,11 @@ public: using iterator = std::vector::iterator; using const_iterator = std::vector::const_iterator; + struct metadata { + text_format_t m_format; + std::string m_value; + }; + /** * Construct a logfile with the given arguments. * @@ -360,6 +365,16 @@ public: return this->lf_bookmark_metadata; } + std::map& get_embedded_metadata() + { + return this->lf_embedded_metadata; + } + + const std::map& get_embedded_metadata() const + { + return this->lf_embedded_metadata; + } + protected: /** * Process a line from the file. @@ -416,6 +431,7 @@ private: robin_hood::unordered_map lf_bookmark_metadata; std::vector> lf_applicable_taggers; + std::map lf_embedded_metadata; }; class logline_observer { diff --git a/src/logfile_sub_source.cc b/src/logfile_sub_source.cc index 8862ed9a..40c9d442 100644 --- a/src/logfile_sub_source.cc +++ b/src/logfile_sub_source.cc @@ -46,6 +46,7 @@ #include "lnav.events.hh" #include "log_accel.hh" #include "logfile_sub_source.cfg.hh" +#include "md2attr_line.hh" #include "readline_highlighters.hh" #include "relative_time.hh" #include "sql_util.hh" @@ -1908,9 +1909,19 @@ logfile_sub_source::meta_grepper::grep_value_for_line(vis_line_t line, if (!line_meta_opt) { value_out.clear(); } else { - bookmark_metadata& bm = *(line_meta_opt.value()); + auto& bm = *(line_meta_opt.value()); + + { + md2attr_line mdal; + + auto parse_res = md4cpp::parse(bm.bm_comment, mdal); + if (parse_res.isOk()) { + value_out.append(parse_res.unwrap().get_string()); + } else { + value_out.append(bm.bm_comment); + } + } - value_out.append(bm.bm_comment); value_out.append("\x1c"); for (const auto& tag : bm.bm_tags) { value_out.append(tag); diff --git a/src/md2attr_line.cc b/src/md2attr_line.cc index 4bfd47bc..4e1ddec0 100644 --- a/src/md2attr_line.cc +++ b/src/md2attr_line.cc @@ -33,6 +33,7 @@ #include "base/itertools.hh" #include "base/lnav_log.hh" #include "pcrepp/pcrepp.hh" +#include "pugixml/pugixml.hpp" #include "readline_highlighters.hh" #include "view_curses.hh" @@ -145,8 +146,7 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl) last_block.append("\n"); } if (this->ml_list_stack.empty() - && !endswith(last_block.get_string(), "\n\n")) - { + && !endswith(last_block.get_string(), "\n\n")) { last_block.append("\n"); } } @@ -208,8 +208,7 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl) for (auto line : block_text.split_lines()) { if (!cmd_block.empty() - && endswith(cmd_block.get_string(), "\\\n")) - { + && endswith(cmd_block.get_string(), "\\\n")) { cmd_block.append(line).append("\n"); continue; } @@ -361,8 +360,7 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl) } } for (size_t line_index = 0; line_index < max_cell_lines; - line_index++) - { + line_index++) { size_t col = 0; for (const auto& cell : cells) { block_text.append(" "); @@ -392,6 +390,12 @@ md2attr_line::leave_block(const md4cpp::event_handler::block& bl) } else if (bl.is()) { this->ml_tables.back().t_rows.back().r_columns.push_back(block_text); } else { + if (bl.is()) { + if (startswith(block_text.get_string(), " triplet of output bytes + for(size_t rpos = 0; rpos < encoded.len; rpos += 4, d += 4) + { + if(d[2] == '=' || d[3] == '=') // skip the last quartet if it is padded + { + C4_ASSERT(d + 4 == encoded.str + encoded.len); + break; + } + uint32_t val = 0; + c4appendval_(d[3], 0); + c4appendval_(d[2], 1); + c4appendval_(d[1], 2); + c4appendval_(d[0], 3); + c4append_((val >> (2 * 8)) & full_byte); + c4append_((val >> (1 * 8)) & full_byte); + c4append_((val ) & full_byte); + } + // deal with the last quartet when it is padded + if(d == encoded.str + encoded.len) + return wpos; + if(d[2] == '=') // 2 padding chars + { + C4_ASSERT(d + 4 == encoded.str + encoded.len); + C4_ASSERT(d[3] == '='); + uint32_t val = 0; + c4appendval_(d[1], 2); + c4appendval_(d[0], 3); + c4append_((val >> (2 * 8)) & full_byte); + } + else if(d[3] == '=') // 1 padding char + { + C4_ASSERT(d + 4 == encoded.str + encoded.len); + uint32_t val = 0; + c4appendval_(d[2], 1); + c4appendval_(d[1], 2); + c4appendval_(d[0], 3); + c4append_((val >> (2 * 8)) & full_byte); + c4append_((val >> (1 * 8)) & full_byte); + } + return wpos; + #undef c4append_ + #undef c4appendval_ +} + +} // namespace c4 + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/base64.cpp) + +#define C4_WINDOWS_POP_HPP_ + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/windows_push.hpp +// https://github.com/biojppm/c4core/src/c4/windows_push.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_WINDOWS_PUSH_HPP_ +#define _C4_WINDOWS_PUSH_HPP_ + +/** @file windows_push.hpp sets up macros to include windows header files + * without pulling in all of + * + * @see #include windows_pop.hpp to undefine these macros + * + * @see https://aras-p.info/blog/2018/01/12/Minimizing-windows.h/ */ + + +#if defined(_WIN64) || defined(_WIN32) + +#if defined(_M_AMD64) +# ifndef _AMD64_ +# define _c4_AMD64_ +# define _AMD64_ +# endif +#elif defined(_M_IX86) +# ifndef _X86_ +# define _c4_X86_ +# define _X86_ +# endif +#elif defined(_M_ARM64) +# ifndef _ARM64_ +# define _c4_ARM64_ +# define _ARM64_ +# endif +#elif defined(_M_ARM) +# ifndef _ARM_ +# define _c4_ARM_ +# define _ARM_ +# endif +#endif + +#ifndef NOMINMAX +# define _c4_NOMINMAX +# define NOMINMAX +#endif + +#ifndef NOGDI +# define _c4_NOGDI +# define NOGDI +#endif + +#ifndef VC_EXTRALEAN +# define _c4_VC_EXTRALEAN +# define VC_EXTRALEAN +#endif + +#ifndef WIN32_LEAN_AND_MEAN +# define _c4_WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +/* If defined, the following flags inhibit definition + * of the indicated items. + * + * NOGDICAPMASKS - CC_*, LC_*, PC_*, CP_*, TC_*, RC_ + * NOVIRTUALKEYCODES - VK_* + * NOWINMESSAGES - WM_*, EM_*, LB_*, CB_* + * NOWINSTYLES - WS_*, CS_*, ES_*, LBS_*, SBS_*, CBS_* + * NOSYSMETRICS - SM_* + * NOMENUS - MF_* + * NOICONS - IDI_* + * NOKEYSTATES - MK_* + * NOSYSCOMMANDS - SC_* + * NORASTEROPS - Binary and Tertiary raster ops + * NOSHOWWINDOW - SW_* + * OEMRESOURCE - OEM Resource values + * NOATOM - Atom Manager routines + * NOCLIPBOARD - Clipboard routines + * NOCOLOR - Screen colors + * NOCTLMGR - Control and Dialog routines + * NODRAWTEXT - DrawText() and DT_* + * NOGDI - All GDI defines and routines + * NOKERNEL - All KERNEL defines and routines + * NOUSER - All USER defines and routines + * NONLS - All NLS defines and routines + * NOMB - MB_* and MessageBox() + * NOMEMMGR - GMEM_*, LMEM_*, GHND, LHND, associated routines + * NOMETAFILE - typedef METAFILEPICT + * NOMINMAX - Macros min(a,b) and max(a,b) + * NOMSG - typedef MSG and associated routines + * NOOPENFILE - OpenFile(), OemToAnsi, AnsiToOem, and OF_* + * NOSCROLL - SB_* and scrolling routines + * NOSERVICE - All Service Controller routines, SERVICE_ equates, etc. + * NOSOUND - Sound driver routines + * NOTEXTMETRIC - typedef TEXTMETRIC and associated routines + * NOWH - SetWindowsHook and WH_* + * NOWINOFFSETS - GWL_*, GCL_*, associated routines + * NOCOMM - COMM driver routines + * NOKANJI - Kanji support stuff. + * NOHELP - Help engine interface. + * NOPROFILER - Profiler interface. + * NODEFERWINDOWPOS - DeferWindowPos routines + * NOMCX - Modem Configuration Extensions + */ + +#endif /* defined(_WIN64) || defined(_WIN32) */ + +#endif /* _C4_WINDOWS_PUSH_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/windows_push.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/windows.hpp +// https://github.com/biojppm/c4core/src/c4/windows.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_WINDOWS_HPP_ +#define _C4_WINDOWS_HPP_ + +#if defined(_WIN64) || defined(_WIN32) +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/windows_push.hpp +//#include "c4/windows_push.hpp" +#if !defined(C4_WINDOWS_PUSH_HPP_) && !defined(_C4_WINDOWS_PUSH_HPP_) +#error "amalgamate: file c4/windows_push.hpp must have been included at this point" +#endif /* C4_WINDOWS_PUSH_HPP_ */ + +#include +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/windows_pop.hpp +//#include "c4/windows_pop.hpp" +#if !defined(C4_WINDOWS_POP_HPP_) && !defined(_C4_WINDOWS_POP_HPP_) +#error "amalgamate: file c4/windows_pop.hpp must have been included at this point" +#endif /* C4_WINDOWS_POP_HPP_ */ + +#endif + +#endif /* _C4_WINDOWS_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/windows.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/windows_pop.hpp +// https://github.com/biojppm/c4core/src/c4/windows_pop.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_WINDOWS_POP_HPP_ +#define _C4_WINDOWS_POP_HPP_ + +#if defined(_WIN64) || defined(_WIN32) + +#ifdef _c4_AMD64_ +# undef _c4_AMD64_ +# undef _AMD64_ +#endif +#ifdef _c4_X86_ +# undef _c4_X86_ +# undef _X86_ +#endif +#ifdef _c4_ARM_ +# undef _c4_ARM_ +# undef _ARM_ +#endif + +#ifdef _c4_NOMINMAX +# undef _c4_NOMINMAX +# undef NOMINMAX +#endif + +#ifdef NOGDI +# undef _c4_NOGDI +# undef NOGDI +#endif + +#ifdef VC_EXTRALEAN +# undef _c4_VC_EXTRALEAN +# undef VC_EXTRALEAN +#endif + +#ifdef WIN32_LEAN_AND_MEAN +# undef _c4_WIN32_LEAN_AND_MEAN +# undef WIN32_LEAN_AND_MEAN +#endif + +#endif /* defined(_WIN64) || defined(_WIN32) */ + +#endif /* _C4_WINDOWS_POP_HPP_ */ + + +// (end https://github.com/biojppm/c4core/src/c4/windows_pop.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/error.cpp +// https://github.com/biojppm/c4core/src/c4/error.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef C4CORE_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + + +//included above: +//#include +//included above: +//#include +//included above: +//#include + +#define C4_LOGF_ERR(...) fprintf(stderr, __VA_ARGS__); fflush(stderr) +#define C4_LOGF_WARN(...) fprintf(stderr, __VA_ARGS__); fflush(stderr) +#define C4_LOGP(msg, ...) printf(msg) + +#if defined(C4_XBOX) || (defined(C4_WIN) && defined(C4_MSVC)) +// amalgamate: removed include of +// https://github.com/biojppm/c4core/src/c4/windows.hpp +//# include "c4/windows.hpp" +#if !defined(C4_WINDOWS_HPP_) && !defined(_C4_WINDOWS_HPP_) +#error "amalgamate: file c4/windows.hpp must have been included at this point" +#endif /* C4_WINDOWS_HPP_ */ + +#elif defined(C4_PS4) +# include +#elif defined(C4_UNIX) || defined(C4_LINUX) +# include +//included above: +//# include +# include +#elif defined(C4_MACOS) || defined(C4_IOS) +//included above: +//# include +# include +# include +# include +#endif +// the amalgamation tool is dumb and was omitting this include under MACOS. +// So do it only once: +#if defined(C4_UNIX) || defined(C4_LINUX) || defined(C4_MACOS) || defined(C4_IOS) +# include +#endif + +#if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION) +# include +#endif + +#ifdef __clang__ +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wformat-nonliteral" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +#endif + + +//----------------------------------------------------------------------------- +namespace c4 { + +static error_flags s_error_flags = ON_ERROR_DEFAULTS; +static error_callback_type s_error_callback = nullptr; + +//----------------------------------------------------------------------------- + +error_flags get_error_flags() +{ + return s_error_flags; +} +void set_error_flags(error_flags flags) +{ + s_error_flags = flags; +} + +error_callback_type get_error_callback() +{ + return s_error_callback; +} +/** Set the function which is called when an error occurs. */ +void set_error_callback(error_callback_type cb) +{ + s_error_callback = cb; +} + +//----------------------------------------------------------------------------- + +void handle_error(srcloc where, const char *fmt, ...) +{ + char buf[1024]; + size_t msglen = 0; + if(s_error_flags & (ON_ERROR_LOG|ON_ERROR_CALLBACK)) + { + va_list args; + va_start(args, fmt); + int ilen = vsnprintf(buf, sizeof(buf), fmt, args); // ss.vprintf(fmt, args); + va_end(args); + msglen = ilen >= 0 && ilen < (int)sizeof(buf) ? static_cast(ilen) : sizeof(buf)-1; + } + + if(s_error_flags & ON_ERROR_LOG) + { + C4_LOGF_ERR("\n"); +#if defined(C4_ERROR_SHOWS_FILELINE) && defined(C4_ERROR_SHOWS_FUNC) + C4_LOGF_ERR("%s:%d: ERROR: %s\n", where.file, where.line, buf); + C4_LOGF_ERR("%s:%d: ERROR here: %s\n", where.file, where.line, where.func); +#elif defined(C4_ERROR_SHOWS_FILELINE) + C4_LOGF_ERR("%s:%d: ERROR: %s\n", where.file, where.line, buf); +#elif ! defined(C4_ERROR_SHOWS_FUNC) + C4_LOGF_ERR("ERROR: %s\n", buf); +#endif + } + + if(s_error_flags & ON_ERROR_CALLBACK) + { + if(s_error_callback) + { + s_error_callback(buf, msglen/*ss.c_strp(), ss.tellp()*/); + } + } + + if(s_error_flags & ON_ERROR_ABORT) + { + abort(); + } + + if(s_error_flags & ON_ERROR_THROW) + { +#if defined(C4_EXCEPTIONS_ENABLED) && defined(C4_ERROR_THROWS_EXCEPTION) + throw Exception(buf); +#else + abort(); +#endif + } +} + +//----------------------------------------------------------------------------- + +void handle_warning(srcloc where, const char *fmt, ...) +{ + va_list args; + char buf[1024]; //sstream ss; + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + C4_LOGF_WARN("\n"); +#if defined(C4_ERROR_SHOWS_FILELINE) && defined(C4_ERROR_SHOWS_FUNC) + C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/); + C4_LOGF_WARN("%s:%d: WARNING: here: %s\n", where.file, where.line, where.func); +#elif defined(C4_ERROR_SHOWS_FILELINE) + C4_LOGF_WARN("%s:%d: WARNING: %s\n", where.file, where.line, buf/*ss.c_strp()*/); +#elif ! defined(C4_ERROR_SHOWS_FUNC) + C4_LOGF_WARN("WARNING: %s\n", buf/*ss.c_strp()*/); +#endif + //c4::log.flush(); +} + +//----------------------------------------------------------------------------- +bool is_debugger_attached() +{ +#if defined(C4_UNIX) || defined(C4_LINUX) + static bool first_call = true; + static bool first_call_result = false; + if(first_call) + { + first_call = false; + //! @see http://stackoverflow.com/questions/3596781/how-to-detect-if-the-current-process-is-being-run-by-gdb + //! (this answer: http://stackoverflow.com/a/24969863/3968589 ) + char buf[1024] = ""; + + int status_fd = open("/proc/self/status", O_RDONLY); + if (status_fd == -1) + { + return 0; + } + + ssize_t num_read = ::read(status_fd, buf, sizeof(buf)); + + if (num_read > 0) + { + static const char TracerPid[] = "TracerPid:"; + char *tracer_pid; + + if(num_read < 1024) + { + buf[num_read] = 0; + } + tracer_pid = strstr(buf, TracerPid); + if (tracer_pid) + { + first_call_result = !!::atoi(tracer_pid + sizeof(TracerPid) - 1); + } + } + } + return first_call_result; +#elif defined(C4_PS4) + return (sceDbgIsDebuggerAttached() != 0); +#elif defined(C4_XBOX) || (defined(C4_WIN) && defined(C4_MSVC)) + return IsDebuggerPresent() != 0; +#elif defined(C4_MACOS) || defined(C4_IOS) + // https://stackoverflow.com/questions/2200277/detecting-debugger-on-mac-os-x + // Returns true if the current process is being debugged (either + // running under the debugger or has a debugger attached post facto). + int junk; + int mib[4]; + struct kinfo_proc info; + size_t size; + + // Initialize the flags so that, if sysctl fails for some bizarre + // reason, we get a predictable result. + + info.kp_proc.p_flag = 0; + + // Initialize mib, which tells sysctl the info we want, in this case + // we're looking for information about a specific process ID. + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + mib[3] = getpid(); + + // Call sysctl. + + size = sizeof(info); + junk = sysctl(mib, sizeof(mib) / sizeof(*mib), &info, &size, NULL, 0); + assert(junk == 0); + + // We're being debugged if the P_TRACED flag is set. + return ((info.kp_proc.p_flag & P_TRACED) != 0); +#else + return false; +#endif +} // is_debugger_attached() + +} // namespace c4 + + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* C4CORE_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/c4core/src/c4/error.cpp) + +#endif /* _C4CORE_SINGLE_HEADER_AMALGAMATED_HPP_ */ + + + +// (end https://github.com/biojppm/rapidyaml/src/c4/c4core_all.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/export.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/export.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_YML_EXPORT_HPP_ +#define C4_YML_EXPORT_HPP_ + +#ifdef _WIN32 + #ifdef RYML_SHARED + #ifdef RYML_EXPORTS + #define RYML_EXPORT __declspec(dllexport) + #else + #define RYML_EXPORT __declspec(dllimport) + #endif + #else + #define RYML_EXPORT + #endif +#else + #define RYML_EXPORT +#endif + +#endif /* C4_YML_EXPORT_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/export.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/common.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_COMMON_HPP_ +#define _C4_YML_COMMON_HPP_ + +//included above: +//#include +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp +//#include +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/export.hpp +//#include +#if !defined(C4_YML_EXPORT_HPP_) && !defined(_C4_YML_EXPORT_HPP_) +#error "amalgamate: file c4/yml/export.hpp must have been included at this point" +#endif /* C4_YML_EXPORT_HPP_ */ + + + +#ifndef RYML_USE_ASSERT +# define RYML_USE_ASSERT C4_USE_ASSERT +#endif + + +#if RYML_USE_ASSERT +# define RYML_ASSERT(cond) RYML_CHECK(cond) +# define RYML_ASSERT_MSG(cond, msg) RYML_CHECK_MSG(cond, msg) +#else +# define RYML_ASSERT(cond) +# define RYML_ASSERT_MSG(cond, msg) +#endif + + +#define RYML_CHECK(cond) \ + do { \ + if(!(cond)) \ + { \ + C4_DEBUG_BREAK(); \ + c4::yml::error("check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ + } \ + } while(0) + +#define RYML_CHECK_MSG(cond, msg) \ + do \ + { \ + if(!(cond)) \ + { \ + C4_DEBUG_BREAK(); \ + c4::yml::error(msg ": check failed: " #cond, c4::yml::Location(__FILE__, __LINE__, 0)); \ + } \ + } while(0) + + +#if C4_CPP >= 14 +# define RYML_DEPRECATED(msg) [[deprecated(msg)]] +#else +# if defined(_MSC_VER) +# define RYML_DEPRECATED(msg) __declspec(deprecated) +# else // defined(__GNUC__) || defined(__clang__) +# define RYML_DEPRECATED(msg) __attribute__((deprecated)) +# endif +#endif + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace c4 { +namespace yml { + +enum : size_t { + /** a null position */ + npos = size_t(-1), + /** an index to none */ + NONE = size_t(-1) +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +//! holds a position into a source buffer +struct RYML_EXPORT LineCol +{ + //! number of bytes from the beginning of the source buffer + size_t offset; + //! line + size_t line; + //! column + size_t col; + + LineCol() : offset(), line(), col() {} + //! construct from line and column + LineCol(size_t l, size_t c) : offset(0), line(l), col(c) {} + //! construct from offset, line and column + LineCol(size_t o, size_t l, size_t c) : offset(o), line(l), col(c) {} +}; + + +//! a source file position +struct RYML_EXPORT Location : public LineCol +{ + csubstr name; + + operator bool () const { return !name.empty() || line != 0 || offset != 0; } + + Location() : LineCol(), name() {} + Location( size_t l, size_t c) : LineCol{ l, c}, name( ) {} + Location( csubstr n, size_t l, size_t c) : LineCol{ l, c}, name(n) {} + Location( csubstr n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(n) {} + Location(const char *n, size_t l, size_t c) : LineCol{ l, c}, name(to_csubstr(n)) {} + Location(const char *n, size_t b, size_t l, size_t c) : LineCol{b, l, c}, name(to_csubstr(n)) {} +}; + + +//----------------------------------------------------------------------------- + +/** the type of the function used to report errors. This function must + * interrupt execution, either by raising an exception or calling + * std::abort(). */ +using pfn_error = void (*)(const char* msg, size_t msg_len, Location location, void *user_data); +/** the type of the function used to allocate memory */ +using pfn_allocate = void* (*)(size_t len, void* hint, void *user_data); +/** the type of the function used to free memory */ +using pfn_free = void (*)(void* mem, size_t size, void *user_data); + +/** trigger an error: call the current error callback. */ +RYML_EXPORT void error(const char *msg, size_t msg_len, Location loc); +/** @overload error */ +inline void error(const char *msg, size_t msg_len) +{ + error(msg, msg_len, Location{}); +} +/** @overload error */ +template +inline void error(const char (&msg)[N], Location loc) +{ + error(msg, N-1, loc); +} +/** @overload error */ +template +inline void error(const char (&msg)[N]) +{ + error(msg, N-1, Location{}); +} + +//----------------------------------------------------------------------------- + +/// a c-style callbacks class +struct RYML_EXPORT Callbacks +{ + void * m_user_data; + pfn_allocate m_allocate; + pfn_free m_free; + pfn_error m_error; + + Callbacks(); + Callbacks(void *user_data, pfn_allocate alloc, pfn_free free, pfn_error error_); + + bool operator!= (Callbacks const& that) const { return !operator==(that); } + bool operator== (Callbacks const& that) const + { + return (m_user_data == that.m_user_data && + m_allocate == that.m_allocate && + m_free == that.m_free && + m_error == that.m_error); + } +}; + +/// get the global callbacks +RYML_EXPORT Callbacks const& get_callbacks(); +/// set the global callbacks +RYML_EXPORT void set_callbacks(Callbacks const& c); +/// set the global callbacks to their defaults +RYML_EXPORT void reset_callbacks(); + +/// @cond dev +#define _RYML_CB_ERR(cb, msg_literal) \ +do \ +{ \ + const char msg[] = msg_literal; \ + C4_DEBUG_BREAK(); \ + (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ +} while(0) +#define _RYML_CB_CHECK(cb, cond) \ + do \ + { \ + if(!(cond)) \ + { \ + const char msg[] = "check failed: " #cond; \ + C4_DEBUG_BREAK(); \ + (cb).m_error(msg, sizeof(msg), c4::yml::Location(__FILE__, 0, __LINE__, 0), (cb).m_user_data); \ + } \ + } while(0) +#ifdef RYML_USE_ASSERT +#define _RYML_CB_ASSERT(cb, cond) _RYML_CB_CHECK((cb), (cond)) +#else +#define _RYML_CB_ASSERT(cb, cond) do {} while(0) +#endif +#define _RYML_CB_ALLOC_HINT(cb, T, num, hint) (T*) (cb).m_allocate((num) * sizeof(T), (hint), (cb).m_user_data) +#define _RYML_CB_ALLOC(cb, T, num) _RYML_CB_ALLOC_HINT((cb), (T), (num), nullptr) +#define _RYML_CB_FREE(cb, buf, T, num) \ + do { \ + (cb).m_free((buf), (num) * sizeof(T), (cb).m_user_data); \ + (buf) = nullptr; \ + } while(0) + + + +namespace detail { +template +struct _charconstant_t + : public std::conditional::value, + std::integral_constant, + std::integral_constant>::type +{}; +#define _RYML_CHCONST(signedval, unsignedval) ::c4::yml::detail::_charconstant_t::value +} // namespace detail + + +namespace detail { +struct _SubstrWriter +{ + substr buf; + size_t pos; + _SubstrWriter(substr buf_, size_t pos_=0) : buf(buf_), pos(pos_) {} + void append(csubstr s) + { + C4_ASSERT(!s.overlaps(buf)); + if(pos + s.len <= buf.len) + memcpy(buf.str + pos, s.str, s.len); + pos += s.len; + } + void append(char c) + { + if(pos < buf.len) + buf.str[pos] = c; + ++pos; + } + void append_n(char c, size_t numtimes) + { + if(pos + numtimes < buf.len) + memset(buf.str + pos, c, numtimes); + pos += numtimes; + } + size_t slack() const { return pos <= buf.len ? buf.len - pos : 0; } + size_t excess() const { return pos > buf.len ? pos - buf.len : 0; } + //! get the part written so far + csubstr curr() const { return pos <= buf.len ? buf.first(pos) : buf; } + //! get the part that is still free to write to (the remainder) + substr rem() { return pos < buf.len ? buf.sub(pos) : buf.last(0); } + + size_t advance(size_t more) { pos += more; return pos; } +}; +} // namespace detail + +/// @endcond + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_COMMON_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/tree.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_TREE_HPP_ +#define _C4_YML_TREE_HPP_ + + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/types.hpp +//#include "c4/types.hpp" +#if !defined(C4_TYPES_HPP_) && !defined(_C4_TYPES_HPP_) +#error "amalgamate: file c4/types.hpp must have been included at this point" +#endif /* C4_TYPES_HPP_ */ + +#ifndef _C4_YML_COMMON_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ + +#endif + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/charconv.hpp +//#include +#if !defined(C4_CHARCONV_HPP_) && !defined(_C4_CHARCONV_HPP_) +#error "amalgamate: file c4/charconv.hpp must have been included at this point" +#endif /* C4_CHARCONV_HPP_ */ + +//included above: +//#include +//included above: +//#include + + +C4_SUPPRESS_WARNING_MSVC_PUSH +C4_SUPPRESS_WARNING_MSVC(4251) // needs to have dll-interface to be used by clients of struct +C4_SUPPRESS_WARNING_MSVC(4296) // expression is always 'boolean_value' +C4_SUPPRESS_WARNING_GCC_CLANG_PUSH +C4_SUPPRESS_WARNING_GCC("-Wtype-limits") + + +namespace c4 { +namespace yml { + +struct NodeScalar; +struct NodeInit; +struct NodeData; +class NodeRef; +class Tree; + + +/** encode a floating point value to a string. */ +template +size_t to_chars_float(substr buf, T val) +{ + C4_SUPPRESS_WARNING_GCC_CLANG_WITH_PUSH("-Wfloat-equal"); + static_assert(std::is_floating_point::value, "must be floating point"); + if(C4_UNLIKELY(std::isnan(val))) + return to_chars(buf, csubstr(".nan")); + else if(C4_UNLIKELY(val == std::numeric_limits::infinity())) + return to_chars(buf, csubstr(".inf")); + else if(C4_UNLIKELY(val == -std::numeric_limits::infinity())) + return to_chars(buf, csubstr("-.inf")); + return to_chars(buf, val); + C4_SUPPRESS_WARNING_GCC_CLANG_POP +} + + +/** decode a floating point from string. Accepts special values: .nan, + * .inf, -.inf */ +template +bool from_chars_float(csubstr buf, T *C4_RESTRICT val) +{ + static_assert(std::is_floating_point::value, "must be floating point"); + if(C4_LIKELY(from_chars(buf, val))) + { + return true; + } + else if(C4_UNLIKELY(buf == ".nan" || buf == ".NaN" || buf == ".NAN")) + { + *val = std::numeric_limits::quiet_NaN(); + return true; + } + else if(C4_UNLIKELY(buf == ".inf" || buf == ".Inf" || buf == ".INF")) + { + *val = std::numeric_limits::infinity(); + return true; + } + else if(C4_UNLIKELY(buf == "-.inf" || buf == "-.Inf" || buf == "-.INF")) + { + *val = -std::numeric_limits::infinity(); + return true; + } + else + { + return false; + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** the integral type necessary to cover all the bits marking node tags */ +using tag_bits = uint16_t; + +/** a bit mask for marking tags for types */ +typedef enum : tag_bits { + // container types + TAG_NONE = 0, + TAG_MAP = 1, /**< !!map Unordered set of key: value pairs without duplicates. @see https://yaml.org/type/map.html */ + TAG_OMAP = 2, /**< !!omap Ordered sequence of key: value pairs without duplicates. @see https://yaml.org/type/omap.html */ + TAG_PAIRS = 3, /**< !!pairs Ordered sequence of key: value pairs allowing duplicates. @see https://yaml.org/type/pairs.html */ + TAG_SET = 4, /**< !!set Unordered set of non-equal values. @see https://yaml.org/type/set.html */ + TAG_SEQ = 5, /**< !!seq Sequence of arbitrary values. @see https://yaml.org/type/seq.html */ + // scalar types + TAG_BINARY = 6, /**< !!binary A sequence of zero or more octets (8 bit values). @see https://yaml.org/type/binary.html */ + TAG_BOOL = 7, /**< !!bool Mathematical Booleans. @see https://yaml.org/type/bool.html */ + TAG_FLOAT = 8, /**< !!float Floating-point approximation to real numbers. https://yaml.org/type/float.html */ + TAG_INT = 9, /**< !!float Mathematical integers. https://yaml.org/type/int.html */ + TAG_MERGE = 10, /**< !!merge Specify one or more mapping to be merged with the current one. https://yaml.org/type/merge.html */ + TAG_NULL = 11, /**< !!null Devoid of value. https://yaml.org/type/null.html */ + TAG_STR = 12, /**< !!str A sequence of zero or more Unicode characters. https://yaml.org/type/str.html */ + TAG_TIMESTAMP = 13, /**< !!timestamp A point in time https://yaml.org/type/timestamp.html */ + TAG_VALUE = 14, /**< !!value Specify the default value of a mapping https://yaml.org/type/value.html */ + TAG_YAML = 15, /**< !!yaml Specify the default value of a mapping https://yaml.org/type/yaml.html */ +} YamlTag_e; + +YamlTag_e to_tag(csubstr tag); +csubstr from_tag(YamlTag_e tag); +csubstr from_tag_long(YamlTag_e tag); +csubstr normalize_tag(csubstr tag); +csubstr normalize_tag_long(csubstr tag); + +struct TagDirective +{ + /** Eg `!e!` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr handle; + /** Eg `tag:example.com,2000:app/` in `%TAG !e! tag:example.com,2000:app/` */ + csubstr prefix; + /** The next node to which this tag directive applies */ + size_t next_node_id; +}; + +#ifndef RYML_MAX_TAG_DIRECTIVES +/** the maximum number of tag directives in a Tree */ +#define RYML_MAX_TAG_DIRECTIVES 4 +#endif + + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + + +/** the integral type necessary to cover all the bits marking node types */ +using type_bits = uint64_t; + + +/** a bit mask for marking node types */ +typedef enum : type_bits { + // a convenience define, undefined below + #define c4bit(v) (type_bits(1) << v) + NOTYPE = 0, ///< no node type is set + VAL = c4bit(0), ///< a leaf node, has a (possibly empty) value + KEY = c4bit(1), ///< is member of a map, must have non-empty key + MAP = c4bit(2), ///< a map: a parent of keyvals + SEQ = c4bit(3), ///< a seq: a parent of vals + DOC = c4bit(4), ///< a document + STREAM = c4bit(5)|SEQ, ///< a stream: a seq of docs + KEYREF = c4bit(6), ///< a *reference: the key references an &anchor + VALREF = c4bit(7), ///< a *reference: the val references an &anchor + KEYANCH = c4bit(8), ///< the key has an &anchor + VALANCH = c4bit(9), ///< the val has an &anchor + KEYTAG = c4bit(10), ///< the key has an explicit tag/type + VALTAG = c4bit(11), ///< the val has an explicit tag/type + _TYMASK = c4bit(12)-1, // all the bits up to here + VALQUO = c4bit(12), ///< the val is quoted by '', "", > or | + KEYQUO = c4bit(13), ///< the key is quoted by '', "", > or | + KEYVAL = KEY|VAL, + KEYSEQ = KEY|SEQ, + KEYMAP = KEY|MAP, + DOCMAP = DOC|MAP, + DOCSEQ = DOC|SEQ, + DOCVAL = DOC|VAL, + // these flags are from a work in progress and should not be used yet + _WIP_STYLE_FLOW_SL = c4bit(14), ///< mark container with single-line flow format (seqs as '[val1,val2], maps as '{key: val, key2: val2}') + _WIP_STYLE_FLOW_ML = c4bit(15), ///< mark container with multi-line flow format (seqs as '[val1,\nval2], maps as '{key: val,\nkey2: val2}') + _WIP_STYLE_BLOCK = c4bit(16), ///< mark container with block format (seqs as '- val\n', maps as 'key: val') + _WIP_KEY_LITERAL = c4bit(17), ///< mark key scalar as multiline, block literal | + _WIP_VAL_LITERAL = c4bit(18), ///< mark val scalar as multiline, block literal | + _WIP_KEY_FOLDED = c4bit(19), ///< mark key scalar as multiline, block folded > + _WIP_VAL_FOLDED = c4bit(20), ///< mark val scalar as multiline, block folded > + _WIP_KEY_SQUO = c4bit(21), ///< mark key scalar as single quoted + _WIP_VAL_SQUO = c4bit(22), ///< mark val scalar as single quoted + _WIP_KEY_DQUO = c4bit(23), ///< mark key scalar as double quoted + _WIP_VAL_DQUO = c4bit(24), ///< mark val scalar as double quoted + _WIP_KEY_PLAIN = c4bit(25), ///< mark key scalar as plain scalar (unquoted, even when multiline) + _WIP_VAL_PLAIN = c4bit(26), ///< mark val scalar as plain scalar (unquoted, even when multiline) + _WIP_KEY_STYLE = _WIP_KEY_LITERAL|_WIP_KEY_FOLDED|_WIP_KEY_SQUO|_WIP_KEY_DQUO|_WIP_KEY_PLAIN, + _WIP_VAL_STYLE = _WIP_VAL_LITERAL|_WIP_VAL_FOLDED|_WIP_VAL_SQUO|_WIP_VAL_DQUO|_WIP_VAL_PLAIN, + _WIP_KEY_FT_NL = c4bit(27), ///< features: mark key scalar as having \n in its contents + _WIP_VAL_FT_NL = c4bit(28), ///< features: mark val scalar as having \n in its contents + _WIP_KEY_FT_SQ = c4bit(29), ///< features: mark key scalar as having single quotes in its contents + _WIP_VAL_FT_SQ = c4bit(30), ///< features: mark val scalar as having single quotes in its contents + _WIP_KEY_FT_DQ = c4bit(31), ///< features: mark key scalar as having double quotes in its contents + _WIP_VAL_FT_DQ = c4bit(32), ///< features: mark val scalar as having double quotes in its contents + #undef c4bit +} NodeType_e; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** wraps a NodeType_e element with some syntactic sugar and predicates */ +struct NodeType +{ +public: + + NodeType_e type; + +public: + + C4_ALWAYS_INLINE operator NodeType_e & C4_RESTRICT () { return type; } + C4_ALWAYS_INLINE operator NodeType_e const& C4_RESTRICT () const { return type; } + + C4_ALWAYS_INLINE NodeType() : type(NOTYPE) {} + C4_ALWAYS_INLINE NodeType(NodeType_e t) : type(t) {} + C4_ALWAYS_INLINE NodeType(type_bits t) : type((NodeType_e)t) {} + + C4_ALWAYS_INLINE const char *type_str() const { return type_str(type); } + static const char* type_str(NodeType_e t); + + C4_ALWAYS_INLINE void set(NodeType_e t) { type = t; } + C4_ALWAYS_INLINE void set(type_bits t) { type = (NodeType_e)t; } + + C4_ALWAYS_INLINE void add(NodeType_e t) { type = (NodeType_e)(type|t); } + C4_ALWAYS_INLINE void add(type_bits t) { type = (NodeType_e)(type|t); } + + C4_ALWAYS_INLINE void rem(NodeType_e t) { type = (NodeType_e)(type & ~t); } + C4_ALWAYS_INLINE void rem(type_bits t) { type = (NodeType_e)(type & ~t); } + + C4_ALWAYS_INLINE void clear() { type = NOTYPE; } + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + C4_ALWAYS_INLINE bool is_stream() const { return ((type & STREAM) == STREAM) != 0; } + C4_ALWAYS_INLINE bool is_doc() const { return (type & DOC) != 0; } + C4_ALWAYS_INLINE bool is_container() const { return (type & (MAP|SEQ|STREAM)) != 0; } + C4_ALWAYS_INLINE bool is_map() const { return (type & MAP) != 0; } + C4_ALWAYS_INLINE bool is_seq() const { return (type & SEQ) != 0; } + C4_ALWAYS_INLINE bool has_val() const { return (type & VAL) != 0; } + C4_ALWAYS_INLINE bool has_key() const { return (type & KEY) != 0; } + C4_ALWAYS_INLINE bool is_val() const { return (type & (KEYVAL)) == VAL; } + C4_ALWAYS_INLINE bool is_keyval() const { return (type & KEYVAL) == KEYVAL; } + C4_ALWAYS_INLINE bool has_key_tag() const { return (type & (KEY|KEYTAG)) == (KEY|KEYTAG); } + C4_ALWAYS_INLINE bool has_val_tag() const { return ((type & (VALTAG)) && (type & (VAL|MAP|SEQ))); } + C4_ALWAYS_INLINE bool has_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } + C4_ALWAYS_INLINE bool is_key_anchor() const { return (type & (KEY|KEYANCH)) == (KEY|KEYANCH); } + C4_ALWAYS_INLINE bool has_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } + C4_ALWAYS_INLINE bool is_val_anchor() const { return (type & VALANCH) != 0 && (type & (VAL|SEQ|MAP)) != 0; } + C4_ALWAYS_INLINE bool has_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } + C4_ALWAYS_INLINE bool is_anchor() const { return (type & (KEYANCH|VALANCH)) != 0; } + C4_ALWAYS_INLINE bool is_key_ref() const { return (type & KEYREF) != 0; } + C4_ALWAYS_INLINE bool is_val_ref() const { return (type & VALREF) != 0; } + C4_ALWAYS_INLINE bool is_ref() const { return (type & (KEYREF|VALREF)) != 0; } + C4_ALWAYS_INLINE bool is_anchor_or_ref() const { return (type & (KEYANCH|VALANCH|KEYREF|VALREF)) != 0; } + C4_ALWAYS_INLINE bool is_key_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO); } + C4_ALWAYS_INLINE bool is_val_quoted() const { return (type & (VAL|VALQUO)) == (VAL|VALQUO); } + C4_ALWAYS_INLINE bool is_quoted() const { return (type & (KEY|KEYQUO)) == (KEY|KEYQUO) || (type & (VAL|VALQUO)) == (VAL|VALQUO); } + + // these predicates are a work in progress and subject to change. Don't use yet. + C4_ALWAYS_INLINE bool default_block() const { return (type & (_WIP_STYLE_BLOCK|_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) == 0; } + C4_ALWAYS_INLINE bool marked_block() const { return (type & (_WIP_STYLE_BLOCK)) != 0; } + C4_ALWAYS_INLINE bool marked_flow_sl() const { return (type & (_WIP_STYLE_FLOW_SL)) != 0; } + C4_ALWAYS_INLINE bool marked_flow_ml() const { return (type & (_WIP_STYLE_FLOW_ML)) != 0; } + C4_ALWAYS_INLINE bool marked_flow() const { return (type & (_WIP_STYLE_FLOW_ML|_WIP_STYLE_FLOW_SL)) != 0; } + C4_ALWAYS_INLINE bool key_marked_literal() const { return (type & (_WIP_KEY_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool val_marked_literal() const { return (type & (_WIP_VAL_LITERAL)) != 0; } + C4_ALWAYS_INLINE bool key_marked_folded() const { return (type & (_WIP_KEY_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool val_marked_folded() const { return (type & (_WIP_VAL_FOLDED)) != 0; } + C4_ALWAYS_INLINE bool key_marked_squo() const { return (type & (_WIP_KEY_SQUO)) != 0; } + C4_ALWAYS_INLINE bool val_marked_squo() const { return (type & (_WIP_VAL_SQUO)) != 0; } + C4_ALWAYS_INLINE bool key_marked_dquo() const { return (type & (_WIP_KEY_DQUO)) != 0; } + C4_ALWAYS_INLINE bool val_marked_dquo() const { return (type & (_WIP_VAL_DQUO)) != 0; } + C4_ALWAYS_INLINE bool key_marked_plain() const { return (type & (_WIP_KEY_PLAIN)) != 0; } + C4_ALWAYS_INLINE bool val_marked_plain() const { return (type & (_WIP_VAL_PLAIN)) != 0; } + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** a node scalar is a csubstr, which may be tagged and anchored. */ +struct NodeScalar +{ + csubstr tag; + csubstr scalar; + csubstr anchor; + +public: + + /// initialize as an empty scalar + inline NodeScalar() noexcept : tag(), scalar(), anchor() {} + + /// initialize as an untagged scalar + template + inline NodeScalar(const char (&s)[N]) noexcept : tag(), scalar(s), anchor() {} + inline NodeScalar(csubstr s ) noexcept : tag(), scalar(s), anchor() {} + + /// initialize as a tagged scalar + template + inline NodeScalar(const char (&t)[N], const char (&s)[N]) noexcept : tag(t), scalar(s), anchor() {} + inline NodeScalar(csubstr t , csubstr s ) noexcept : tag(t), scalar(s), anchor() {} + +public: + + ~NodeScalar() noexcept = default; + NodeScalar(NodeScalar &&) noexcept = default; + NodeScalar(NodeScalar const&) noexcept = default; + NodeScalar& operator= (NodeScalar &&) noexcept = default; + NodeScalar& operator= (NodeScalar const&) noexcept = default; + +public: + + bool empty() const noexcept { return tag.empty() && scalar.empty() && anchor.empty(); } + + void clear() noexcept { tag.clear(); scalar.clear(); anchor.clear(); } + + void set_ref_maybe_replacing_scalar(csubstr ref, bool has_scalar) noexcept + { + csubstr trimmed = ref.begins_with('*') ? ref.sub(1) : ref; + anchor = trimmed; + if((!has_scalar) || !scalar.ends_with(trimmed)) + scalar = ref; + } +}; +C4_MUST_BE_TRIVIAL_COPY(NodeScalar); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** convenience class to initialize nodes */ +struct NodeInit +{ + + NodeType type; + NodeScalar key; + NodeScalar val; + +public: + + /// initialize as an empty node + NodeInit() : type(NOTYPE), key(), val() {} + /// initialize as a typed node + NodeInit(NodeType_e t) : type(t), key(), val() {} + /// initialize as a sequence member + NodeInit(NodeScalar const& v) : type(VAL), key(), val(v) { _add_flags(); } + /// initialize as a mapping member + NodeInit( NodeScalar const& k, NodeScalar const& v) : type(KEYVAL), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } + /// initialize as a mapping member with explicit type + NodeInit(NodeType_e t, NodeScalar const& k, NodeScalar const& v) : type(t ), key(k.tag, k.scalar), val(v.tag, v.scalar) { _add_flags(); } + /// initialize as a mapping member with explicit type (eg SEQ or MAP) + NodeInit(NodeType_e t, NodeScalar const& k ) : type(t ), key(k.tag, k.scalar), val( ) { _add_flags(KEY); } + +public: + + void clear() + { + type.clear(); + key.clear(); + val.clear(); + } + + void _add_flags(type_bits more_flags=0) + { + type = (type|more_flags); + if( ! key.tag.empty()) + type = (type|KEYTAG); + if( ! val.tag.empty()) + type = (type|VALTAG); + if( ! key.anchor.empty()) + type = (type|KEYANCH); + if( ! val.anchor.empty()) + type = (type|VALANCH); + } + + bool _check() const + { + // key cannot be empty + RYML_ASSERT(key.scalar.empty() == ((type & KEY) == 0)); + // key tag cannot be empty + RYML_ASSERT(key.tag.empty() == ((type & KEYTAG) == 0)); + // val may be empty even though VAL is set. But when VAL is not set, val must be empty + RYML_ASSERT(((type & VAL) != 0) || val.scalar.empty()); + // val tag cannot be empty + RYML_ASSERT(val.tag.empty() == ((type & VALTAG) == 0)); + return true; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** contains the data for each YAML node. */ +struct NodeData +{ + NodeType m_type; + + NodeScalar m_key; + NodeScalar m_val; + + size_t m_parent; + size_t m_first_child; + size_t m_last_child; + size_t m_next_sibling; + size_t m_prev_sibling; +}; +C4_MUST_BE_TRIVIAL_COPY(NodeData); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +class RYML_EXPORT Tree +{ +public: + + /** @name construction and assignment */ + /** @{ */ + + Tree() : Tree(get_callbacks()) {} + Tree(Callbacks const& cb); + Tree(size_t node_capacity, size_t arena_capacity=0) : Tree(node_capacity, arena_capacity, get_callbacks()) {} + Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb); + + ~Tree(); + + Tree(Tree const& that) noexcept; + Tree(Tree && that) noexcept; + + Tree& operator= (Tree const& that) noexcept; + Tree& operator= (Tree && that) noexcept; + + /** @} */ + +public: + + /** @name memory and sizing */ + /** @{ */ + + void reserve(size_t node_capacity); + + /** clear the tree and zero every node + * @note does NOT clear the arena + * @see clear_arena() */ + void clear(); + inline void clear_arena() { m_arena_pos = 0; } + + inline bool empty() const { return m_size == 0; } + + inline size_t size () const { return m_size; } + inline size_t capacity() const { return m_cap; } + inline size_t slack() const { RYML_ASSERT(m_cap >= m_size); return m_cap - m_size; } + + inline size_t arena_size() const { return m_arena_pos; } + inline size_t arena_capacity() const { return m_arena.len; } + inline size_t arena_slack() const { RYML_ASSERT(m_arena.len >= m_arena_pos); return m_arena.len - m_arena_pos; } + + Callbacks const& callbacks() const { return m_callbacks; } + void callbacks(Callbacks const& cb) { m_callbacks = cb; } + + /** @} */ + +public: + + /** @name node getters */ + /** @{ */ + + //! get the index of a node belonging to this tree. + //! @p n can be nullptr, in which case a + size_t id(NodeData const* n) const + { + if( ! n) + { + return NONE; + } + RYML_ASSERT(n >= m_buf && n < m_buf + m_cap); + return static_cast(n - m_buf); + } + + //! get a pointer to a node's NodeData. + //! i can be NONE, in which case a nullptr is returned + inline NodeData *get(size_t i) + { + if(i == NONE) + return nullptr; + RYML_ASSERT(i >= 0 && i < m_cap); + return m_buf + i; + } + //! get a pointer to a node's NodeData. + //! i can be NONE, in which case a nullptr is returned. + inline NodeData const *get(size_t i) const + { + if(i == NONE) + return nullptr; + RYML_ASSERT(i >= 0 && i < m_cap); + return m_buf + i; + } + + //! An if-less form of get() that demands a valid node index. + //! This function is implementation only; use at your own risk. + inline NodeData * _p(size_t i) { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } + //! An if-less form of get() that demands a valid node index. + //! This function is implementation only; use at your own risk. + inline NodeData const * _p(size_t i) const { RYML_ASSERT(i != NONE && i >= 0 && i < m_cap); return m_buf + i; } + + //! Get the id of the root node + size_t root_id() { if(m_cap == 0) { reserve(16); } RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } + //! Get the id of the root node + size_t root_id() const { RYML_ASSERT(m_cap > 0 && m_size > 0); return 0; } + + //! Get a NodeRef of a node by id + NodeRef ref(size_t id); + //! Get a NodeRef of a node by id + NodeRef const ref(size_t id) const; + + //! Get the root as a NodeRef + NodeRef rootref(); + //! Get the root as a NodeRef + NodeRef const rootref() const; + + //! find a root child by name, return it as a NodeRef + //! @note requires the root to be a map. + NodeRef operator[] (csubstr key); + //! find a root child by name, return it as a NodeRef + //! @note requires the root to be a map. + NodeRef const operator[] (csubstr key) const; + + //! find a root child by index: return the root node's @p i-th child as a NodeRef + //! @note @i is NOT the node id, but the child's position + NodeRef operator[] (size_t i); + //! find a root child by index: return the root node's @p i-th child as a NodeRef + //! @note @i is NOT the node id, but the child's position + NodeRef const operator[] (size_t i) const; + + //! get the i-th document of the stream + //! @note @i is NOT the node id, but the doc position within the stream + NodeRef docref(size_t i); + //! get the i-th document of the stream + //! @note @i is NOT the node id, but the doc position within the stream + NodeRef const docref(size_t i) const; + + /** @} */ + +public: + + /** @name node property getters */ + /** @{ */ + + NodeType type(size_t node) const { return _p(node)->m_type; } + const char* type_str(size_t node) const { return NodeType::type_str(_p(node)->m_type); } + + csubstr const& key (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key.scalar; } + csubstr const& key_tag (size_t node) const { RYML_ASSERT(has_key_tag(node)); return _p(node)->m_key.tag; } + csubstr const& key_ref (size_t node) const { RYML_ASSERT(is_key_ref(node) && ! has_key_anchor(node)); return _p(node)->m_key.anchor; } + csubstr const& key_anchor(size_t node) const { RYML_ASSERT( ! is_key_ref(node) && has_key_anchor(node)); return _p(node)->m_key.anchor; } + NodeScalar const& keysc (size_t node) const { RYML_ASSERT(has_key(node)); return _p(node)->m_key; } + + csubstr const& val (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val.scalar; } + csubstr const& val_tag (size_t node) const { RYML_ASSERT(has_val_tag(node)); return _p(node)->m_val.tag; } + csubstr const& val_ref (size_t node) const { RYML_ASSERT(is_val_ref(node) && ! has_val_anchor(node)); return _p(node)->m_val.anchor; } + csubstr const& val_anchor(size_t node) const { RYML_ASSERT( ! is_val_ref(node) && has_val_anchor(node)); return _p(node)->m_val.anchor; } + NodeScalar const& valsc (size_t node) const { RYML_ASSERT(has_val(node)); return _p(node)->m_val; } + + bool key_is_null(size_t node) const { RYML_ASSERT(has_key(node)); if(is_key_quoted(node)) return false; csubstr s = _p(node)->m_key.scalar; return s == nullptr || s == "~" || s == "null" || s == "Null" || s == "NULL"; } + bool val_is_null(size_t node) const { RYML_ASSERT(has_val(node)); if(is_val_quoted(node)) return false; csubstr s = _p(node)->m_val.scalar; return s == nullptr || s == "~" || s == "null" || s == "Null" || s == "NULL"; } + + /** @} */ + +public: + + /** @name node type predicates */ + /** @{ */ + + C4_ALWAYS_INLINE bool is_stream(size_t node) const { return _p(node)->m_type.is_stream(); } + C4_ALWAYS_INLINE bool is_doc(size_t node) const { return _p(node)->m_type.is_doc(); } + C4_ALWAYS_INLINE bool is_container(size_t node) const { return _p(node)->m_type.is_container(); } + C4_ALWAYS_INLINE bool is_map(size_t node) const { return _p(node)->m_type.is_map(); } + C4_ALWAYS_INLINE bool is_seq(size_t node) const { return _p(node)->m_type.is_seq(); } + C4_ALWAYS_INLINE bool has_key(size_t node) const { return _p(node)->m_type.has_key(); } + C4_ALWAYS_INLINE bool has_val(size_t node) const { return _p(node)->m_type.has_val(); } + C4_ALWAYS_INLINE bool is_val(size_t node) const { return _p(node)->m_type.is_val(); } + C4_ALWAYS_INLINE bool is_keyval(size_t node) const { return _p(node)->m_type.is_keyval(); } + C4_ALWAYS_INLINE bool has_key_tag(size_t node) const { return _p(node)->m_type.has_key_tag(); } + C4_ALWAYS_INLINE bool has_val_tag(size_t node) const { return _p(node)->m_type.has_val_tag(); } + C4_ALWAYS_INLINE bool has_key_anchor(size_t node) const { return _p(node)->m_type.has_key_anchor(); } + C4_ALWAYS_INLINE bool is_key_anchor(size_t node) const { return _p(node)->m_type.is_key_anchor(); } + C4_ALWAYS_INLINE bool has_val_anchor(size_t node) const { return _p(node)->m_type.has_val_anchor(); } + C4_ALWAYS_INLINE bool is_val_anchor(size_t node) const { return _p(node)->m_type.is_val_anchor(); } + C4_ALWAYS_INLINE bool has_anchor(size_t node) const { return _p(node)->m_type.has_anchor(); } + C4_ALWAYS_INLINE bool is_anchor(size_t node) const { return _p(node)->m_type.is_anchor(); } + C4_ALWAYS_INLINE bool is_key_ref(size_t node) const { return _p(node)->m_type.is_key_ref(); } + C4_ALWAYS_INLINE bool is_val_ref(size_t node) const { return _p(node)->m_type.is_val_ref(); } + C4_ALWAYS_INLINE bool is_ref(size_t node) const { return _p(node)->m_type.is_ref(); } + C4_ALWAYS_INLINE bool is_anchor_or_ref(size_t node) const { return _p(node)->m_type.is_anchor_or_ref(); } + C4_ALWAYS_INLINE bool is_key_quoted(size_t node) const { return _p(node)->m_type.is_key_quoted(); } + C4_ALWAYS_INLINE bool is_val_quoted(size_t node) const { return _p(node)->m_type.is_val_quoted(); } + C4_ALWAYS_INLINE bool is_quoted(size_t node) const { return _p(node)->m_type.is_quoted(); } + + C4_ALWAYS_INLINE bool parent_is_seq(size_t node) const { RYML_ASSERT(has_parent(node)); return is_seq(_p(node)->m_parent); } + C4_ALWAYS_INLINE bool parent_is_map(size_t node) const { RYML_ASSERT(has_parent(node)); return is_map(_p(node)->m_parent); } + + /** true when key and val are empty, and has no children */ + bool empty(size_t node) const { return ! has_children(node) && _p(node)->m_key.empty() && (( ! (_p(node)->m_type & VAL)) || _p(node)->m_val.empty()); } + /** true when the node has an anchor named a */ + bool has_anchor(size_t node, csubstr a) const { return _p(node)->m_key.anchor == a || _p(node)->m_val.anchor == a; } + + /** @} */ + +public: + + /** @name hierarchy predicates */ + /** @{ */ + + bool is_root(size_t node) const { RYML_ASSERT(_p(node)->m_parent != NONE || node == 0); return _p(node)->m_parent == NONE; } + + bool has_parent(size_t node) const { return _p(node)->m_parent != NONE; } + + bool has_child(size_t node, csubstr key) const { return find_child(node, key) != npos; } + bool has_child(size_t node, size_t ch) const { return child_pos(node, ch) != npos; } + bool has_children(size_t node) const { return _p(node)->m_first_child != NONE; } + + bool has_sibling(size_t node, size_t sib) const { return is_root(node) ? sib==node : child_pos(_p(node)->m_parent, sib) != npos; } + bool has_sibling(size_t node, csubstr key) const { return find_sibling(node, key) != npos; } + /** counts with *this */ + bool has_siblings(size_t /*node*/) const { return true; } + /** does not count with *this */ + bool has_other_siblings(size_t node) const { return is_root(node) ? false : (_p(_p(node)->m_parent)->m_first_child != _p(_p(node)->m_parent)->m_last_child); } + + /** @} */ + +public: + + /** @name hierarchy getters */ + /** @{ */ + + size_t parent(size_t node) const { return _p(node)->m_parent; } + + size_t prev_sibling(size_t node) const { return _p(node)->m_prev_sibling; } + size_t next_sibling(size_t node) const { return _p(node)->m_next_sibling; } + + /** O(#num_children) */ + size_t num_children(size_t node) const; + size_t child_pos(size_t node, size_t ch) const; + size_t first_child(size_t node) const { return _p(node)->m_first_child; } + size_t last_child(size_t node) const { return _p(node)->m_last_child; } + size_t child(size_t node, size_t pos) const; + size_t find_child(size_t node, csubstr const& key) const; + + /** O(#num_siblings) */ + /** counts with this */ + size_t num_siblings(size_t node) const { return is_root(node) ? 1 : num_children(_p(node)->m_parent); } + /** does not count with this */ + size_t num_other_siblings(size_t node) const { size_t ns = num_siblings(node); RYML_ASSERT(ns > 0); return ns-1; } + size_t sibling_pos(size_t node, size_t sib) const { RYML_ASSERT( ! is_root(node) || node == root_id()); return child_pos(_p(node)->m_parent, sib); } + size_t first_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_first_child; } + size_t last_sibling(size_t node) const { return is_root(node) ? node : _p(_p(node)->m_parent)->m_last_child; } + size_t sibling(size_t node, size_t pos) const { return child(_p(node)->m_parent, pos); } + size_t find_sibling(size_t node, csubstr const& key) const { return find_child(_p(node)->m_parent, key); } + + size_t doc(size_t i) const { size_t rid = root_id(); RYML_ASSERT(is_stream(rid)); return child(rid, i); } //!< gets the @p i document node index. requires that the root node is a stream. + + /** @} */ + +public: + + /** @name node modifiers */ + /** @{ */ + + void to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags=0); + void to_map(size_t node, csubstr key, type_bits more_flags=0); + void to_seq(size_t node, csubstr key, type_bits more_flags=0); + void to_val(size_t node, csubstr val, type_bits more_flags=0); + void to_map(size_t node, type_bits more_flags=0); + void to_seq(size_t node, type_bits more_flags=0); + void to_doc(size_t node, type_bits more_flags=0); + void to_stream(size_t node, type_bits more_flags=0); + + void set_key(size_t node, csubstr key) { RYML_ASSERT(has_key(node)); _p(node)->m_key.scalar = key; } + void set_val(size_t node, csubstr val) { RYML_ASSERT(has_val(node)); _p(node)->m_val.scalar = val; } + + void set_key_tag(size_t node, csubstr tag) { RYML_ASSERT(has_key(node)); _p(node)->m_key.tag = tag; _add_flags(node, KEYTAG); } + void set_val_tag(size_t node, csubstr tag) { RYML_ASSERT(has_val(node) || is_container(node)); _p(node)->m_val.tag = tag; _add_flags(node, VALTAG); } + + void set_key_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_key_ref(node)); _p(node)->m_key.anchor = anchor.triml('&'); _add_flags(node, KEYANCH); } + void set_val_anchor(size_t node, csubstr anchor) { RYML_ASSERT( ! is_val_ref(node)); _p(node)->m_val.anchor = anchor.triml('&'); _add_flags(node, VALANCH); } + void set_key_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_key_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_key.set_ref_maybe_replacing_scalar(ref, n->m_type.has_key()); _add_flags(node, KEY|KEYREF); } + void set_val_ref (size_t node, csubstr ref ) { RYML_ASSERT( ! has_val_anchor(node)); NodeData* C4_RESTRICT n = _p(node); n->m_val.set_ref_maybe_replacing_scalar(ref, n->m_type.has_val()); _add_flags(node, VAL|VALREF); } + + void rem_key_anchor(size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYANCH); } + void rem_val_anchor(size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALANCH); } + void rem_key_ref (size_t node) { _p(node)->m_key.anchor.clear(); _rem_flags(node, KEYREF); } + void rem_val_ref (size_t node) { _p(node)->m_val.anchor.clear(); _rem_flags(node, VALREF); } + void rem_anchor_ref(size_t node) { _p(node)->m_key.anchor.clear(); _p(node)->m_val.anchor.clear(); _rem_flags(node, KEYANCH|VALANCH|KEYREF|VALREF); } + + /** @} */ + +public: + + /** @name tree modifiers */ + /** @{ */ + + /** reorder the tree in memory so that all the nodes are stored + * in a linear sequence when visited in depth-first order. + * This will invalidate existing ids, since the node id is its + * position in the node array. */ + void reorder(); + + /** Resolve references (aliases <- anchors) in the tree. + * + * Dereferencing is opt-in; after parsing, Tree::resolve() + * has to be called explicitly for obtaining resolved references in the + * tree. This method will resolve all references and substitute the + * anchored values in place of the reference. + * + * This method first does a full traversal of the tree to gather all + * anchors and references in a separate collection, then it goes through + * that collection to locate the names, which it does by obeying the YAML + * standard diktat that "an alias node refers to the most recent node in + * the serialization having the specified anchor" + * + * So, depending on the number of anchor/alias nodes, this is a + * potentially expensive operation, with a best-case linear complexity + * (from the initial traversal). This potential cost is the reason for + * requiring an explicit call. + */ + void resolve(); + + /** @} */ + +public: + + /** @name tag directives */ + /** @{ */ + + void resolve_tags(); + + size_t num_tag_directives() const; + size_t add_tag_directive(TagDirective const& td); + void clear_tag_directives(); + + size_t resolve_tag(substr output, csubstr tag, size_t node_id) const; + csubstr resolve_tag_sub(substr output, csubstr tag, size_t node_id) const + { + size_t needed = resolve_tag(output, tag, node_id); + return needed <= output.len ? output.first(needed) : output; + } + + using tag_directive_const_iterator = TagDirective const*; + tag_directive_const_iterator begin_tag_directives() const { return m_tag_directives; } + tag_directive_const_iterator end_tag_directives() const { return m_tag_directives + num_tag_directives(); } + + struct TagDirectiveProxy + { + tag_directive_const_iterator b, e; + tag_directive_const_iterator begin() const { return b; } + tag_directive_const_iterator end() const { return e; } + }; + + TagDirectiveProxy tag_directives() const { return TagDirectiveProxy{begin_tag_directives(), end_tag_directives()}; } + + /** @} */ + +public: + + /** @name modifying hierarchy */ + /** @{ */ + + /** create and insert a new child of "parent". insert after the (to-be) + * sibling "after", which must be a child of "parent". To insert as the + * first child, set after to NONE */ + inline size_t insert_child(size_t parent, size_t after) + { + RYML_ASSERT(parent != NONE); + RYML_ASSERT(is_container(parent) || is_root(parent)); + RYML_ASSERT(after == NONE || has_child(parent, after)); + size_t child = _claim(); + _set_hierarchy(child, parent, after); + return child; + } + inline size_t prepend_child(size_t parent) { return insert_child(parent, NONE); } + inline size_t append_child(size_t parent) { return insert_child(parent, last_child(parent)); } + +public: + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + //! create and insert a new sibling of n. insert after "after" + inline size_t insert_sibling(size_t node, size_t after) + { + RYML_ASSERT(node != NONE); + RYML_ASSERT( ! is_root(node)); + RYML_ASSERT(parent(node) != NONE); + RYML_ASSERT(after == NONE || (has_sibling(node, after) && has_sibling(after, node))); + RYML_ASSERT(get(node) != nullptr); + return insert_child(get(node)->m_parent, after); + } + inline size_t prepend_sibling(size_t node) { return insert_sibling(node, NONE); } + inline size_t append_sibling(size_t node) { return insert_sibling(node, last_sibling(node)); } + +public: + + /** remove an entire branch at once: ie remove the children and the node itself */ + inline void remove(size_t node) + { + remove_children(node); + _release(node); + } + + /** remove all the node's children, but keep the node itself */ + void remove_children(size_t node); + + /** change the @p type of the node to one of MAP, SEQ or VAL. @p + * type must have one and only one of MAP,SEQ,VAL; @p type may + * possibly have KEY, but if it does, then the @p node must also + * have KEY. Changing to the same type is a no-op. Otherwise, + * changing to a different type will initialize the node with an + * empty value of the desired type: changing to VAL will + * initialize with a null scalar (~), changing to MAP will + * initialize with an empty map ({}), and changing to SEQ will + * initialize with an empty seq ([]). */ + bool change_type(size_t node, NodeType type); + + bool change_type(size_t node, type_bits type) + { + return change_type(node, (NodeType)type); + } + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + +public: + + /** change the node's position in the parent */ + void move(size_t node, size_t after); + + /** change the node's parent and position */ + void move(size_t node, size_t new_parent, size_t after); + + /** change the node's parent and position to a different tree + * @return the index of the new node in the destination tree */ + size_t move(Tree * src, size_t node, size_t new_parent, size_t after); + + /** ensure the first node is a stream. Eg, change this tree + * + * DOCMAP + * MAP + * KEYVAL + * KEYVAL + * SEQ + * VAL + * + * to + * + * STREAM + * DOCMAP + * MAP + * KEYVAL + * KEYVAL + * SEQ + * VAL + * + * If the root is already a stream, this is a no-op. + */ + void set_root_as_stream(); + +public: + + /** recursively duplicate a node from this tree into a new parent, + * placing it after one of its children + * @return the index of the copy */ + size_t duplicate(size_t node, size_t new_parent, size_t after); + /** recursively duplicate a node from a different tree into a new parent, + * placing it after one of its children + * @return the index of the copy */ + size_t duplicate(Tree const* src, size_t node, size_t new_parent, size_t after); + + /** recursively duplicate the node's children (but not the node) + * @return the index of the last duplicated child */ + size_t duplicate_children(size_t node, size_t parent, size_t after); + /** recursively duplicate the node's children (but not the node), where + * the node is from a different tree + * @return the index of the last duplicated child */ + size_t duplicate_children(Tree const* src, size_t node, size_t parent, size_t after); + + void duplicate_contents(size_t node, size_t where); + void duplicate_contents(Tree const* src, size_t node, size_t where); + + /** duplicate the node's children (but not the node) in a new parent, but + * omit repetitions where a duplicated node has the same key (in maps) or + * value (in seqs). If one of the duplicated children has the same key + * (in maps) or value (in seqs) as one of the parent's children, the one + * that is placed closest to the end will prevail. */ + size_t duplicate_children_no_rep(size_t node, size_t parent, size_t after); + size_t duplicate_children_no_rep(Tree const* src, size_t node, size_t parent, size_t after); + +public: + + void merge_with(Tree const* src, size_t src_node=NONE, size_t dst_root=NONE); + + /** @} */ + +public: + + /** @name internal string arena */ + /** @{ */ + + /** get the current size of the tree's internal arena */ + size_t arena_pos() const { return m_arena_pos; } + + /** get the current arena */ + substr arena() const { return m_arena.first(m_arena_pos); } + + /** return true if the given substring is part of the tree's string arena */ + bool in_arena(csubstr s) const + { + return m_arena.is_super(s); + } + + /** serialize the given non-floating-point variable to the tree's arena, growing it as + * needed to accomodate the serialization. + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual nodes. + * @see alloc_arena() */ + template + typename std::enable_if::value, csubstr>::type + to_arena(T const& C4_RESTRICT a) + { + substr rem(m_arena.sub(m_arena_pos)); + size_t num = to_chars(rem, a); + if(num > rem.len) + { + rem = _grow_arena(num); + num = to_chars(rem, a); + RYML_ASSERT(num <= rem.len); + } + rem = _request_span(num); + return rem; + } + + /** serialize the given floating-point variable to the tree's arena, growing it as + * needed to accomodate the serialization. + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual nodes. + * @see alloc_arena() */ + template + typename std::enable_if::value, csubstr>::type + to_arena(T const& C4_RESTRICT a) + { + substr rem(m_arena.sub(m_arena_pos)); + size_t num = to_chars_float(rem, a); + if(num > rem.len) + { + rem = _grow_arena(num); + num = to_chars_float(rem, a); + RYML_ASSERT(num <= rem.len); + } + rem = _request_span(num); + return rem; + } + + /** copy the given substr to the tree's arena, growing it by the required size + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual nodes. + * @see alloc_arena() */ + substr copy_to_arena(csubstr s) + { + substr cp = alloc_arena(s.len); + RYML_ASSERT(cp.len == s.len); + RYML_ASSERT(!s.overlaps(cp)); + #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) + C4_SUPPRESS_WARNING_GCC_PUSH + C4_SUPPRESS_WARNING_GCC("-Wstringop-overflow=") // no need for terminating \0 + C4_SUPPRESS_WARNING_GCC( "-Wrestrict") // there's an assert to ensure no violation of restrict behavior + #endif + memcpy(cp.str, s.str, s.len); + #if (!defined(__clang__)) && (defined(__GNUC__) && __GNUC__ >= 10) + C4_SUPPRESS_WARNING_GCC_POP + #endif + return cp; + } + + /** grow the tree's string arena by the given size and return a substr + * of the added portion + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual nodes. */ + substr alloc_arena(size_t sz) + { + if(sz > arena_slack()) + _grow_arena(sz - arena_slack()); + substr s = _request_span(sz); + return s; + } + + /** ensure the tree's internal string arena is at least the given capacity + * @note Growing the arena may cause relocation of the entire + * existing arena, and thus change the contents of individual nodes. */ + void reserve_arena(size_t arena_cap) + { + if(arena_cap > m_arena.len) + { + substr buf; + buf.str = (char*) m_callbacks.m_allocate(arena_cap, m_arena.str, m_callbacks.m_user_data); + buf.len = arena_cap; + if(m_arena.str) + { + RYML_ASSERT(m_arena.len >= 0); + _relocate(buf); // does a memcpy and changes nodes using the arena + m_callbacks.m_free(m_arena.str, m_arena.len, m_callbacks.m_user_data); + } + m_arena = buf; + } + } + + /** @} */ + +private: + + substr _grow_arena(size_t more) + { + size_t cap = m_arena_pos + more; + cap = cap < 2 * m_arena.len ? 2 * m_arena.len : cap; + cap = cap < 64 ? 64 : cap; + reserve_arena(cap); + return m_arena.sub(m_arena_pos); + } + + substr _request_span(size_t sz) + { + substr s; + s = m_arena.sub(m_arena_pos, sz); + m_arena_pos += sz; + return s; + } + + substr _relocated(csubstr s, substr next_arena) const + { + RYML_ASSERT(m_arena.is_super(s)); + RYML_ASSERT(m_arena.sub(0, m_arena_pos).is_super(s)); + auto pos = (s.str - m_arena.str); + substr r(next_arena.str + pos, s.len); + RYML_ASSERT(r.str - next_arena.str == pos); + RYML_ASSERT(next_arena.sub(0, m_arena_pos).is_super(r)); + return r; + } + +public: + + /** @name lookup */ + /** @{ */ + + struct lookup_result + { + size_t target; + size_t closest; + size_t path_pos; + csubstr path; + + inline operator bool() const { return target != NONE; } + + lookup_result() : target(NONE), closest(NONE), path_pos(0), path() {} + lookup_result(csubstr path_, size_t start) : target(NONE), closest(start), path_pos(0), path(path_) {} + + /** get the part ot the input path that was resolved */ + csubstr resolved() const; + /** get the part ot the input path that was unresolved */ + csubstr unresolved() const; + }; + + /** for example foo.bar[0].baz */ + lookup_result lookup_path(csubstr path, size_t start=NONE) const; + + /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify + * the tree so that the corresponding lookup_path() would return the + * default value. + * @see lookup_path() */ + size_t lookup_path_or_modify(csubstr default_value, csubstr path, size_t start=NONE); + + /** defaulted lookup: lookup @p path; if the lookup fails, recursively modify + * the tree so that the corresponding lookup_path() would return the + * branch @p src_node (from the tree @p src). + * @see lookup_path() */ + size_t lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start=NONE); + + /** @} */ + +private: + + struct _lookup_path_token + { + csubstr value; + NodeType type; + _lookup_path_token() : value(), type() {} + _lookup_path_token(csubstr v, NodeType t) : value(v), type(t) {} + inline operator bool() const { return type != NOTYPE; } + bool is_index() const { return value.begins_with('[') && value.ends_with(']'); } + }; + + size_t _lookup_path_or_create(csubstr path, size_t start); + + void _lookup_path (lookup_result *r) const; + void _lookup_path_modify(lookup_result *r); + + size_t _next_node (lookup_result *r, _lookup_path_token *parent) const; + size_t _next_node_modify(lookup_result *r, _lookup_path_token *parent); + + void _advance(lookup_result *r, size_t more) const; + + _lookup_path_token _next_token(lookup_result *r, _lookup_path_token const& parent) const; + +private: + + void _clear(); + void _free(); + void _copy(Tree const& that); + void _move(Tree & that); + + void _relocate(substr next_arena); + +public: + + #if ! RYML_USE_ASSERT + C4_ALWAYS_INLINE void _check_next_flags(size_t, type_bits) {} + #else + void _check_next_flags(size_t node, type_bits f) + { + auto n = _p(node); + type_bits o = n->m_type; // old + C4_UNUSED(o); + if(f & MAP) + { + RYML_ASSERT_MSG((f & SEQ) == 0, "cannot mark simultaneously as map and seq"); + RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as map and val"); + RYML_ASSERT_MSG((o & SEQ) == 0, "cannot turn a seq into a map; clear first"); + RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a map; clear first"); + } + else if(f & SEQ) + { + RYML_ASSERT_MSG((f & MAP) == 0, "cannot mark simultaneously as seq and map"); + RYML_ASSERT_MSG((f & VAL) == 0, "cannot mark simultaneously as seq and val"); + RYML_ASSERT_MSG((o & MAP) == 0, "cannot turn a map into a seq; clear first"); + RYML_ASSERT_MSG((o & VAL) == 0, "cannot turn a val into a seq; clear first"); + } + if(f & KEY) + { + RYML_ASSERT(!is_root(node)); + auto pid = parent(node); C4_UNUSED(pid); + RYML_ASSERT(is_map(pid)); + } + if((f & VAL) && !is_root(node)) + { + auto pid = parent(node); C4_UNUSED(pid); + RYML_ASSERT(is_map(pid) || is_seq(pid)); + } + } + #endif + + inline void _set_flags(size_t node, NodeType_e f) { _check_next_flags(node, f); _p(node)->m_type = f; } + inline void _set_flags(size_t node, type_bits f) { _check_next_flags(node, f); _p(node)->m_type = f; } + + inline void _add_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = f | d->m_type; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _add_flags(size_t node, type_bits f) { NodeData *d = _p(node); f |= d->m_type; _check_next_flags(node, f); d->m_type = f; } + + inline void _rem_flags(size_t node, NodeType_e f) { NodeData *d = _p(node); type_bits fb = d->m_type & ~f; _check_next_flags(node, fb); d->m_type = (NodeType_e) fb; } + inline void _rem_flags(size_t node, type_bits f) { NodeData *d = _p(node); f = d->m_type & ~f; _check_next_flags(node, f); d->m_type = f; } + + void _set_key(size_t node, csubstr key, type_bits more_flags=0) + { + _p(node)->m_key.scalar = key; + _add_flags(node, KEY|more_flags); + } + void _set_key(size_t node, NodeScalar const& key, type_bits more_flags=0) + { + _p(node)->m_key = key; + _add_flags(node, KEY|more_flags); + } + + void _set_val(size_t node, csubstr val, type_bits more_flags=0) + { + RYML_ASSERT(num_children(node) == 0); + RYML_ASSERT(!is_seq(node) && !is_map(node)); + _p(node)->m_val.scalar = val; + _add_flags(node, VAL|more_flags); + } + void _set_val(size_t node, NodeScalar const& val, type_bits more_flags=0) + { + RYML_ASSERT(num_children(node) == 0); + RYML_ASSERT( ! is_container(node)); + _p(node)->m_val = val; + _add_flags(node, VAL|more_flags); + } + + void _set(size_t node, NodeInit const& i) + { + RYML_ASSERT(i._check()); + NodeData *n = _p(node); + RYML_ASSERT(n->m_key.scalar.empty() || i.key.scalar.empty() || i.key.scalar == n->m_key.scalar); + _add_flags(node, i.type); + if(n->m_key.scalar.empty()) + { + if( ! i.key.scalar.empty()) + { + _set_key(node, i.key.scalar); + } + } + n->m_key.tag = i.key.tag; + n->m_val = i.val; + } + + void _set_parent_as_container_if_needed(size_t in) + { + NodeData const* n = _p(in); + size_t ip = parent(in); + if(ip != NONE) + { + if( ! (is_seq(ip) || is_map(ip))) + { + if((in == first_child(ip)) && (in == last_child(ip))) + { + if( ! n->m_key.empty() || has_key(in)) + { + _add_flags(ip, MAP); + } + else + { + _add_flags(ip, SEQ); + } + } + } + } + } + + void _seq2map(size_t node) + { + RYML_ASSERT(is_seq(node)); + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + NodeData *C4_RESTRICT ch = _p(i); + if(ch->m_type.is_keyval()) + continue; + ch->m_type.add(KEY); + ch->m_key = ch->m_val; + } + auto *C4_RESTRICT n = _p(node); + n->m_type.rem(SEQ); + n->m_type.add(MAP); + } + + size_t _do_reorder(size_t *node, size_t count); + + void _swap(size_t n_, size_t m_); + void _swap_props(size_t n_, size_t m_); + void _swap_hierarchy(size_t n_, size_t m_); + void _copy_hierarchy(size_t dst_, size_t src_); + + void _copy_props(size_t dst_, size_t src_) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *_p(src_); + dst.m_type = src.m_type; + dst.m_key = src.m_key; + dst.m_val = src.m_val; + } + + void _copy_props_wo_key(size_t dst_, size_t src_) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *_p(src_); + dst.m_type = src.m_type; + dst.m_val = src.m_val; + } + + void _copy_props(size_t dst_, Tree const* that_tree, size_t src_) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = src.m_type; + dst.m_key = src.m_key; + dst.m_val = src.m_val; + } + + void _copy_props_wo_key(size_t dst_, Tree const* that_tree, size_t src_) + { + auto & C4_RESTRICT dst = *_p(dst_); + auto const& C4_RESTRICT src = *that_tree->_p(src_); + dst.m_type = src.m_type; + dst.m_val = src.m_val; + } + + inline void _clear_type(size_t node) + { + _p(node)->m_type = NOTYPE; + } + + inline void _clear(size_t node) + { + auto *C4_RESTRICT n = _p(node); + n->m_type = NOTYPE; + n->m_key.clear(); + n->m_val.clear(); + n->m_parent = NONE; + n->m_first_child = NONE; + n->m_last_child = NONE; + } + + inline void _clear_key(size_t node) + { + _p(node)->m_key.clear(); + _rem_flags(node, KEY); + } + + inline void _clear_val(size_t node) + { + _p(node)->m_key.clear(); + _rem_flags(node, VAL); + } + +private: + + void _clear_range(size_t first, size_t num); + + size_t _claim(); + void _claim_root(); + void _release(size_t node); + void _free_list_add(size_t node); + void _free_list_rem(size_t node); + + void _set_hierarchy(size_t node, size_t parent, size_t after_sibling); + void _rem_hierarchy(size_t node); + +public: + + // members are exposed, but you should NOT access them directly + + NodeData * m_buf; + size_t m_cap; + + size_t m_size; + + size_t m_free_head; + size_t m_free_tail; + + substr m_arena; + size_t m_arena_pos; + + Callbacks m_callbacks; + + TagDirective m_tag_directives[RYML_MAX_TAG_DIRECTIVES]; + +}; + +} // namespace yml +} // namespace c4 + + +C4_SUPPRESS_WARNING_MSVC_POP +C4_SUPPRESS_WARNING_GCC_CLANG_POP + + +#endif /* _C4_YML_TREE_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/node.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_NODE_HPP_ +#define _C4_YML_NODE_HPP_ + +/** @file node.hpp + * @see NodeRef */ + +//included above: +//#include + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/base64.hpp +//#include "c4/base64.hpp" +#if !defined(C4_BASE64_HPP_) && !defined(_C4_BASE64_HPP_) +#error "amalgamate: file c4/base64.hpp must have been included at this point" +#endif /* C4_BASE64_HPP_ */ + + +#ifdef __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" +#endif + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +#endif + +namespace c4 { +namespace yml { + +template struct Key { K & k; }; +template<> struct Key { fmt::const_base64_wrapper wrapper; }; +template<> struct Key { fmt::base64_wrapper wrapper; }; + +template C4_ALWAYS_INLINE Key key(K & k) { return Key{k}; } +C4_ALWAYS_INLINE Key key(fmt::const_base64_wrapper w) { return {w}; } +C4_ALWAYS_INLINE Key key(fmt::base64_wrapper w) { return {w}; } + +template void write(NodeRef *n, T const& v); + +template +typename std::enable_if< ! std::is_floating_point::value, bool>::type +read(NodeRef const& n, T *v); + +template +typename std::enable_if< std::is_floating_point::value, bool>::type +read(NodeRef const& n, T *v); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** a reference to a node in an existing yaml tree, offering a more + * convenient API than the index-based API used in the tree. */ +class RYML_EXPORT NodeRef +{ +private: + + // require valid: a helper macro, undefined at the end + #define _C4RV() RYML_ASSERT(valid() && !is_seed()) + + Tree *C4_RESTRICT m_tree; + size_t m_id; + + /** This member is used to enable lazy operator[] writing. When a child + * with a key or index is not found, m_id is set to the id of the parent + * and the asked-for key or index are stored in this member until a write + * does happen. Then it is given as key or index for creating the child. + * When a key is used, the csubstr stores it (so the csubstr's string is + * non-null and the csubstr's size is different from NONE). When an index is + * used instead, the csubstr's string is set to null, and only the csubstr's + * size is set to a value different from NONE. Otherwise, when operator[] + * does find the child then this member is empty: the string is null and + * the size is NONE. */ + csubstr m_seed; + +public: + + /** @name node construction */ + /** @{ */ + + NodeRef() : m_tree(nullptr), m_id(NONE), m_seed() { _clear_seed(); } + NodeRef(Tree &t) : m_tree(&t), m_id(t .root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t) : m_tree(t ), m_id(t->root_id()), m_seed() { _clear_seed(); } + NodeRef(Tree *t, size_t id) : m_tree(t), m_id(id), m_seed() { _clear_seed(); } + NodeRef(Tree *t, size_t id, size_t seed_pos) : m_tree(t), m_id(id), m_seed() { m_seed.str = nullptr; m_seed.len = seed_pos; } + NodeRef(Tree *t, size_t id, csubstr seed_key) : m_tree(t), m_id(id), m_seed(seed_key) {} + NodeRef(std::nullptr_t) : m_tree(nullptr), m_id(NONE), m_seed() {} + + NodeRef(NodeRef const&) = default; + NodeRef(NodeRef &&) = default; + + NodeRef& operator= (NodeRef const&) = default; + NodeRef& operator= (NodeRef &&) = default; + + /** @} */ + +public: + + inline Tree * tree() { return m_tree; } + inline Tree const* tree() const { return m_tree; } + + inline size_t id() const { return m_id; } + + inline NodeData * get() { return m_tree->get(m_id); } + inline NodeData const* get() const { return m_tree->get(m_id); } + + inline bool operator== (NodeRef const& that) const { _C4RV(); RYML_ASSERT(that.valid() && !that.is_seed()); RYML_ASSERT(that.m_tree == m_tree); return m_id == that.m_id; } + inline bool operator!= (NodeRef const& that) const { return ! this->operator==(that); } + + inline bool operator== (std::nullptr_t) const { return m_tree == nullptr || m_id == NONE || is_seed(); } + inline bool operator!= (std::nullptr_t) const { return ! this->operator== (nullptr); } + + inline bool operator== (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) == val; } + inline bool operator!= (csubstr val) const { _C4RV(); RYML_ASSERT(has_val()); return m_tree->val(m_id) != val; } + + //inline operator bool () const { return m_tree == nullptr || m_id == NONE || is_seed(); } + +public: + + inline bool valid() const { return m_tree != nullptr && m_id != NONE; } + inline bool is_seed() const { return m_seed.str != nullptr || m_seed.len != NONE; } + + inline void _clear_seed() { /*do this manually or an assert is triggered*/ m_seed.str = nullptr; m_seed.len = NONE; } + +public: + + /** @name node property getters */ + /** @{ */ + + inline NodeType type() const { _C4RV(); return m_tree->type(m_id); } + inline const char* type_str() const { _C4RV(); RYML_ASSERT(valid() && ! is_seed()); return m_tree->type_str(m_id); } + + inline csubstr key() const { _C4RV(); return m_tree->key(m_id); } + inline csubstr key_tag() const { _C4RV(); return m_tree->key_tag(m_id); } + inline csubstr key_ref() const { _C4RV(); return m_tree->key_ref(m_id); } + inline csubstr key_anchor() const { _C4RV(); return m_tree->key_anchor(m_id); } + inline NodeScalar keysc() const { _C4RV(); return m_tree->keysc(m_id); } + + inline csubstr val() const { _C4RV(); return m_tree->val(m_id); } + inline csubstr val_tag() const { _C4RV(); return m_tree->val_tag(m_id); } + inline csubstr val_ref() const { _C4RV(); return m_tree->val_ref(m_id); } + inline csubstr val_anchor() const { _C4RV(); return m_tree->val_anchor(m_id); } + inline NodeScalar valsc() const { _C4RV(); return m_tree->valsc(m_id); } + + inline bool key_is_null() const { _C4RV(); return m_tree->key_is_null(m_id); } + inline bool val_is_null() const { _C4RV(); return m_tree->val_is_null(m_id); } + + /** decode the base64-encoded key deserialize and assign the + * decoded blob to the given buffer/ + * @return the size of base64-decoded blob */ + size_t deserialize_key(fmt::base64_wrapper v) const; + /** decode the base64-encoded key deserialize and assign the + * decoded blob to the given buffer/ + * @return the size of base64-decoded blob */ + size_t deserialize_val(fmt::base64_wrapper v) const; + + /** @} */ + +public: + + /** @name node property predicates */ + /** @{ */ + + C4_ALWAYS_INLINE bool is_stream() const { _C4RV(); return m_tree->is_stream(m_id); } + C4_ALWAYS_INLINE bool is_doc() const { _C4RV(); return m_tree->is_doc(m_id); } + C4_ALWAYS_INLINE bool is_container() const { _C4RV(); return m_tree->is_container(m_id); } + C4_ALWAYS_INLINE bool is_map() const { _C4RV(); return m_tree->is_map(m_id); } + C4_ALWAYS_INLINE bool is_seq() const { _C4RV(); return m_tree->is_seq(m_id); } + C4_ALWAYS_INLINE bool has_val() const { _C4RV(); return m_tree->has_val(m_id); } + C4_ALWAYS_INLINE bool has_key() const { _C4RV(); return m_tree->has_key(m_id); } + C4_ALWAYS_INLINE bool is_val() const { _C4RV(); return m_tree->is_val(m_id); } + C4_ALWAYS_INLINE bool is_keyval() const { _C4RV(); return m_tree->is_keyval(m_id); } + C4_ALWAYS_INLINE bool has_key_tag() const { _C4RV(); return m_tree->has_key_tag(m_id); } + C4_ALWAYS_INLINE bool has_val_tag() const { _C4RV(); return m_tree->has_val_tag(m_id); } + C4_ALWAYS_INLINE bool has_key_anchor() const { _C4RV(); return m_tree->has_key_anchor(m_id); } + C4_ALWAYS_INLINE bool is_key_anchor() const { _C4RV(); return m_tree->is_key_anchor(m_id); } + C4_ALWAYS_INLINE bool has_val_anchor() const { _C4RV(); return m_tree->has_val_anchor(m_id); } + C4_ALWAYS_INLINE bool is_val_anchor() const { _C4RV(); return m_tree->is_val_anchor(m_id); } + C4_ALWAYS_INLINE bool has_anchor() const { _C4RV(); return m_tree->has_anchor(m_id); } + C4_ALWAYS_INLINE bool is_anchor() const { _C4RV(); return m_tree->is_anchor(m_id); } + C4_ALWAYS_INLINE bool is_key_ref() const { _C4RV(); return m_tree->is_key_ref(m_id); } + C4_ALWAYS_INLINE bool is_val_ref() const { _C4RV(); return m_tree->is_val_ref(m_id); } + C4_ALWAYS_INLINE bool is_ref() const { _C4RV(); return m_tree->is_ref(m_id); } + C4_ALWAYS_INLINE bool is_anchor_or_ref() const { _C4RV(); return m_tree->is_anchor_or_ref(m_id); } + C4_ALWAYS_INLINE bool is_key_quoted() const { _C4RV(); return m_tree->is_key_quoted(m_id); } + C4_ALWAYS_INLINE bool is_val_quoted() const { _C4RV(); return m_tree->is_val_quoted(m_id); } + C4_ALWAYS_INLINE bool is_quoted() const { _C4RV(); return m_tree->is_quoted(m_id); } + + C4_ALWAYS_INLINE bool parent_is_seq() const { _C4RV(); return m_tree->parent_is_seq(m_id); } + C4_ALWAYS_INLINE bool parent_is_map() const { _C4RV(); return m_tree->parent_is_map(m_id); } + + /** true when name and value are empty, and has no children */ + C4_ALWAYS_INLINE bool empty() const { _C4RV(); return m_tree->empty(m_id); } + + /** @} */ + +public: + + /** @name hierarchy predicates */ + /** @{ */ + + inline bool is_root() const { _C4RV(); return m_tree->is_root(m_id); } + inline bool has_parent() const { _C4RV(); return m_tree->has_parent(m_id); } + + inline bool has_child(NodeRef const& ch) const { _C4RV(); return m_tree->has_child(m_id, ch.m_id); } + inline bool has_child(csubstr name) const { _C4RV(); return m_tree->has_child(m_id, name); } + inline bool has_children() const { _C4RV(); return m_tree->has_children(m_id); } + + inline bool has_sibling(NodeRef const& n) const { _C4RV(); return m_tree->has_sibling(m_id, n.m_id); } + inline bool has_sibling(csubstr name) const { _C4RV(); return m_tree->has_sibling(m_id, name); } + /** counts with this */ + inline bool has_siblings() const { _C4RV(); return m_tree->has_siblings(m_id); } + /** does not count with this */ + inline bool has_other_siblings() const { _C4RV(); return m_tree->has_other_siblings(m_id); } + + /** @} */ + +public: + + /** @name hierarchy getters */ + /** @{ */ + + NodeRef parent() { _C4RV(); return {m_tree, m_tree->parent(m_id)}; } + NodeRef const parent() const { _C4RV(); return {m_tree, m_tree->parent(m_id)}; } + + NodeRef prev_sibling() { _C4RV(); return {m_tree, m_tree->prev_sibling(m_id)}; } + NodeRef const prev_sibling() const { _C4RV(); return {m_tree, m_tree->prev_sibling(m_id)}; } + + NodeRef next_sibling() { _C4RV(); return {m_tree, m_tree->next_sibling(m_id)}; } + NodeRef const next_sibling() const { _C4RV(); return {m_tree, m_tree->next_sibling(m_id)}; } + + /** O(#num_children) */ + size_t num_children() const { _C4RV(); return m_tree->num_children(m_id); } + size_t child_pos(NodeRef const& n) const { _C4RV(); return m_tree->child_pos(m_id, n.m_id); } + NodeRef first_child() { _C4RV(); return {m_tree, m_tree->first_child(m_id)}; } + NodeRef const first_child() const { _C4RV(); return {m_tree, m_tree->first_child(m_id)}; } + NodeRef last_child () { _C4RV(); return {m_tree, m_tree->last_child (m_id)}; } + NodeRef const last_child () const { _C4RV(); return {m_tree, m_tree->last_child (m_id)}; } + NodeRef child(size_t pos) { _C4RV(); return {m_tree, m_tree->child(m_id, pos)}; } + NodeRef const child(size_t pos) const { _C4RV(); return {m_tree, m_tree->child(m_id, pos)}; } + NodeRef find_child(csubstr name) { _C4RV(); return {m_tree, m_tree->find_child(m_id, name)}; } + NodeRef const find_child(csubstr name) const { _C4RV(); return {m_tree, m_tree->find_child(m_id, name)}; } + + /** O(#num_siblings) */ + size_t num_siblings() const { _C4RV(); return m_tree->num_siblings(m_id); } + size_t num_other_siblings() const { _C4RV(); return m_tree->num_other_siblings(m_id); } + size_t sibling_pos(NodeRef const& n) const { _C4RV(); return m_tree->child_pos(m_tree->parent(m_id), n.m_id); } + NodeRef first_sibling() { _C4RV(); return {m_tree, m_tree->first_sibling(m_id)}; } + NodeRef const first_sibling() const { _C4RV(); return {m_tree, m_tree->first_sibling(m_id)}; } + NodeRef last_sibling () { _C4RV(); return {m_tree, m_tree->last_sibling(m_id)}; } + NodeRef const last_sibling () const { _C4RV(); return {m_tree, m_tree->last_sibling(m_id)}; } + NodeRef sibling(size_t pos) { _C4RV(); return {m_tree, m_tree->sibling(m_id, pos)}; } + NodeRef const sibling(size_t pos) const { _C4RV(); return {m_tree, m_tree->sibling(m_id, pos)}; } + NodeRef find_sibling(csubstr name) { _C4RV(); return {m_tree, m_tree->find_sibling(m_id, name)}; } + NodeRef const find_sibling(csubstr name) const { _C4RV(); return {m_tree, m_tree->find_sibling(m_id, name)}; } + + NodeRef doc(size_t num) { _C4RV(); return {m_tree, m_tree->doc(num)}; } + NodeRef const doc(size_t num) const { _C4RV(); return {m_tree, m_tree->doc(num)}; } + + /** @} */ + +public: + + /** @name node modifiers */ + /** @{ */ + + void change_type(NodeType t) { _C4RV(); m_tree->change_type(m_id, t); } + void set_type(NodeType t) { _C4RV(); m_tree->_set_flags(m_id, t); } + void set_key(csubstr key) { _C4RV(); m_tree->_set_key(m_id, key); } + void set_val(csubstr val) { _C4RV(); m_tree->_set_val(m_id, val); } + void set_key_tag(csubstr key_tag) { _C4RV(); m_tree->set_key_tag(m_id, key_tag); } + void set_val_tag(csubstr val_tag) { _C4RV(); m_tree->set_val_tag(m_id, val_tag); } + void set_key_anchor(csubstr key_anchor) { _C4RV(); m_tree->set_key_anchor(m_id, key_anchor); } + void set_val_anchor(csubstr val_anchor) { _C4RV(); m_tree->set_val_anchor(m_id, val_anchor); } + void set_key_ref(csubstr key_ref) { _C4RV(); m_tree->set_key_ref(m_id, key_ref); } + void set_val_ref(csubstr val_ref) { _C4RV(); m_tree->set_val_ref(m_id, val_ref); } + + template + size_t set_key_serialized(T const& C4_RESTRICT k) + { + _C4RV(); + csubstr s = m_tree->to_arena(k); + m_tree->_set_key(m_id, s); + return s.len; + } + template + size_t set_val_serialized(T const& C4_RESTRICT v) + { + _C4RV(); + csubstr s = m_tree->to_arena(v); + m_tree->_set_val(m_id, s); + return s.len; + } + + /** encode a blob as base64, then assign the result to the node's key + * @return the size of base64-encoded blob */ + size_t set_key_serialized(fmt::const_base64_wrapper w); + /** encode a blob as base64, then assign the result to the node's val + * @return the size of base64-encoded blob */ + size_t set_val_serialized(fmt::const_base64_wrapper w); + +public: + + inline void clear() + { + if(is_seed()) + return; + m_tree->remove_children(m_id); + m_tree->_clear(m_id); + } + + inline void clear_key() + { + if(is_seed()) + return; + m_tree->_clear_key(m_id); + } + + inline void clear_val() + { + if(is_seed()) + return; + m_tree->_clear_val(m_id); + } + + inline void clear_children() + { + if(is_seed()) + return; + m_tree->remove_children(m_id); + } + + /** @} */ + +public: + + /** hierarchy getters */ + /** @{ */ + + /** O(num_children) */ + NodeRef operator[] (csubstr k) + { + RYML_ASSERT( ! is_seed()); + RYML_ASSERT(valid()); + size_t ch = m_tree->find_child(m_id, k); + NodeRef r = ch != NONE ? NodeRef(m_tree, ch) : NodeRef(m_tree, m_id, k); + return r; + } + + /** O(num_children) */ + NodeRef const operator[] (csubstr k) const + { + RYML_ASSERT( ! is_seed()); + RYML_ASSERT(valid()); + size_t ch = m_tree->find_child(m_id, k); + RYML_ASSERT(ch != NONE); + NodeRef const r(m_tree, ch); + return r; + } + + /** O(num_children) */ + NodeRef operator[] (size_t pos) + { + RYML_ASSERT( ! is_seed()); + RYML_ASSERT(valid()); + size_t ch = m_tree->child(m_id, pos); + NodeRef r = ch != NONE ? NodeRef(m_tree, ch) : NodeRef(m_tree, m_id, pos); + return r; + } + + /** O(num_children) */ + NodeRef const operator[] (size_t pos) const + { + RYML_ASSERT( ! is_seed()); + RYML_ASSERT(valid()); + size_t ch = m_tree->child(m_id, pos); + RYML_ASSERT(ch != NONE); + NodeRef const r(m_tree, ch); + return r; + } + + /** @} */ + +public: + + /** node modification */ + /** @{ */ + + void create() { _apply_seed(); } + + inline void operator= (NodeType_e t) + { + _apply_seed(); + m_tree->_add_flags(m_id, t); + } + + inline void operator|= (NodeType_e t) + { + _apply_seed(); + m_tree->_add_flags(m_id, t); + } + + inline void operator= (NodeInit const& v) + { + _apply_seed(); + _apply(v); + } + + inline void operator= (NodeScalar const& v) + { + _apply_seed(); + _apply(v); + } + + inline void operator= (csubstr v) + { + _apply_seed(); + _apply(v); + } + + template + inline void operator= (const char (&v)[N]) + { + _apply_seed(); + csubstr sv; + sv.assign(v); + _apply(sv); + } + + /** @} */ + +public: + + /** serialize a variable to the arena */ + template + inline csubstr to_arena(T const& C4_RESTRICT s) const + { + _C4RV(); + return m_tree->to_arena(s); + } + + /** serialize a variable, then assign the result to the node's val */ + inline NodeRef& operator<< (csubstr s) + { + // this overload is needed to prevent ambiguity (there's also + // operator<< for writing a substr to a stream) + _apply_seed(); + write(this, s); + RYML_ASSERT(val() == s); + return *this; + } + + template + inline NodeRef& operator<< (T const& C4_RESTRICT v) + { + _apply_seed(); + write(this, v); + return *this; + } + + template + inline NodeRef const& operator>> (T &v) const + { + RYML_ASSERT( ! is_seed()); + RYML_ASSERT(valid()); + RYML_ASSERT(get() != nullptr); + if( ! read(*this, &v)) + { + c4::yml::error("could not deserialize value"); + } + return *this; + } + +public: + + /** serialize a variable, then assign the result to the node's key */ + template + inline NodeRef& operator<< (Key const& C4_RESTRICT v) + { + _apply_seed(); + set_key_serialized(v.k); + return *this; + } + + /** serialize a variable, then assign the result to the node's key */ + template + inline NodeRef& operator<< (Key const& C4_RESTRICT v) + { + _apply_seed(); + set_key_serialized(v.k); + return *this; + } + + /** deserialize the node's key to the given variable */ + template + inline NodeRef const& operator>> (Key v) const + { + RYML_ASSERT( ! is_seed()); + RYML_ASSERT(valid()); + RYML_ASSERT(get() != nullptr); + from_chars(key(), &v.k); + return *this; + } + +public: + + NodeRef& operator<< (Key w) + { + set_key_serialized(w.wrapper); + return *this; + } + + NodeRef& operator<< (fmt::const_base64_wrapper w) + { + set_val_serialized(w); + return *this; + } + + NodeRef const& operator>> (Key w) const + { + deserialize_key(w.wrapper); + return *this; + } + + NodeRef const& operator>> (fmt::base64_wrapper w) const + { + deserialize_val(w); + return *this; + } + +public: + + template + void get_if(csubstr name, T *var) const + { + auto ch = find_child(name); + if(ch.valid()) + { + ch >> *var; + } + } + + template + void get_if(csubstr name, T *var, T fallback) const + { + auto ch = find_child(name); + if(ch.valid()) + { + ch >> *var; + } + else + { + *var = fallback; + } + } + +private: + + void _apply_seed() + { + if(m_seed.str) // we have a seed key: use it to create the new child + { + //RYML_ASSERT(i.key.scalar.empty() || m_key == i.key.scalar || m_key.empty()); + m_id = m_tree->append_child(m_id); + m_tree->_set_key(m_id, m_seed); + m_seed.str = nullptr; + m_seed.len = NONE; + } + else if(m_seed.len != NONE) // we have a seed index: create a child at that position + { + RYML_ASSERT(m_tree->num_children(m_id) == m_seed.len); + m_id = m_tree->append_child(m_id); + m_seed.str = nullptr; + m_seed.len = NONE; + } + else + { + RYML_ASSERT(valid()); + } + } + + inline void _apply(csubstr v) + { + m_tree->_set_val(m_id, v); + } + + inline void _apply(NodeScalar const& v) + { + m_tree->_set_val(m_id, v); + } + + inline void _apply(NodeInit const& i) + { + m_tree->_set(m_id, i); + } + +public: + + inline NodeRef insert_child(NodeRef after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); + return r; + } + + inline NodeRef insert_child(NodeInit const& i, NodeRef after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_child(m_id, after.m_id)); + r._apply(i); + return r; + } + + inline NodeRef prepend_child() + { + _C4RV(); + NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); + return r; + } + + inline NodeRef prepend_child(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->insert_child(m_id, NONE)); + r._apply(i); + return r; + } + + inline NodeRef append_child() + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_child(m_id)); + return r; + } + + inline NodeRef append_child(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_child(m_id)); + r._apply(i); + return r; + } + +public: + + inline NodeRef insert_sibling(NodeRef const after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); + return r; + } + + inline NodeRef insert_sibling(NodeInit const& i, NodeRef const after) + { + _C4RV(); + RYML_ASSERT(after.m_tree == m_tree); + NodeRef r(m_tree, m_tree->insert_sibling(m_id, after.m_id)); + r._apply(i); + return r; + } + + inline NodeRef prepend_sibling() + { + _C4RV(); + NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); + return r; + } + + inline NodeRef prepend_sibling(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->prepend_sibling(m_id)); + r._apply(i); + return r; + } + + inline NodeRef append_sibling() + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_sibling(m_id)); + return r; + } + + inline NodeRef append_sibling(NodeInit const& i) + { + _C4RV(); + NodeRef r(m_tree, m_tree->append_sibling(m_id)); + r._apply(i); + return r; + } + +public: + + inline void remove_child(NodeRef & child) + { + _C4RV(); + RYML_ASSERT(has_child(child)); + RYML_ASSERT(child.parent().id() == id()); + m_tree->remove(child.id()); + child.clear(); + } + + //! remove the nth child of this node + inline void remove_child(size_t pos) + { + _C4RV(); + RYML_ASSERT(pos >= 0 && pos < num_children()); + size_t child = m_tree->child(m_id, pos); + RYML_ASSERT(child != NONE); + m_tree->remove(child); + } + + //! remove a child by name + inline void remove_child(csubstr key) + { + _C4RV(); + size_t child = m_tree->find_child(m_id, key); + RYML_ASSERT(child != NONE); + m_tree->remove(child); + } + +public: + + /** change the node's position within its parent */ + inline void move(NodeRef const after) + { + _C4RV(); + m_tree->move(m_id, after.m_id); + } + + /** move the node to a different parent, which may belong to a different + * tree. When this is the case, then this node's tree pointer is reset to + * the tree of the parent node. */ + inline void move(NodeRef const parent, NodeRef const after) + { + _C4RV(); + RYML_ASSERT(parent.m_tree == after.m_tree); + if(parent.m_tree == m_tree) + { + m_tree->move(m_id, parent.m_id, after.m_id); + } + else + { + parent.m_tree->move(m_tree, m_id, parent.m_id, after.m_id); + m_tree = parent.m_tree; + } + } + + inline NodeRef duplicate(NodeRef const parent, NodeRef const after) const + { + _C4RV(); + RYML_ASSERT(parent.m_tree == after.m_tree); + if(parent.m_tree == m_tree) + { + size_t dup = m_tree->duplicate(m_id, parent.m_id, after.m_id); + NodeRef r(m_tree, dup); + return r; + } + else + { + size_t dup = parent.m_tree->duplicate(m_tree, m_id, parent.m_id, after.m_id); + NodeRef r(parent.m_tree, dup); + return r; + } + } + + inline void duplicate_children(NodeRef const parent, NodeRef const after) const + { + _C4RV(); + RYML_ASSERT(parent.m_tree == after.m_tree); + if(parent.m_tree == m_tree) + { + m_tree->duplicate_children(m_id, parent.m_id, after.m_id); + } + else + { + parent.m_tree->duplicate_children(m_tree, m_id, parent.m_id, after.m_id); + } + } + +private: + + template + struct child_iterator + { + Tree * m_tree; + size_t m_child_id; + + using value_type = NodeRef; + + child_iterator(Tree * t, size_t id) : m_tree(t), m_child_id(id) {} + + child_iterator& operator++ () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->next_sibling(m_child_id); return *this; } + child_iterator& operator-- () { RYML_ASSERT(m_child_id != NONE); m_child_id = m_tree->prev_sibling(m_child_id); return *this; } + + Nd operator* () const { return Nd(m_tree, m_child_id); } + Nd operator-> () const { return Nd(m_tree, m_child_id); } + + bool operator!= (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id != that.m_child_id; } + bool operator== (child_iterator that) const { RYML_ASSERT(m_tree == that.m_tree); return m_child_id == that.m_child_id; } + }; + +public: + + using iterator = child_iterator< NodeRef>; + using const_iterator = child_iterator; + + inline iterator begin() { return iterator(m_tree, m_tree->first_child(m_id)); } + inline iterator end () { return iterator(m_tree, NONE); } + + inline const_iterator begin() const { return const_iterator(m_tree, m_tree->first_child(m_id)); } + inline const_iterator end () const { return const_iterator(m_tree, NONE); } + +private: + + template + struct children_view_ + { + using n_iterator = child_iterator; + + n_iterator b, e; + + inline children_view_(n_iterator const& b_, n_iterator const& e_) : b(b_), e(e_) {} + + inline n_iterator begin() const { return b; } + inline n_iterator end () const { return e; } + }; + +public: + + using children_view = children_view_< NodeRef>; + using const_children_view = children_view_; + + children_view children() { return children_view(begin(), end()); } + const_children_view children() const { return const_children_view(begin(), end()); } + + #if defined(__clang__) + # pragma clang diagnostic push + # pragma clang diagnostic ignored "-Wnull-dereference" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # if __GNUC__ >= 6 + # pragma GCC diagnostic ignored "-Wnull-dereference" + # endif + #endif + + children_view siblings() { if(is_root()) { return children_view(end(), end()); } else { size_t p = get()->m_parent; return children_view(iterator(m_tree, m_tree->get(p)->m_first_child), iterator(m_tree, NONE)); } } + const_children_view siblings() const { if(is_root()) { return const_children_view(end(), end()); } else { size_t p = get()->m_parent; return const_children_view(const_iterator(m_tree, m_tree->get(p)->m_first_child), const_iterator(m_tree, NONE)); } } + + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + +public: + + /** visit every child node calling fn(node) */ + template bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true); + /** visit every child node calling fn(node) */ + template bool visit(Visitor fn, size_t indentation_level=0, bool skip_root=true) const; + + /** visit every child node calling fn(node, level) */ + template bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true); + /** visit every child node calling fn(node, level) */ + template bool visit_stacked(Visitor fn, size_t indentation_level=0, bool skip_root=true) const; + +#undef _C4RV +}; + +//----------------------------------------------------------------------------- +template +inline void write(NodeRef *n, T const& v) +{ + n->set_val_serialized(v); +} + +template +typename std::enable_if< ! std::is_floating_point::value, bool>::type +inline read(NodeRef const& n, T *v) +{ + return from_chars(n.val(), v); +} + +template +typename std::enable_if< std::is_floating_point::value, bool>::type +inline read(NodeRef const& n, T *v) +{ + return from_chars_float(n.val(), v); +} + + +//----------------------------------------------------------------------------- +template +bool NodeRef::visit(Visitor fn, size_t indentation_level, bool skip_root) +{ + return const_cast(this)->visit(fn, indentation_level, skip_root); +} + +template +bool NodeRef::visit(Visitor fn, size_t indentation_level, bool skip_root) const +{ + size_t increment = 0; + if( ! (is_root() && skip_root)) + { + if(fn(this, indentation_level)) + { + return true; + } + ++increment; + } + if(has_children()) + { + for(auto ch : children()) + { + if(ch.visit(fn, indentation_level + increment)) // no need to forward skip_root as it won't be root + { + return true; + } + } + } + return false; +} + + +template +bool NodeRef::visit_stacked(Visitor fn, size_t indentation_level, bool skip_root) +{ + return const_cast< NodeRef const* >(this)->visit_stacked(fn, indentation_level, skip_root); +} + +template +bool NodeRef::visit_stacked(Visitor fn, size_t indentation_level, bool skip_root) const +{ + size_t increment = 0; + if( ! (is_root() && skip_root)) + { + if(fn(this, indentation_level)) + { + return true; + } + ++increment; + } + if(has_children()) + { + fn.push(this, indentation_level); + for(auto ch : children()) + { + if(ch.visit(fn, indentation_level + increment)) // no need to forward skip_root as it won't be root + { + fn.pop(this, indentation_level); + return true; + } + } + fn.pop(this, indentation_level); + } + return false; +} + +} // namespace yml +} // namespace c4 + + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#ifdef __GNUC__ +# pragma GCC diagnostic pop +#endif + +#endif /* _C4_YML_NODE_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/writer.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/writer.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_WRITER_HPP_ +#define _C4_YML_WRITER_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "./common.hpp" +#endif + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp +//#include +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + +//included above: +//#include // fwrite(), fputc() +//included above: +//#include // memcpy() + + +namespace c4 { +namespace yml { + + +/** Repeat-Character: a character to be written a number of times. */ +struct RepC +{ + char c; + size_t num_times; +}; +inline RepC indent_to(size_t num_levels) +{ + return {' ', size_t(2) * num_levels}; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A writer that outputs to a file. Defaults to stdout. */ +struct WriterFile +{ + FILE * m_file; + size_t m_pos; + + WriterFile(FILE *f = nullptr) : m_file(f ? f : stdout), m_pos(0) {} + + inline substr _get(bool /*error_on_excess*/) + { + substr sp; + sp.str = nullptr; + sp.len = m_pos; + return sp; + } + + template + inline void _do_write(const char (&a)[N]) + { + fwrite(a, sizeof(char), N - 1, m_file); + m_pos += N - 1; + } + + inline void _do_write(csubstr sp) + { + #if defined(__clang__) + # pragma clang diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #endif + if(sp.empty()) return; + fwrite(sp.str, sizeof(csubstr::char_type), sp.len, m_file); + m_pos += sp.len; + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + } + + inline void _do_write(const char c) + { + fputc(c, m_file); + ++m_pos; + } + + inline void _do_write(RepC const rc) + { + for(size_t i = 0; i < rc.num_times; ++i) + { + fputc(rc.c, m_file); + } + m_pos += rc.num_times; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** A writer that outputs to an STL-like ostream. */ +template +struct WriterOStream +{ + OStream& m_stream; + size_t m_pos; + + WriterOStream(OStream &s) : m_stream(s), m_pos(0) {} + + inline substr _get(bool /*error_on_excess*/) + { + substr sp; + sp.str = nullptr; + sp.len = m_pos; + return sp; + } + + template + inline void _do_write(const char (&a)[N]) + { + m_stream.write(a, N - 1); + m_pos += N - 1; + } + + inline void _do_write(csubstr sp) + { + #if defined(__clang__) + # pragma clang diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #elif defined(__GNUC__) + # pragma GCC diagnostic push + # pragma GCC diagnostic ignored "-Wsign-conversion" + #endif + if(sp.empty()) return; + m_stream.write(sp.str, sp.len); + m_pos += sp.len; + #if defined(__clang__) + # pragma clang diagnostic pop + #elif defined(__GNUC__) + # pragma GCC diagnostic pop + #endif + } + + inline void _do_write(const char c) + { + m_stream.put(c); + ++m_pos; + } + + inline void _do_write(RepC const rc) + { + for(size_t i = 0; i < rc.num_times; ++i) + { + m_stream.put(rc.c); + } + m_pos += rc.num_times; + } +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +/** a writer to a substr */ +struct WriterBuf +{ + substr m_buf; + size_t m_pos; + + WriterBuf(substr sp) : m_buf(sp), m_pos(0) {} + + inline substr _get(bool error_on_excess) + { + if(m_pos <= m_buf.len) + { + return m_buf.first(m_pos); + } + if(error_on_excess) + { + c4::yml::error("not enough space in the given buffer"); + } + substr sp; + sp.str = nullptr; + sp.len = m_pos; + return sp; + } + + template + inline void _do_write(const char (&a)[N]) + { + RYML_ASSERT( ! m_buf.overlaps(a)); + if(m_pos + N-1 <= m_buf.len) + { + memcpy(&(m_buf[m_pos]), a, N-1); + } + m_pos += N-1; + } + + inline void _do_write(csubstr sp) + { + if(sp.empty()) return; + RYML_ASSERT( ! sp.overlaps(m_buf)); + if(m_pos + sp.len <= m_buf.len) + { + memcpy(&(m_buf[m_pos]), sp.str, sp.len); + } + m_pos += sp.len; + } + + inline void _do_write(const char c) + { + if(m_pos + 1 <= m_buf.len) + { + m_buf[m_pos] = c; + } + ++m_pos; + } + + inline void _do_write(RepC const rc) + { + if(m_pos + rc.num_times <= m_buf.len) + { + for(size_t i = 0; i < rc.num_times; ++i) + { + m_buf[m_pos + i] = rc.c; + } + } + m_pos += rc.num_times; + } +}; + + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_WRITER_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/writer.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/detail/parser_dbg.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_DETAIL_PARSER_DBG_HPP_ +#define _C4_YML_DETAIL_PARSER_DBG_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "../common.hpp" +#endif +//included above: +//#include + +//----------------------------------------------------------------------------- +// some debugging scaffolds + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4068/*unknown pragma*/) +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunknown-pragmas" +//#pragma GCC diagnostic ignored "-Wpragma-system-header-outside-header" +#pragma GCC system_header + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Werror" +#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments" + +// some debugging scaffolds +#ifdef RYML_DBG +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp +//#include +#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_) +#error "amalgamate: file c4/dump.hpp must have been included at this point" +#endif /* C4_DUMP_HPP_ */ + +namespace c4 { +inline void _dbg_dumper(csubstr s) { fwrite(s.str, 1, s.len, stdout); }; +template +void _dbg_printf(c4::csubstr fmt, Args&& ...args) +{ + static char writebuf[256]; + auto results = c4::format_dump_resume<&_dbg_dumper>(writebuf, fmt, std::forward(args)...); + // resume writing if the results failed to fit the buffer + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. + { + results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward(args)...); + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) + { + results = format_dump_resume<&_dbg_dumper>(results, writebuf, fmt, std::forward(args)...); + } + } +} +} // namespace c4 + +# define _c4dbgt(fmt, ...) this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, ## __VA_ARGS__) +# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ ) +# define _c4dbgq(msg) _dbg_printf(msg "\n") +# define _c4err(fmt, ...) \ + do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ + this->_err("ERROR:\n" "{}:{}: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); } while(0) +#else +# define _c4dbgt(fmt, ...) +# define _c4dbgpf(fmt, ...) +# define _c4dbgp(msg) +# define _c4dbgq(msg) +# define _c4err(fmt, ...) \ + do { if(c4::is_debugger_attached()) { C4_DEBUG_BREAK(); } \ + this->_err("ERROR: " fmt, ## __VA_ARGS__); } while(0) +#endif + +#define _c4prsp(sp) sp +#define _c4presc(s) __c4presc(s.str, s.len) +inline c4::csubstr _c4prc(const char &C4_RESTRICT c) +{ + switch(c) + { + case '\n': return c4::csubstr("\\n"); + case '\t': return c4::csubstr("\\t"); + case '\0': return c4::csubstr("\\0"); + case '\r': return c4::csubstr("\\r"); + case '\f': return c4::csubstr("\\f"); + case '\b': return c4::csubstr("\\b"); + case '\v': return c4::csubstr("\\v"); + case '\a': return c4::csubstr("\\a"); + default: return c4::csubstr(&c, 1); + } +} +inline void __c4presc(const char *s, size_t len) +{ + size_t prev = 0; + for(size_t i = 0; i < len; ++i) + { + switch(s[i]) + { + case '\n' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('n'); putchar('\n'); prev = i+1; break; + case '\t' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('t'); prev = i+1; break; + case '\0' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('0'); prev = i+1; break; + case '\r' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('r'); prev = i+1; break; + case '\f' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('f'); prev = i+1; break; + case '\b' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('b'); prev = i+1; break; + case '\v' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('v'); prev = i+1; break; + case '\a' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('a'); prev = i+1; break; + case '\x1b': fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('e'); prev = i+1; break; + case -0x3e/*0xc2u*/: + if(i+1 < len) + { + if(s[i+1] == -0x60/*0xa0u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('_'); prev = i+2; ++i; + } + else if(s[i+1] == -0x7b/*0x85u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('N'); prev = i+2; ++i; + } + break; + } + case -0x1e/*0xe2u*/: + if(i+2 < len && s[i+1] == -0x80/*0x80u*/) + { + if(s[i+2] == -0x58/*0xa8u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('L'); prev = i+3; i += 2; + } + else if(s[i+2] == -0x57/*0xa9u*/) + { + fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('P'); prev = i+3; i += 2; + } + break; + } + } + } + fwrite(s + prev, 1, len - prev, stdout); +} + +#pragma clang diagnostic pop +#pragma GCC diagnostic pop + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + + +#endif /* _C4_YML_DETAIL_PARSER_DBG_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp) + +#define C4_YML_EMIT_DEF_HPP_ + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/emit.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_EMIT_HPP_ +#define _C4_YML_EMIT_HPP_ + +#ifndef _C4_YML_WRITER_HPP_ +#include "./writer.hpp" +#endif + +#ifndef _C4_YML_TREE_HPP_ +#include "./tree.hpp" +#endif + +#ifndef _C4_YML_NODE_HPP_ +#include "./node.hpp" +#endif + +namespace c4 { +namespace yml { + +template class Emitter; + +template +using EmitterOStream = Emitter>; +using EmitterFile = Emitter; +using EmitterBuf = Emitter; + +typedef enum { + EMIT_YAML = 0, + EMIT_JSON = 1 +} EmitType_e; + + +/** mark a tree or node to be emitted as json */ +struct as_json +{ + Tree const* tree; + size_t node; + as_json(Tree const& t) : tree(&t), node(t.empty() ? NONE : t.root_id()) {} + as_json(Tree const& t, size_t id) : tree(&t), node(id) {} + as_json(NodeRef const& n) : tree(n.tree()), node(n.id()) {} +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +class Emitter : public Writer +{ +public: + + using Writer::Writer; + + /** emit! + * + * When writing to a buffer, returns a substr of the emitted YAML. + * If the given buffer has insufficient space, the returned span will + * be null and its size will be the needed space. No writes are done + * after the end of the buffer. + * + * When writing to a file, the returned substr will be null, but its + * length will be set to the number of bytes written. */ + substr emit(EmitType_e type, Tree const& t, size_t id, bool error_on_excess); + /** emit starting at the root node */ + substr emit(EmitType_e type, Tree const& t, bool error_on_excess=true); + /** emit the given node */ + substr emit(EmitType_e type, NodeRef const& n, bool error_on_excess=true); + +private: + + Tree const* C4_RESTRICT m_tree; + + void _emit_yaml(size_t id); + void _do_visit_flow_sl(size_t id, size_t ilevel=0); + void _do_visit_flow_ml(size_t id, size_t ilevel=0, size_t do_indent=1); + void _do_visit_block(size_t id, size_t ilevel=0, size_t do_indent=1); + void _do_visit_block_container(size_t id, size_t next_level, size_t do_indent); + void _do_visit_json(size_t id); + +private: + + void _write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t level); + void _write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags); + + void _write_doc(size_t id); + void _write_scalar(csubstr s, bool was_quoted); + void _write_scalar_json(csubstr s, bool as_key, bool was_quoted); + void _write_scalar_literal(csubstr s, size_t level, bool as_key, bool explicit_indentation=false); + void _write_scalar_folded(csubstr s, size_t level, bool as_key); + void _write_scalar_squo(csubstr s, size_t level); + void _write_scalar_dquo(csubstr s, size_t level); + void _write_scalar_plain(csubstr s, size_t level); + + void _write_tag(csubstr tag) + { + if(!tag.begins_with('!')) + this->Writer::_do_write('!'); + this->Writer::_do_write(tag); + } + + enum : type_bits { + _keysc = (KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | ~(VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), + _valsc = ~(KEY|KEYREF|KEYANCH|KEYQUO|_WIP_KEY_STYLE) | (VAL|VALREF|VALANCH|VALQUO|_WIP_VAL_STYLE), + _keysc_json = (KEY) | ~(VAL), + _valsc_json = ~(KEY) | (VAL), + }; + + C4_ALWAYS_INLINE void _writek(size_t id, size_t level) { _write(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~_valsc, level); } + C4_ALWAYS_INLINE void _writev(size_t id, size_t level) { _write(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~_keysc, level); } + + C4_ALWAYS_INLINE void _writek_json(size_t id) { _write_json(m_tree->keysc(id), m_tree->_p(id)->m_type.type & ~(VAL)); } + C4_ALWAYS_INLINE void _writev_json(size_t id) { _write_json(m_tree->valsc(id), m_tree->_p(id)->m_type.type & ~(KEY)); } + +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit(Tree const& t, size_t id, FILE *f) +{ + EmitterFile em(f); + return em.emit(EMIT_YAML, t, id, /*error_on_excess*/true).len; +} +/** emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. */ +inline size_t emit_json(Tree const& t, size_t id, FILE *f) +{ + EmitterFile em(f); + return em.emit(EMIT_JSON, t, id, /*error_on_excess*/true).len; +} + + +/** emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit(Tree const& t, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit(EMIT_YAML, t, /*error_on_excess*/true).len; +} + +/** emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_json(Tree const& t, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit(EMIT_JSON, t, /*error_on_excess*/true).len; +} + + +/** emit YAML to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit(NodeRef const& r, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit(EMIT_YAML, r, /*error_on_excess*/true).len; +} + +/** emit JSON to the given file. A null file defaults to stdout. + * Return the number of bytes written. + * @overload */ +inline size_t emit_json(NodeRef const& r, FILE *f=nullptr) +{ + EmitterFile em(f); + return em.emit(EMIT_JSON, r, /*error_on_excess*/true).len; +} + + +//----------------------------------------------------------------------------- + +/** emit YAML to an STL-like ostream */ +template +inline OStream& operator<< (OStream& s, Tree const& t) +{ + EmitterOStream em(s); + em.emit(EMIT_YAML, t); + return s; +} + +/** emit YAML to an STL-like ostream + * @overload */ +template +inline OStream& operator<< (OStream& s, NodeRef const& n) +{ + EmitterOStream em(s); + em.emit(EMIT_YAML, n); + return s; +} + +/** emit json to an STL-like stream */ +template +inline OStream& operator<< (OStream& s, as_json const& j) +{ + EmitterOStream em(s); + em.emit(EMIT_JSON, *j.tree, j.node, true); + return s; +} + + +//----------------------------------------------------------------------------- + + +/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit(EMIT_YAML, t, id, error_on_excess); +} + +/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_json(Tree const& t, size_t id, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit(EMIT_JSON, t, id, error_on_excess); +} + + +/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit(Tree const& t, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit(EMIT_YAML, t, error_on_excess); +} + +/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload */ +inline substr emit_json(Tree const& t, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit(EMIT_JSON, t, error_on_excess); +} + + +/** emit YAML to the given buffer. Return a substr trimmed to the emitted YAML. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload + */ +inline substr emit(NodeRef const& r, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit(EMIT_YAML, r, error_on_excess); +} + +/** emit JSON to the given buffer. Return a substr trimmed to the emitted JSON. + * @param error_on_excess Raise an error if the space in the buffer is insufficient. + * @overload + */ +inline substr emit_json(NodeRef const& r, substr buf, bool error_on_excess=true) +{ + EmitterBuf em(buf); + return em.emit(EMIT_JSON, r, error_on_excess); +} + + +//----------------------------------------------------------------------------- + +/** emit+resize: emit YAML to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted YAML. */ +template +substr emitrs(Tree const& t, size_t id, CharOwningContainer * cont) +{ + substr buf = to_substr(*cont); + substr ret = emit(t, id, buf, /*error_on_excess*/false); + if(ret.str == nullptr && ret.len > 0) + { + cont->resize(ret.len); + buf = to_substr(*cont); + ret = emit(t, id, buf, /*error_on_excess*/true); + } + return ret; +} + +/** emit+resize: emit JSON to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted JSON. */ +template +substr emitrs_json(Tree const& t, size_t id, CharOwningContainer * cont) +{ + substr buf = to_substr(*cont); + substr ret = emit_json(t, id, buf, /*error_on_excess*/false); + if(ret.str == nullptr && ret.len > 0) + { + cont->resize(ret.len); + buf = to_substr(*cont); + ret = emit_json(t, id, buf, /*error_on_excess*/true); + } + return ret; +} + + +/** emit+resize: emit YAML to the given std::string/std::vector-like + * container, resizing it as needed to fit the emitted YAML. */ +template +CharOwningContainer emitrs(Tree const& t, size_t id) +{ + CharOwningContainer c; + emitrs(t, id, &c); + return c; +} + +/** emit+resize: emit JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template +CharOwningContainer emitrs_json(Tree const& t, size_t id) +{ + CharOwningContainer c; + emitrs_json(t, id, &c); + return c; +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template +substr emitrs(Tree const& t, CharOwningContainer * cont) +{ + if(t.empty()) + return {}; + return emitrs(t, t.root_id(), cont); +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template +substr emitrs_json(Tree const& t, CharOwningContainer * cont) +{ + if(t.empty()) + return {}; + return emitrs_json(t, t.root_id(), cont); +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template +CharOwningContainer emitrs(Tree const& t) +{ + CharOwningContainer c; + if(t.empty()) + return c; + emitrs(t, t.root_id(), &c); + return c; +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template +CharOwningContainer emitrs_json(Tree const& t) +{ + CharOwningContainer c; + if(t.empty()) + return c; + emitrs_json(t, t.root_id(), &c); + return c; +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template +substr emitrs(NodeRef const& n, CharOwningContainer * cont) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + return emitrs(*n.tree(), n.id(), cont); +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template +substr emitrs_json(NodeRef const& n, CharOwningContainer * cont) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + return emitrs_json(*n.tree(), n.id(), cont); +} + + +/** emit+resize: YAML to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted YAML. */ +template +CharOwningContainer emitrs(NodeRef const& n) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + CharOwningContainer c; + emitrs(*n.tree(), n.id(), &c); + return c; +} + +/** emit+resize: JSON to the given std::string/std::vector-like container, + * resizing it as needed to fit the emitted JSON. */ +template +CharOwningContainer emitrs_json(NodeRef const& n) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + CharOwningContainer c; + emitrs_json(*n.tree(), n.id(), &c); + return c; +} + +} // namespace yml +} // namespace c4 + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp +//#include "c4/yml/emit.def.hpp" +#if !defined(C4_YML_EMIT_DEF_HPP_) && !defined(_C4_YML_EMIT_DEF_HPP_) +#error "amalgamate: file c4/yml/emit.def.hpp must have been included at this point" +#endif /* C4_YML_EMIT_DEF_HPP_ */ + + +#endif /* _C4_YML_EMIT_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/emit.def.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_EMIT_DEF_HPP_ +#define _C4_YML_EMIT_DEF_HPP_ + +#ifndef _C4_YML_EMIT_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp +//#include "c4/yml/emit.hpp" +#if !defined(C4_YML_EMIT_HPP_) && !defined(_C4_YML_EMIT_HPP_) +#error "amalgamate: file c4/yml/emit.hpp must have been included at this point" +#endif /* C4_YML_EMIT_HPP_ */ + +#endif + +namespace c4 { +namespace yml { + +template +substr Emitter::emit(EmitType_e type, Tree const& t, size_t id, bool error_on_excess) +{ + if(t.empty()) + { + _RYML_CB_ASSERT(t.callbacks(), id == NONE); + return {}; + } + _RYML_CB_CHECK(t.callbacks(), id < t.size()); + m_tree = &t; + if(type == EMIT_YAML) + _emit_yaml(id); + else if(type == EMIT_JSON) + _do_visit_json(id); + else + _RYML_CB_ERR(m_tree->callbacks(), "unknown emit type"); + return this->Writer::_get(error_on_excess); +} + +template +substr Emitter::emit(EmitType_e type, Tree const& t, bool error_on_excess) +{ + if(t.empty()) + return {}; + return emit(type, t, t.root_id(), error_on_excess); +} + +template +substr Emitter::emit(EmitType_e type, NodeRef const& n, bool error_on_excess) +{ + _RYML_CB_CHECK(n.tree()->callbacks(), n.valid()); + return emit(type, *n.tree(), n.id(), error_on_excess); +} + + +//----------------------------------------------------------------------------- + +template +void Emitter::_emit_yaml(size_t id) +{ + // save branches in the visitor by doing the initial stream/doc + // logic here, sparing the need to check stream/val/keyval inside + // the visitor functions + auto dispatch = [this](size_t node){ + NodeType ty = m_tree->type(node); + if(ty.marked_flow_sl()) + _do_visit_flow_sl(node, 0); + else if(ty.marked_flow_ml()) + _do_visit_flow_ml(node, 0); + else + { + _do_visit_block(node, 0); + } + }; + if(!m_tree->is_root(id)) + { + if(m_tree->is_container(id) && !m_tree->type(id).marked_flow()) + { + size_t ilevel = 0; + if(m_tree->has_key(id)) + { + this->Writer::_do_write(m_tree->key(id)); + this->Writer::_do_write(":\n"); + ++ilevel; + } + _do_visit_block_container(id, ilevel, ilevel); + return; + } + } + + auto *btd = m_tree->tag_directives().b; + auto *etd = m_tree->tag_directives().e; + auto write_tag_directives = [&btd, etd, this](size_t next_node){ + auto end = btd; + while(end < etd) + { + if(end->next_node_id > next_node) + break; + ++end; + } + for( ; btd != end; ++btd) + { + if(next_node != m_tree->first_child(m_tree->parent(next_node))) + this->Writer::_do_write("...\n"); + this->Writer::_do_write("%TAG "); + this->Writer::_do_write(btd->handle); + this->Writer::_do_write(' '); + this->Writer::_do_write(btd->prefix); + this->Writer::_do_write('\n'); + } + }; + if(m_tree->is_stream(id)) + { + if(m_tree->first_child(id) != NONE) + write_tag_directives(m_tree->first_child(id)); + for(size_t child = m_tree->first_child(id); child != NONE; child = m_tree->next_sibling(child)) + { + dispatch(child); + if(m_tree->next_sibling(child) != NONE) + write_tag_directives(m_tree->next_sibling(child)); + } + } + else if(m_tree->is_container(id)) + { + dispatch(id); + } + else if(m_tree->is_doc(id)) + { + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_container(id)); // checked above + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_val(id)); // so it must be a val + _write_doc(id); + } + else if(m_tree->is_keyval(id)) + { + _writek(id, 0); + this->Writer::_do_write(": "); + _writev(id, 0); + if(!m_tree->type(id).marked_flow()) + this->Writer::_do_write('\n'); + } + else if(m_tree->is_val(id)) + { + //this->Writer::_do_write("- "); + _writev(id, 0); + if(!m_tree->type(id).marked_flow()) + this->Writer::_do_write('\n'); + } + else if(m_tree->type(id) == NOTYPE) + { + ; + } + else + { + _RYML_CB_ERR(m_tree->callbacks(), "unknown type"); + } +} + +template +void Emitter::_write_doc(size_t id) +{ + RYML_ASSERT(m_tree->is_doc(id)); + if(!m_tree->is_root(id)) + { + RYML_ASSERT(m_tree->is_stream(m_tree->parent(id))); + this->Writer::_do_write("---"); + } + if(!m_tree->has_val(id)) // this is more frequent + { + if(m_tree->has_val_tag(id)) + { + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + _write_tag(m_tree->val_tag(id)); + } + if(m_tree->has_val_anchor(id)) + { + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(id)); + } + } + else // docval + { + RYML_ASSERT(m_tree->has_val(id)); + RYML_ASSERT(!m_tree->has_key(id)); + if(!m_tree->is_root(id)) + this->Writer::_do_write(' '); + _writev(id, 0); + } + this->Writer::_do_write('\n'); +} + +template +void Emitter::_do_visit_flow_sl(size_t node, size_t ilevel) +{ + RYML_ASSERT(!m_tree->is_stream(node)); + RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); + RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + + if(m_tree->is_doc(node)) + { + _write_doc(node); + if(!m_tree->has_children(node)) + return; + } + else if(m_tree->is_container(node)) + { + RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); + + bool spc = false; // write a space + + if(m_tree->has_key(node)) + { + _writek(node, ilevel); + this->Writer::_do_write(':'); + spc = true; + } + + if(m_tree->has_val_tag(node)) + { + if(spc) + this->Writer::_do_write(' '); + _write_tag(m_tree->val_tag(node)); + spc = true; + } + + if(m_tree->has_val_anchor(node)) + { + if(spc) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(node)); + spc = true; + } + + if(spc) + this->Writer::_do_write(' '); + + if(m_tree->is_map(node)) + { + this->Writer::_do_write('{'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_seq(node)); + this->Writer::_do_write('['); + } + } // container + + for(size_t child = m_tree->first_child(node), count = 0; child != NONE; child = m_tree->next_sibling(child)) + { + if(count++) + this->Writer::_do_write(','); + if(m_tree->is_keyval(child)) + { + _writek(child, ilevel); + this->Writer::_do_write(": "); + _writev(child, ilevel); + } + else if(m_tree->is_val(child)) + { + _writev(child, ilevel); + } + else + { + // with single-line flow, we can never go back to block + _do_visit_flow_sl(child, ilevel + 1); + } + } + + if(m_tree->is_map(node)) + { + this->Writer::_do_write('}'); + } + else if(m_tree->is_seq(node)) + { + this->Writer::_do_write(']'); + } +} + +template +void Emitter::_do_visit_flow_ml(size_t id, size_t ilevel, size_t do_indent) +{ + C4_UNUSED(id); + C4_UNUSED(ilevel); + C4_UNUSED(do_indent); + RYML_CHECK(false/*not implemented*/); +} + +template +void Emitter::_do_visit_block_container(size_t node, size_t next_level, size_t do_indent) +{ + RepC ind = indent_to(do_indent * next_level); + + if(m_tree->is_seq(node)) + { + for(size_t child = m_tree->first_child(node); child != NONE; child = m_tree->next_sibling(child)) + { + _RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->has_key(child)); + if(m_tree->is_val(child)) + { + this->Writer::_do_write(ind); + this->Writer::_do_write("- "); + _writev(child, next_level); + this->Writer::_do_write('\n'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(child)); + NodeType ty = m_tree->type(child); + if(ty.marked_flow_sl()) + { + this->Writer::_do_write(ind); + this->Writer::_do_write("- "); + _do_visit_flow_sl(child, 0u); + this->Writer::_do_write('\n'); + } + else if(ty.marked_flow_ml()) + { + this->Writer::_do_write(ind); + this->Writer::_do_write("- "); + _do_visit_flow_ml(child, next_level, do_indent); + this->Writer::_do_write('\n'); + } + else + { + _do_visit_block(child, next_level, do_indent); + } + } + do_indent = true; + ind = indent_to(do_indent * next_level); + } + } + else // map + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node)); + for(size_t ich = m_tree->first_child(node); ich != NONE; ich = m_tree->next_sibling(ich)) + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_key(ich)); + if(m_tree->is_keyval(ich)) + { + this->Writer::_do_write(ind); + _writek(ich, next_level); + this->Writer::_do_write(": "); + _writev(ich, next_level); + this->Writer::_do_write('\n'); + } + else + { + _RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(ich)); + NodeType ty = m_tree->type(ich); + if(ty.marked_flow_sl()) + { + this->Writer::_do_write(ind); + _do_visit_flow_sl(ich, 0u); + this->Writer::_do_write('\n'); + } + else if(ty.marked_flow_ml()) + { + this->Writer::_do_write(ind); + _do_visit_flow_ml(ich, 0u); + this->Writer::_do_write('\n'); + } + else + { + _do_visit_block(ich, next_level, do_indent); + } + } + do_indent = true; + ind = indent_to(do_indent * next_level); + } + } +} + +template +void Emitter::_do_visit_block(size_t node, size_t ilevel, size_t do_indent) +{ + RYML_ASSERT(!m_tree->is_stream(node)); + RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node)); + RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node))); + RepC ind = indent_to(do_indent * ilevel); + + if(m_tree->is_doc(node)) + { + _write_doc(node); + if(!m_tree->has_children(node)) + return; + } + else if(m_tree->is_container(node)) + { + RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node)); + + bool spc = false; // write a space + bool nl = false; // write a newline + + if(m_tree->has_key(node)) + { + this->Writer::_do_write(ind); + _writek(node, ilevel); + this->Writer::_do_write(':'); + spc = true; + } + else if(!m_tree->is_root(node)) + { + this->Writer::_do_write(ind); + this->Writer::_do_write('-'); + spc = true; + } + + if(m_tree->has_val_tag(node)) + { + if(spc) + this->Writer::_do_write(' '); + _write_tag(m_tree->val_tag(node)); + spc = true; + nl = true; + } + + if(m_tree->has_val_anchor(node)) + { + if(spc) + this->Writer::_do_write(' '); + this->Writer::_do_write('&'); + this->Writer::_do_write(m_tree->val_anchor(node)); + spc = true; + nl = true; + } + + if(m_tree->has_children(node)) + { + if(m_tree->has_key(node)) + nl = true; + else + if(!m_tree->is_root(node) && !nl) + spc = true; + } + else + { + if(m_tree->is_seq(node)) + this->Writer::_do_write(" []\n"); + else if(m_tree->is_map(node)) + this->Writer::_do_write(" {}\n"); + return; + } + + if(spc && !nl) + this->Writer::_do_write(' '); + + do_indent = 0; + if(nl) + { + this->Writer::_do_write('\n'); + do_indent = 1; + } + } // container + + size_t next_level = ilevel + 1; + if(m_tree->is_root(node) || m_tree->is_doc(node)) + next_level = ilevel; // do not indent at top level + + _do_visit_block_container(node, next_level, do_indent); +} + +template +void Emitter::_do_visit_json(size_t id) +{ + _RYML_CB_CHECK(m_tree->callbacks(), !m_tree->is_stream(id)); // JSON does not have streams + if(m_tree->is_keyval(id)) + { + _writek_json(id); + this->Writer::_do_write(": "); + _writev_json(id); + } + else if(m_tree->is_val(id)) + { + _writev_json(id); + } + else if(m_tree->is_container(id)) + { + if(m_tree->has_key(id)) + { + _writek_json(id); + this->Writer::_do_write(": "); + } + if(m_tree->is_seq(id)) + this->Writer::_do_write('['); + else if(m_tree->is_map(id)) + this->Writer::_do_write('{'); + } // container + + for(size_t ich = m_tree->first_child(id); ich != NONE; ich = m_tree->next_sibling(ich)) + { + if(ich != m_tree->first_child(id)) + this->Writer::_do_write(','); + _do_visit_json(ich); + } + + if(m_tree->is_seq(id)) + this->Writer::_do_write(']'); + else if(m_tree->is_map(id)) + this->Writer::_do_write('}'); +} + +template +void Emitter::_write(NodeScalar const& C4_RESTRICT sc, NodeType flags, size_t ilevel) +{ + if( ! sc.tag.empty()) + { + _write_tag(sc.tag); + this->Writer::_do_write(' '); + } + if(flags.has_anchor()) + { + RYML_ASSERT(flags.is_ref() != flags.has_anchor()); + RYML_ASSERT( ! sc.anchor.empty()); + this->Writer::_do_write('&'); + this->Writer::_do_write(sc.anchor); + this->Writer::_do_write(' '); + } + else if(flags.is_ref()) + { + if(sc.anchor != "<<") + this->Writer::_do_write('*'); + this->Writer::_do_write(sc.anchor); + return; + } + + // ensure the style flags only have one of KEY or VAL + _RYML_CB_ASSERT(m_tree->callbacks(), ((flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE)) == 0) || (((flags&_WIP_KEY_STYLE) == 0) != ((flags&_WIP_VAL_STYLE) == 0))); + + auto style_marks = flags & (_WIP_KEY_STYLE|_WIP_VAL_STYLE); + if(style_marks & (_WIP_KEY_LITERAL|_WIP_VAL_LITERAL)) + { + _write_scalar_literal(sc.scalar, ilevel, flags.has_key()); + } + else if(style_marks & (_WIP_KEY_FOLDED|_WIP_VAL_FOLDED)) + { + _write_scalar_folded(sc.scalar, ilevel, flags.has_key()); + } + else if(style_marks & (_WIP_KEY_SQUO|_WIP_VAL_SQUO)) + { + _write_scalar_squo(sc.scalar, ilevel); + } + else if(style_marks & (_WIP_KEY_DQUO|_WIP_VAL_DQUO)) + { + _write_scalar_dquo(sc.scalar, ilevel); + } + else if(style_marks & (_WIP_KEY_PLAIN|_WIP_VAL_PLAIN)) + { + _write_scalar_plain(sc.scalar, ilevel); + } + else if(!style_marks) + { + size_t first_non_nl = sc.scalar.first_not_of('\n'); + bool all_newlines = first_non_nl == npos; + bool has_leading_ws = (!all_newlines) && sc.scalar.sub(first_non_nl).begins_with_any(" \t"); + bool do_literal = ((!sc.scalar.empty() && all_newlines) || (has_leading_ws && !sc.scalar.trim(' ').empty())); + if(do_literal) + { + _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); + } + else + { + for(size_t i = 0; i < sc.scalar.len; ++i) + { + if(sc.scalar.str[i] == '\n') + { + _write_scalar_literal(sc.scalar, ilevel, flags.has_key(), /*explicit_indentation*/has_leading_ws); + goto wrote_special; + } + // todo: check for escaped characters requiring double quotes + } + _write_scalar(sc.scalar, flags.is_quoted()); + wrote_special: + ; + } + } + else + { + _RYML_CB_ERR(m_tree->callbacks(), "not implemented"); + } +} +template +void Emitter::_write_json(NodeScalar const& C4_RESTRICT sc, NodeType flags) +{ + if(C4_UNLIKELY( ! sc.tag.empty())) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have tags"); + if(C4_UNLIKELY(flags.has_anchor())) + _RYML_CB_ERR(m_tree->callbacks(), "JSON does not have anchors"); + _write_scalar_json(sc.scalar, flags.has_key(), flags.is_quoted()); +} + +#define _rymlindent_nextline() for(size_t lv = 0; lv < ilevel+1; ++lv) { this->Writer::_do_write(' '); this->Writer::_do_write(' '); } + +template +void Emitter::_write_scalar_literal(csubstr s, size_t ilevel, bool explicit_key, bool explicit_indentation) +{ + if(explicit_key) + this->Writer::_do_write("? "); + csubstr trimmed = s.trimr("\n\r"); + size_t numnewlines_at_end = s.len - trimmed.len - s.sub(trimmed.len).count('\r'); + // + if(!explicit_indentation) + this->Writer::_do_write('|'); + else + this->Writer::_do_write("|2"); + // + if(numnewlines_at_end > 1 || (trimmed.len == 0 && s.len > 0)/*only newlines*/) + this->Writer::_do_write("+\n"); + else if(numnewlines_at_end == 1) + this->Writer::_do_write('\n'); + else + this->Writer::_do_write("-\n"); + // + if(trimmed.len) + { + size_t pos = 0; // tracks the last character that was already written + for(size_t i = 0; i < trimmed.len; ++i) + { + if(trimmed[i] != '\n') + continue; + // write everything up to this point + csubstr since_pos = trimmed.range(pos, i+1); // include the newline + _rymlindent_nextline() + this->Writer::_do_write(since_pos); + pos = i+1; // already written + } + if(pos < trimmed.len) + { + _rymlindent_nextline() + this->Writer::_do_write(trimmed.sub(pos)); + } + if(numnewlines_at_end) + { + this->Writer::_do_write('\n'); + --numnewlines_at_end; + } + } + for(size_t i = 0; i < numnewlines_at_end; ++i) + { + _rymlindent_nextline() + if(i+1 < numnewlines_at_end || explicit_key) + this->Writer::_do_write('\n'); + } + if(explicit_key && !numnewlines_at_end) + this->Writer::_do_write('\n'); +} + +template +void Emitter::_write_scalar_folded(csubstr s, size_t ilevel, bool explicit_key) +{ + if(explicit_key) + { + this->Writer::_do_write("? "); + } + RYML_ASSERT(s.find("\r") == csubstr::npos); + csubstr trimmed = s.trimr('\n'); + size_t numnewlines_at_end = s.len - trimmed.len; + if(numnewlines_at_end == 0) + { + this->Writer::_do_write(">-\n"); + } + else if(numnewlines_at_end == 1) + { + this->Writer::_do_write(">\n"); + } + else if(numnewlines_at_end > 1) + { + this->Writer::_do_write(">+\n"); + } + if(trimmed.len) + { + size_t pos = 0; // tracks the last character that was already written + for(size_t i = 0; i < trimmed.len; ++i) + { + if(trimmed[i] != '\n') + continue; + // write everything up to this point + csubstr since_pos = trimmed.range(pos, i+1); // include the newline + pos = i+1; // because of the newline + _rymlindent_nextline() + this->Writer::_do_write(since_pos); + this->Writer::_do_write('\n'); // write the newline twice + } + if(pos < trimmed.len) + { + _rymlindent_nextline() + this->Writer::_do_write(trimmed.sub(pos)); + } + if(numnewlines_at_end) + { + this->Writer::_do_write('\n'); + --numnewlines_at_end; + } + } + for(size_t i = 0; i < numnewlines_at_end; ++i) + { + _rymlindent_nextline() + if(i+1 < numnewlines_at_end || explicit_key) + this->Writer::_do_write('\n'); + } + if(explicit_key && !numnewlines_at_end) + this->Writer::_do_write('\n'); +} + +template +void Emitter::_write_scalar_squo(csubstr s, size_t ilevel) +{ + size_t pos = 0; // tracks the last character that was already written + this->Writer::_do_write('\''); + for(size_t i = 0; i < s.len; ++i) + { + if(s[i] == '\n') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this char + this->Writer::_do_write('\n'); // write the character again + if(i + 1 < s.len) + _rymlindent_nextline() // indent the next line + pos = i+1; + } + else if(s[i] == '\'') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this char + this->Writer::_do_write('\''); // write the character again + pos = i+1; + } + } + // write missing characters at the end of the string + if(pos < s.len) + this->Writer::_do_write(s.sub(pos)); + this->Writer::_do_write('\''); +} + +template +void Emitter::_write_scalar_dquo(csubstr s, size_t ilevel) +{ + size_t pos = 0; // tracks the last character that was already written + this->Writer::_do_write('"'); + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s.str[i]; + if(curr == '"' || curr == '\\') + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write('\\'); // write the escape + this->Writer::_do_write(curr); // write the char + pos = i+1; + } + else if(s[i] == '\n') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this newline + this->Writer::_do_write('\n'); // write the newline again + if(i + 1 < s.len) + _rymlindent_nextline() // indent the next line + pos = i+1; + if(i+1 < s.len) // escape leading whitespace after the newline + { + const char next = s.str[i+1]; + if(next == ' ' || next == '\t') + this->Writer::_do_write('\\'); + } + } + else if(curr == ' ' || curr == '\t') + { + // escape trailing whitespace before a newline + size_t next = s.first_not_of(" \t\r", i); + if(next != npos && s[next] == '\n') + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); // write everything up to (excluding) this char + this->Writer::_do_write('\\'); // escape the whitespace + pos = i; + } + } + } + // write missing characters at the end of the string + if(pos < s.len) + { + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); + } + this->Writer::_do_write('"'); +} + +template +void Emitter::_write_scalar_plain(csubstr s, size_t ilevel) +{ + size_t pos = 0; // tracks the last character that was already written + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s.str[i]; + if(curr == '\n') + { + csubstr sub = s.range(pos, i+1); + this->Writer::_do_write(sub); // write everything up to (including) this newline + this->Writer::_do_write('\n'); // write the newline again + if(i + 1 < s.len) + _rymlindent_nextline() // indent the next line + pos = i+1; + } + } + // write missing characters at the end of the string + if(pos < s.len) + { + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); + } +} + +#undef _rymlindent_nextline + +template +void Emitter::_write_scalar(csubstr s, bool was_quoted) +{ + // this block of code needed to be moved to before the needs_quotes + // assignment to work around a g++ optimizer bug where (s.str != nullptr) + // was evaluated as true even if s.str was actually a nullptr (!!!) + if(s.len == size_t(0)) + { + if(was_quoted) + this->Writer::_do_write("''"); + return; + } + + const bool needs_quotes = ( + was_quoted + || + ( + ( ! s.is_number()) + && + ( + // has leading whitespace + s.begins_with_any(" \n\t\r") + || + // looks like reference or anchor or would be treated as a directive + s.begins_with_any("*&%") + || + s.begins_with("<<") + || + // has trailing whitespace + s.ends_with_any(" \n\t\r") + || + // has special chars + (s.first_of("#:-?,\n{}[]'\"") != npos) + ) + ) + ); + + if( ! needs_quotes) + { + this->Writer::_do_write(s); + } + else + { + const bool has_dquotes = s.first_of( '"') != npos; + const bool has_squotes = s.first_of('\'') != npos; + if(!has_squotes && has_dquotes) + { + this->Writer::_do_write('\''); + this->Writer::_do_write(s); + this->Writer::_do_write('\''); + } + else if(has_squotes && !has_dquotes) + { + RYML_ASSERT(s.count('\n') == 0); + this->Writer::_do_write('"'); + this->Writer::_do_write(s); + this->Writer::_do_write('"'); + } + else + { + _write_scalar_squo(s, /*FIXME FIXME FIXME*/0); + } + } +} +template +void Emitter::_write_scalar_json(csubstr s, bool as_key, bool was_quoted) +{ + if(was_quoted) + { + this->Writer::_do_write('"'); + this->Writer::_do_write(s); + this->Writer::_do_write('"'); + } + // json only allows strings as keys + else if(!as_key && (s.is_number() || s == "true" || s == "null" || s == "false")) + { + this->Writer::_do_write(s); + } + else + { + size_t pos = 0; + this->Writer::_do_write('"'); + for(size_t i = 0; i < s.len; ++i) + { + switch (s[i]) + { + case '"': + case '\n': { + if(i > 0) + { + csubstr sub = s.range(pos, i); + this->Writer::_do_write(sub); + } + pos = i + 1; + switch (s[i]) { + case '"': + this->Writer::_do_write("\\\""); + break; + case '\n': + this->Writer::_do_write("\\n"); + break; + } + break; + } + } + } + if(pos < s.len) + { + csubstr sub = s.sub(pos); + this->Writer::_do_write(sub); + } + this->Writer::_do_write('"'); + } +} + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_EMIT_DEF_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/emit.def.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/detail/stack.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_DETAIL_STACK_HPP_ +#define _C4_YML_DETAIL_STACK_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +//included above: +//#include "../common.hpp" +#endif + +#ifdef RYML_DBG +//included above: +//# include +#endif + +//included above: +//#include + +namespace c4 { +namespace yml { +namespace detail { + +/** A lightweight contiguous stack with SSO. This avoids a dependency on std. */ +template +class stack +{ + static_assert(std::is_trivially_copyable::value, "T must be trivially copyable"); + static_assert(std::is_trivially_destructible::value, "T must be trivially destructible"); + + enum : size_t { sso_size = N }; + +public: + + T m_buf[N]; + T * m_stack; + size_t m_size; + size_t m_capacity; + Callbacks m_callbacks; + +public: + + constexpr static bool is_contiguous() { return true; } + + stack(Callbacks const& cb) + : m_buf() + , m_stack(m_buf) + , m_size(0) + , m_capacity(N) + , m_callbacks(cb) {} + stack() : stack(get_callbacks()) {} + ~stack() + { + _free(); + } + + stack(stack const& that) noexcept : stack(that.m_callbacks) + { + resize(that.m_size); + _cp(&that); + } + + stack(stack &&that) noexcept : stack(that.m_callbacks) + { + _mv(&that); + } + + stack& operator= (stack const& that) noexcept + { + _cb(that.m_callbacks); + resize(that.m_size); + _cp(&that); + return *this; + } + + stack& operator= (stack &&that) noexcept + { + _cb(that.m_callbacks); + _mv(&that); + return *this; + } + +public: + + size_t size() const { return m_size; } + size_t empty() const { return m_size == 0; } + size_t capacity() const { return m_capacity; } + + void clear() + { + m_size = 0; + } + + void resize(size_t sz) + { + reserve(sz); + m_size = sz; + } + + void reserve(size_t sz); + + void push(T const& C4_RESTRICT n) + { + RYML_ASSERT((const char*)&n + sizeof(T) < (const char*)m_stack || &n > m_stack + m_capacity); + if(m_size == m_capacity) + { + size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + reserve(cap); + } + m_stack[m_size] = n; + ++m_size; + } + + void push_top() + { + RYML_ASSERT(m_size > 0); + if(m_size == m_capacity) + { + size_t cap = m_capacity == 0 ? N : 2 * m_capacity; + reserve(cap); + } + m_stack[m_size] = m_stack[m_size - 1]; + ++m_size; + } + + T const& C4_RESTRICT pop() + { + RYML_ASSERT(m_size > 0); + --m_size; + return m_stack[m_size]; + } + + C4_ALWAYS_INLINE T const& C4_RESTRICT top() const { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top() { RYML_ASSERT(m_size > 0); return m_stack[m_size - 1]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom() const { RYML_ASSERT(m_size > 0); return m_stack[0]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom() { RYML_ASSERT(m_size > 0); return m_stack[0]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT top(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT top(size_t i) { RYML_ASSERT(i < m_size); return m_stack[m_size - 1 - i]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT bottom(size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT bottom(size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + + C4_ALWAYS_INLINE T const& C4_RESTRICT operator[](size_t i) const { RYML_ASSERT(i < m_size); return m_stack[i]; } + C4_ALWAYS_INLINE T & C4_RESTRICT operator[](size_t i) { RYML_ASSERT(i < m_size); return m_stack[i]; } + +public: + + using iterator = T *; + using const_iterator = T const *; + + iterator begin() { return m_stack; } + iterator end () { return m_stack + m_size; } + + const_iterator begin() const { return (const_iterator)m_stack; } + const_iterator end () const { return (const_iterator)m_stack + m_size; } + +public: + void _free(); + void _cp(stack const* C4_RESTRICT that); + void _mv(stack * that); + void _cb(Callbacks const& cb); +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +template +void stack::reserve(size_t sz) +{ + if(sz <= m_size) + return; + if(sz <= N) + { + m_stack = m_buf; + m_capacity = N; + return; + } + T *buf = (T*) m_callbacks.m_allocate(sz * sizeof(T), m_stack, m_callbacks.m_user_data); + memcpy(buf, m_stack, m_size * sizeof(T)); + if(m_stack != m_buf) + { + m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + } + m_stack = buf; + m_capacity = sz; +} + + +//----------------------------------------------------------------------------- + +template +void stack::_free() +{ + RYML_ASSERT(m_stack != nullptr); // this structure cannot be memset() to zero + if(m_stack != m_buf) + { + m_callbacks.m_free(m_stack, m_capacity * sizeof(T), m_callbacks.m_user_data); + m_stack = m_buf; + m_size = N; + m_capacity = N; + } + else + { + RYML_ASSERT(m_capacity == N); + } +} + + +//----------------------------------------------------------------------------- + +template +void stack::_cp(stack const* C4_RESTRICT that) +{ + if(that->m_stack != that->m_buf) + { + RYML_ASSERT(that->m_capacity > N); + RYML_ASSERT(that->m_size <= that->m_capacity); + } + else + { + RYML_ASSERT(that->m_capacity <= N); + RYML_ASSERT(that->m_size <= that->m_capacity); + } + memcpy(m_stack, that->m_stack, that->m_size * sizeof(T)); + m_size = that->m_size; + m_capacity = that->m_size < N ? N : that->m_size; + m_callbacks = that->m_callbacks; +} + + +//----------------------------------------------------------------------------- + +template +void stack::_mv(stack * that) +{ + if(that->m_stack != that->m_buf) + { + RYML_ASSERT(that->m_capacity > N); + RYML_ASSERT(that->m_size <= that->m_capacity); + m_stack = that->m_stack; + } + else + { + RYML_ASSERT(that->m_capacity <= N); + RYML_ASSERT(that->m_size <= that->m_capacity); + memcpy(m_buf, that->m_buf, that->m_size * sizeof(T)); + m_stack = m_buf; + } + m_size = that->m_size; + m_capacity = that->m_capacity; + m_callbacks = that->m_callbacks; + // make sure no deallocation happens on destruction + RYML_ASSERT(that->m_stack != m_buf); + that->m_stack = that->m_buf; + that->m_capacity = N; + that->m_size = 0; +} + + +//----------------------------------------------------------------------------- + +template +void stack::_cb(Callbacks const& cb) +{ + if(cb != m_callbacks) + { + _free(); + m_callbacks = cb; + } +} + +} // namespace detail +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_DETAIL_STACK_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/parse.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_PARSE_HPP_ +#define _C4_YML_PARSE_HPP_ + +#ifndef _C4_YML_TREE_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +#endif + +#ifndef _C4_YML_NODE_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +#endif + +#ifndef _C4_YML_DETAIL_STACK_HPP_ +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp +//#include "c4/yml/detail/stack.hpp" +#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_) +#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_STACK_HPP_ */ + +#endif + +//included above: +//#include + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4251/*needs to have dll-interface to be used by clients of struct*/) +#endif + +namespace c4 { +namespace yml { + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +class RYML_EXPORT Parser +{ +public: + + /** @name construction and assignment */ + /** @{ */ + + Parser() : Parser(get_callbacks()) {} + Parser(Callbacks const& cb); + ~Parser(); + + Parser(Parser &&); + Parser(Parser const&); + Parser& operator=(Parser &&); + Parser& operator=(Parser const&); + + /** @} */ + +public: + + /** @name modifiers */ + /** @{ */ + + /** Reserve a certain capacity for the parsing stack. + * This should be larger than the expected depth of the parsed + * YAML tree. + * + * The parsing stack is the only (potential) heap memory used by + * the parser. + * + * If the requested capacity is below the default + * stack size of 16, the memory is used directly in the parser + * object; otherwise it will be allocated from the heap. + * + * @note this reserves memory only for the parser itself; all the + * allocations for the parsed tree will go through the tree's + * allocator. + * + * @note the tree and the arena can (and should) also be reserved. */ + void reserve_stack(size_t capacity) + { + m_stack.reserve(capacity); + } + + /** Reserve a certain capacity for the array used to track node + * locations in the source buffer. */ + void reserve_locations(size_t num_source_lines) + { + _resize_locations(num_source_lines); + } + + /** Reserve a certain capacity for the character arena used to + * filter scalars. */ + void reserve_filter_arena(size_t num_characters) + { + _resize_filter_arena(num_characters); + } + + /** @} */ + +public: + + /** @name getters and modifiers */ + /** @{ */ + + /** Get the current callbacks in the parser. */ + Callbacks callbacks() const { return m_stack.m_callbacks; } + + /** Get the name of the latest file parsed by this object. */ + csubstr filename() const { return m_file; } + + /** Get the latest YAML buffer parsed by this object. */ + csubstr source() const { return m_buf; } + + size_t stack_capacity() const { return m_stack.capacity(); } + size_t locations_capacity() const { return m_newline_offsets_capacity; } + size_t filter_arena_capacity() const { return m_filter_arena.len; } + + /** @} */ + +public: + + /** @name parse_in_place */ + /** @{ */ + + /** Create a new tree and parse into its root. + * The tree is created with the callbacks currently in the parser. */ + Tree parse_in_place(csubstr filename, substr src) + { + Tree t(callbacks()); + t.reserve(_estimate_capacity(src)); + this->parse_in_place(filename, src, &t, t.root_id()); + return t; + } + + /** Parse into an existing tree, starting at its root node. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_place(csubstr filename, substr src, Tree *t) + { + this->parse_in_place(filename, src, t, t->root_id()); + } + + /** Parse into an existing node. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_place(csubstr filename, substr src, Tree *t, size_t node_id); + // ^^^^^^^^^^^^^ this is the workhorse overload; everything else is syntactic candy + + /** Parse into an existing node. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_place(csubstr filename, substr src, NodeRef node) + { + this->parse_in_place(filename, src, node.tree(), node.id()); + } + + RYML_DEPRECATED("use parse_in_place() instead") Tree parse(csubstr filename, substr src) { return parse_in_place(filename, src); } + RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t) { parse_in_place(filename, src, t); } + RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, Tree *t, size_t node_id) { parse_in_place(filename, src, t, node_id); } + RYML_DEPRECATED("use parse_in_place() instead") void parse(csubstr filename, substr src, NodeRef node) { parse_in_place(filename, src, node); } + + /** @} */ + +public: + + /** @name parse_in_arena: copy the YAML source buffer to the + * tree's arena, then parse the copy in situ + * + * @note overloads receiving a substr YAML buffer are intentionally + * left undefined, such that calling parse_in_arena() with a substr + * will cause a linker error. This is to prevent an accidental + * copy of the source buffer to the tree's arena, because substr + * is implicitly convertible to csubstr. If you really intend to parse + * a mutable buffer in the tree's arena, convert it first to immutable + * by assigning the substr to a csubstr prior to calling parse_in_arena(). + * This is not needed for parse_in_place() because csubstr is not + * implicitly convertible to substr. */ + /** @{ */ + + // READ THE NOTE ABOVE! + #define RYML_DONT_PARSE_SUBSTR_IN_ARENA "Do not pass a (mutable) substr to parse_in_arena(); if you have a substr, it should be parsed in place. Consider using parse_in_place() instead, or convert the buffer to csubstr prior to calling. This function is deliberately left undefined and will cause a compiler error." + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr csrc); + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t); + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, Tree *t, size_t node_id); + RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr csrc, NodeRef node); + + /** Create a new tree and parse into its root. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + Tree parse_in_arena(csubstr filename, csubstr csrc) + { + Tree t(callbacks()); + substr src = t.copy_to_arena(csrc); + t.reserve(_estimate_capacity(csrc)); + this->parse_in_place(filename, src, &t, t.root_id()); + return t; + } + + /** Parse into an existing tree, starting at its root node. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_arena(csubstr filename, csubstr csrc, Tree *t) + { + substr src = t->copy_to_arena(csrc); + this->parse_in_place(filename, src, t, t->root_id()); + } + + /** Parse into a specific node in an existing tree. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_arena(csubstr filename, csubstr csrc, Tree *t, size_t node_id) + { + substr src = t->copy_to_arena(csrc); + this->parse_in_place(filename, src, t, node_id); + } + + /** Parse into a specific node in an existing tree. + * The immutable YAML source is first copied to the tree's arena, + * and parsed from there. + * The callbacks in the tree are kept, and used to allocate + * the tree members, if any allocation is required. */ + void parse_in_arena(csubstr filename, csubstr csrc, NodeRef node) + { + substr src = node.tree()->copy_to_arena(csrc); + this->parse_in_place(filename, src, node.tree(), node.id()); + } + + RYML_DEPRECATED("use parse_in_arena() instead") Tree parse(csubstr filename, csubstr csrc) { return parse_in_arena(filename, csrc); } + RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t) { parse_in_arena(filename, csrc, t); } + RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, Tree *t, size_t node_id) { parse_in_arena(filename, csrc, t, node_id); } + RYML_DEPRECATED("use parse_in_arena() instead") void parse(csubstr filename, csubstr csrc, NodeRef node) { parse_in_arena(filename, csrc, node); } + + /** @} */ + +public: + + /** @name locations */ + /** @{ */ + + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(Tree const& tree, size_t node_id) const; + /** Get the location of a node of the last tree to be parsed by this parser. */ + Location location(NodeRef node) const; + /** Get the string starting at a particular location, to the end + * of the parsed source buffer. */ + csubstr location_contents(Location const& loc) const; + /** Given a pointer to a buffer position, get the location. @p val + * must be pointing to somewhere in the source buffer that was + * last parsed by this object. */ + Location val_location(const char *val) const; + + /** @} */ + +private: + + typedef enum { + BLOCK_LITERAL, //!< keep newlines (|) + BLOCK_FOLD //!< replace newline with single space (>) + } BlockStyle_e; + + typedef enum { + CHOMP_CLIP, //!< single newline at end (default) + CHOMP_STRIP, //!< no newline at end (-) + CHOMP_KEEP //!< all newlines from end (+) + } BlockChomp_e; + +private: + + using flag_t = int; + + static size_t _estimate_capacity(csubstr src) { size_t c = _count_nlines(src); c = c >= 16 ? c : 16; return c; } + + void _reset(); + + bool _finished_file() const; + bool _finished_line() const; + + csubstr _peek_next_line(size_t pos=npos) const; + bool _advance_to_peeked(); + void _scan_line(); + + csubstr _slurp_doc_scalar(); + + /** + * @param [out] quoted + * Will only be written to if this method returns true. + * Will be set to true if the scanned scalar was quoted, by '', "", > or |. + */ + bool _scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted); + + csubstr _scan_comment(); + csubstr _scan_squot_scalar(); + csubstr _scan_dquot_scalar(); + csubstr _scan_block(); + substr _scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation); + substr _scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line); + substr _scan_complex_key(csubstr currscalar, csubstr peeked_line); + csubstr _scan_to_next_nonempty_line(size_t indentation); + csubstr _extend_scanned_scalar(csubstr currscalar); + + csubstr _filter_squot_scalar(const substr s); + csubstr _filter_dquot_scalar(substr s); + csubstr _filter_plain_scalar(substr s, size_t indentation); + csubstr _filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation); + template + bool _filter_nl(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos, size_t indentation); + template + void _filter_ws(substr scalar, size_t *C4_RESTRICT pos, size_t *C4_RESTRICT filter_arena_pos); + bool _apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp); + + void _handle_finished_file(); + void _handle_line(); + + bool _handle_indentation(); + + bool _handle_unk(); + bool _handle_map_flow(); + bool _handle_map_blck(); + bool _handle_seq_flow(); + bool _handle_seq_blck(); + bool _handle_top(); + bool _handle_types(); + bool _handle_key_anchors_and_refs(); + bool _handle_val_anchors_and_refs(); + void _move_val_tag_to_key_tag(); + void _move_key_tag_to_val_tag(); + void _move_key_tag2_to_key_tag(); + void _move_val_anchor_to_key_anchor(); + void _move_key_anchor_to_val_anchor(); + + void _push_level(bool explicit_flow_chars = false); + void _pop_level(); + + void _start_unk(bool as_child=true); + + void _start_map(bool as_child=true); + void _start_map_unk(bool as_child); + void _stop_map(); + + void _start_seq(bool as_child=true); + void _stop_seq(); + + void _start_seqimap(); + void _stop_seqimap(); + + void _start_doc(bool as_child=true); + void _stop_doc(); + void _start_new_doc(csubstr rem); + void _end_stream(); + + NodeData* _append_val(csubstr val, flag_t quoted=false); + NodeData* _append_key_val(csubstr val, flag_t val_quoted=false); + bool _rval_dash_start_or_continue_seq(); + + void _store_scalar(csubstr s, flag_t is_quoted); + csubstr _consume_scalar(); + void _move_scalar_from_top(); + + inline NodeData* _append_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_val({str, size_t(0)}); } + inline NodeData* _append_key_val_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); return _append_key_val({str, size_t(0)}); } + inline void _store_scalar_null(const char *str) { _RYML_CB_ASSERT(m_stack.m_callbacks, str >= m_buf.begin() && str <= m_buf.end()); _store_scalar({str, size_t(0)}, false); } + + void _set_indentation(size_t behind); + void _save_indentation(size_t behind=0); + bool _maybe_set_indentation_from_anchor_or_tag(); + + void _write_key_anchor(size_t node_id); + void _write_val_anchor(size_t node_id); + + void _handle_directive(csubstr directive); + + void _skipchars(char c); + template + void _skipchars(const char (&chars)[N]); + +private: + + static size_t _count_nlines(csubstr src); + +private: + + typedef enum : flag_t { + RTOP = 0x01 << 0, ///< reading at top level + RUNK = 0x01 << 1, ///< reading an unknown: must determine whether scalar, map or seq + RMAP = 0x01 << 2, ///< reading a map + RSEQ = 0x01 << 3, ///< reading a seq + FLOW = 0x01 << 4, ///< reading is inside explicit flow chars: [] or {} + QMRK = 0x01 << 5, ///< reading an explicit key (`? key`) + RKEY = 0x01 << 6, ///< reading a scalar as key + RVAL = 0x01 << 7, ///< reading a scalar as val + RNXT = 0x01 << 8, ///< read next val or keyval + SSCL = 0x01 << 9, ///< there's a stored scalar + QSCL = 0x01 << 10, ///< stored scalar was quoted + RSET = 0x01 << 11, ///< the (implicit) map being read is a !!set. @see https://yaml.org/type/set.html + NDOC = 0x01 << 12, ///< no document mode. a document has ended and another has not started yet. + //! reading an implicit map nested in an explicit seq. + //! eg, {key: [key2: value2, key3: value3]} + //! is parsed as {key: [{key2: value2}, {key3: value3}]} + RSEQIMAP = 0x01 << 13, + } State_e; + + struct LineContents + { + csubstr full; ///< the full line, including newlines on the right + csubstr stripped; ///< the stripped line, excluding newlines on the right + csubstr rem; ///< the stripped line remainder; initially starts at the first non-space character + size_t indentation; ///< the number of spaces on the beginning of the line + + LineContents() : full(), stripped(), rem(), indentation() {} + + void reset_with_next_line(csubstr buf, size_t pos); + + void reset(csubstr full_, csubstr stripped_) + { + full = full_; + stripped = stripped_; + rem = stripped_; + // find the first column where the character is not a space + indentation = full.first_not_of(' '); + } + + size_t current_col() const + { + return current_col(rem); + } + + size_t current_col(csubstr s) const + { + RYML_ASSERT(s.str >= full.str); + RYML_ASSERT(full.is_super(s)); + size_t col = static_cast(s.str - full.str); + return col; + } + }; + + struct State + { + flag_t flags; + size_t level; + size_t node_id; // don't hold a pointer to the node as it will be relocated during tree resizes + csubstr scalar; + size_t scalar_col; // the column where the scalar (or its quotes) begin + + Location pos; + LineContents line_contents; + size_t indref; + + State() : flags(), level(), node_id(), scalar(), scalar_col(), pos(), line_contents(), indref() {} + + void reset(const char *file, size_t node_id_) + { + flags = RUNK|RTOP; + level = 0; + pos.name = to_csubstr(file); + pos.offset = 0; + pos.line = 1; + pos.col = 1; + node_id = node_id_; + scalar_col = 0; + scalar.clear(); + indref = 0; + } + }; + + void _line_progressed(size_t ahead); + void _line_ended(); + void _line_ended_undo(); + + void _prepare_pop() + { + RYML_ASSERT(m_stack.size() > 1); + State const& curr = m_stack.top(); + State & next = m_stack.top(1); + next.pos = curr.pos; + next.line_contents = curr.line_contents; + next.scalar = curr.scalar; + } + + inline bool _at_line_begin() const + { + return m_state->line_contents.rem.begin() == m_state->line_contents.full.begin(); + } + inline bool _at_line_end() const + { + csubstr r = m_state->line_contents.rem; + return r.empty() || r.begins_with(' ', r.len); + } + inline bool _token_is_from_this_line(csubstr token) const + { + return token.is_sub(m_state->line_contents.full); + } + + inline NodeData * node(State const* s) const { return m_tree->get(s->node_id); } + inline NodeData * node(State const& s) const { return m_tree->get(s .node_id); } + inline NodeData * node(size_t node_id) const { return m_tree->get( node_id); } + + inline bool has_all(flag_t f) const { return (m_state->flags & f) == f; } + inline bool has_any(flag_t f) const { return (m_state->flags & f) != 0; } + inline bool has_none(flag_t f) const { return (m_state->flags & f) == 0; } + + static inline bool has_all(flag_t f, State const* s) { return (s->flags & f) == f; } + static inline bool has_any(flag_t f, State const* s) { return (s->flags & f) != 0; } + static inline bool has_none(flag_t f, State const* s) { return (s->flags & f) == 0; } + + inline void set_flags(flag_t f) { set_flags(f, m_state); } + inline void add_flags(flag_t on) { add_flags(on, m_state); } + inline void addrem_flags(flag_t on, flag_t off) { addrem_flags(on, off, m_state); } + inline void rem_flags(flag_t off) { rem_flags(off, m_state); } + + void set_flags(flag_t f, State * s); + void add_flags(flag_t on, State * s); + void addrem_flags(flag_t on, flag_t off, State * s); + void rem_flags(flag_t off, State * s); + + void _resize_filter_arena(size_t num_characters); + void _grow_filter_arena(size_t num_characters); + substr _finish_filter_arena(substr dst, size_t pos); + + void _prepare_locations() const; // only changes mutable members + void _resize_locations(size_t sz) const; // only changes mutable members + void _mark_locations_dirty(); + bool _locations_dirty() const; + +private: + + void _free(); + void _clr(); + void _cp(Parser const* that); + void _mv(Parser *that); + +#ifdef RYML_DBG + template void _dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const; +#endif + template void _err(csubstr fmt, Args const& C4_RESTRICT ...args) const; + template void _fmt_msg(DumpFn &&dumpfn) const; + static csubstr _prfl(substr buf, flag_t v); + +private: + + csubstr m_file; + substr m_buf; + + size_t m_root_id; + Tree * m_tree; + + detail::stack m_stack; + State * m_state; + + size_t m_key_tag_indentation; + size_t m_key_tag2_indentation; + csubstr m_key_tag; + csubstr m_key_tag2; + size_t m_val_tag_indentation; + csubstr m_val_tag; + + bool m_key_anchor_was_before; + size_t m_key_anchor_indentation; + csubstr m_key_anchor; + size_t m_val_anchor_indentation; + csubstr m_val_anchor; + + substr m_filter_arena; + + mutable size_t *m_newline_offsets; + mutable size_t m_newline_offsets_size; + mutable size_t m_newline_offsets_capacity; + mutable csubstr m_newline_offsets_buf; +}; + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +/** @name parse_in_place + * + * @desc parse a mutable YAML source buffer. + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily parse YAML without the need + * to instantiate a separate parser. Note that some properties + * (notably node locations in the original source code) are only + * available through the parser object after it has parsed the + * code. If you need access to any of these properties, use + * Parser::parse_in_place() */ +/** @{ */ + +inline Tree parse_in_place( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } //!< parse in-situ a modifiable YAML source buffer. +inline Tree parse_in_place(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } //!< parse in-situ a modifiable YAML source buffer, providing a filename for error messages. +inline void parse_in_place( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer +inline void parse_in_place(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. +inline void parse_in_place( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer +inline void parse_in_place(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. +inline void parse_in_place( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer +inline void parse_in_place(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } //!< reusing the YAML tree, parse in-situ a modifiable YAML source buffer, providing a filename for error messages. + +RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse( substr yaml ) { Parser np; return np.parse_in_place({} , yaml); } +RYML_DEPRECATED("use parse_in_place() instead") inline Tree parse(csubstr filename, substr yaml ) { Parser np; return np.parse_in_place(filename, yaml); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t ) { Parser np; np.parse_in_place({} , yaml, t); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t ) { Parser np; np.parse_in_place(filename, yaml, t); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place({} , yaml, t, node_id); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_place(filename, yaml, t, node_id); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse( substr yaml, NodeRef node ) { Parser np; np.parse_in_place({} , yaml, node); } +RYML_DEPRECATED("use parse_in_place() instead") inline void parse(csubstr filename, substr yaml, NodeRef node ) { Parser np; np.parse_in_place(filename, yaml, node); } + +/** @} */ + + +//----------------------------------------------------------------------------- + +/** @name parse_in_arena + * @desc parse a read-only YAML source buffer, copying it first to the tree's arena. + * + * @note These freestanding functions use a temporary parser object, + * and are convenience functions to easily parse YAML without the need + * to instantiate a separate parser. Note that some properties + * (notably node locations in the original source code) are only + * available through the parser object after it has parsed the + * code. If you need access to any of these properties, use + * Parser::parse_in_arena(). + * + * @note overloads receiving a substr YAML buffer are intentionally + * left undefined, such that calling parse_in_arena() with a substr + * will cause a linker error. This is to prevent an accidental + * copy of the source buffer to the tree's arena, because substr + * is implicitly convertible to csubstr. If you really intend to parse + * a mutable buffer in the tree's arena, convert it first to immutable + * by assigning the substr to a csubstr prior to calling parse_in_arena(). + * This is not needed for parse_in_place() because csubstr is not + * implicitly convertible to substr. */ +/** @{ */ + +/* READ THE NOTE ABOVE! */ +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena( substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) Tree parse_in_arena(csubstr filename, substr yaml ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, Tree *t, size_t node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, Tree *t, size_t node_id); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena( substr yaml, NodeRef node ); +RYML_DEPRECATED(RYML_DONT_PARSE_SUBSTR_IN_ARENA) void parse_in_arena(csubstr filename, substr yaml, NodeRef node ); + +inline Tree parse_in_arena( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline Tree parse_in_arena(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +inline void parse_in_arena( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +inline void parse_in_arena( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline void parse_in_arena(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +inline void parse_in_arena( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +inline void parse_in_arena(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. + +RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse( csubstr yaml ) { Parser np; return np.parse_in_arena({} , yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline Tree parse(csubstr filename, csubstr yaml ) { Parser np; return np.parse_in_arena(filename, yaml); } //!< parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena({} , yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t ) { Parser np; np.parse_in_arena(filename, yaml, t); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena({} , yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, Tree *t, size_t node_id) { Parser np; np.parse_in_arena(filename, yaml, t, node_id); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse( csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena({} , yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena. +RYML_DEPRECATED("use parse_in_arena() instead") inline void parse(csubstr filename, csubstr yaml, NodeRef node ) { Parser np; np.parse_in_arena(filename, yaml, node); } //!< reusing the YAML tree, parse a read-only YAML source buffer, copying it first to the tree's source arena, providing a filename for error messages. + +/** @} */ + +} // namespace yml +} // namespace c4 + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif + +#endif /* _C4_YML_PARSE_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/map.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_STD_MAP_HPP_ +#define _C4_YML_STD_MAP_HPP_ + +/** @file map.hpp write/read std::map to/from a YAML tree. */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +#include + +namespace c4 { +namespace yml { + +// std::map requires child nodes in the data +// tree hierarchy (a MAP node in ryml parlance). +// So it should be serialized via write()/read(). + +template +void write(c4::yml::NodeRef *n, std::map const& m) +{ + *n |= c4::yml::MAP; + for(auto const& p : m) + { + auto ch = n->append_child(); + ch << c4::yml::key(p.first); + ch << p.second; + } +} + +template +bool read(c4::yml::NodeRef const& n, std::map * m) +{ + K k{}; + V v; + for(auto const ch : n) + { + ch >> c4::yml::key(k); + ch >> v; + m->emplace(std::make_pair(std::move(k), std::move(v))); + } + return true; +} + +} // namespace yml +} // namespace c4 + +#endif // _C4_YML_STD_MAP_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/string.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_YML_STD_STRING_HPP_ +#define C4_YML_STD_STRING_HPP_ + +/** @file string.hpp substring conversions for/from std::string */ + +// everything we need is implemented here: +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/std/string.hpp +//#include +#if !defined(C4_STD_STRING_HPP_) && !defined(_C4_STD_STRING_HPP_) +#error "amalgamate: file c4/std/string.hpp must have been included at this point" +#endif /* C4_STD_STRING_HPP_ */ + + +#endif // C4_YML_STD_STRING_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/vector.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_STD_VECTOR_HPP_ +#define _C4_YML_STD_VECTOR_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/std/vector.hpp +//#include +#if !defined(C4_STD_VECTOR_HPP_) && !defined(_C4_STD_VECTOR_HPP_) +#error "amalgamate: file c4/std/vector.hpp must have been included at this point" +#endif /* C4_STD_VECTOR_HPP_ */ + +//included above: +//#include + +namespace c4 { +namespace yml { + +// vector is a sequence-like type, and it requires child nodes +// in the data tree hierarchy (a SEQ node in ryml parlance). +// So it should be serialized via write()/read(). + +template +void write(c4::yml::NodeRef *n, std::vector const& vec) +{ + *n |= c4::yml::SEQ; + for(auto const& v : vec) + { + n->append_child() << v; + } +} + +template +bool read(c4::yml::NodeRef const& n, std::vector *vec) +{ + vec->resize(n.num_children()); + size_t pos = 0; + for(auto const ch : n) + { + ch >> (*vec)[pos++]; + } + return true; +} + +} // namespace yml +} // namespace c4 + +#endif // _C4_YML_STD_VECTOR_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/std/std.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_STD_STD_HPP_ +#define _C4_YML_STD_STD_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/string.hpp +//#include "c4/yml/std/string.hpp" +#if !defined(C4_YML_STD_STRING_HPP_) && !defined(_C4_YML_STD_STRING_HPP_) +#error "amalgamate: file c4/yml/std/string.hpp must have been included at this point" +#endif /* C4_YML_STD_STRING_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/vector.hpp +//#include "c4/yml/std/vector.hpp" +#if !defined(C4_YML_STD_VECTOR_HPP_) && !defined(_C4_YML_STD_VECTOR_HPP_) +#error "amalgamate: file c4/yml/std/vector.hpp must have been included at this point" +#endif /* C4_YML_STD_VECTOR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/std/map.hpp +//#include "c4/yml/std/map.hpp" +#if !defined(C4_YML_STD_MAP_HPP_) && !defined(_C4_YML_STD_MAP_HPP_) +#error "amalgamate: file c4/yml/std/map.hpp must have been included at this point" +#endif /* C4_YML_STD_MAP_HPP_ */ + + +#endif // _C4_YML_STD_STD_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/std/std.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/common.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/common.hpp +//#include "c4/yml/common.hpp" +#if !defined(C4_YML_COMMON_HPP_) && !defined(_C4_YML_COMMON_HPP_) +#error "amalgamate: file c4/yml/common.hpp must have been included at this point" +#endif /* C4_YML_COMMON_HPP_ */ + + +#ifndef RYML_NO_DEFAULT_CALLBACKS +//included above: +//# include +//included above: +//# include +#endif // RYML_NO_DEFAULT_CALLBACKS + +namespace c4 { +namespace yml { + +namespace { +thread_local Callbacks s_default_callbacks; +} // anon namespace + +#ifndef RYML_NO_DEFAULT_CALLBACKS +void report_error_impl(const char* msg, size_t length, Location loc, FILE *f) +{ + if(!f) + f = stderr; + if(loc) + { + if(!loc.name.empty()) + { + fwrite(loc.name.str, 1, loc.name.len, f); + fputc(':', f); + } + fprintf(f, "%zu:", loc.line); + if(loc.col) + fprintf(f, "%zu:", loc.col); + if(loc.offset) + fprintf(f, " (%zuB):", loc.offset); + } + fprintf(f, "%.*s\n", (int)length, msg); + fflush(f); +} + +void error_impl(const char* msg, size_t length, Location loc, void * /*user_data*/) +{ + report_error_impl(msg, length, loc, nullptr); + ::abort(); +} + +void* allocate_impl(size_t length, void * /*hint*/, void * /*user_data*/) +{ + void *mem = ::malloc(length); + if(mem == nullptr) + { + const char msg[] = "could not allocate memory"; + error_impl(msg, sizeof(msg)-1, {}, nullptr); + } + return mem; +} + +void free_impl(void *mem, size_t /*length*/, void * /*user_data*/) +{ + ::free(mem); +} +#endif // RYML_NO_DEFAULT_CALLBACKS + + + +Callbacks::Callbacks() + : + m_user_data(nullptr), + #ifndef RYML_NO_DEFAULT_CALLBACKS + m_allocate(allocate_impl), + m_free(free_impl), + m_error(error_impl) + #else + m_allocate(nullptr), + m_free(nullptr), + m_error(nullptr) + #endif +{ +} + +Callbacks::Callbacks(void *user_data, pfn_allocate alloc_, pfn_free free_, pfn_error error_) + : + m_user_data(user_data), + #ifndef RYML_NO_DEFAULT_CALLBACKS + m_allocate(alloc_ ? alloc_ : allocate_impl), + m_free(free_ ? free_ : free_impl), + m_error(error_ ? error_ : error_impl) + #else + m_allocate(alloc_), + m_free(free_), + m_error(error_) + #endif +{ + C4_CHECK(m_allocate); + C4_CHECK(m_free); + C4_CHECK(m_error); +} + + +void set_callbacks(Callbacks const& c) +{ + s_default_callbacks = c; +} + +Callbacks const& get_callbacks() +{ + return s_default_callbacks; +} + +void reset_callbacks() +{ + set_callbacks(Callbacks()); +} + +void error(const char *msg, size_t msg_len, Location loc) +{ + s_default_callbacks.m_error(msg, msg_len, loc, s_default_callbacks.m_user_data); +} + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/common.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/tree.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/stack.hpp +//#include "c4/yml/detail/stack.hpp" +#if !defined(C4_YML_DETAIL_STACK_HPP_) && !defined(_C4_YML_DETAIL_STACK_HPP_) +#error "amalgamate: file c4/yml/detail/stack.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_STACK_HPP_ */ + + + +C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wtype-limits") +C4_SUPPRESS_WARNING_MSVC_WITH_PUSH(4296/*expression is always 'boolean_value'*/) + +namespace c4 { +namespace yml { + + +csubstr normalize_tag(csubstr tag) +{ + YamlTag_e t = to_tag(tag); + if(t != TAG_NONE) + return from_tag(t); + if(tag.begins_with("!<")) + tag = tag.sub(1); + if(tag.begins_with(""}; + case TAG_OMAP: + return {""}; + case TAG_PAIRS: + return {""}; + case TAG_SET: + return {""}; + case TAG_SEQ: + return {""}; + case TAG_BINARY: + return {""}; + case TAG_BOOL: + return {""}; + case TAG_FLOAT: + return {""}; + case TAG_INT: + return {""}; + case TAG_MERGE: + return {""}; + case TAG_NULL: + return {""}; + case TAG_STR: + return {""}; + case TAG_TIMESTAMP: + return {""}; + case TAG_VALUE: + return {""}; + case TAG_YAML: + return {""}; + case TAG_NONE: + return {""}; + } + return {""}; +} + +csubstr from_tag(YamlTag_e tag) +{ + switch(tag) + { + case TAG_MAP: + return {"!!map"}; + case TAG_OMAP: + return {"!!omap"}; + case TAG_PAIRS: + return {"!!pairs"}; + case TAG_SET: + return {"!!set"}; + case TAG_SEQ: + return {"!!seq"}; + case TAG_BINARY: + return {"!!binary"}; + case TAG_BOOL: + return {"!!bool"}; + case TAG_FLOAT: + return {"!!float"}; + case TAG_INT: + return {"!!int"}; + case TAG_MERGE: + return {"!!merge"}; + case TAG_NULL: + return {"!!null"}; + case TAG_STR: + return {"!!str"}; + case TAG_TIMESTAMP: + return {"!!timestamp"}; + case TAG_VALUE: + return {"!!value"}; + case TAG_YAML: + return {"!!yaml"}; + case TAG_NONE: + return {""}; + } + return {""}; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +const char* NodeType::type_str(NodeType_e ty) +{ + switch(ty & _TYMASK) + { + case KEYVAL: + return "KEYVAL"; + case KEY: + return "KEY"; + case VAL: + return "VAL"; + case MAP: + return "MAP"; + case SEQ: + return "SEQ"; + case KEYMAP: + return "KEYMAP"; + case KEYSEQ: + return "KEYSEQ"; + case DOCSEQ: + return "DOCSEQ"; + case DOCMAP: + return "DOCMAP"; + case DOCVAL: + return "DOCVAL"; + case DOC: + return "DOC"; + case STREAM: + return "STREAM"; + case NOTYPE: + return "NOTYPE"; + default: + if((ty & KEYVAL) == KEYVAL) + return "KEYVAL***"; + if((ty & KEYMAP) == KEYMAP) + return "KEYMAP***"; + if((ty & KEYSEQ) == KEYSEQ) + return "KEYSEQ***"; + if((ty & DOCSEQ) == DOCSEQ) + return "DOCSEQ***"; + if((ty & DOCMAP) == DOCMAP) + return "DOCMAP***"; + if((ty & DOCVAL) == DOCVAL) + return "DOCVAL***"; + if(ty & KEY) + return "KEY***"; + if(ty & VAL) + return "VAL***"; + if(ty & MAP) + return "MAP***"; + if(ty & SEQ) + return "SEQ***"; + if(ty & DOC) + return "DOC***"; + return "(unk)"; + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +NodeRef Tree::rootref() +{ + return NodeRef(this, root_id()); +} +NodeRef const Tree::rootref() const +{ + return NodeRef(const_cast(this), root_id()); +} + +NodeRef Tree::ref(size_t id) +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return NodeRef(this, id); +} +NodeRef const Tree::ref(size_t id) const +{ + _RYML_CB_ASSERT(m_callbacks, id != NONE && id >= 0 && id < m_size); + return NodeRef(const_cast(this), id); +} + +NodeRef Tree::operator[] (csubstr key) +{ + return rootref()[key]; +} +NodeRef const Tree::operator[] (csubstr key) const +{ + return rootref()[key]; +} + +NodeRef Tree::operator[] (size_t i) +{ + return rootref()[i]; +} +NodeRef const Tree::operator[] (size_t i) const +{ + return rootref()[i]; +} + +NodeRef Tree::docref(size_t i) +{ + return ref(doc(i)); +} +NodeRef const Tree::docref(size_t i) const +{ + return ref(doc(i)); +} + + +//----------------------------------------------------------------------------- +Tree::Tree(Callbacks const& cb) + : m_buf(nullptr) + , m_cap(0) + , m_size(0) + , m_free_head(NONE) + , m_free_tail(NONE) + , m_arena() + , m_arena_pos(0) + , m_callbacks(cb) +{ +} + +Tree::Tree(size_t node_capacity, size_t arena_capacity, Callbacks const& cb) + : Tree(cb) +{ + reserve(node_capacity); + reserve_arena(arena_capacity); +} + +Tree::~Tree() +{ + _free(); +} + + +Tree::Tree(Tree const& that) noexcept : Tree(that.m_callbacks) +{ + _copy(that); +} + +Tree& Tree::operator= (Tree const& that) noexcept +{ + _free(); + m_callbacks = that.m_callbacks; + _copy(that); + return *this; +} + +Tree::Tree(Tree && that) noexcept : Tree(that.m_callbacks) +{ + _move(that); +} + +Tree& Tree::operator= (Tree && that) noexcept +{ + _free(); + m_callbacks = that.m_callbacks; + _move(that); + return *this; +} + +void Tree::_free() +{ + if(m_buf) + { + _RYML_CB_ASSERT(m_callbacks, m_cap > 0); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); + } + if(m_arena.str) + { + _RYML_CB_ASSERT(m_callbacks, m_arena.len > 0); + _RYML_CB_FREE(m_callbacks, m_arena.str, char, m_arena.len); + } + _clear(); +} + + +C4_SUPPRESS_WARNING_GCC_PUSH +#if defined(__GNUC__) && __GNUC__>= 8 + C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wclass-memaccess") // error: ‘void* memset(void*, int, size_t)’ clearing an object of type ‘class c4::yml::Tree’ with no trivial copy-assignment; use assignment or value-initialization instead +#endif + +void Tree::_clear() +{ + m_buf = nullptr; + m_cap = 0; + m_size = 0; + m_free_head = 0; + m_free_tail = 0; + m_arena = {}; + m_arena_pos = 0; + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = {}; +} + +void Tree::_copy(Tree const& that) +{ + _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); + m_buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, that.m_cap, that.m_buf); + memcpy(m_buf, that.m_buf, that.m_cap * sizeof(NodeData)); + m_cap = that.m_cap; + m_size = that.m_size; + m_free_head = that.m_free_head; + m_free_tail = that.m_free_tail; + m_arena_pos = that.m_arena_pos; + m_arena = that.m_arena; + if(that.m_arena.str) + { + _RYML_CB_ASSERT(m_callbacks, that.m_arena.len > 0); + substr arena; + arena.str = _RYML_CB_ALLOC_HINT(m_callbacks, char, that.m_arena.len, that.m_arena.str); + arena.len = that.m_arena.len; + _relocate(arena); // does a memcpy of the arena and updates nodes using the old arena + m_arena = arena; + } + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = that.m_tag_directives[i]; +} + +void Tree::_move(Tree & that) +{ + _RYML_CB_ASSERT(m_callbacks, m_buf == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.str == nullptr); + _RYML_CB_ASSERT(m_callbacks, m_arena.len == 0); + m_buf = that.m_buf; + m_cap = that.m_cap; + m_size = that.m_size; + m_free_head = that.m_free_head; + m_free_tail = that.m_free_tail; + m_arena = that.m_arena; + m_arena_pos = that.m_arena_pos; + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = that.m_tag_directives[i]; + that._clear(); +} + +void Tree::_relocate(substr next_arena) +{ + _RYML_CB_ASSERT(m_callbacks, next_arena.not_empty()); + _RYML_CB_ASSERT(m_callbacks, next_arena.len >= m_arena.len); + memcpy(next_arena.str, m_arena.str, m_arena_pos); + for(NodeData *C4_RESTRICT n = m_buf, *e = m_buf + m_cap; n != e; ++n) + { + if(in_arena(n->m_key.scalar)) + n->m_key.scalar = _relocated(n->m_key.scalar, next_arena); + if(in_arena(n->m_key.tag)) + n->m_key.tag = _relocated(n->m_key.tag, next_arena); + if(in_arena(n->m_key.anchor)) + n->m_key.anchor = _relocated(n->m_key.anchor, next_arena); + if(in_arena(n->m_val.scalar)) + n->m_val.scalar = _relocated(n->m_val.scalar, next_arena); + if(in_arena(n->m_val.tag)) + n->m_val.tag = _relocated(n->m_val.tag, next_arena); + if(in_arena(n->m_val.anchor)) + n->m_val.anchor = _relocated(n->m_val.anchor, next_arena); + } + for(TagDirective &C4_RESTRICT td : m_tag_directives) + { + if(in_arena(td.prefix)) + td.prefix = _relocated(td.prefix, next_arena); + if(in_arena(td.handle)) + td.handle = _relocated(td.handle, next_arena); + } +} + + +//----------------------------------------------------------------------------- +void Tree::reserve(size_t cap) +{ + if(cap > m_cap) + { + NodeData *buf = _RYML_CB_ALLOC_HINT(m_callbacks, NodeData, cap, m_buf); + if(m_buf) + { + memcpy(buf, m_buf, m_cap * sizeof(NodeData)); + _RYML_CB_FREE(m_callbacks, m_buf, NodeData, m_cap); + } + size_t first = m_cap, del = cap - m_cap; + m_cap = cap; + m_buf = buf; + _clear_range(first, del); + if(m_free_head != NONE) + { + _RYML_CB_ASSERT(m_callbacks, m_buf != nullptr); + _RYML_CB_ASSERT(m_callbacks, m_free_tail != NONE); + m_buf[m_free_tail].m_next_sibling = first; + m_buf[first].m_prev_sibling = m_free_tail; + m_free_tail = cap-1; + } + else + { + _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE); + m_free_head = first; + m_free_tail = cap-1; + } + _RYML_CB_ASSERT(m_callbacks, m_free_head == NONE || (m_free_head >= 0 && m_free_head < cap)); + _RYML_CB_ASSERT(m_callbacks, m_free_tail == NONE || (m_free_tail >= 0 && m_free_tail < cap)); + + if( ! m_size) + _claim_root(); + } +} + + +//----------------------------------------------------------------------------- +void Tree::clear() +{ + _clear_range(0, m_cap); + m_size = 0; + if(m_buf) + { + _RYML_CB_ASSERT(m_callbacks, m_cap >= 0); + m_free_head = 0; + m_free_tail = m_cap-1; + _claim_root(); + } + else + { + m_free_head = NONE; + m_free_tail = NONE; + } + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + m_tag_directives[i] = {}; +} + +void Tree::_claim_root() +{ + size_t r = _claim(); + _RYML_CB_ASSERT(m_callbacks, r == 0); + _set_hierarchy(r, NONE, NONE); +} + + +//----------------------------------------------------------------------------- +void Tree::_clear_range(size_t first, size_t num) +{ + if(num == 0) + return; // prevent overflow when subtracting + _RYML_CB_ASSERT(m_callbacks, first >= 0 && first + num <= m_cap); + memset(m_buf + first, 0, num * sizeof(NodeData)); // TODO we should not need this + for(size_t i = first, e = first + num; i < e; ++i) + { + _clear(i); + NodeData *n = m_buf + i; + n->m_prev_sibling = i - 1; + n->m_next_sibling = i + 1; + } + m_buf[first + num - 1].m_next_sibling = NONE; +} + +C4_SUPPRESS_WARNING_GCC_POP + + +//----------------------------------------------------------------------------- +void Tree::_release(size_t i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + + _rem_hierarchy(i); + _free_list_add(i); + _clear(i); + + --m_size; +} + +//----------------------------------------------------------------------------- +// add to the front of the free list +void Tree::_free_list_add(size_t i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + NodeData &C4_RESTRICT w = m_buf[i]; + + w.m_parent = NONE; + w.m_next_sibling = m_free_head; + w.m_prev_sibling = NONE; + if(m_free_head != NONE) + m_buf[m_free_head].m_prev_sibling = i; + m_free_head = i; + if(m_free_tail == NONE) + m_free_tail = m_free_head; +} + +void Tree::_free_list_rem(size_t i) +{ + if(m_free_head == i) + m_free_head = _p(i)->m_next_sibling; + _rem_hierarchy(i); +} + +//----------------------------------------------------------------------------- +size_t Tree::_claim() +{ + if(m_free_head == NONE || m_buf == nullptr) + { + size_t sz = 2 * m_cap; + sz = sz ? sz : 16; + reserve(sz); + _RYML_CB_ASSERT(m_callbacks, m_free_head != NONE); + } + + _RYML_CB_ASSERT(m_callbacks, m_size < m_cap); + _RYML_CB_ASSERT(m_callbacks, m_free_head >= 0 && m_free_head < m_cap); + + size_t ichild = m_free_head; + NodeData *child = m_buf + ichild; + + ++m_size; + m_free_head = child->m_next_sibling; + if(m_free_head == NONE) + { + m_free_tail = NONE; + _RYML_CB_ASSERT(m_callbacks, m_size == m_cap); + } + + _clear(ichild); + + return ichild; +} + +//----------------------------------------------------------------------------- + +C4_SUPPRESS_WARNING_GCC_PUSH +C4_SUPPRESS_WARNING_CLANG_PUSH +C4_SUPPRESS_WARNING_CLANG("-Wnull-dereference") +#if defined(__GNUC__) && (__GNUC__ >= 6) +C4_SUPPRESS_WARNING_GCC("-Wnull-dereference") +#endif + +void Tree::_set_hierarchy(size_t ichild, size_t iparent, size_t iprev_sibling) +{ + _RYML_CB_ASSERT(m_callbacks, iparent == NONE || (iparent >= 0 && iparent < m_cap)); + _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE || (iprev_sibling >= 0 && iprev_sibling < m_cap)); + + NodeData *C4_RESTRICT child = get(ichild); + + child->m_parent = iparent; + child->m_prev_sibling = NONE; + child->m_next_sibling = NONE; + + if(iparent == NONE) + { + _RYML_CB_ASSERT(m_callbacks, ichild == 0); + _RYML_CB_ASSERT(m_callbacks, iprev_sibling == NONE); + } + + if(iparent == NONE) + return; + + size_t inext_sibling = iprev_sibling != NONE ? next_sibling(iprev_sibling) : first_child(iparent); + NodeData *C4_RESTRICT parent = get(iparent); + NodeData *C4_RESTRICT psib = get(iprev_sibling); + NodeData *C4_RESTRICT nsib = get(inext_sibling); + + if(psib) + { + _RYML_CB_ASSERT(m_callbacks, next_sibling(iprev_sibling) == id(nsib)); + child->m_prev_sibling = id(psib); + psib->m_next_sibling = id(child); + _RYML_CB_ASSERT(m_callbacks, psib->m_prev_sibling != psib->m_next_sibling || psib->m_prev_sibling == NONE); + } + + if(nsib) + { + _RYML_CB_ASSERT(m_callbacks, prev_sibling(inext_sibling) == id(psib)); + child->m_next_sibling = id(nsib); + nsib->m_prev_sibling = id(child); + _RYML_CB_ASSERT(m_callbacks, nsib->m_prev_sibling != nsib->m_next_sibling || nsib->m_prev_sibling == NONE); + } + + if(parent->m_first_child == NONE) + { + _RYML_CB_ASSERT(m_callbacks, parent->m_last_child == NONE); + parent->m_first_child = id(child); + parent->m_last_child = id(child); + } + else + { + if(child->m_next_sibling == parent->m_first_child) + parent->m_first_child = id(child); + + if(child->m_prev_sibling == parent->m_last_child) + parent->m_last_child = id(child); + } +} + +C4_SUPPRESS_WARNING_GCC_POP +C4_SUPPRESS_WARNING_CLANG_POP + + +//----------------------------------------------------------------------------- +void Tree::_rem_hierarchy(size_t i) +{ + _RYML_CB_ASSERT(m_callbacks, i >= 0 && i < m_cap); + + NodeData &C4_RESTRICT w = m_buf[i]; + + // remove from the parent + if(w.m_parent != NONE) + { + NodeData &C4_RESTRICT p = m_buf[w.m_parent]; + if(p.m_first_child == i) + { + p.m_first_child = w.m_next_sibling; + } + if(p.m_last_child == i) + { + p.m_last_child = w.m_prev_sibling; + } + } + + // remove from the used list + if(w.m_prev_sibling != NONE) + { + NodeData *C4_RESTRICT prev = get(w.m_prev_sibling); + prev->m_next_sibling = w.m_next_sibling; + } + if(w.m_next_sibling != NONE) + { + NodeData *C4_RESTRICT next = get(w.m_next_sibling); + next->m_prev_sibling = w.m_prev_sibling; + } +} + +//----------------------------------------------------------------------------- +void Tree::reorder() +{ + size_t r = root_id(); + _do_reorder(&r, 0); +} + +//----------------------------------------------------------------------------- +size_t Tree::_do_reorder(size_t *node, size_t count) +{ + // swap this node if it's not in place + if(*node != count) + { + _swap(*node, count); + *node = count; + } + ++count; // bump the count from this node + + // now descend in the hierarchy + for(size_t i = first_child(*node); i != NONE; i = next_sibling(i)) + { + // this child may have been relocated to a different index, + // so get an updated version + count = _do_reorder(&i, count); + } + return count; +} + +//----------------------------------------------------------------------------- +void Tree::_swap(size_t n_, size_t m_) +{ + _RYML_CB_ASSERT(m_callbacks, (parent(n_) != NONE) || type(n_) == NOTYPE); + _RYML_CB_ASSERT(m_callbacks, (parent(m_) != NONE) || type(m_) == NOTYPE); + NodeType tn = type(n_); + NodeType tm = type(m_); + if(tn != NOTYPE && tm != NOTYPE) + { + _swap_props(n_, m_); + _swap_hierarchy(n_, m_); + } + else if(tn == NOTYPE && tm != NOTYPE) + { + _copy_props(n_, m_); + _free_list_rem(n_); + _copy_hierarchy(n_, m_); + _clear(m_); + _free_list_add(m_); + } + else if(tn != NOTYPE && tm == NOTYPE) + { + _copy_props(m_, n_); + _free_list_rem(m_); + _copy_hierarchy(m_, n_); + _clear(n_); + _free_list_add(n_); + } + else + { + C4_NEVER_REACH(); + } +} + +//----------------------------------------------------------------------------- +void Tree::_swap_hierarchy(size_t ia, size_t ib) +{ + if(ia == ib) return; + + for(size_t i = first_child(ia); i != NONE; i = next_sibling(i)) + { + if(i == ib || i == ia) + continue; + _p(i)->m_parent = ib; + } + + for(size_t i = first_child(ib); i != NONE; i = next_sibling(i)) + { + if(i == ib || i == ia) + continue; + _p(i)->m_parent = ia; + } + + auto & C4_RESTRICT a = *_p(ia); + auto & C4_RESTRICT b = *_p(ib); + auto & C4_RESTRICT pa = *_p(a.m_parent); + auto & C4_RESTRICT pb = *_p(b.m_parent); + + if(&pa == &pb) + { + if((pa.m_first_child == ib && pa.m_last_child == ia) + || + (pa.m_first_child == ia && pa.m_last_child == ib)) + { + std::swap(pa.m_first_child, pa.m_last_child); + } + else + { + bool changed = false; + if(pa.m_first_child == ia) + { + pa.m_first_child = ib; + changed = true; + } + if(pa.m_last_child == ia) + { + pa.m_last_child = ib; + changed = true; + } + if(pb.m_first_child == ib && !changed) + { + pb.m_first_child = ia; + } + if(pb.m_last_child == ib && !changed) + { + pb.m_last_child = ia; + } + } + } + else + { + if(pa.m_first_child == ia) + pa.m_first_child = ib; + if(pa.m_last_child == ia) + pa.m_last_child = ib; + if(pb.m_first_child == ib) + pb.m_first_child = ia; + if(pb.m_last_child == ib) + pb.m_last_child = ia; + } + std::swap(a.m_first_child , b.m_first_child); + std::swap(a.m_last_child , b.m_last_child); + + if(a.m_prev_sibling != ib && b.m_prev_sibling != ia && + a.m_next_sibling != ib && b.m_next_sibling != ia) + { + if(a.m_prev_sibling != NONE && a.m_prev_sibling != ib) + _p(a.m_prev_sibling)->m_next_sibling = ib; + if(a.m_next_sibling != NONE && a.m_next_sibling != ib) + _p(a.m_next_sibling)->m_prev_sibling = ib; + if(b.m_prev_sibling != NONE && b.m_prev_sibling != ia) + _p(b.m_prev_sibling)->m_next_sibling = ia; + if(b.m_next_sibling != NONE && b.m_next_sibling != ia) + _p(b.m_next_sibling)->m_prev_sibling = ia; + std::swap(a.m_prev_sibling, b.m_prev_sibling); + std::swap(a.m_next_sibling, b.m_next_sibling); + } + else + { + if(a.m_next_sibling == ib) // n will go after m + { + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling == ia); + if(a.m_prev_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ib); + _p(a.m_prev_sibling)->m_next_sibling = ib; + } + if(b.m_next_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ia); + _p(b.m_next_sibling)->m_prev_sibling = ia; + } + size_t ns = b.m_next_sibling; + b.m_prev_sibling = a.m_prev_sibling; + b.m_next_sibling = ia; + a.m_prev_sibling = ib; + a.m_next_sibling = ns; + } + else if(a.m_prev_sibling == ib) // m will go after n + { + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling == ia); + if(b.m_prev_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ia); + _p(b.m_prev_sibling)->m_next_sibling = ia; + } + if(a.m_next_sibling != NONE) + { + _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ib); + _p(a.m_next_sibling)->m_prev_sibling = ib; + } + size_t ns = b.m_prev_sibling; + a.m_prev_sibling = b.m_prev_sibling; + a.m_next_sibling = ib; + b.m_prev_sibling = ia; + b.m_next_sibling = ns; + } + else + { + C4_NEVER_REACH(); + } + } + _RYML_CB_ASSERT(m_callbacks, a.m_next_sibling != ia); + _RYML_CB_ASSERT(m_callbacks, a.m_prev_sibling != ia); + _RYML_CB_ASSERT(m_callbacks, b.m_next_sibling != ib); + _RYML_CB_ASSERT(m_callbacks, b.m_prev_sibling != ib); + + if(a.m_parent != ib && b.m_parent != ia) + { + std::swap(a.m_parent, b.m_parent); + } + else + { + if(a.m_parent == ib && b.m_parent != ia) + { + a.m_parent = b.m_parent; + b.m_parent = ia; + } + else if(a.m_parent != ib && b.m_parent == ia) + { + b.m_parent = a.m_parent; + a.m_parent = ib; + } + else + { + C4_NEVER_REACH(); + } + } +} + +//----------------------------------------------------------------------------- +void Tree::_copy_hierarchy(size_t dst_, size_t src_) +{ + auto const& C4_RESTRICT src = *_p(src_); + auto & C4_RESTRICT dst = *_p(dst_); + auto & C4_RESTRICT prt = *_p(src.m_parent); + for(size_t i = src.m_first_child; i != NONE; i = next_sibling(i)) + { + _p(i)->m_parent = dst_; + } + if(src.m_prev_sibling != NONE) + { + _p(src.m_prev_sibling)->m_next_sibling = dst_; + } + if(src.m_next_sibling != NONE) + { + _p(src.m_next_sibling)->m_prev_sibling = dst_; + } + if(prt.m_first_child == src_) + { + prt.m_first_child = dst_; + } + if(prt.m_last_child == src_) + { + prt.m_last_child = dst_; + } + dst.m_parent = src.m_parent; + dst.m_first_child = src.m_first_child; + dst.m_last_child = src.m_last_child; + dst.m_prev_sibling = src.m_prev_sibling; + dst.m_next_sibling = src.m_next_sibling; +} + +//----------------------------------------------------------------------------- +void Tree::_swap_props(size_t n_, size_t m_) +{ + NodeData &C4_RESTRICT n = *_p(n_); + NodeData &C4_RESTRICT m = *_p(m_); + std::swap(n.m_type, m.m_type); + std::swap(n.m_key, m.m_key); + std::swap(n.m_val, m.m_val); +} + +//----------------------------------------------------------------------------- +void Tree::move(size_t node, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); + _RYML_CB_ASSERT(m_callbacks, has_sibling(node, after) && has_sibling(after, node)); + + _rem_hierarchy(node); + _set_hierarchy(node, parent(node), after); +} + +//----------------------------------------------------------------------------- + +void Tree::move(size_t node, size_t new_parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + _RYML_CB_ASSERT(m_callbacks, ! is_root(node)); + + _rem_hierarchy(node); + _set_hierarchy(node, new_parent, after); +} + +size_t Tree::move(Tree *src, size_t node, size_t new_parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, new_parent != NONE); + + size_t dup = duplicate(src, node, new_parent, after); + src->remove(node); + return dup; +} + +void Tree::set_root_as_stream() +{ + size_t root = root_id(); + if(is_stream(root)) + return; + // don't use _add_flags() because it's checked and will fail + if(!has_children(root)) + { + if(is_val(root)) + { + _p(root)->m_type.add(SEQ); + size_t next_doc = append_child(root); + _copy_props_wo_key(next_doc, root); + _p(next_doc)->m_type.add(DOC); + _p(next_doc)->m_type.rem(SEQ); + } + _p(root)->m_type = STREAM; + return; + } + _RYML_CB_ASSERT(m_callbacks, !has_key(root)); + size_t next_doc = append_child(root); + _copy_props_wo_key(next_doc, root); + _add_flags(next_doc, DOC); + for(size_t prev = NONE, ch = first_child(root), next = next_sibling(ch); ch != NONE; ) + { + if(ch == next_doc) + break; + move(ch, next_doc, prev); + prev = ch; + ch = next; + next = next_sibling(next); + } + _p(root)->m_type = STREAM; +} + + +//----------------------------------------------------------------------------- +void Tree::remove_children(size_t node) +{ + _RYML_CB_ASSERT(m_callbacks, get(node) != nullptr); + size_t ich = get(node)->m_first_child; + while(ich != NONE) + { + remove_children(ich); + _RYML_CB_ASSERT(m_callbacks, get(ich) != nullptr); + size_t next = get(ich)->m_next_sibling; + _release(ich); + if(ich == get(node)->m_last_child) + break; + ich = next; + } +} + +bool Tree::change_type(size_t node, NodeType type) +{ + _RYML_CB_ASSERT(m_callbacks, type.is_val() || type.is_map() || type.is_seq()); + _RYML_CB_ASSERT(m_callbacks, type.is_val() + type.is_map() + type.is_seq() == 1); + _RYML_CB_ASSERT(m_callbacks, type.has_key() == has_key(node) || (has_key(node) && !type.has_key())); + NodeData *d = _p(node); + if(type.is_map() && is_map(node)) + return false; + else if(type.is_seq() && is_seq(node)) + return false; + else if(type.is_val() && is_val(node)) + return false; + d->m_type = (d->m_type & (~(MAP|SEQ|VAL))) | type; + remove_children(node); + return true; +} + + +//----------------------------------------------------------------------------- +size_t Tree::duplicate(size_t node, size_t parent, size_t after) +{ + return duplicate(this, node, parent, after); +} + +size_t Tree::duplicate(Tree const* src, size_t node, size_t parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, ! src->is_root(node)); + + size_t copy = _claim(); + + _copy_props(copy, src, node); + _set_hierarchy(copy, parent, after); + duplicate_children(src, node, copy, NONE); + + return copy; +} + +//----------------------------------------------------------------------------- +size_t Tree::duplicate_children(size_t node, size_t parent, size_t after) +{ + return duplicate_children(this, node, parent, after); +} + +size_t Tree::duplicate_children(Tree const* src, size_t node, size_t parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); + + size_t prev = after; + for(size_t i = src->first_child(node); i != NONE; i = src->next_sibling(i)) + { + prev = duplicate(src, i, parent, prev); + } + + return prev; +} + +//----------------------------------------------------------------------------- +void Tree::duplicate_contents(size_t node, size_t where) +{ + duplicate_contents(this, node, where); +} + +void Tree::duplicate_contents(Tree const *src, size_t node, size_t where) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, where != NONE); + _copy_props_wo_key(where, src, node); + duplicate_children(src, node, where, last_child(where)); +} + +//----------------------------------------------------------------------------- +size_t Tree::duplicate_children_no_rep(size_t node, size_t parent, size_t after) +{ + return duplicate_children_no_rep(this, node, parent, after); +} + +size_t Tree::duplicate_children_no_rep(Tree const *src, size_t node, size_t parent, size_t after) +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, parent != NONE); + _RYML_CB_ASSERT(m_callbacks, after == NONE || has_child(parent, after)); + + // don't loop using pointers as there may be a relocation + + // find the position where "after" is + size_t after_pos = NONE; + if(after != NONE) + { + for(size_t i = first_child(parent), icount = 0; i != NONE; ++icount, i = next_sibling(i)) + { + if(i == after) + { + after_pos = icount; + break; + } + } + _RYML_CB_ASSERT(m_callbacks, after_pos != NONE); + } + + // for each child to be duplicated... + size_t prev = after; + for(size_t i = src->first_child(node), icount = 0; i != NONE; ++icount, i = src->next_sibling(i)) + { + if(is_seq(parent)) + { + prev = duplicate(i, parent, prev); + } + else + { + _RYML_CB_ASSERT(m_callbacks, is_map(parent)); + // does the parent already have a node with key equal to that of the current duplicate? + size_t rep = NONE, rep_pos = NONE; + for(size_t j = first_child(parent), jcount = 0; j != NONE; ++jcount, j = next_sibling(j)) + { + if(key(j) == key(i)) + { + rep = j; + rep_pos = jcount; + break; + } + } + if(rep == NONE) // there is no repetition; just duplicate + { + prev = duplicate(src, i, parent, prev); + } + else // yes, there is a repetition + { + if(after_pos != NONE && rep_pos < after_pos) + { + // rep is located before the node which will be inserted, + // and will be overridden by the duplicate. So replace it. + remove(rep); + prev = duplicate(src, i, parent, prev); + } + else if(after_pos == NONE || rep_pos >= after_pos) + { + // rep is located after the node which will be inserted + // and overrides it. So move the rep into this node's place. + if(rep != prev) + { + move(rep, prev); + prev = rep; + } + } + } // there's a repetition + } + } + + return prev; +} + + +//----------------------------------------------------------------------------- + +void Tree::merge_with(Tree const *src, size_t src_node, size_t dst_node) +{ + _RYML_CB_ASSERT(m_callbacks, src != nullptr); + if(src_node == NONE) + src_node = src->root_id(); + if(dst_node == NONE) + dst_node = root_id(); + _RYML_CB_ASSERT(m_callbacks, src->has_val(src_node) || src->is_seq(src_node) || src->is_map(src_node)); + + if(src->has_val(src_node)) + { + if( ! has_val(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + } + if(src->is_keyval(src_node)) + _copy_props(dst_node, src, src_node); + else if(src->is_val(src_node)) + _copy_props_wo_key(dst_node, src, src_node); + else + C4_NEVER_REACH(); + } + else if(src->is_seq(src_node)) + { + if( ! is_seq(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + _clear_type(dst_node); + if(src->has_key(src_node)) + to_seq(dst_node, src->key(src_node)); + else + to_seq(dst_node); + } + for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + { + size_t dch = append_child(dst_node); + _copy_props_wo_key(dch, src, sch); + merge_with(src, sch, dch); + } + } + else if(src->is_map(src_node)) + { + if( ! is_map(dst_node)) + { + if(has_children(dst_node)) + remove_children(dst_node); + _clear_type(dst_node); + if(src->has_key(src_node)) + to_map(dst_node, src->key(src_node)); + else + to_map(dst_node); + } + for(size_t sch = src->first_child(src_node); sch != NONE; sch = src->next_sibling(sch)) + { + size_t dch = find_child(dst_node, src->key(sch)); + if(dch == NONE) + { + dch = append_child(dst_node); + _copy_props(dch, src, sch); + } + merge_with(src, sch, dch); + } + } + else + { + C4_NEVER_REACH(); + } +} + + +//----------------------------------------------------------------------------- + +namespace detail { +/** @todo make this part of the public API, refactoring as appropriate + * to be able to use the same resolver to handle multiple trees (one + * at a time) */ +struct ReferenceResolver +{ + struct refdata + { + NodeType type; + size_t node; + size_t prev_anchor; + size_t target; + size_t parent_ref; + size_t parent_ref_sibling; + }; + + Tree *t; + /** from the specs: "an alias node refers to the most recent + * node in the serialization having the specified anchor". So + * we need to start looking upward from ref nodes. + * + * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ + stack refs; + + ReferenceResolver(Tree *t_) : t(t_), refs(t_->callbacks()) + { + resolve(); + } + + void store_anchors_and_refs() + { + // minimize (re-)allocations by counting first + size_t num_anchors_and_refs = count_anchors_and_refs(t->root_id()); + if(!num_anchors_and_refs) + return; + refs.reserve(num_anchors_and_refs); + + // now descend through the hierarchy + _store_anchors_and_refs(t->root_id()); + + // finally connect the reference list + size_t prev_anchor = npos; + size_t count = 0; + for(auto &rd : refs) + { + rd.prev_anchor = prev_anchor; + if(rd.type.is_anchor()) + prev_anchor = count; + ++count; + } + } + + size_t count_anchors_and_refs(size_t n) + { + size_t c = 0; + c += t->has_key_anchor(n); + c += t->has_val_anchor(n); + c += t->is_key_ref(n); + c += t->is_val_ref(n); + for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) + c += count_anchors_and_refs(ch); + return c; + } + + void _store_anchors_and_refs(size_t n) + { + if(t->is_key_ref(n) || t->is_val_ref(n) || (t->has_key(n) && t->key(n) == "<<")) + { + if(t->is_seq(n)) + { + // for merging multiple inheritance targets + // <<: [ *CENTER, *BIG ] + for(size_t ich = t->first_child(n); ich != NONE; ich = t->next_sibling(ich)) + { + RYML_ASSERT(t->num_children(ich) == 0); + refs.push({VALREF, ich, npos, npos, n, t->next_sibling(n)}); + } + return; + } + if(t->is_key_ref(n) && t->key(n) != "<<") // insert key refs BEFORE inserting val refs + { + RYML_CHECK((!t->has_key(n)) || t->key(n).ends_with(t->key_ref(n))); + refs.push({KEYREF, n, npos, npos, NONE, NONE}); + } + if(t->is_val_ref(n)) + { + RYML_CHECK((!t->has_val(n)) || t->val(n).ends_with(t->val_ref(n))); + refs.push({VALREF, n, npos, npos, NONE, NONE}); + } + } + if(t->has_key_anchor(n)) + { + RYML_CHECK(t->has_key(n)); + refs.push({KEYANCH, n, npos, npos, NONE, NONE}); + } + if(t->has_val_anchor(n)) + { + RYML_CHECK(t->has_val(n) || t->is_container(n)); + refs.push({VALANCH, n, npos, npos, NONE, NONE}); + } + for(size_t ch = t->first_child(n); ch != NONE; ch = t->next_sibling(ch)) + { + _store_anchors_and_refs(ch); + } + } + + size_t lookup_(refdata *C4_RESTRICT ra) + { + RYML_ASSERT(ra->type.is_key_ref() || ra->type.is_val_ref()); + RYML_ASSERT(ra->type.is_key_ref() != ra->type.is_val_ref()); + csubstr refname; + if(ra->type.is_val_ref()) + { + refname = t->val_ref(ra->node); + } + else + { + RYML_ASSERT(ra->type.is_key_ref()); + refname = t->key_ref(ra->node); + } + while(ra->prev_anchor != npos) + { + ra = &refs[ra->prev_anchor]; + if(t->has_anchor(ra->node, refname)) + return ra->node; + } + + #ifndef RYML_ERRMSG_SIZE + #define RYML_ERRMSG_SIZE 1024 + #endif + + char errmsg[RYML_ERRMSG_SIZE]; + snprintf(errmsg, RYML_ERRMSG_SIZE, "anchor does not exist: '%.*s'", + static_cast(refname.size()), refname.data()); + c4::yml::error(errmsg); + return NONE; + } + + void resolve() + { + store_anchors_and_refs(); + if(refs.empty()) + return; + + /* from the specs: "an alias node refers to the most recent + * node in the serialization having the specified anchor". So + * we need to start looking upward from ref nodes. + * + * @see http://yaml.org/spec/1.2/spec.html#id2765878 */ + for(size_t i = 0, e = refs.size(); i < e; ++i) + { + auto &C4_RESTRICT rd = refs.top(i); + if( ! rd.type.is_ref()) + continue; + rd.target = lookup_(&rd); + } + } + +}; // ReferenceResolver +} // namespace detail + +void Tree::resolve() +{ + if(m_size == 0) + return; + + detail::ReferenceResolver rr(this); + + // insert the resolved references + size_t prev_parent_ref = NONE; + size_t prev_parent_ref_after = NONE; + for(auto const& C4_RESTRICT rd : rr.refs) + { + if( ! rd.type.is_ref()) + continue; + if(rd.parent_ref != NONE) + { + _RYML_CB_ASSERT(m_callbacks, is_seq(rd.parent_ref)); + size_t after, p = parent(rd.parent_ref); + if(prev_parent_ref != rd.parent_ref) + { + after = rd.parent_ref;//prev_sibling(rd.parent_ref_sibling); + prev_parent_ref_after = after; + } + else + { + after = prev_parent_ref_after; + } + prev_parent_ref = rd.parent_ref; + prev_parent_ref_after = duplicate_children_no_rep(rd.target, p, after); + remove(rd.node); + } + else + { + if(has_key(rd.node) && is_key_ref(rd.node) && key(rd.node) == "<<") + { + _RYML_CB_ASSERT(m_callbacks, is_keyval(rd.node)); + size_t p = parent(rd.node); + size_t after = prev_sibling(rd.node); + duplicate_children_no_rep(rd.target, p, after); + remove(rd.node); + } + else if(rd.type.is_key_ref()) + { + _RYML_CB_ASSERT(m_callbacks, is_key_ref(rd.node)); + _RYML_CB_ASSERT(m_callbacks, has_key_anchor(rd.target) || has_val_anchor(rd.target)); + if(has_val_anchor(rd.target) && val_anchor(rd.target) == key_ref(rd.node)) + { + _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); + _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); + _p(rd.node)->m_key.scalar = val(rd.target); + _add_flags(rd.node, KEY); + } + else + { + _RYML_CB_CHECK(m_callbacks, key_anchor(rd.target) == key_ref(rd.node)); + _p(rd.node)->m_key.scalar = key(rd.target); + _add_flags(rd.node, VAL); + } + } + else + { + _RYML_CB_ASSERT(m_callbacks, rd.type.is_val_ref()); + if(has_key_anchor(rd.target) && key_anchor(rd.target) == val_ref(rd.node)) + { + _RYML_CB_CHECK(m_callbacks, !is_container(rd.target)); + _RYML_CB_CHECK(m_callbacks, has_val(rd.target)); + _p(rd.node)->m_val.scalar = key(rd.target); + _add_flags(rd.node, VAL); + } + else + { + duplicate_contents(rd.target, rd.node); + } + } + } + } + + // clear anchors and refs + for(auto const& C4_RESTRICT ar : rr.refs) + { + rem_anchor_ref(ar.node); + if(ar.parent_ref != NONE) + if(type(ar.parent_ref) != NOTYPE) + remove(ar.parent_ref); + } + +} + +//----------------------------------------------------------------------------- + +size_t Tree::num_children(size_t node) const +{ + size_t count = 0; + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + ++count; + } + return count; +} + +size_t Tree::child(size_t node, size_t pos) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + size_t count = 0; + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(count++ == pos) + return i; + } + return NONE; +} + +size_t Tree::child_pos(size_t node, size_t ch) const +{ + size_t count = 0; + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(i == ch) + return count; + ++count; + } + return npos; +} + +#if defined(__clang__) +# pragma clang diagnostic push +# pragma GCC diagnostic ignored "-Wnull-dereference" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# if __GNUC__ >= 6 +# pragma GCC diagnostic ignored "-Wnull-dereference" +# endif +#endif + +size_t Tree::find_child(size_t node, csubstr const& name) const +{ + _RYML_CB_ASSERT(m_callbacks, node != NONE); + _RYML_CB_ASSERT(m_callbacks, is_map(node)); + if(get(node)->m_first_child == NONE) + { + _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child == NONE); + return NONE; + } + else + { + _RYML_CB_ASSERT(m_callbacks, _p(node)->m_last_child != NONE); + } + for(size_t i = first_child(node); i != NONE; i = next_sibling(i)) + { + if(_p(i)->m_key.scalar == name) + { + return i; + } + } + return NONE; +} + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + + +//----------------------------------------------------------------------------- + +void Tree::to_val(size_t node, csubstr val, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); + _set_flags(node, VAL|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val = val; +} + +void Tree::to_keyval(size_t node, csubstr key, csubstr val, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEYVAL|more_flags); + _p(node)->m_key = key; + _p(node)->m_val = val; +} + +void Tree::to_map(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || ! parent_is_map(node)); // parent must not have children with keys + _set_flags(node, MAP|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_map(size_t node, csubstr key, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEY|MAP|more_flags); + _p(node)->m_key = key; + _p(node)->m_val.clear(); +} + +void Tree::to_seq(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_seq(node)); + _set_flags(node, SEQ|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_seq(size_t node, csubstr key, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _RYML_CB_ASSERT(m_callbacks, parent(node) == NONE || parent_is_map(node)); + _set_flags(node, KEY|SEQ|more_flags); + _p(node)->m_key = key; + _p(node)->m_val.clear(); +} + +void Tree::to_doc(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _set_flags(node, DOC|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + +void Tree::to_stream(size_t node, type_bits more_flags) +{ + _RYML_CB_ASSERT(m_callbacks, ! has_children(node)); + _set_flags(node, STREAM|more_flags); + _p(node)->m_key.clear(); + _p(node)->m_val.clear(); +} + + +//----------------------------------------------------------------------------- +size_t Tree::num_tag_directives() const +{ + // this assumes we have a very small number of tag directives + for(size_t i = 0; i < RYML_MAX_TAG_DIRECTIVES; ++i) + if(m_tag_directives[i].handle.empty()) + return i; + return RYML_MAX_TAG_DIRECTIVES; +} + +void Tree::clear_tag_directives() +{ + for(TagDirective &td : m_tag_directives) + td = {}; +} + +size_t Tree::add_tag_directive(TagDirective const& td) +{ + _RYML_CB_CHECK(m_callbacks, !td.handle.empty()); + _RYML_CB_CHECK(m_callbacks, !td.prefix.empty()); + _RYML_CB_ASSERT(m_callbacks, td.handle.begins_with('!')); + _RYML_CB_ASSERT(m_callbacks, td.handle.ends_with('!')); + // https://yaml.org/spec/1.2.2/#rule-ns-word-char + _RYML_CB_ASSERT(m_callbacks, td.handle == '!' || td.handle == "!!" || td.handle.trim('!').first_not_of("01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-") == npos); + size_t pos = num_tag_directives(); + _RYML_CB_CHECK(m_callbacks, pos < RYML_MAX_TAG_DIRECTIVES); + m_tag_directives[pos] = td; + return pos; +} + +size_t Tree::resolve_tag(substr output, csubstr tag, size_t node_id) const +{ + // lookup from the end. We want to find the first directive that + // matches the tag and has a target node id leq than the given + // node_id. + for(size_t i = RYML_MAX_TAG_DIRECTIVES-1; i != (size_t)-1; --i) + { + auto const& td = m_tag_directives[i]; + if(td.handle.empty()) + continue; + if(tag.begins_with(td.handle) && td.next_node_id <= node_id) + { + _RYML_CB_ASSERT(m_callbacks, tag.len >= td.handle.len); + csubstr rest = tag.sub(td.handle.len); + size_t len = 1u + td.prefix.len + rest.len + 1u; + size_t numpc = rest.count('%'); + if(numpc == 0) + { + if(len <= output.len) + { + output.str[0] = '<'; + memcpy(1u + output.str, td.prefix.str, td.prefix.len); + memcpy(1u + output.str + td.prefix.len, rest.str, rest.len); + output.str[1u + td.prefix.len + rest.len] = '>'; + } + } + else + { + // need to decode URI % sequences + size_t pos = rest.find('%'); + _RYML_CB_ASSERT(m_callbacks, pos != npos); + do { + size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); + if(next == npos) + next = rest.len; + _RYML_CB_CHECK(m_callbacks, pos+1 < next); + _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); + size_t delta = next - (pos+1); + len -= delta; + pos = rest.find('%', pos+1); + } while(pos != npos); + if(len <= output.len) + { + size_t prev = 0, wpos = 0; + auto appendstr = [&](csubstr s) { memcpy(output.str + wpos, s.str, s.len); wpos += s.len; }; + auto appendchar = [&](char c) { output.str[wpos++] = c; }; + appendchar('<'); + appendstr(td.prefix); + pos = rest.find('%'); + _RYML_CB_ASSERT(m_callbacks, pos != npos); + do { + size_t next = rest.first_not_of("0123456789abcdefABCDEF", pos+1); + if(next == npos) + next = rest.len; + _RYML_CB_CHECK(m_callbacks, pos+1 < next); + _RYML_CB_CHECK(m_callbacks, pos+1 + 2 <= next); + uint8_t val; + if(C4_UNLIKELY(!read_hex(rest.range(pos+1, next), &val) || val > 127)) + _RYML_CB_ERR(m_callbacks, "invalid URI character"); + appendstr(rest.range(prev, pos)); + appendchar((char)val); + prev = next; + pos = rest.find('%', pos+1); + } while(pos != npos); + _RYML_CB_ASSERT(m_callbacks, pos == npos); + _RYML_CB_ASSERT(m_callbacks, prev > 0); + _RYML_CB_ASSERT(m_callbacks, rest.len >= prev); + appendstr(rest.sub(prev)); + appendchar('>'); + _RYML_CB_ASSERT(m_callbacks, wpos == len); + } + } + return len; + } + } + return 0; // return 0 to signal that the tag is local and cannot be resolved +} + +namespace { +csubstr _transform_tag(Tree *t, csubstr tag, size_t node) +{ + size_t required_size = t->resolve_tag(substr{}, tag, node); + if(!required_size) + return tag; + const char *prev_arena = t->arena().str; + substr buf = t->alloc_arena(required_size); + _RYML_CB_ASSERT(t->m_callbacks, t->arena().str == prev_arena); + size_t actual_size = t->resolve_tag(buf, tag, node); + _RYML_CB_ASSERT(t->m_callbacks, actual_size <= required_size); + return buf.first(actual_size); +} +void _resolve_tags(Tree *t, size_t node) +{ + for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + { + if(t->has_key(child) && t->has_key_tag(child)) + t->set_key_tag(child, _transform_tag(t, t->key_tag(child), child)); + if(t->has_val(child) && t->has_val_tag(child)) + t->set_val_tag(child, _transform_tag(t, t->val_tag(child), child)); + _resolve_tags(t, child); + } +} +size_t _count_resolved_tags_size(Tree const* t, size_t node) +{ + size_t sz = 0; + for(size_t child = t->first_child(node); child != NONE; child = t->next_sibling(child)) + { + if(t->has_key(child) && t->has_key_tag(child)) + sz += t->resolve_tag(substr{}, t->key_tag(child), child); + if(t->has_val(child) && t->has_val_tag(child)) + sz += t->resolve_tag(substr{}, t->val_tag(child), child); + sz += _count_resolved_tags_size(t, child); + } + return sz; +} +} // namespace + +void Tree::resolve_tags() +{ + if(empty()) + return; + if(num_tag_directives() == 0) + return; + size_t needed_size = _count_resolved_tags_size(this, root_id()); + if(needed_size) + reserve_arena(arena_pos() + needed_size); + _resolve_tags(this, root_id()); +} + + +//----------------------------------------------------------------------------- + +csubstr Tree::lookup_result::resolved() const +{ + csubstr p = path.first(path_pos); + if(p.ends_with('.')) + p = p.first(p.len-1); + return p; +} + +csubstr Tree::lookup_result::unresolved() const +{ + return path.sub(path_pos); +} + +void Tree::_advance(lookup_result *r, size_t more) const +{ + r->path_pos += more; + if(r->path.sub(r->path_pos).begins_with('.')) + ++r->path_pos; +} + +Tree::lookup_result Tree::lookup_path(csubstr path, size_t start) const +{ + if(start == NONE) + start = root_id(); + lookup_result r(path, start); + if(path.empty()) + return r; + _lookup_path(&r); + if(r.target == NONE && r.closest == start) + r.closest = NONE; + return r; +} + +size_t Tree::lookup_path_or_modify(csubstr default_value, csubstr path, size_t start) +{ + size_t target = _lookup_path_or_create(path, start); + if(parent_is_map(target)) + to_keyval(target, key(target), default_value); + else + to_val(target, default_value); + return target; +} + +size_t Tree::lookup_path_or_modify(Tree const *src, size_t src_node, csubstr path, size_t start) +{ + size_t target = _lookup_path_or_create(path, start); + merge_with(src, src_node, target); + return target; +} + +size_t Tree::_lookup_path_or_create(csubstr path, size_t start) +{ + if(start == NONE) + start = root_id(); + lookup_result r(path, start); + _lookup_path(&r); + if(r.target != NONE) + { + C4_ASSERT(r.unresolved().empty()); + return r.target; + } + _lookup_path_modify(&r); + return r.target; +} + +void Tree::_lookup_path(lookup_result *r) const +{ + C4_ASSERT( ! r->unresolved().empty()); + _lookup_path_token parent{"", type(r->closest)}; + size_t node; + do + { + node = _next_node(r, &parent); + if(node != NONE) + r->closest = node; + if(r->unresolved().empty()) + { + r->target = node; + return; + } + } while(node != NONE); +} + +void Tree::_lookup_path_modify(lookup_result *r) +{ + C4_ASSERT( ! r->unresolved().empty()); + _lookup_path_token parent{"", type(r->closest)}; + size_t node; + do + { + node = _next_node_modify(r, &parent); + if(node != NONE) + r->closest = node; + if(r->unresolved().empty()) + { + r->target = node; + return; + } + } while(node != NONE); +} + +size_t Tree::_next_node(lookup_result * r, _lookup_path_token *parent) const +{ + _lookup_path_token token = _next_token(r, *parent); + if( ! token) + return NONE; + + size_t node = NONE; + csubstr prev = token.value; + if(token.type == MAP || token.type == SEQ) + { + _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); + //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); + _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); + node = find_child(r->closest, token.value); + } + else if(token.type == KEYVAL) + { + _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); + if(is_map(r->closest)) + node = find_child(r->closest, token.value); + } + else if(token.type == KEY) + { + _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); + token.value = token.value.offs(1, 1).trim(' '); + size_t idx = 0; + _RYML_CB_CHECK(m_callbacks, from_chars(token.value, &idx)); + node = child(r->closest, idx); + } + else + { + C4_NEVER_REACH(); + } + + if(node != NONE) + { + *parent = token; + } + else + { + csubstr p = r->path.sub(r->path_pos > 0 ? r->path_pos - 1 : r->path_pos); + r->path_pos -= prev.len; + if(p.begins_with('.')) + r->path_pos -= 1u; + } + + return node; +} + +size_t Tree::_next_node_modify(lookup_result * r, _lookup_path_token *parent) +{ + _lookup_path_token token = _next_token(r, *parent); + if( ! token) + return NONE; + + size_t node = NONE; + if(token.type == MAP || token.type == SEQ) + { + _RYML_CB_ASSERT(m_callbacks, !token.value.begins_with('[')); + //_RYML_CB_ASSERT(m_callbacks, is_container(r->closest) || r->closest == NONE); + if( ! is_container(r->closest)) + { + if(has_key(r->closest)) + to_map(r->closest, key(r->closest)); + else + to_map(r->closest); + } + else + { + if(is_map(r->closest)) + node = find_child(r->closest, token.value); + else + { + size_t pos = NONE; + _RYML_CB_CHECK(m_callbacks, c4::atox(token.value, &pos)); + _RYML_CB_ASSERT(m_callbacks, pos != NONE); + node = child(r->closest, pos); + } + } + if(node == NONE) + { + _RYML_CB_ASSERT(m_callbacks, is_map(r->closest)); + node = append_child(r->closest); + NodeData *n = _p(node); + n->m_key.scalar = token.value; + n->m_type.add(KEY); + } + } + else if(token.type == KEYVAL) + { + _RYML_CB_ASSERT(m_callbacks, r->unresolved().empty()); + if(is_map(r->closest)) + { + node = find_child(r->closest, token.value); + if(node == NONE) + node = append_child(r->closest); + } + else + { + _RYML_CB_ASSERT(m_callbacks, !is_seq(r->closest)); + _add_flags(r->closest, MAP); + node = append_child(r->closest); + } + NodeData *n = _p(node); + n->m_key.scalar = token.value; + n->m_val.scalar = ""; + n->m_type.add(KEYVAL); + } + else if(token.type == KEY) + { + _RYML_CB_ASSERT(m_callbacks, token.value.begins_with('[') && token.value.ends_with(']')); + token.value = token.value.offs(1, 1).trim(' '); + size_t idx; + if( ! from_chars(token.value, &idx)) + return NONE; + if( ! is_container(r->closest)) + { + if(has_key(r->closest)) + { + csubstr k = key(r->closest); + _clear_type(r->closest); + to_seq(r->closest, k); + } + else + { + _clear_type(r->closest); + to_seq(r->closest); + } + } + _RYML_CB_ASSERT(m_callbacks, is_container(r->closest)); + node = child(r->closest, idx); + if(node == NONE) + { + _RYML_CB_ASSERT(m_callbacks, num_children(r->closest) <= idx); + for(size_t i = num_children(r->closest); i <= idx; ++i) + { + node = append_child(r->closest); + if(i < idx) + { + if(is_map(r->closest)) + to_keyval(node, /*"~"*/{}, /*"~"*/{}); + else if(is_seq(r->closest)) + to_val(node, /*"~"*/{}); + } + } + } + } + else + { + C4_NEVER_REACH(); + } + + _RYML_CB_ASSERT(m_callbacks, node != NONE); + *parent = token; + return node; +} + +/** types of tokens: + * - seeing "map." ---> "map"/MAP + * - finishing "scalar" ---> "scalar"/KEYVAL + * - seeing "seq[n]" ---> "seq"/SEQ (--> "[n]"/KEY) + * - seeing "[n]" ---> "[n]"/KEY + */ +Tree::_lookup_path_token Tree::_next_token(lookup_result *r, _lookup_path_token const& parent) const +{ + csubstr unres = r->unresolved(); + if(unres.empty()) + return {}; + + // is it an indexation like [0], [1], etc? + if(unres.begins_with('[')) + { + size_t pos = unres.find(']'); + if(pos == csubstr::npos) + return {}; + csubstr idx = unres.first(pos + 1); + _advance(r, pos + 1); + return {idx, KEY}; + } + + // no. so it must be a name + size_t pos = unres.first_of(".["); + if(pos == csubstr::npos) + { + _advance(r, unres.len); + NodeType t; + if(( ! parent) || parent.type.is_seq()) + return {unres, VAL}; + return {unres, KEYVAL}; + } + + // it's either a map or a seq + _RYML_CB_ASSERT(m_callbacks, unres[pos] == '.' || unres[pos] == '['); + if(unres[pos] == '.') + { + _RYML_CB_ASSERT(m_callbacks, pos != 0); + _advance(r, pos + 1); + return {unres.first(pos), MAP}; + } + + _RYML_CB_ASSERT(m_callbacks, unres[pos] == '['); + _advance(r, pos); + return {unres.first(pos), SEQ}; +} + + +} // namespace ryml +} // namespace c4 + + +C4_SUPPRESS_WARNING_GCC_POP +C4_SUPPRESS_WARNING_MSVC_POP + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/tree.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/parse.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp +//#include "c4/yml/parse.hpp" +#if !defined(C4_YML_PARSE_HPP_) && !defined(_C4_YML_PARSE_HPP_) +#error "amalgamate: file c4/yml/parse.hpp must have been included at this point" +#endif /* C4_YML_PARSE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/error.hpp +//#include "c4/error.hpp" +#if !defined(C4_ERROR_HPP_) && !defined(_C4_ERROR_HPP_) +#error "amalgamate: file c4/error.hpp must have been included at this point" +#endif /* C4_ERROR_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/utf.hpp +//#include "c4/utf.hpp" +#if !defined(C4_UTF_HPP_) && !defined(_C4_UTF_HPP_) +#error "amalgamate: file c4/utf.hpp must have been included at this point" +#endif /* C4_UTF_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/dump.hpp +//#include +#if !defined(C4_DUMP_HPP_) && !defined(_C4_DUMP_HPP_) +#error "amalgamate: file c4/dump.hpp must have been included at this point" +#endif /* C4_DUMP_HPP_ */ + + +//included above: +//#include +//included above: +//#include +//included above: +//#include + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + +#ifdef RYML_DBG +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp +//#include "c4/yml/detail/print.hpp" +#if !defined(C4_YML_DETAIL_PRINT_HPP_) && !defined(_C4_YML_DETAIL_PRINT_HPP_) +#error "amalgamate: file c4/yml/detail/print.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PRINT_HPP_ */ + +#endif + +#ifndef RYML_ERRMSG_SIZE + #define RYML_ERRMSG_SIZE 1024 +#endif + +//#define RYML_WITH_TAB_TOKENS +#ifdef RYML_WITH_TAB_TOKENS +#define _RYML_WITH_TAB_TOKENS(...) __VA_ARGS__ +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) with +#else +#define _RYML_WITH_TAB_TOKENS(...) +#define _RYML_WITH_OR_WITHOUT_TAB_TOKENS(with, without) without +#endif + + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +#elif defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma clang diagnostic ignored "-Wformat-nonliteral" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" // to remove a warning on an assertion that a size_t >= 0. Later on, this size_t will turn into a template argument, and then it can become < 0. +# pragma GCC diagnostic ignored "-Wformat-nonliteral" +# if __GNUC__ >= 7 +# pragma GCC diagnostic ignored "-Wduplicated-branches" +# endif +#endif + +namespace c4 { +namespace yml { + +namespace { + +template +void _parse_dump(DumpFn dumpfn, c4::csubstr fmt, Args&& ...args) +{ + char writebuf[256]; + auto results = c4::format_dump_resume(dumpfn, writebuf, fmt, std::forward(args)...); + // resume writing if the results failed to fit the buffer + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) // bufsize will be that of the largest element serialized. Eg int(1), will require 1 byte. + { + results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward(args)...); + if(C4_UNLIKELY(results.bufsize > sizeof(writebuf))) + { + results = format_dump_resume(dumpfn, results, writebuf, fmt, std::forward(args)...); + } + } +} + +bool _is_scalar_next__runk(csubstr s) +{ + return !(s.begins_with(": ") || s.begins_with_any("#,:{}[]%&") || s.begins_with("? ") || s == "-" || s.begins_with("- ")); +} + +bool _is_scalar_next__rseq_rval(csubstr s) +{ + return !(s.begins_with_any("[{!&") || s.begins_with("? ") || s.begins_with("- ") || s == "-"); +} + +bool _is_scalar_next__rmap(csubstr s) +{ + return !(s.begins_with(": ") || s.begins_with_any("#,!&") || s.begins_with("? ") _RYML_WITH_TAB_TOKENS(|| s.begins_with(":\t"))); +} + +bool _is_scalar_next__rmap_val(csubstr s) +{ + return !(s.begins_with("- ") || s.begins_with_any("{[") || s == "-"); +} + +bool _is_doc_sep(csubstr s) +{ + constexpr const csubstr dashes = "---"; + constexpr const csubstr ellipsis = "..."; + constexpr const csubstr whitesp = " \t"; + if(s.begins_with(dashes)) + return s == dashes || s.sub(3).begins_with_any(whitesp); + else if(s.begins_with(ellipsis)) + return s == ellipsis || s.sub(3).begins_with_any(whitesp); + return false; +} + +/** @p i is set to the first non whitespace character after the line + * @return the number of empty lines after the initial position */ +size_t count_following_newlines(csubstr r, size_t *C4_RESTRICT i, size_t indentation) +{ + RYML_ASSERT(r[*i] == '\n'); + size_t numnl_following = 0; + ++(*i); + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] == '\n') + { + ++numnl_following; + if(indentation) // skip the indentation after the newline + { + size_t stop = *i + indentation; + for( ; *i < r.len; ++(*i)) + { + if(r.str[*i] != ' ' && r.str[*i] != '\r') + break; + RYML_ASSERT(*i < stop); + } + C4_UNUSED(stop); + } + } + else if(r.str[*i] == ' ' || r.str[*i] == '\t' || r.str[*i] == '\r') // skip leading whitespace + ; + else + break; + } + return numnl_following; +} + +} // anon namespace + + +//----------------------------------------------------------------------------- + +Parser::~Parser() +{ + _free(); + _clr(); +} + +Parser::Parser(Callbacks const& cb) + : m_file() + , m_buf() + , m_root_id(NONE) + , m_tree() + , m_stack(cb) + , m_state() + , m_key_tag_indentation(0) + , m_key_tag2_indentation(0) + , m_key_tag() + , m_key_tag2() + , m_val_tag_indentation(0) + , m_val_tag() + , m_key_anchor_was_before(false) + , m_key_anchor_indentation(0) + , m_key_anchor() + , m_val_anchor_indentation(0) + , m_val_anchor() + , m_filter_arena() + , m_newline_offsets() + , m_newline_offsets_size(0) + , m_newline_offsets_capacity(0) + , m_newline_offsets_buf() +{ + m_stack.push(State{}); + m_state = &m_stack.top(); +} + +Parser::Parser(Parser &&that) + : m_file(that.m_file) + , m_buf(that.m_buf) + , m_root_id(that.m_root_id) + , m_tree(that.m_tree) + , m_stack(std::move(that.m_stack)) + , m_state(&m_stack.top()) + , m_key_tag_indentation(that.m_key_tag_indentation) + , m_key_tag2_indentation(that.m_key_tag2_indentation) + , m_key_tag(that.m_key_tag) + , m_key_tag2(that.m_key_tag2) + , m_val_tag_indentation(that.m_val_tag_indentation) + , m_val_tag(that.m_val_tag) + , m_key_anchor_was_before(that.m_key_anchor_was_before) + , m_key_anchor_indentation(that.m_key_anchor_indentation) + , m_key_anchor(that.m_key_anchor) + , m_val_anchor_indentation(that.m_val_anchor_indentation) + , m_val_anchor(that.m_val_anchor) + , m_filter_arena(that.m_filter_arena) + , m_newline_offsets(that.m_newline_offsets) + , m_newline_offsets_size(that.m_newline_offsets_size) + , m_newline_offsets_capacity(that.m_newline_offsets_capacity) + , m_newline_offsets_buf(that.m_newline_offsets_buf) +{ + that._clr(); +} + +Parser::Parser(Parser const& that) + : m_file(that.m_file) + , m_buf(that.m_buf) + , m_root_id(that.m_root_id) + , m_tree(that.m_tree) + , m_stack(that.m_stack) + , m_state(&m_stack.top()) + , m_key_tag_indentation(that.m_key_tag_indentation) + , m_key_tag2_indentation(that.m_key_tag2_indentation) + , m_key_tag(that.m_key_tag) + , m_key_tag2(that.m_key_tag2) + , m_val_tag_indentation(that.m_val_tag_indentation) + , m_val_tag(that.m_val_tag) + , m_key_anchor_was_before(that.m_key_anchor_was_before) + , m_key_anchor_indentation(that.m_key_anchor_indentation) + , m_key_anchor(that.m_key_anchor) + , m_val_anchor_indentation(that.m_val_anchor_indentation) + , m_val_anchor(that.m_val_anchor) + , m_filter_arena() + , m_newline_offsets() + , m_newline_offsets_size() + , m_newline_offsets_capacity() + , m_newline_offsets_buf() +{ + if(that.m_newline_offsets_capacity) + { + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity == that.m_newline_offsets_capacity); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + } + if(that.m_filter_arena.len) + { + _resize_filter_arena(that.m_filter_arena.len); + } +} + +Parser& Parser::operator=(Parser &&that) +{ + _free(); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_root_id = (that.m_root_id); + m_tree = (that.m_tree); + m_stack = std::move(that.m_stack); + m_state = (&m_stack.top()); + m_key_tag_indentation = (that.m_key_tag_indentation); + m_key_tag2_indentation = (that.m_key_tag2_indentation); + m_key_tag = (that.m_key_tag); + m_key_tag2 = (that.m_key_tag2); + m_val_tag_indentation = (that.m_val_tag_indentation); + m_val_tag = (that.m_val_tag); + m_key_anchor_was_before = (that.m_key_anchor_was_before); + m_key_anchor_indentation = (that.m_key_anchor_indentation); + m_key_anchor = (that.m_key_anchor); + m_val_anchor_indentation = (that.m_val_anchor_indentation); + m_val_anchor = (that.m_val_anchor); + m_filter_arena = that.m_filter_arena; + m_newline_offsets = (that.m_newline_offsets); + m_newline_offsets_size = (that.m_newline_offsets_size); + m_newline_offsets_capacity = (that.m_newline_offsets_capacity); + m_newline_offsets_buf = (that.m_newline_offsets_buf); + that._clr(); + return *this; +} + +Parser& Parser::operator=(Parser const& that) +{ + _free(); + m_file = (that.m_file); + m_buf = (that.m_buf); + m_root_id = (that.m_root_id); + m_tree = (that.m_tree); + m_stack = that.m_stack; + m_state = &m_stack.top(); + m_key_tag_indentation = (that.m_key_tag_indentation); + m_key_tag2_indentation = (that.m_key_tag2_indentation); + m_key_tag = (that.m_key_tag); + m_key_tag2 = (that.m_key_tag2); + m_val_tag_indentation = (that.m_val_tag_indentation); + m_val_tag = (that.m_val_tag); + m_key_anchor_was_before = (that.m_key_anchor_was_before); + m_key_anchor_indentation = (that.m_key_anchor_indentation); + m_key_anchor = (that.m_key_anchor); + m_val_anchor_indentation = (that.m_val_anchor_indentation); + m_val_anchor = (that.m_val_anchor); + if(that.m_filter_arena.len > 0) + _resize_filter_arena(that.m_filter_arena.len); + if(that.m_newline_offsets_capacity > m_newline_offsets_capacity) + _resize_locations(that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_capacity); + _RYML_CB_CHECK(m_stack.m_callbacks, m_newline_offsets_capacity >= that.m_newline_offsets_size); + memcpy(m_newline_offsets, that.m_newline_offsets, that.m_newline_offsets_size * sizeof(size_t)); + m_newline_offsets_size = that.m_newline_offsets_size; + m_newline_offsets_buf = that.m_newline_offsets_buf; + return *this; +} + +void Parser::_clr() +{ + m_file = {}; + m_buf = {}; + m_root_id = {}; + m_tree = {}; + m_stack.clear(); + m_state = {}; + m_key_tag_indentation = {}; + m_key_tag2_indentation = {}; + m_key_tag = {}; + m_key_tag2 = {}; + m_val_tag_indentation = {}; + m_val_tag = {}; + m_key_anchor_was_before = {}; + m_key_anchor_indentation = {}; + m_key_anchor = {}; + m_val_anchor_indentation = {}; + m_val_anchor = {}; + m_filter_arena = {}; + m_newline_offsets = {}; + m_newline_offsets_size = {}; + m_newline_offsets_capacity = {}; + m_newline_offsets_buf = {}; +} + +void Parser::_free() +{ + if(m_newline_offsets) + { + _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = nullptr; + m_newline_offsets_size = 0u; + m_newline_offsets_capacity = 0u; + m_newline_offsets_buf = 0u; + } + if(m_filter_arena.len) + { + _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); + m_filter_arena = {}; + } + m_stack._free(); +} + + +//----------------------------------------------------------------------------- +void Parser::_reset() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() == 1); + m_stack.clear(); + m_stack.push({}); + m_state = &m_stack.top(); + m_state->reset(m_file.str, m_root_id); + + m_key_tag_indentation = 0; + m_key_tag2_indentation = 0; + m_key_tag.clear(); + m_key_tag2.clear(); + m_val_tag_indentation = 0; + m_val_tag.clear(); + m_key_anchor_was_before = false; + m_key_anchor_indentation = 0; + m_key_anchor.clear(); + m_val_anchor_indentation = 0; + m_val_anchor.clear(); + + _mark_locations_dirty(); +} + +//----------------------------------------------------------------------------- +template +void Parser::_fmt_msg(DumpFn &&dumpfn) const +{ + auto const& lc = m_state->line_contents; + csubstr contents = lc.stripped; + if(contents.len) + { + // print the yaml src line + size_t offs = 3u + to_chars(substr{}, m_state->pos.line) + to_chars(substr{}, m_state->pos.col); + if(m_file.len) + { + _parse_dump(dumpfn, "{}:", m_file); + offs += m_file.len + 1; + } + _parse_dump(dumpfn, "{}:{}: ", m_state->pos.line, m_state->pos.col); + csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u)); + csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("...")); + _parse_dump(dumpfn, "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len); + // highlight the remaining portion of the previous line + size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin()); + size_t lastcol = firstcol + lc.rem.len; + for(size_t i = 0; i < offs + firstcol; ++i) + dumpfn(" "); + dumpfn("^"); + for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i) + dumpfn("~"); + _parse_dump(dumpfn, "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1); + } + else + { + dumpfn("\n"); + } + +#ifdef RYML_DBG + // next line: print the state flags + { + char flagbuf_[64]; + _parse_dump(dumpfn, "top state: {}\n", _prfl(flagbuf_, m_state->flags)); + } +#endif +} + + +//----------------------------------------------------------------------------- +template +void Parser::_err(csubstr fmt, Args const& C4_RESTRICT ...args) const +{ + char errmsg[RYML_ERRMSG_SIZE]; + detail::_SubstrWriter writer(errmsg); + auto dumpfn = [&writer](csubstr s){ writer.append(s); }; + _parse_dump(dumpfn, fmt, args...); + writer.append('\n'); + _fmt_msg(dumpfn); + size_t len = writer.pos < RYML_ERRMSG_SIZE ? writer.pos : RYML_ERRMSG_SIZE; + m_tree->m_callbacks.m_error(errmsg, len, m_state->pos, m_tree->m_callbacks.m_user_data); +} + +//----------------------------------------------------------------------------- +#ifdef RYML_DBG +template +void Parser::_dbg(csubstr fmt, Args const& C4_RESTRICT ...args) const +{ + auto dumpfn = [](csubstr s){ fwrite(s.str, 1, s.len, stdout); }; + _parse_dump(dumpfn, fmt, args...); + dumpfn("\n"); + _fmt_msg(dumpfn); +} +#endif + +//----------------------------------------------------------------------------- +bool Parser::_finished_file() const +{ + bool ret = m_state->pos.offset >= m_buf.len; + if(ret) + { + _c4dbgp("finished file!!!"); + } + return ret; +} + +//----------------------------------------------------------------------------- +bool Parser::_finished_line() const +{ + return m_state->line_contents.rem.empty(); +} + +//----------------------------------------------------------------------------- +void Parser::parse_in_place(csubstr file, substr buf, Tree *t, size_t node_id) +{ + m_file = file; + m_buf = buf; + m_root_id = node_id; + m_tree = t; + _reset(); + while( ! _finished_file()) + { + _scan_line(); + while( ! _finished_line()) + _handle_line(); + if(_finished_file()) + break; // it may have finished because of multiline blocks + _line_ended(); + } + _handle_finished_file(); +} + +//----------------------------------------------------------------------------- +void Parser::_handle_finished_file() +{ + _end_stream(); +} + +//----------------------------------------------------------------------------- +void Parser::_handle_line() +{ + _c4dbgq("\n-----------"); + _c4dbgt("handling line={}, offset={}B", m_state->pos.line, m_state->pos.offset); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_state->line_contents.rem.empty()); + if(has_any(RSEQ)) + { + if(has_any(FLOW)) + { + if(_handle_seq_flow()) + return; + } + else + { + if(_handle_seq_blck()) + return; + } + } + else if(has_any(RMAP)) + { + if(has_any(FLOW)) + { + if(_handle_map_flow()) + return; + } + else + { + if(_handle_map_blck()) + return; + } + } + else if(has_any(RUNK)) + { + if(_handle_unk()) + return; + } + + if(_handle_top()) + return; +} + + +//----------------------------------------------------------------------------- +bool Parser::_handle_unk() +{ + _c4dbgp("handle_unk"); + + csubstr rem = m_state->line_contents.rem; + const bool start_as_child = (node(m_state) == nullptr); + + if(C4_UNLIKELY(has_any(NDOC))) + { + if(rem == "---" || rem.begins_with("--- ")) + { + _start_new_doc(rem); + return true; + } + auto trimmed = rem.triml(' '); + if(trimmed == "---" || trimmed.begins_with("--- ")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len >= trimmed.len); + _line_progressed(rem.len - trimmed.len); + _start_new_doc(trimmed); + _save_indentation(); + return true; + } + else if(trimmed.begins_with("...")) + { + _end_stream(); + } + else if(trimmed.first_of("#%") == csubstr::npos) // neither a doc nor a tag + { + _c4dbgpf("starting implicit doc to accomodate unexpected tokens: '{}'", rem); + size_t indref = m_state->indref; + _push_level(); + _start_doc(); + _set_indentation(indref); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, !trimmed.empty()); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT|RSEQ|RMAP)); + if(m_state->indref > 0) + { + csubstr ws = rem.left_of(rem.first_not_of(' ')); + if(m_state->indref <= ws.len) + { + _c4dbgpf("skipping base indentation of {}", m_state->indref); + _line_progressed(m_state->indref); + rem = rem.sub(m_state->indref); + } + } + + if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + _c4dbgpf("it's a seq (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_seq(start_as_child); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem == '-') + { + _c4dbgpf("it's a seq (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_seq(start_as_child); + _save_indentation(); + _line_progressed(1); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgpf("it's a seq, flow (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(/*explicit flow*/true); + _start_seq(start_as_child); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgpf("it's a map, flow (as_child={})", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(/*explicit flow*/true); + _start_map(start_as_child); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem.begins_with("? ")) + { + _c4dbgpf("it's a map (as_child={}) + this key is complex", start_as_child); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_map(start_as_child); + addrem_flags(RKEY|QMRK, RVAL); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem.begins_with(": ") && !has_all(SSCL)) + { + _c4dbgp("it's a map with an empty key"); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_map(start_as_child); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem == ':' && !has_all(SSCL)) + { + _c4dbgp("it's a map with an empty key"); + _move_key_anchor_to_val_anchor(); + _move_key_tag_to_val_tag(); + _push_level(); + _start_map(start_as_child); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + _save_indentation(); + _line_progressed(1); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(!rem.begins_with('*') && _handle_key_anchors_and_refs()) + { + return true; + } + else if(has_all(SSCL)) + { + _c4dbgpf("there's a stored scalar: '{}'", m_state->scalar); + + csubstr saved_scalar; + bool is_quoted; + if(_scan_scalar(&saved_scalar, &is_quoted)) + { + rem = m_state->line_contents.rem; + _c4dbgpf("... and there's also a scalar next! '{}'", saved_scalar); + if(rem.begins_with_any(" \t")) + { + size_t n = rem.first_not_of(" \t"); + _c4dbgpf("skipping {} spaces/tabs", n); + rem = rem.sub(n); + _line_progressed(n); + } + } + + _c4dbgpf("rem='{}'", rem); + + if(rem.begins_with(", ")) + { + _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); + _start_seq(start_as_child); + add_flags(FLOW); + _append_val(_consume_scalar()); + _line_progressed(2); + } + else if(rem.begins_with(',')) + { + _c4dbgpf("got a ',' -- it's a seq (as_child={})", start_as_child); + _start_seq(start_as_child); + add_flags(FLOW); + _append_val(_consume_scalar()); + _line_progressed(1); + } + else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("got a ': ' -- it's a map (as_child={})", start_as_child); + _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair + _line_progressed(2); + } + else if(rem == ":" || rem.begins_with(":\"") || rem.begins_with(":'")) + { + if(rem == ":") { _c4dbgpf("got a ':' -- it's a map (as_child={})", start_as_child); } + else { _c4dbgpf("got a '{}' -- it's a map (as_child={})", rem.first(2), start_as_child); } + _start_map_unk(start_as_child); // wait for the val scalar to append the key-val pair + _line_progressed(1); // advance only 1 + } + else if(rem.begins_with('}')) + { + if(!has_all(RMAP|FLOW)) + { + _c4err("invalid token: not reading a map"); + } + if(!has_all(SSCL)) + { + _c4err("no scalar stored"); + } + _append_key_val(saved_scalar); + _stop_map(); + _line_progressed(1); + } + else if(rem.begins_with("...")) + { + _c4dbgp("got stream end '...'"); + _end_stream(); + _line_progressed(3); + } + else if(rem.begins_with('#')) + { + _c4dbgpf("it's a comment: '{}'", rem); + _scan_comment(); + return true; + } + else if(_handle_key_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with(" ") || rem.begins_with("\t")) + { + size_t n = rem.first_not_of(" \t"); + if(n == npos) + n = rem.len; + _c4dbgpf("has {} spaces/tabs, skip...", n); + _line_progressed(n); + return true; + } + else if(rem.empty()) + { + // nothing to do + } + else if(rem == "---" || rem.begins_with("--- ")) + { + _c4dbgp("caught ---: starting doc"); + _start_new_doc(rem); + return true; + } + else if(rem.begins_with('%')) + { + _c4dbgp("caught a directive: ignoring..."); + _line_progressed(rem.len); + return true; + } + else + { + _c4err("parse error"); + } + + if( ! saved_scalar.empty()) + { + _store_scalar(saved_scalar, is_quoted); + } + + return true; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL)); + csubstr scalar; + size_t indentation = m_state->line_contents.indentation; // save + bool is_quoted; + if(_scan_scalar(&scalar, &is_quoted)) + { + _c4dbgpf("got a {} scalar", is_quoted ? "quoted" : ""); + rem = m_state->line_contents.rem; + { + size_t first = rem.first_not_of(" \t"); + if(first && first != npos) + { + _c4dbgpf("skip {} whitespace characters", first); + _line_progressed(first); + rem = rem.sub(first); + } + } + _store_scalar(scalar, is_quoted); + if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("got a ': ' next -- it's a map (as_child={})", start_as_child); + _push_level(); + _start_map(start_as_child); // wait for the val scalar to append the key-val pair + _set_indentation(indentation); + _line_progressed(2); // call this AFTER saving the indentation + } + else if(rem == ":") + { + _c4dbgpf("got a ':' next -- it's a map (as_child={})", start_as_child); + _push_level(); + _start_map(start_as_child); // wait for the val scalar to append the key-val pair + _set_indentation(indentation); + _line_progressed(1); // call this AFTER saving the indentation + } + else + { + // we still don't know whether it's a seq or a map + // so just store the scalar + } + return true; + } + else if(rem.begins_with_any(" \t")) + { + csubstr ws = rem.left_of(rem.first_not_of(" \t")); + rem = rem.right_of(ws); + if(has_all(RTOP) && rem.begins_with("---")) + { + _c4dbgp("there's a doc starting, and it's indented"); + _set_indentation(ws.len); + } + _c4dbgpf("skipping {} spaces/tabs", ws.len); + _line_progressed(ws.len); + return true; + } + } + + return false; +} + + +//----------------------------------------------------------------------------- +C4_ALWAYS_INLINE void Parser::_skipchars(char c) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with(c)); + size_t pos = m_state->line_contents.rem.first_not_of(c); + if(pos == npos) + pos = m_state->line_contents.rem.len; // maybe the line is just whitespace + _c4dbgpf("skip {} '{}'", pos, c); + _line_progressed(pos); +} + +template +C4_ALWAYS_INLINE void Parser::_skipchars(const char (&chars)[N]) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begins_with_any(chars)); + size_t pos = m_state->line_contents.rem.first_not_of(chars); + if(pos == npos) + pos = m_state->line_contents.rem.len; // maybe the line is just whitespace + _c4dbgpf("skip {} characters", pos); + _line_progressed(pos); +} + + +//----------------------------------------------------------------------------- +bool Parser::_handle_seq_flow() +{ + _c4dbgpf("handle_seq_flow: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); + + if(rem.begins_with(' ')) + { + // with explicit flow, indentation does not matter + _c4dbgp("starts with spaces"); + _skipchars(' '); + return true; + } + _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) + { + _c4dbgp("starts with tabs"); + _skipchars('\t'); + return true; + }) + else if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); // also progresses the line + return true; + } + else if(rem.begins_with(']')) + { + _c4dbgp("end the sequence"); + _pop_level(); + _line_progressed(1); + if(has_all(RSEQIMAP)) + { + _stop_seqimap(); + _pop_level(); + } + return true; + } + + if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + bool is_quoted; + if(_scan_scalar(&rem, &is_quoted)) + { + _c4dbgp("it's a scalar"); + addrem_flags(RNXT, RVAL); + _append_val(rem, is_quoted); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_map(); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem == ':') + { + _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(1); + return true; + } + else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(2); + return true; + } + else if(rem.begins_with("? ")) + { + _c4dbgpf("found '? ' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(2); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(SSCL) && m_state->scalar == ""); + addrem_flags(QMRK|RKEY, RVAL|SSCL); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with(", ")) + { + _c4dbgp("found ',' -- the value was null"); + _append_val_null(rem.str - 1); + _line_progressed(2); + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("found ',' -- the value was null"); + _append_val_null(rem.str - 1); + _line_progressed(1); + return true; + } + else if(rem.begins_with('\t')) + { + _skipchars('\t'); + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + if(rem.begins_with(", ")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); + _c4dbgp("seq: expect next val"); + addrem_flags(RVAL, RNXT); + _line_progressed(2); + return true; + } + else if(rem.begins_with(',')) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); + _c4dbgp("seq: expect next val"); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + return true; + } + else if(rem == ':') + { + _c4dbgpf("found ':' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(1); + return true; + } + else if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgpf("found ': ' -- there's an implicit map in the seq node[{}]", m_state->node_id); + _start_seqimap(); + _line_progressed(2); + return true; + } + else + { + _c4err("was expecting a comma"); + } + } + else + { + _c4err("internal error"); + } + + return true; +} + +//----------------------------------------------------------------------------- +bool Parser::_handle_seq_blck() +{ + _c4dbgpf("handle_seq_impl: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + + if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); + return true; + } + + if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + + if(_handle_indentation()) + return true; + + if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + _c4dbgp("expect another val"); + addrem_flags(RVAL, RNXT); + _line_progressed(2); + return true; + } + else if(rem == '-') + { + _c4dbgp("expect another val"); + addrem_flags(RVAL, RNXT); + _line_progressed(1); + return true; + } + else if(rem.begins_with_any(" \t")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); + _skipchars(" \t"); + return true; + } + else if(rem.begins_with("...")) + { + _c4dbgp("got stream end '...'"); + _end_stream(); + _line_progressed(3); + return true; + } + else if(rem.begins_with("---")) + { + _c4dbgp("got document start '---'"); + _start_new_doc(rem); + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + // there can be empty values + if(_handle_indentation()) + return true; + + csubstr s; + bool is_quoted; + if(_scan_scalar(&s, &is_quoted)) // this also progresses the line + { + _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); + + rem = m_state->line_contents.rem; + if(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(rem.begins_with_any(" \t"), rem.begins_with(' '))) + { + _c4dbgp("skipping whitespace..."); + size_t skip = rem.first_not_of(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(skip == csubstr::npos) + skip = rem.len; // maybe the line is just whitespace + _line_progressed(skip); + rem = rem.sub(skip); + } + + _c4dbgpf("rem=[{}]~~~{}~~~", rem.len, rem); + if(!rem.begins_with('#') && (rem.ends_with(':') || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) + { + _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); + if(m_key_anchor.empty()) + _move_val_anchor_to_key_anchor(); + if(m_key_tag.empty()) + _move_val_tag_to_key_tag(); + addrem_flags(RNXT, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT + _push_level(); + _start_map(); + _store_scalar(s, is_quoted); + if( ! _maybe_set_indentation_from_anchor_or_tag()) + { + _c4dbgpf("set indentation from scalar: {}", m_state->scalar_col); + _set_indentation(m_state->scalar_col); // this is the column where the scalar starts + } + _move_key_tag2_to_key_tag(); + addrem_flags(RVAL, RKEY); + _line_progressed(1); + } + else + { + _c4dbgp("appending val to current seq"); + _append_val(s, is_quoted); + addrem_flags(RNXT, RVAL); + } + return true; + } + else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + if(_rval_dash_start_or_continue_seq()) + _line_progressed(2); + return true; + } + else if(rem == '-') + { + if(_rval_dash_start_or_continue_seq()) + _line_progressed(1); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq, flow"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map, flow"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _start_map(); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem.begins_with("? ")) + { + _c4dbgp("val is a child map + this key is complex"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(); + _start_map(); + addrem_flags(QMRK|RKEY, RVAL); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem.begins_with(' ')) + { + csubstr spc = rem.left_of(rem.first_not_of(' ')); + if(_at_line_begin()) + { + _c4dbgpf("skipping value indentation: {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + else + { + _c4dbgpf("skipping {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + /* pathological case: + * - &key : val + * - &key : + * - : val + */ + else if((!has_all(SSCL)) && + (rem.begins_with(": ") || rem.left_of(rem.find("#")).trimr("\t") == ":")) + { + if(!m_val_anchor.empty() || !m_val_tag.empty()) + { + _c4dbgp("val is a child map + this key is empty, with anchors or tags"); + addrem_flags(RNXT, RVAL); // before _push_level! + _move_val_tag_to_key_tag(); + _move_val_anchor_to_key_anchor(); + _push_level(); + _start_map(); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + RYML_CHECK(_maybe_set_indentation_from_anchor_or_tag()); // one of them must exist + _line_progressed(rem.begins_with(": ") ? 2u : 1u); + return true; + } + else + { + _c4dbgp("val is a child map + this key is empty, no anchors or tags"); + addrem_flags(RNXT, RVAL); // before _push_level! + size_t ind = m_state->indref; + _push_level(); + _start_map(); + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY); + _c4dbgpf("set indentation from map anchor: {}", ind + 2); + _set_indentation(ind + 2); // this is the column where the map starts + _line_progressed(rem.begins_with(": ") ? 2u : 1u); + return true; + } + } + else + { + _c4err("parse error"); + } + } + + return false; +} + +//----------------------------------------------------------------------------- + +bool Parser::_rval_dash_start_or_continue_seq() +{ + size_t ind = m_state->line_contents.current_col(); + _RYML_CB_ASSERT(m_stack.m_callbacks, ind >= m_state->indref); + size_t delta_ind = ind - m_state->indref; + if( ! delta_ind) + { + _c4dbgp("prev val was empty"); + addrem_flags(RNXT, RVAL); + _append_val_null(&m_state->line_contents.full[ind]); + return false; + } + _c4dbgp("val is a nested seq, indented"); + addrem_flags(RNXT, RVAL); // before _push_level! + _push_level(); + _start_seq(); + _save_indentation(); + return true; +} + +//----------------------------------------------------------------------------- +bool Parser::_handle_map_flow() +{ + // explicit flow, ie, inside {}, separated by commas + _c4dbgpf("handle_map_flow: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP|FLOW)); + + if(rem.begins_with(' ')) + { + // with explicit flow, indentation does not matter + _c4dbgp("starts with spaces"); + _skipchars(' '); + return true; + } + _RYML_WITH_TAB_TOKENS(else if(rem.begins_with('\t')) + { + // with explicit flow, indentation does not matter + _c4dbgp("starts with tabs"); + _skipchars('\t'); + return true; + }) + else if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); // also progresses the line + return true; + } + else if(rem.begins_with('}')) + { + _c4dbgp("end the map"); + if(has_all(SSCL)) + { + _c4dbgp("the last val was null"); + _append_key_val_null(rem.str - 1); + rem_flags(RVAL); + } + _pop_level(); + _line_progressed(1); + if(has_all(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + return true; + } + + if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RSEQIMAP)); + + if(rem.begins_with(", ")) + { + _c4dbgp("seq: expect next keyval"); + addrem_flags(RKEY, RNXT); + _line_progressed(2); + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("seq: expect next keyval"); + addrem_flags(RKEY, RNXT); + _line_progressed(1); + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + + bool is_quoted; + if(has_none(SSCL) && _scan_scalar(&rem, &is_quoted)) + { + _c4dbgp("it's a scalar"); + _store_scalar(rem, is_quoted); + rem = m_state->line_contents.rem; + csubstr trimmed = rem.triml(" \t"); + if(trimmed.len && (trimmed.begins_with(": ") || trimmed.begins_with_any(":,}") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t")))) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= rem.str); + size_t num = static_cast(trimmed.str - rem.str); + _c4dbgpf("trimming {} whitespace after the scalar: '{}' --> '{}'", num, rem, rem.sub(num)); + rem = rem.sub(num); + _line_progressed(num); + } + } + + if(rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(2); + if(!has_all(SSCL)) + { + _c4dbgp("no key was found, defaulting to empty key ''"); + _store_scalar_null(rem.str); + } + return true; + } + else if(rem == ':') + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + if(!has_all(SSCL)) + { + _c4dbgp("no key was found, defaulting to empty key ''"); + _store_scalar_null(rem.str); + } + return true; + } + else if(rem.begins_with('?')) + { + _c4dbgp("complex key"); + add_flags(QMRK); + _line_progressed(1); + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("prev scalar was a key with null value"); + _append_key_val_null(rem.str - 1); + _line_progressed(1); + return true; + } + else if(rem.begins_with('}')) + { + _c4dbgp("map terminates after a key..."); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); + _c4dbgp("the last val was null"); + _append_key_val_null(rem.str - 1); + rem_flags(RVAL); + if(has_all(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + _pop_level(); + _line_progressed(1); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_key_anchors_and_refs()) + { + return true; + } + else if(rem == "") + { + return true; + } + else + { + size_t pos = rem.first_not_of(" \t"); + if(pos == csubstr::npos) + pos = 0; + rem = rem.sub(pos); + if(rem.begins_with(':')) + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(pos + 1); + if(!has_all(SSCL)) + { + _c4dbgp("no key was found, defaulting to empty key ''"); + _store_scalar_null(rem.str); + } + return true; + } + else if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + _line_progressed(pos); + rem = _scan_comment(); // also progresses the line + return true; + } + else + { + _c4err("parse error"); + } + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); + bool is_quoted; + if(_scan_scalar(&rem, &is_quoted)) + { + _c4dbgp("it's a scalar"); + addrem_flags(RNXT, RVAL|RKEY); + _append_key_val(rem, is_quoted); + if(has_all(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq"); + addrem_flags(RNXT, RVAL|RKEY); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map"); + addrem_flags(RNXT, RVAL|RKEY); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_map(); + addrem_flags(FLOW|RKEY, RNXT|RVAL); + _line_progressed(1); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with(',')) + { + _c4dbgp("appending empty val"); + _append_key_val_null(rem.str - 1); + addrem_flags(RKEY, RVAL); + _line_progressed(1); + if(has_any(RSEQIMAP)) + { + _c4dbgp("stopping implicitly nested 1x map"); + _stop_seqimap(); + _pop_level(); + } + return true; + } + else if(has_any(RSEQIMAP) && rem.begins_with(']')) + { + _c4dbgp("stopping implicitly nested 1x map"); + if(has_any(SSCL)) + { + _append_key_val_null(rem.str - 1); + } + _stop_seqimap(); + _pop_level(); + return true; + } + else + { + _c4err("parse error"); + } + } + else + { + _c4err("internal error"); + } + + return false; +} + +//----------------------------------------------------------------------------- +bool Parser::_handle_map_blck() +{ + _c4dbgpf("handle_map_impl: node_id={} level={}", m_state->node_id, m_state->level); + csubstr rem = m_state->line_contents.rem; + + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RMAP)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + + if(rem.begins_with('#')) + { + _c4dbgp("it's a comment"); + rem = _scan_comment(); + return true; + } + + if(has_any(RNXT)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + // actually, we don't need RNXT in indent-based maps. + addrem_flags(RKEY, RNXT); + } + + if(_handle_indentation()) + return true; + + if(has_any(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RVAL)); + + _c4dbgp("read scalar?"); + bool is_quoted; + if(_scan_scalar(&rem, &is_quoted)) // this also progresses the line + { + _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); + if(has_all(QMRK|SSCL)) + { + _c4dbgpf("current key is QMRK; SSCL is set. so take store scalar='{}' as key and add an empty val", m_state->scalar); + _append_key_val_null(rem.str - 1); + } + _store_scalar(rem, is_quoted); + if(has_all(QMRK|RSET)) + { + _c4dbgp("it's a complex key, so use null value '~'"); + _append_key_val_null(rem.str); + } + rem = m_state->line_contents.rem; + + if(rem.begins_with(':')) + { + _c4dbgp("wait for val"); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + rem = m_state->line_contents.rem; + if(rem.begins_with_any(" \t")) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); + rem = rem.left_of(rem.first_not_of(" \t")); + _c4dbgpf("skip {} spaces/tabs", rem.len); + _line_progressed(rem.len); + } + } + return true; + } + else if(rem.begins_with_any(" \t")) + { + size_t pos = rem.first_not_of(" \t"); + if(pos == npos) + pos = rem.len; + _c4dbgpf("skip {} spaces/tabs", pos); + _line_progressed(pos); + return true; + } + else if(rem == '?' || rem.begins_with("? ")) + { + _c4dbgp("it's a complex key"); + _line_progressed(rem.begins_with("? ") ? 2u : 1u); + if(has_any(SSCL)) + _append_key_val_null(rem.str - 1); + add_flags(QMRK); + return true; + } + else if(has_all(QMRK) && rem.begins_with(':')) + { + _c4dbgp("complex key finished"); + if(!has_any(SSCL)) + _store_scalar_null(rem.str); + addrem_flags(RVAL, RKEY|QMRK); + _line_progressed(1); + rem = m_state->line_contents.rem; + if(rem.begins_with(' ')) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! _at_line_begin()); + _skipchars(' '); + } + return true; + } + else if(rem == ':' || rem.begins_with(": ") _RYML_WITH_TAB_TOKENS( || rem.begins_with(":\t"))) + { + _c4dbgp("key finished"); + if(!has_all(SSCL)) + { + _c4dbgp("key was empty..."); + _store_scalar_null(rem.str); + rem_flags(QMRK); + } + addrem_flags(RVAL, RKEY); + _line_progressed(rem == ':' ? 1 : 2); + return true; + } + else if(rem.begins_with("...")) + { + _c4dbgp("end current document"); + _end_stream(); + _line_progressed(3); + return true; + } + else if(rem.begins_with("---")) + { + _c4dbgp("start new document '---'"); + _start_new_doc(rem); + return true; + } + else if(_handle_types()) + { + return true; + } + else if(_handle_key_anchors_and_refs()) + { + return true; + } + else + { + _c4err("parse error"); + } + } + else if(has_any(RVAL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RNXT)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RKEY)); + + csubstr s; + bool is_quoted; + if(_scan_scalar(&s, &is_quoted)) // this also progresses the line + { + _c4dbgpf("it's a{} scalar", is_quoted ? " quoted" : ""); + + rem = m_state->line_contents.rem; + + if(rem.begins_with(": ")) + { + _c4dbgp("actually, the scalar is the first key of a map"); + addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT + _push_level(); + _move_scalar_from_top(); + _move_val_anchor_to_key_anchor(); + _start_map(); + _save_indentation(m_state->scalar_col); + addrem_flags(RVAL, RKEY); + _line_progressed(2); + } + else if(rem.begins_with(':')) + { + _c4dbgp("actually, the scalar is the first key of a map, and it opens a new scope"); + addrem_flags(RKEY, RVAL); // before _push_level! This prepares the current level for popping by setting it to RNXT + _push_level(); + _move_scalar_from_top(); + _move_val_anchor_to_key_anchor(); + _start_map(); + _save_indentation(/*behind*/s.len); + addrem_flags(RVAL, RKEY); + _line_progressed(1); + } + else + { + _c4dbgp("appending keyval to current map"); + _append_key_val(s, is_quoted); + addrem_flags(RKEY, RVAL); + } + return true; + } + else if(rem.begins_with("- ") _RYML_WITH_TAB_TOKENS( || rem.begins_with("-\t"))) + { + _c4dbgp("val is a nested seq, indented"); + addrem_flags(RKEY, RVAL); // before _push_level! + _push_level(); + _move_scalar_from_top(); + _start_seq(); + _save_indentation(); + _line_progressed(2); + return true; + } + else if(rem == '-') + { + _c4dbgp("maybe a seq. start unknown, indented"); + _start_unk(); + _save_indentation(); + _line_progressed(1); + return true; + } + else if(rem.begins_with('[')) + { + _c4dbgp("val is a child seq, flow"); + addrem_flags(RKEY, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_seq(); + add_flags(FLOW); + _line_progressed(1); + return true; + } + else if(rem.begins_with('{')) + { + _c4dbgp("val is a child map, flow"); + addrem_flags(RKEY, RVAL); // before _push_level! + _push_level(/*explicit flow*/true); + _move_scalar_from_top(); + _start_map(); + addrem_flags(FLOW|RKEY, RVAL); + _line_progressed(1); + return true; + } + else if(rem.begins_with(' ')) + { + csubstr spc = rem.left_of(rem.first_not_of(' ')); + if(_at_line_begin()) + { + _c4dbgpf("skipping value indentation: {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + else + { + _c4dbgpf("skipping {} spaces", spc.len); + _line_progressed(spc.len); + return true; + } + } + else if(_handle_types()) + { + return true; + } + else if(_handle_val_anchors_and_refs()) + { + return true; + } + else if(rem.begins_with("--- ") || rem == "---" || rem.begins_with("---\t")) + { + _start_new_doc(rem); + return true; + } + else + { + _c4err("parse error"); + } + } + else + { + _c4err("internal error"); + } + + return false; +} + + +//----------------------------------------------------------------------------- +bool Parser::_handle_top() +{ + _c4dbgp("handle_top"); + csubstr rem = m_state->line_contents.rem; + + if(rem.begins_with('#')) + { + _c4dbgp("a comment line"); + _scan_comment(); + return true; + } + + csubstr trimmed = rem.triml(' '); + + if(trimmed.begins_with('%')) + { + _handle_directive(trimmed); + _line_progressed(rem.len); + return true; + } + else if(trimmed.begins_with("--- ") || trimmed == "---" || trimmed.begins_with("---\t")) + { + _start_new_doc(rem); + if(trimmed.len < rem.len) + { + _line_progressed(rem.len - trimmed.len); + _save_indentation(); + } + return true; + } + else if(trimmed.begins_with("...")) + { + _c4dbgp("end current document"); + _end_stream(); + if(trimmed.len < rem.len) + { + _line_progressed(rem.len - trimmed.len); + } + _line_progressed(3); + return true; + } + else + { + _c4err("parse error"); + } + + return false; +} + + +//----------------------------------------------------------------------------- + +bool Parser::_handle_key_anchors_and_refs() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RVAL)); + const csubstr rem = m_state->line_contents.rem; + if(rem.begins_with('&')) + { + _c4dbgp("found a key anchor!!!"); + if(has_all(QMRK|SSCL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); + _c4dbgp("there is a stored key, so this anchor is for the next element"); + _append_key_val_null(rem.str - 1); + rem_flags(QMRK); + return true; + } + csubstr anchor = rem.left_of(rem.first_of(' ')); + _line_progressed(anchor.len); + anchor = anchor.sub(1); // skip the first character + _move_key_anchor_to_val_anchor(); + _c4dbgpf("key anchor value: '{}'", anchor); + m_key_anchor = anchor; + m_key_anchor_indentation = m_state->line_contents.current_col(rem); + return true; + } + else if(C4_UNLIKELY(rem.begins_with('*'))) + { + _c4err("not implemented - this should have been catched elsewhere"); + C4_NEVER_REACH(); + return false; + } + return false; +} + +bool Parser::_handle_val_anchors_and_refs() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, !has_any(RKEY)); + const csubstr rem = m_state->line_contents.rem; + if(rem.begins_with('&')) + { + csubstr anchor = rem.left_of(rem.first_of(' ')); + _line_progressed(anchor.len); + anchor = anchor.sub(1); // skip the first character + _c4dbgpf("val: found an anchor: '{}', indentation={}!!!", anchor, m_state->line_contents.current_col(rem)); + if(m_val_anchor.empty()) + { + _c4dbgpf("save val anchor: '{}'", anchor); + m_val_anchor = anchor; + m_val_anchor_indentation = m_state->line_contents.current_col(rem); + } + else + { + _c4dbgpf("there is a pending val anchor '{}'", m_val_anchor); + if(m_tree->is_seq(m_state->node_id)) + { + if(m_tree->has_children(m_state->node_id)) + { + _c4dbgpf("current node={} is a seq, has {} children", m_state->node_id, m_tree->num_children(m_state->node_id)); + _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); + m_key_anchor = anchor; + m_key_anchor_indentation = m_state->line_contents.current_col(rem); + } + else + { + _c4dbgpf("current node={} is a seq, has no children", m_state->node_id); + if(m_tree->has_val_anchor(m_state->node_id)) + { + _c4dbgpf("... node={} already has val anchor: '{}'", m_state->node_id, m_tree->val_anchor(m_state->node_id)); + _c4dbgpf("... so take the new one as a key anchor '{}'", anchor); + m_key_anchor = anchor; + m_key_anchor_indentation = m_state->line_contents.current_col(rem); + } + else + { + _c4dbgpf("... so set pending val anchor: '{}' on current node {}", m_val_anchor, m_state->node_id); + m_tree->set_val_anchor(m_state->node_id, m_val_anchor); + m_val_anchor = anchor; + m_val_anchor_indentation = m_state->line_contents.current_col(rem); + } + } + } + } + return true; + } + else if(C4_UNLIKELY(rem.begins_with('*'))) + { + _c4err("not implemented - this should have been catched elsewhere"); + C4_NEVER_REACH(); + return false; + } + return false; +} + +void Parser::_move_key_anchor_to_val_anchor() +{ + if(m_key_anchor.empty()) + return; + _c4dbgpf("move current key anchor to val slot: key='{}' -> val='{}'", m_key_anchor, m_val_anchor); + if(!m_val_anchor.empty()) + _c4err("triple-pending anchor"); + m_val_anchor = m_key_anchor; + m_val_anchor_indentation = m_key_anchor_indentation; + m_key_anchor = {}; + m_key_anchor_indentation = {}; +} + +void Parser::_move_val_anchor_to_key_anchor() +{ + if(m_val_anchor.empty()) + return; + if(!_token_is_from_this_line(m_val_anchor)) + return; + _c4dbgpf("move current val anchor to key slot: key='{}' <- val='{}'", m_key_anchor, m_val_anchor); + if(!m_key_anchor.empty()) + _c4err("triple-pending anchor"); + m_key_anchor = m_val_anchor; + m_key_anchor_indentation = m_val_anchor_indentation; + m_val_anchor = {}; + m_val_anchor_indentation = {}; +} + +void Parser::_move_key_tag_to_val_tag() +{ + if(m_key_tag.empty()) + return; + _c4dbgpf("move key tag to val tag: key='{}' -> val='{}'", m_key_tag, m_val_tag); + m_val_tag = m_key_tag; + m_val_tag_indentation = m_key_tag_indentation; + m_key_tag.clear(); + m_key_tag_indentation = 0; +} + +void Parser::_move_val_tag_to_key_tag() +{ + if(m_val_tag.empty()) + return; + if(!_token_is_from_this_line(m_val_tag)) + return; + _c4dbgpf("move val tag to key tag: key='{}' <- val='{}'", m_key_tag, m_val_tag); + m_key_tag = m_val_tag; + m_key_tag_indentation = m_val_tag_indentation; + m_val_tag.clear(); + m_val_tag_indentation = 0; +} + +void Parser::_move_key_tag2_to_key_tag() +{ + if(m_key_tag2.empty()) + return; + _c4dbgpf("move key tag2 to key tag: key='{}' <- key2='{}'", m_key_tag, m_key_tag2); + m_key_tag = m_key_tag2; + m_key_tag_indentation = m_key_tag2_indentation; + m_key_tag2.clear(); + m_key_tag2_indentation = 0; +} + + +//----------------------------------------------------------------------------- + +bool Parser::_handle_types() +{ + csubstr rem = m_state->line_contents.rem.triml(' '); + csubstr t; + + if(rem.begins_with("!!")) + { + _c4dbgp("begins with '!!'"); + t = rem.left_of(rem.first_of(" ,")); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); + //t = t.sub(2); + if(t == "!!set") + add_flags(RSET); + } + else if(rem.begins_with("!<")) + { + _c4dbgp("begins with '!<'"); + t = rem.left_of(rem.first_of('>'), true); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 2); + //t = t.sub(2, t.len-1); + } + else if(rem.begins_with("!h!")) + { + _c4dbgp("begins with '!h!'"); + t = rem.left_of(rem.first_of(' ')); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 3); + //t = t.sub(3); + } + else if(rem.begins_with('!')) + { + _c4dbgp("begins with '!'"); + t = rem.left_of(rem.first_of(' ')); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); + //t = t.sub(1); + } + + if(t.empty()) + return false; + + if(has_all(QMRK|SSCL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RKEY)); + _c4dbgp("there is a stored key, so this tag is for the next element"); + _append_key_val_null(rem.str - 1); + rem_flags(QMRK); + } + + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + const char *tag_beginning = rem.str; + #endif + size_t tag_indentation = m_state->line_contents.current_col(t); + _c4dbgpf("there was a tag: '{}', indentation={}", t, tag_indentation); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.end() > m_state->line_contents.rem.begin()); + _line_progressed(static_cast(t.end() - m_state->line_contents.rem.begin())); + { + size_t pos = m_state->line_contents.rem.first_not_of(" \t"); + if(pos != csubstr::npos) + _line_progressed(pos); + } + + if(has_all(RMAP|RKEY)) + { + _c4dbgpf("saving map key tag '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_key_tag.empty()); + m_key_tag = t; + m_key_tag_indentation = tag_indentation; + } + else if(has_all(RMAP|RVAL)) + { + /* foo: !!str + * !!str : bar */ + rem = m_state->line_contents.rem; + rem = rem.left_of(rem.find("#")); + rem = rem.trimr(" \t"); + _c4dbgpf("rem='{}'", rem); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(rem == ':' || rem.begins_with(": ")) + { + _c4dbgp("the last val was null, and this is a tag from a null key"); + _append_key_val_null(tag_beginning - 1); + _store_scalar_null(rem.str - 1); + // do not change the flag to key, it is ~ + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begin() > m_state->line_contents.rem.begin()); + size_t token_len = rem == ':' ? 1 : 2; + _line_progressed(static_cast(token_len + rem.begin() - m_state->line_contents.rem.begin())); + } + #endif + _c4dbgpf("saving map val tag '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); + m_val_tag = t; + m_val_tag_indentation = tag_indentation; + } + else if(has_all(RSEQ|RVAL) || has_all(RTOP|RUNK|NDOC)) + { + if(m_val_tag.empty()) + { + _c4dbgpf("saving seq/doc val tag '{}'", t); + m_val_tag = t; + m_val_tag_indentation = tag_indentation; + } + else + { + _c4dbgpf("saving seq/doc key tag '{}'", t); + m_key_tag = t; + m_key_tag_indentation = tag_indentation; + } + } + else if(has_all(RTOP|RUNK) || has_any(RUNK)) + { + rem = m_state->line_contents.rem; + rem = rem.left_of(rem.find("#")); + rem = rem.trimr(" \t"); + if(rem.empty()) + { + _c4dbgpf("saving val tag '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_tag.empty()); + m_val_tag = t; + m_val_tag_indentation = tag_indentation; + } + else + { + _c4dbgpf("saving key tag '{}'", t); + if(m_key_tag.empty()) + { + m_key_tag = t; + m_key_tag_indentation = tag_indentation; + } + else + { + /* handle this case: + * !!str foo: !!map + * !!int 1: !!float 20.0 + * !!int 3: !!float 40.0 + * + * (m_key_tag would be !!str and m_key_tag2 would be !!int) + */ + m_key_tag2 = t; + m_key_tag2_indentation = tag_indentation; + } + } + } + else + { + _c4err("internal error"); + } + + if(m_val_tag.not_empty()) + { + YamlTag_e tag = to_tag(t); + if(tag == TAG_STR) + { + _c4dbgpf("tag '{}' is a str-type tag", t); + if(has_all(RTOP|RUNK|NDOC)) + { + _c4dbgpf("docval. slurping the string. pos={}", m_state->pos.offset); + csubstr scalar = _slurp_doc_scalar(); + _c4dbgpf("docval. after slurp: {}, at node {}: '{}'", m_state->pos.offset, m_state->node_id, scalar); + m_tree->to_val(m_state->node_id, scalar, DOC); + _c4dbgpf("docval. val tag {} -> {}", m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); + m_val_tag.clear(); + if(!m_val_anchor.empty()) + { + _c4dbgpf("setting val anchor[{}]='{}'", m_state->node_id, m_val_anchor); + m_tree->set_val_anchor(m_state->node_id, m_val_anchor); + m_val_anchor.clear(); + } + _end_stream(); + } + } + } + return true; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_slurp_doc_scalar() +{ + csubstr s = m_state->line_contents.rem; + size_t pos = m_state->pos.offset; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.find("---") != csubstr::npos); + _c4dbgpf("slurp 0 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + if(s.len == 0) + { + _line_ended(); + _scan_line(); + s = m_state->line_contents.rem; + pos = m_state->pos.offset; + } + + size_t skipws = s.first_not_of(" \t"); + _c4dbgpf("slurp 1 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + if(skipws != npos) + { + _line_progressed(skipws); + s = m_state->line_contents.rem; + pos = m_state->pos.offset; + _c4dbgpf("slurp 2 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, m_val_anchor.empty()); + _handle_val_anchors_and_refs(); + if(!m_val_anchor.empty()) + { + s = m_state->line_contents.rem; + skipws = s.first_not_of(" \t"); + if(skipws != npos) + { + _line_progressed(skipws); + } + s = m_state->line_contents.rem; + pos = m_state->pos.offset; + _c4dbgpf("slurp 3 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + } + + if(s.begins_with('\'')) + { + m_state->scalar_col = m_state->line_contents.current_col(s); + return _scan_squot_scalar(); + } + else if(s.begins_with('"')) + { + m_state->scalar_col = m_state->line_contents.current_col(s); + return _scan_dquot_scalar(); + } + else if(s.begins_with('|') || s.begins_with('>')) + { + return _scan_block(); + } + + _c4dbgpf("slurp 4 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() + pos); + _line_progressed(static_cast(s.end() - (m_buf.begin() + pos))); + + _c4dbgpf("slurp 5 '{}'. REM='{}'", s, m_buf.sub(m_state->pos.offset)); + + if(_at_line_end()) + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + return s; +} + +//----------------------------------------------------------------------------- +bool Parser::_scan_scalar(csubstr *C4_RESTRICT scalar, bool *C4_RESTRICT quoted) +{ + csubstr s = m_state->line_contents.rem; + if(s.len == 0) + return false; + s = s.trim(" \t"); + if(s.len == 0) + return false; + + if(s.begins_with('\'')) + { + _c4dbgp("got a ': scanning single-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_squot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('"')) + { + _c4dbgp("got a \": scanning double-quoted scalar"); + m_state->scalar_col = m_state->line_contents.current_col(s); + *scalar = _scan_dquot_scalar(); + *quoted = true; + return true; + } + else if(s.begins_with('|') || s.begins_with('>')) + { + *scalar = _scan_block(); + *quoted = false; + return true; + } + else if(has_any(RTOP) && _is_doc_sep(s)) + { + return false; + } + else if(has_any(RSEQ)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_all(RKEY)); + if(has_all(RVAL)) + { + _c4dbgp("RSEQ|RVAL"); + if( ! _is_scalar_next__rseq_rval(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + if(s.ends_with(':')) + { + --s.len; + } + else + { + auto first = s.first_of_any(": " _RYML_WITH_TAB_TOKENS( , ":\t"), " #"); + if(first) + s.len = first.pos; + } + if(has_all(FLOW)) + { + _c4dbgp("RSEQ|RVAL|EXPL"); + s = s.left_of(s.first_of(",]")); + } + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + } + else + { + _c4err("internal error"); + } + } + else if(has_any(RMAP)) + { + if( ! _is_scalar_next__rmap(s)) + return false; + size_t colon_space = s.find(": "); + if(colon_space == npos) + { + _RYML_WITH_OR_WITHOUT_TAB_TOKENS( + // with tab tokens + colon_space = s.find(":\t"); + if(colon_space == npos) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + colon_space = s.find(':'); + if(colon_space != s.len-1) + colon_space = npos; + } + , + // without tab tokens + colon_space = s.find(':'); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len > 0); + if(colon_space != s.len-1) + colon_space = npos; + ) + } + + if(has_all(RKEY)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, !s.begins_with(' ')); + if(has_any(QMRK)) + { + _c4dbgp("RMAP|RKEY|CPLX"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_any(RMAP)); + if(s.begins_with("? ") || s == '?') + return false; + s = s.left_of(colon_space); + s = s.left_of(s.first_of("#")); + if(has_any(FLOW)) + s = s.left_of(s.first_of(':')); + s = s.trimr(" \t"); + if(s.begins_with("---")) + return false; + else if(s.begins_with("...")) + return false; + } + else + { + _c4dbgp("RMAP|RKEY"); + _RYML_CB_CHECK(m_stack.m_callbacks, !s.begins_with('{')); + if(s.begins_with("? ") || s == '?') + return false; + s = s.left_of(colon_space); + s = s.trimr(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(has_any(FLOW)) + { + _c4dbgpf("RMAP|RKEY|EXPL: '{}'", s); + s = s.left_of(s.first_of(",}")); + if(s.ends_with(':')) + s = s.offs(0, 1); + } + else if(s.begins_with("---")) + { + return false; + } + else if(s.begins_with("...")) + { + return false; + } + } + } + else if(has_all(RVAL)) + { + _c4dbgp("RMAP|RVAL"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(QMRK)); + if( ! _is_scalar_next__rmap_val(s)) + return false; + _RYML_WITH_TAB_TOKENS(else if(s.begins_with("-\t")) + return false; + ) + s = s.left_of(s.find(" #")); // is there a comment? + s = s.left_of(s.find("\t#")); // is there a comment? + if(has_any(FLOW)) + { + _c4dbgp("RMAP|RVAL|EXPL"); + if(has_none(RSEQIMAP)) + s = s.left_of(s.first_of(",}")); + else + s = s.left_of(s.first_of(",]")); + } + s = s.trim(_RYML_WITH_OR_WITHOUT_TAB_TOKENS(" \t", ' ')); + if(s.begins_with("---")) + return false; + else if(s.begins_with("...")) + return false; + } + else + { + _c4err("parse error"); + } + } + else if(has_all(RUNK)) + { + _c4dbgpf("RUNK '[{}]~~~{}~~~", s.len, s); + if( ! _is_scalar_next__runk(s)) + { + _c4dbgp("RUNK: no scalar next"); + return false; + } + s = s.left_of(s.find(" #")); + size_t pos = s.find(": "); + if(pos != npos) + s = s.left_of(pos); + else if(s.ends_with(':')) + s = s.left_of(s.len-1); + _RYML_WITH_TAB_TOKENS( + else if((pos = s.find(":\t")) != npos) // TABS + s = s.left_of(pos); + ) + else + s = s.left_of(s.first_of(',')); + s = s.trim(" \t"); + _c4dbgpf("RUNK: scalar='{}'", s); + } + else + { + _c4err("not implemented"); + } + + if(s.empty()) + return false; + + m_state->scalar_col = m_state->line_contents.current_col(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.str >= m_state->line_contents.rem.str); + _line_progressed(static_cast(s.str - m_state->line_contents.rem.str) + s.len); + + if(_at_line_end() && s != '~') + { + _c4dbgpf("at line end. curr='{}'", s); + s = _extend_scanned_scalar(s); + } + + _c4dbgpf("scalar was '{}'", s); + + *scalar = s; + *quoted = false; + return true; +} + +//----------------------------------------------------------------------------- + +csubstr Parser::_extend_scanned_scalar(csubstr s) +{ + if(has_all(RMAP|RKEY|QMRK)) + { + size_t scalar_indentation = has_any(FLOW) ? 0 : m_state->scalar_col; + _c4dbgpf("extend_scalar: explicit key! indref={} scalar_indentation={} scalar_col={}", m_state->indref, scalar_indentation, m_state->scalar_col); + csubstr n = _scan_to_next_nonempty_line(scalar_indentation); + if(!n.empty()) + { + substr full = _scan_complex_key(s, n).trimr(" \t\r\n"); + if(full != s) + s = _filter_plain_scalar(full, scalar_indentation); + } + } + // deal with plain (unquoted) scalars that continue to the next line + else if(!s.begins_with_any("*")) // cannot be a plain scalar if it starts with * (that's an anchor reference) + { + _c4dbgpf("extend_scalar: line ended, scalar='{}'", s); + if(has_none(FLOW)) + { + size_t scalar_indentation = m_state->indref + 1; + if(has_all(RUNK) && scalar_indentation == 1) + scalar_indentation = 0; + csubstr n = _scan_to_next_nonempty_line(scalar_indentation); + if(!n.empty()) + { + _c4dbgpf("rscalar[IMPL]: state_indref={} state_indentation={} scalar_indentation={}", m_state->indref, m_state->line_contents.indentation, scalar_indentation); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.full.is_super(n)); + substr full = _scan_plain_scalar_blck(s, n, scalar_indentation); + if(full.len >= s.len) + s = _filter_plain_scalar(full, scalar_indentation); + } + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(FLOW)); + csubstr n = _scan_to_next_nonempty_line(/*indentation*/0); + if(!n.empty()) + { + _c4dbgp("rscalar[FLOW]"); + substr full = _scan_plain_scalar_flow(s, n); + s = _filter_plain_scalar(full, /*indentation*/0); + } + } + } + + return s; +} + + +//----------------------------------------------------------------------------- + +substr Parser::_scan_plain_scalar_flow(csubstr currscalar, csubstr peeked_line) +{ + static constexpr const csubstr chars = "[]{}?#,"; + size_t pos = peeked_line.first_of(chars); + bool first = true; + while(pos != 0) + { + if(has_all(RMAP|RKEY) || has_any(RUNK)) + { + csubstr tpkl = peeked_line.triml(' ').trimr("\r\n"); + if(tpkl.begins_with(": ") || tpkl == ':') + { + _c4dbgpf("rscalar[EXPL]: map value starts on the peeked line: '{}'", peeked_line); + peeked_line = peeked_line.first(0); + break; + } + else + { + auto colon_pos = peeked_line.first_of_any(": ", ":"); + if(colon_pos && colon_pos.pos < pos) + { + peeked_line = peeked_line.first(colon_pos.pos); + _c4dbgpf("rscalar[EXPL]: found colon at {}. peeked='{}'", colon_pos.pos, peeked_line); + _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); + _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); + break; + } + } + } + if(pos != npos) + { + _c4dbgpf("rscalar[EXPL]: found special character '{}' at {}, stopping: '{}'", peeked_line[pos], pos, peeked_line.left_of(pos).trimr("\r\n")); + peeked_line = peeked_line.left_of(pos); + _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.end() >= m_state->line_contents.rem.begin()); + _line_progressed(static_cast(peeked_line.end() - m_state->line_contents.rem.begin())); + break; + } + _c4dbgpf("rscalar[EXPL]: append another line, full: '{}'", peeked_line.trimr("\r\n")); + if(!first) + { + RYML_CHECK(_advance_to_peeked()); + } + peeked_line = _scan_to_next_nonempty_line(/*indentation*/0); + if(peeked_line.empty()) + { + _c4err("expected token or continuation"); + } + pos = peeked_line.first_of(chars); + first = false; + } + substr full(m_buf.str + (currscalar.str - m_buf.str), m_buf.begin() + m_state->pos.offset); + full = full.trimr("\n\r "); + return full; +} + + +//----------------------------------------------------------------------------- + +substr Parser::_scan_plain_scalar_blck(csubstr currscalar, csubstr peeked_line, size_t indentation) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); + // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice + // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar + _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); + size_t offs = static_cast(currscalar.end() - m_buf.begin()); + _RYML_CB_ASSERT(m_stack.m_callbacks, peeked_line.begins_with(' ', indentation)); + while(true) + { + _c4dbgpf("rscalar[IMPL]: continuing... ref_indentation={}", indentation); + if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) + { + _c4dbgpf("rscalar[IMPL]: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); + break; + } + else if(( ! peeked_line.begins_with(' ', indentation))) // is the line deindented? + { + if(!peeked_line.trim(" \r\n\t").empty()) // is the line not blank? + { + _c4dbgpf("rscalar[IMPL]: deindented line, not blank -- bail now '{}'", peeked_line.trimr("\r\n")); + break; + } + _c4dbgpf("rscalar[IMPL]: line is blank and has less indentation: ref={} line={}: '{}'", indentation, peeked_line.first_not_of(' ') == csubstr::npos ? 0 : peeked_line.first_not_of(' '), peeked_line.trimr("\r\n")); + _c4dbgpf("rscalar[IMPL]: ... searching for a line starting at indentation {}", indentation); + csubstr next_peeked = _scan_to_next_nonempty_line(indentation); + if(next_peeked.empty()) + { + _c4dbgp("rscalar[IMPL]: ... finished."); + break; + } + _c4dbgp("rscalar[IMPL]: ... continuing."); + peeked_line = next_peeked; + } + + _c4dbgpf("rscalar[IMPL]: line contents: '{}'", peeked_line.right_of(indentation, true).trimr("\r\n")); + size_t token_pos; + if(peeked_line.find(": ") != npos) + { + _line_progressed(peeked_line.find(": ")); + _c4err("': ' is not a valid token in plain flow (unquoted) scalars"); + } + else if(peeked_line.ends_with(':')) + { + _line_progressed(peeked_line.find(':')); + _c4err("lines cannot end with ':' in plain flow (unquoted) scalars"); + } + else if((token_pos = peeked_line.find(" #")) != npos) + { + _line_progressed(token_pos); + break; + //_c4err("' #' is not a valid token in plain flow (unquoted) scalars"); + } + + _c4dbgpf("rscalar[IMPL]: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); + if(!_advance_to_peeked()) + { + _c4dbgp("rscalar[IMPL]: file finishes after the scalar"); + break; + } + peeked_line = m_state->line_contents.rem; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); + substr full(m_buf.str + (currscalar.str - m_buf.str), + currscalar.len + (m_state->pos.offset - offs)); + full = full.trimr("\r\n "); + return full; +} + +substr Parser::_scan_complex_key(csubstr currscalar, csubstr peeked_line) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(currscalar)); + // NOTE. there's a problem with _scan_to_next_nonempty_line(), as it counts newlines twice + // size_t offs = m_state->pos.offset; // so we workaround by directly counting from the end of the given scalar + _RYML_CB_ASSERT(m_stack.m_callbacks, currscalar.end() >= m_buf.begin()); + size_t offs = static_cast(currscalar.end() - m_buf.begin()); + while(true) + { + _c4dbgp("rcplxkey: continuing..."); + if(peeked_line.begins_with("...") || peeked_line.begins_with("---")) + { + _c4dbgpf("rcplxkey: document termination next -- bail now '{}'", peeked_line.trimr("\r\n")); + break; + } + else + { + size_t pos = peeked_line.first_of("?:[]{}"); + if(pos == csubstr::npos) + { + pos = peeked_line.find("- "); + } + if(pos != csubstr::npos) + { + _c4dbgpf("rcplxkey: found special characters at pos={}: '{}'", pos, peeked_line.trimr("\r\n")); + _line_progressed(pos); + break; + } + } + + _c4dbgpf("rcplxkey: no special chars found '{}'", peeked_line.trimr("\r\n")); + csubstr next_peeked = _scan_to_next_nonempty_line(0); + if(next_peeked.empty()) + { + _c4dbgp("rcplxkey: empty ... finished."); + break; + } + _c4dbgp("rcplxkey: ... continuing."); + peeked_line = next_peeked; + + _c4dbgpf("rcplxkey: line contents: '{}'", peeked_line.trimr("\r\n")); + size_t colpos; + if((colpos = peeked_line.find(": ")) != npos) + { + _c4dbgp("rcplxkey: found ': ', stopping."); + _line_progressed(colpos); + break; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if((colpos = peeked_line.ends_with(':'))) + { + _c4dbgp("rcplxkey: ends with ':', stopping."); + _line_progressed(colpos); + break; + } + #endif + _c4dbgpf("rcplxkey: append another line: (len={})'{}'", peeked_line.len, peeked_line.trimr("\r\n")); + if(!_advance_to_peeked()) + { + _c4dbgp("rcplxkey: file finishes after the scalar"); + break; + } + peeked_line = m_state->line_contents.rem; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= offs); + substr full(m_buf.str + (currscalar.str - m_buf.str), + currscalar.len + (m_state->pos.offset - offs)); + return full; +} + +//! scans to the next non-blank line starting with the given indentation +csubstr Parser::_scan_to_next_nonempty_line(size_t indentation) +{ + csubstr next_peeked; + while(true) + { + _c4dbgpf("rscalar: ... curr offset: {} indentation={}", m_state->pos.offset, indentation); + next_peeked = _peek_next_line(m_state->pos.offset); + csubstr next_peeked_triml = next_peeked.triml(' '); + _c4dbgpf("rscalar: ... next peeked line='{}'", next_peeked.trimr("\r\n")); + if(next_peeked_triml.begins_with('#')) + { + _c4dbgp("rscalar: ... first non-space character is #"); + return {}; + } + else if(next_peeked.begins_with(' ', indentation)) + { + _c4dbgpf("rscalar: ... begins at same indentation {}, assuming continuation", indentation); + _advance_to_peeked(); + return next_peeked; + } + else // check for de-indentation + { + csubstr trimmed = next_peeked_triml.trimr("\t\r\n"); + _c4dbgpf("rscalar: ... deindented! trimmed='{}'", trimmed); + if(!trimmed.empty()) + { + _c4dbgp("rscalar: ... and not empty. bailing out."); + return {}; + } + } + if(!_advance_to_peeked()) + { + _c4dbgp("rscalar: file finished"); + return {}; + } + } + return {}; +} + +// returns false when the file finished +bool Parser::_advance_to_peeked() +{ + _line_progressed(m_state->line_contents.rem.len); + _line_ended(); // advances to the peeked-at line, consuming all remaining (probably newline) characters on the current line + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.first_of("\r\n") == csubstr::npos); + _c4dbgpf("advance to peeked: scan more... pos={} len={}", m_state->pos.offset, m_buf.len); + _scan_line(); // puts the peeked-at line in the buffer + if(_finished_file()) + { + _c4dbgp("rscalar: finished file!"); + return false; + } + return true; +} + +//----------------------------------------------------------------------------- + +C4_ALWAYS_INLINE size_t _extend_from_combined_newline(char nl, char following) +{ + return (nl == '\n' && following == '\r') || (nl == '\r' && following == '\n'); +} + +//! look for the next newline chars, and jump to the right of those +csubstr from_next_line(csubstr rem) +{ + size_t nlpos = rem.first_of("\r\n"); + if(nlpos == csubstr::npos) + return {}; + const char nl = rem[nlpos]; + rem = rem.right_of(nlpos); + if(rem.empty()) + return {}; + if(_extend_from_combined_newline(nl, rem.front())) + rem = rem.sub(1); + return rem; +} + +csubstr Parser::_peek_next_line(size_t pos) const +{ + csubstr rem{}; // declare here because of the goto + size_t nlpos{}; // declare here because of the goto + pos = pos == npos ? m_state->pos.offset : pos; + if(pos >= m_buf.len) + goto next_is_empty; + + // look for the next newline chars, and jump to the right of those + rem = from_next_line(m_buf.sub(pos)); + if(rem.empty()) + goto next_is_empty; + + // now get everything up to and including the following newline chars + nlpos = rem.first_of("\r\n"); + if((nlpos != csubstr::npos) && (nlpos + 1 < rem.len)) + nlpos += _extend_from_combined_newline(rem[nlpos], rem[nlpos+1]); + rem = rem.left_of(nlpos, /*include_pos*/true); + + _c4dbgpf("peek next line @ {}: (len={})'{}'", pos, rem.len, rem.trimr("\r\n")); + return rem; + +next_is_empty: + _c4dbgpf("peek next line @ {}: (len=0)''", pos); + return {}; +} + + +//----------------------------------------------------------------------------- +void Parser::LineContents::reset_with_next_line(csubstr buf, size_t offset) +{ + RYML_ASSERT(offset <= buf.len); + char const* C4_RESTRICT b = &buf[offset]; + char const* C4_RESTRICT e = b; + // get the current line stripped of newline chars + while(e < buf.end() && (*e != '\n' && *e != '\r')) + ++e; + RYML_ASSERT(e >= b); + const csubstr stripped_ = buf.sub(offset, static_cast(e - b)); + // advance pos to include the first line ending + if(e != buf.end() && *e == '\r') + ++e; + if(e != buf.end() && *e == '\n') + ++e; + RYML_ASSERT(e >= b); + const csubstr full_ = buf.sub(offset, static_cast(e - b)); + reset(full_, stripped_); +} + +void Parser::_scan_line() +{ + if(m_state->pos.offset >= m_buf.len) + { + m_state->line_contents.reset(m_buf.last(0), m_buf.last(0)); + return; + } + m_state->line_contents.reset_with_next_line(m_buf, m_state->pos.offset); +} + + +//----------------------------------------------------------------------------- +void Parser::_line_progressed(size_t ahead) +{ + _c4dbgpf("line[{}] ({} cols) progressed by {}: col {}-->{} offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead); + m_state->pos.offset += ahead; + m_state->pos.col += ahead; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col <= m_state->line_contents.stripped.len+1); + m_state->line_contents.rem = m_state->line_contents.rem.sub(ahead); +} + +void Parser::_line_ended() +{ + _c4dbgpf("line[{}] ({} cols) ended! offset {}-->{}", m_state->pos.line, m_state->line_contents.full.len, m_state->pos.offset, m_state->pos.offset+m_state->line_contents.full.len - m_state->line_contents.stripped.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == m_state->line_contents.stripped.len+1); + m_state->pos.offset += m_state->line_contents.full.len - m_state->line_contents.stripped.len; + ++m_state->pos.line; + m_state->pos.col = 1; +} + +void Parser::_line_ended_undo() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.col == 1u); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line > 0u); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len); + _c4dbgpf("line[{}] undo ended! line {}-->{}, offset {}-->{}", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - (m_state->line_contents.full.len - m_state->line_contents.stripped.len)); + m_state->pos.offset -= m_state->line_contents.full.len - m_state->line_contents.stripped.len; + --m_state->pos.line; + m_state->pos.col = m_state->line_contents.stripped.len + 1u; +} + +//----------------------------------------------------------------------------- +void Parser::_set_indentation(size_t indentation) +{ + m_state->indref = indentation; + _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); +} + +void Parser::_save_indentation(size_t behind) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->line_contents.rem.begin() >= m_state->line_contents.full.begin()); + m_state->indref = static_cast(m_state->line_contents.rem.begin() - m_state->line_contents.full.begin()); + _RYML_CB_ASSERT(m_stack.m_callbacks, behind <= m_state->indref); + m_state->indref -= behind; + _c4dbgpf("state[{}]: saving indentation: {}", m_state-m_stack.begin(), m_state->indref); +} + +bool Parser::_maybe_set_indentation_from_anchor_or_tag() +{ + if(m_key_anchor.not_empty()) + { + _c4dbgpf("set indentation from key anchor: {}", m_key_anchor_indentation); + _set_indentation(m_key_anchor_indentation); // this is the column where the anchor starts + return true; + } + else if(m_key_tag.not_empty()) + { + _c4dbgpf("set indentation from key tag: {}", m_key_tag_indentation); + _set_indentation(m_key_tag_indentation); // this is the column where the tag starts + return true; + } + return false; +} + + +//----------------------------------------------------------------------------- +void Parser::_write_key_anchor(size_t node_id) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->has_key(node_id)); + if( ! m_key_anchor.empty()) + { + _c4dbgpf("node={}: set key anchor to '{}'", node_id, m_key_anchor); + m_tree->set_key_anchor(node_id, m_key_anchor); + m_key_anchor.clear(); + m_key_anchor_was_before = false; + m_key_anchor_indentation = 0; + } + else if( ! m_tree->is_key_quoted(node_id)) + { + csubstr r = m_tree->key(node_id); + if(r.begins_with('*')) + { + _c4dbgpf("node={}: set key reference: '{}'", node_id, r); + m_tree->set_key_ref(node_id, r.sub(1)); + } + else if(r == "<<") + { + m_tree->set_key_ref(node_id, r); + _c4dbgpf("node={}: it's an inheriting reference", node_id); + if(m_tree->is_seq(node_id)) + { + _c4dbgpf("node={}: inheriting from seq of {}", node_id, m_tree->num_children(node_id)); + for(size_t i = m_tree->first_child(node_id); i != NONE; i = m_tree->next_sibling(i)) + { + if( ! (m_tree->val(i).begins_with('*'))) + _c4err("malformed reference: '{}'", m_tree->val(i)); + } + } + else if( ! m_tree->val(node_id).begins_with('*')) + { + _c4err("malformed reference: '{}'", m_tree->val(node_id)); + } + //m_tree->set_key_ref(node_id, r); + } + } +} + +//----------------------------------------------------------------------------- +void Parser::_write_val_anchor(size_t node_id) +{ + if( ! m_val_anchor.empty()) + { + _c4dbgpf("node={}: set val anchor to '{}'", node_id, m_val_anchor); + m_tree->set_val_anchor(node_id, m_val_anchor); + m_val_anchor.clear(); + } + csubstr r = m_tree->has_val(node_id) ? m_tree->val(node_id) : ""; + if(!m_tree->is_val_quoted(node_id) && r.begins_with('*')) + { + _c4dbgpf("node={}: set val reference: '{}'", node_id, r); + RYML_CHECK(!m_tree->has_val_anchor(node_id)); + m_tree->set_val_ref(node_id, r.sub(1)); + } +} + +//----------------------------------------------------------------------------- +void Parser::_push_level(bool explicit_flow_chars) +{ + _c4dbgpf("pushing level! currnode={} currlevel={} stacksize={} stackcap={}", m_state->node_id, m_state->level, m_stack.size(), m_stack.capacity()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); + if(node(m_state) == nullptr) + { + _c4dbgp("pushing level! actually no, current node is null"); + //_RYML_CB_ASSERT(m_stack.m_callbacks, ! explicit_flow_chars); + return; + } + flag_t st = RUNK; + if(explicit_flow_chars || has_all(FLOW)) + { + st |= FLOW; + } + m_stack.push_top(); + m_state = &m_stack.top(); + set_flags(st); + m_state->node_id = (size_t)NONE; + m_state->indref = (size_t)NONE; + ++m_state->level; + _c4dbgpf("pushing level: now, currlevel={}", m_state->level); +} + +void Parser::_pop_level() +{ + _c4dbgpf("popping level! currnode={} currlevel={}", m_state->node_id, m_state->level); + if(has_any(RMAP) || m_tree->is_map(m_state->node_id)) + { + _stop_map(); + } + if(has_any(RSEQ) || m_tree->is_seq(m_state->node_id)) + { + _stop_seq(); + } + if(m_tree->is_doc(m_state->node_id)) + { + _stop_doc(); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.size() > 1); + _prepare_pop(); + m_stack.pop(); + m_state = &m_stack.top(); + /*if(has_any(RMAP)) + { + _toggle_key_val(); + }*/ + if(m_state->line_contents.indentation == 0) + { + //_RYML_CB_ASSERT(m_stack.m_callbacks, has_none(RTOP)); + add_flags(RTOP); + } + _c4dbgpf("popping level: now, currnode={} currlevel={}", m_state->node_id, m_state->level); +} + +//----------------------------------------------------------------------------- +void Parser::_start_unk(bool /*as_child*/) +{ + _c4dbgp("start_unk"); + _push_level(); + _move_scalar_from_top(); +} + +//----------------------------------------------------------------------------- +void Parser::_start_doc(bool as_child) +{ + _c4dbgpf("start_doc (as child={})", as_child); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); + size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_root(parent_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); + if(as_child) + { + _c4dbgpf("start_doc: parent={}", parent_id); + if( ! m_tree->is_stream(parent_id)) + { + _c4dbgp("start_doc: rearranging with root as STREAM"); + m_tree->set_root_as_stream(); + } + m_state->node_id = m_tree->append_child(parent_id); + m_tree->to_doc(m_state->node_id); + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(parent_id) || m_tree->empty(parent_id)); + m_state->node_id = parent_id; + if( ! m_tree->is_doc(parent_id)) + { + m_tree->to_doc(parent_id, DOC); + } + } + #endif + _c4dbgpf("start_doc: id={}", m_state->node_id); + add_flags(RUNK|RTOP|NDOC); + _handle_types(); + rem_flags(NDOC); +} + +void Parser::_stop_doc() +{ + size_t doc_node = m_state->node_id; + _c4dbgpf("stop_doc[{}]", doc_node); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_doc(doc_node)); + if(!m_tree->is_seq(doc_node) && !m_tree->is_map(doc_node) && !m_tree->is_val(doc_node)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); + _c4dbgpf("stop_doc[{}]: there was nothing; adding null val", doc_node); + m_tree->to_val(doc_node, {}, DOC); + } +} + +void Parser::_end_stream() +{ + _c4dbgpf("end_stream, level={} node_id={}", m_state->level, m_state->node_id); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! m_stack.empty()); + NodeData *added = nullptr; + if(has_any(SSCL)) + { + if(m_tree->is_seq(m_state->node_id)) + { + _c4dbgp("append val..."); + added = _append_val(_consume_scalar()); + } + else if(m_tree->is_map(m_state->node_id)) + { + _c4dbgp("append null key val..."); + added = _append_key_val_null(m_state->line_contents.rem.str); + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(has_any(RSEQIMAP)) + { + _stop_seqimap(); + _pop_level(); + } + #endif + } + else if(m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE) + { + NodeType_e quoted = has_any(QSCL) ? VALQUO : NOTYPE; // do this before consuming the scalar + csubstr scalar = _consume_scalar(); + _c4dbgpf("node[{}]: to docval '{}'{}", m_state->node_id, scalar, quoted == VALQUO ? ", quoted" : ""); + m_tree->to_val(m_state->node_id, scalar, DOC|quoted); + added = m_tree->get(m_state->node_id); + } + else + { + _c4err("internal error"); + } + } + else if(has_all(RSEQ|RVAL) && has_none(FLOW)) + { + _c4dbgp("add last..."); + added = _append_val_null(m_state->line_contents.rem.str); + } + else if(!m_val_tag.empty() && (m_tree->is_doc(m_state->node_id) || m_tree->type(m_state->node_id) == NOTYPE)) + { + csubstr scalar = m_state->line_contents.rem.first(0); + _c4dbgpf("node[{}]: add null scalar as docval", m_state->node_id); + m_tree->to_val(m_state->node_id, scalar, DOC); + added = m_tree->get(m_state->node_id); + } + + if(added) + { + size_t added_id = m_tree->id(added); + if(m_tree->is_seq(m_state->node_id) || m_tree->is_doc(m_state->node_id)) + { + if(!m_key_anchor.empty()) + { + _c4dbgpf("node[{}]: move key to val anchor: '{}'", added_id, m_key_anchor); + m_val_anchor = m_key_anchor; + m_key_anchor = {}; + } + if(!m_key_tag.empty()) + { + _c4dbgpf("node[{}]: move key to val tag: '{}'", added_id, m_key_tag); + m_val_tag = m_key_tag; + m_key_tag = {}; + } + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(!m_key_anchor.empty()) + { + _c4dbgpf("node[{}]: set key anchor='{}'", added_id, m_key_anchor); + m_tree->set_key_anchor(added_id, m_key_anchor); + m_key_anchor = {}; + } + #endif + if(!m_val_anchor.empty()) + { + _c4dbgpf("node[{}]: set val anchor='{}'", added_id, m_val_anchor); + m_tree->set_val_anchor(added_id, m_val_anchor); + m_val_anchor = {}; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(!m_key_tag.empty()) + { + _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", added_id, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(added_id, normalize_tag(m_key_tag)); + m_key_tag = {}; + } + #endif + if(!m_val_tag.empty()) + { + _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", added_id, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(added_id, normalize_tag(m_val_tag)); + m_val_tag = {}; + } + } + + while(m_stack.size() > 1) + { + _c4dbgpf("popping level: {} (stack sz={})", m_state->level, m_stack.size()); + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_any(SSCL, &m_stack.top())); + if(has_all(RSEQ|FLOW)) + _err("closing ] not found"); + _pop_level(); + } + add_flags(NDOC); +} + +void Parser::_start_new_doc(csubstr rem) +{ + _c4dbgp("_start_new_doc"); + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.begins_with("---")); + C4_UNUSED(rem); + + _end_stream(); + + size_t indref = m_state->indref; + _c4dbgpf("start a document, indentation={}", indref); + _line_progressed(3); + _push_level(); + _start_doc(); + _set_indentation(indref); +} + + +//----------------------------------------------------------------------------- +void Parser::_start_map(bool as_child) +{ + _c4dbgpf("start_map (as child={})", as_child); + addrem_flags(RMAP|RVAL, RKEY|RUNK); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); + size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); + if(as_child) + { + m_state->node_id = m_tree->append_child(parent_id); + if(has_all(SSCL)) + { + type_bits key_quoted = NOTYPE; + if(m_state->flags & QSCL) // before consuming the scalar + key_quoted |= KEYQUO; + csubstr key = _consume_scalar(); + m_tree->to_map(m_state->node_id, key, key_quoted); + _c4dbgpf("start_map: id={} key='{}'", m_state->node_id, m_tree->key(m_state->node_id)); + _write_key_anchor(m_state->node_id); + if( ! m_key_tag.empty()) + { + _c4dbgpf("node[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); + m_key_tag.clear(); + } + } + else + { + m_tree->to_map(m_state->node_id); + _c4dbgpf("start_map: id={}", m_state->node_id); + } + m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; + _write_val_anchor(m_state->node_id); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + m_state->node_id = parent_id; + _c4dbgpf("start_map: id={}", m_state->node_id); + type_bits as_doc = 0; + if(m_tree->is_doc(m_state->node_id)) + as_doc |= DOC; + if(!m_tree->is_map(parent_id)) + { + RYML_CHECK(!m_tree->has_children(parent_id)); + m_tree->to_map(parent_id, as_doc); + } + else + { + m_tree->_add_flags(parent_id, as_doc); + } + _move_scalar_from_top(); + if(m_key_anchor.not_empty()) + m_key_anchor_was_before = true; + _write_val_anchor(parent_id); + if(m_stack.size() >= 2) + { + State const& parent_state = m_stack.top(1); + if(parent_state.flags & RSET) + add_flags(RSET); + } + m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; + } + if( ! m_val_tag.empty()) + { + _c4dbgpf("node[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } +} + +void Parser::_start_map_unk(bool as_child) +{ + if(!m_key_anchor_was_before) + { + _c4dbgpf("stash key anchor before starting map... '{}'", m_key_anchor); + csubstr ka = m_key_anchor; + m_key_anchor = {}; + _start_map(as_child); + m_key_anchor = ka; + } + else + { + _start_map(as_child); + m_key_anchor_was_before = false; + } + if(m_key_tag2.not_empty()) + { + m_key_tag = m_key_tag2; + m_key_tag_indentation = m_key_tag2_indentation; + m_key_tag2.clear(); + m_key_tag2_indentation = 0; + } +} + +void Parser::_stop_map() +{ + _c4dbgpf("stop_map[{}]", m_state->node_id); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); + if(has_all(QMRK|RKEY) && !has_all(SSCL)) + { + _c4dbgpf("stop_map[{}]: RKEY", m_state->node_id); + _store_scalar_null(m_state->line_contents.rem.str); + _append_key_val_null(m_state->line_contents.rem.str); + } +} + + +//----------------------------------------------------------------------------- +void Parser::_start_seq(bool as_child) +{ + _c4dbgpf("start_seq (as child={})", as_child); + if(has_all(RTOP|RUNK)) + { + _c4dbgpf("start_seq: moving key tag to val tag: '{}'", m_key_tag); + m_val_tag = m_key_tag; + m_key_tag.clear(); + } + addrem_flags(RSEQ|RVAL, RUNK); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_stack.bottom()) == node(m_root_id)); + size_t parent_id = m_stack.size() < 2 ? m_root_id : m_stack.top(1).node_id; + _RYML_CB_ASSERT(m_stack.m_callbacks, parent_id != NONE); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) == nullptr || node(m_state) == node(m_root_id)); + if(as_child) + { + m_state->node_id = m_tree->append_child(parent_id); + if(has_all(SSCL)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(parent_id)); + type_bits key_quoted = 0; + if(m_state->flags & QSCL) // before consuming the scalar + key_quoted |= KEYQUO; + csubstr key = _consume_scalar(); + m_tree->to_seq(m_state->node_id, key, key_quoted); + _c4dbgpf("start_seq: id={} name='{}'", m_state->node_id, m_tree->key(m_state->node_id)); + _write_key_anchor(m_state->node_id); + if( ! m_key_tag.empty()) + { + _c4dbgpf("start_seq[{}]: set key tag='{}' -> '{}'", m_state->node_id, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(m_state->node_id, normalize_tag(m_key_tag)); + m_key_tag.clear(); + } + } + else + { + type_bits as_doc = 0; + _RYML_CB_ASSERT(m_stack.m_callbacks, !m_tree->is_doc(m_state->node_id)); + m_tree->to_seq(m_state->node_id, as_doc); + _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as doc" : ""); + } + _write_val_anchor(m_state->node_id); + m_tree->_p(m_state->node_id)->m_val.scalar.str = m_state->line_contents.rem.str; + } + else + { + m_state->node_id = parent_id; + type_bits as_doc = 0; + if(m_tree->is_doc(m_state->node_id)) + as_doc |= DOC; + if(!m_tree->is_seq(parent_id)) + { + RYML_CHECK(!m_tree->has_children(parent_id)); + m_tree->to_seq(parent_id, as_doc); + } + else + { + m_tree->_add_flags(parent_id, as_doc); + } + _move_scalar_from_top(); + _c4dbgpf("start_seq: id={}{}", m_state->node_id, as_doc ? " as_doc" : ""); + _write_val_anchor(parent_id); + m_tree->_p(parent_id)->m_val.scalar.str = m_state->line_contents.rem.str; + } + if( ! m_val_tag.empty()) + { + _c4dbgpf("start_seq[{}]: set val tag='{}' -> '{}'", m_state->node_id, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(m_state->node_id, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } +} + +void Parser::_stop_seq() +{ + _c4dbgp("stop_seq"); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); +} + + +//----------------------------------------------------------------------------- +void Parser::_start_seqimap() +{ + _c4dbgpf("start_seqimap at node={}. has_children={}", m_state->node_id, m_tree->has_children(m_state->node_id)); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQ|FLOW)); + // create a map, and turn the last scalar of this sequence + // into the key of the map's first child. This scalar was + // understood to be a value in the sequence, but it is + // actually a key of a map, implicitly opened here. + // Eg [val, key: val] + // + // Yep, YAML is crazy. + if(m_tree->has_children(m_state->node_id) && m_tree->has_val(m_tree->last_child(m_state->node_id))) + { + size_t prev = m_tree->last_child(m_state->node_id); + NodeType ty = m_tree->_p(prev)->m_type; // don't use type() because it masks out the quotes + NodeScalar tmp = m_tree->valsc(prev); + _c4dbgpf("has children and last child={} has val. saving the scalars, val='{}' quoted={}", prev, tmp.scalar, ty.is_val_quoted()); + m_tree->remove(prev); + _push_level(); + _start_map(); + _store_scalar(tmp.scalar, ty.is_val_quoted()); + m_key_anchor = tmp.anchor; + m_key_tag = tmp.tag; + } + else + { + _c4dbgpf("node {} has no children yet, using empty key", m_state->node_id); + _push_level(); + _start_map(); + _store_scalar_null(m_state->line_contents.rem.str); + } + add_flags(RSEQIMAP|FLOW); +} + +void Parser::_stop_seqimap() +{ + _c4dbgp("stop_seqimap"); + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(RSEQIMAP)); +} + + +//----------------------------------------------------------------------------- +NodeData* Parser::_append_val(csubstr val, flag_t quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, ! has_all(SSCL)); + _RYML_CB_ASSERT(m_stack.m_callbacks, node(m_state) != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_seq(m_state->node_id)); + type_bits additional_flags = quoted ? VALQUO : NOTYPE; + _c4dbgpf("append val: '{}' to parent id={} (level={}){}", val, m_state->node_id, m_state->level, quoted ? " VALQUO!" : ""); + size_t nid = m_tree->append_child(m_state->node_id); + m_tree->to_val(nid, val, additional_flags); + + _c4dbgpf("append val: id={} val='{}'", nid, m_tree->get(nid)->m_val.scalar); + if( ! m_val_tag.empty()) + { + _c4dbgpf("append val[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } + _write_val_anchor(nid); + return m_tree->get(nid); +} + +NodeData* Parser::_append_key_val(csubstr val, flag_t val_quoted) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, m_tree->is_map(m_state->node_id)); + type_bits additional_flags = 0; + if(m_state->flags & QSCL) + additional_flags |= KEYQUO; + if(val_quoted) + additional_flags |= VALQUO; + + csubstr key = _consume_scalar(); + _c4dbgpf("append keyval: '{}' '{}' to parent id={} (level={}){}{}", key, val, m_state->node_id, m_state->level, (additional_flags & KEYQUO) ? " KEYQUO!" : "", (additional_flags & VALQUO) ? " VALQUO!" : ""); + size_t nid = m_tree->append_child(m_state->node_id); + m_tree->to_keyval(nid, key, val, additional_flags); + _c4dbgpf("append keyval: id={} key='{}' val='{}'", nid, m_tree->key(nid), m_tree->val(nid)); + if( ! m_key_tag.empty()) + { + _c4dbgpf("append keyval[{}]: set key tag='{}' -> '{}'", nid, m_key_tag, normalize_tag(m_key_tag)); + m_tree->set_key_tag(nid, normalize_tag(m_key_tag)); + m_key_tag.clear(); + } + if( ! m_val_tag.empty()) + { + _c4dbgpf("append keyval[{}]: set val tag='{}' -> '{}'", nid, m_val_tag, normalize_tag(m_val_tag)); + m_tree->set_val_tag(nid, normalize_tag(m_val_tag)); + m_val_tag.clear(); + } + _write_key_anchor(nid); + _write_val_anchor(nid); + rem_flags(QMRK); + return m_tree->get(nid); +} + + +//----------------------------------------------------------------------------- +void Parser::_store_scalar(csubstr s, flag_t is_quoted) +{ + _c4dbgpf("state[{}]: storing scalar '{}' (flag: {}) (old scalar='{}')", + m_state-m_stack.begin(), s, m_state->flags & SSCL, m_state->scalar); + RYML_CHECK(has_none(SSCL)); + add_flags(SSCL | (is_quoted * QSCL)); + m_state->scalar = s; +} + +csubstr Parser::_consume_scalar() +{ + _c4dbgpf("state[{}]: consuming scalar '{}' (flag: {}))", m_state-m_stack.begin(), m_state->scalar, m_state->flags & SSCL); + RYML_CHECK(m_state->flags & SSCL); + csubstr s = m_state->scalar; + rem_flags(SSCL | QSCL); + m_state->scalar.clear(); + return s; +} + +void Parser::_move_scalar_from_top() +{ + if(m_stack.size() < 2) return; + State &prev = m_stack.top(1); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state == &m_stack.top()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state != &prev); + if(prev.flags & SSCL) + { + _c4dbgpf("moving scalar '{}' from state[{}] to state[{}] (overwriting '{}')", prev.scalar, &prev-m_stack.begin(), m_state-m_stack.begin(), m_state->scalar); + add_flags(prev.flags & (SSCL | QSCL)); + m_state->scalar = prev.scalar; + rem_flags(SSCL | QSCL, &prev); + prev.scalar.clear(); + } +} + +//----------------------------------------------------------------------------- +/** @todo this function is a monster and needs love. */ +bool Parser::_handle_indentation() +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(FLOW)); + if( ! _at_line_begin()) + return false; + + size_t ind = m_state->line_contents.indentation; + csubstr rem = m_state->line_contents.rem; + /** @todo instead of trimming, we should use the indentation index from above */ + csubstr remt = rem.triml(' '); + + if(remt.empty() || remt.begins_with('#')) // this is a blank or comment line + { + _line_progressed(rem.size()); + return true; + } + + _c4dbgpf("indentation? ind={} indref={}", ind, m_state->indref); + if(ind == m_state->indref) + { + if(has_all(SSCL|RVAL) && ! rem.sub(ind).begins_with('-')) + { + if(has_all(RMAP)) + { + _append_key_val_null(rem.str + ind - 1); + addrem_flags(RKEY, RVAL); + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if(has_all(RSEQ)) + { + _append_val(_consume_scalar()); + addrem_flags(RNXT, RVAL); + } + else + { + _c4err("internal error"); + } + #endif + } + else if(has_all(RSEQ|RNXT) && ! rem.sub(ind).begins_with('-')) + { + if(m_stack.size() > 2) // do not pop to root level + { + _c4dbgp("end the indentless seq"); + _pop_level(); + return true; + } + } + else + { + _c4dbgpf("same indentation ({}) -- nothing to see here", ind); + } + _line_progressed(ind); + return ind > 0; + } + else if(ind < m_state->indref) + { + _c4dbgpf("smaller indentation ({} < {})!!!", ind, m_state->indref); + if(has_all(RVAL)) + { + _c4dbgp("there was an empty val -- appending"); + if(has_all(RMAP)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_all(SSCL)); + _append_key_val_null(rem.sub(ind).str - 1); + } + else if(has_all(RSEQ)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, has_none(SSCL)); + _append_val_null(rem.sub(ind).str - 1); + } + } + // search the stack frame to jump to based on its indentation + State const* popto = nullptr; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_stack.is_contiguous()); // this search relies on the stack being contiguous + for(State const* s = m_state-1; s >= m_stack.begin(); --s) + { + _c4dbgpf("searching for state with indentation {}. curr={} (level={},node={})", ind, s->indref, s->level, s->node_id); + if(s->indref == ind) + { + _c4dbgpf("gotit!!! level={} node={}", s->level, s->node_id); + popto = s; + // while it may be tempting to think we're done at this + // point, we must still determine whether we're jumping to a + // parent with the same indentation. Consider this case with + // an indentless sequence: + // + // product: + // - sku: BL394D + // quantity: 4 + // description: Basketball + // price: 450.00 + // - sku: BL4438H + // quantity: 1 + // description: Super Hoop + // price: 2392.00 # jumping one level here would be wrong. + // tax: 1234.5 # we must jump two levels + if(popto > m_stack.begin()) + { + auto parent = popto - 1; + if(parent->indref == popto->indref) + { + _c4dbgpf("the parent (level={},node={}) has the same indentation ({}). is this in an indentless sequence?", parent->level, parent->node_id, popto->indref); + _c4dbgpf("isseq(popto)={} ismap(parent)={}", m_tree->is_seq(popto->node_id), m_tree->is_map(parent->node_id)); + if(m_tree->is_seq(popto->node_id) && m_tree->is_map(parent->node_id)) + { + if( ! remt.begins_with('-')) + { + _c4dbgp("this is an indentless sequence"); + popto = parent; + } + else + { + _c4dbgp("not an indentless sequence"); + } + } + } + } + break; + } + } + if(!popto || popto >= m_state || popto->level >= m_state->level) + { + _c4err("parse error: incorrect indentation?"); + } + _c4dbgpf("popping {} levels: from level {} to level {}", m_state->level-popto->level, m_state->level, popto->level); + while(m_state != popto) + { + _c4dbgpf("popping level {} (indentation={})", m_state->level, m_state->indref); + _pop_level(); + } + _RYML_CB_ASSERT(m_stack.m_callbacks, ind == m_state->indref); + _line_progressed(ind); + return true; + } + else + { + _c4dbgpf("larger indentation ({} > {})!!!", ind, m_state->indref); + _RYML_CB_ASSERT(m_stack.m_callbacks, ind > m_state->indref); + if(has_all(RMAP|RVAL)) + { + if(_is_scalar_next__rmap_val(remt) && remt.first_of(":?") == npos) + { + _c4dbgpf("actually it seems a value: '{}'", remt); + } + else + { + addrem_flags(RKEY, RVAL); + _start_unk(); + //_move_scalar_from_top(); + _line_progressed(ind); + _save_indentation(); + return true; + } + } + else if(has_all(RSEQ|RVAL)) + { + // nothing to do here + } + else + { + _c4err("parse error - indentation should not increase at this point"); + } + } + + return false; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_comment() +{ + csubstr s = m_state->line_contents.rem; + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('#')); + _line_progressed(s.len); + // skip the # character + s = s.sub(1); + // skip leading whitespace + s = s.right_of(s.first_not_of(' '), /*include_pos*/true); + _c4dbgpf("comment was '{}'", s); + return s; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_squot_scalar() +{ + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ + + // a span to the end of the file + size_t b = m_state->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_state->pos.offset; // take this into account + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('\'')); + + // skip the opening quote + _line_progressed(1); + s = s.sub(1); + + bool needs_filter = false; + + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) + { + const csubstr line = m_state->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning single quoted scalar @ line[{}]: ~~~{}~~~", m_state->pos.line, line); + for(size_t i = 0; i < line.len; ++i) + { + const char curr = line.str[i]; + if(curr == '\'') // single quotes are escaped with two single quotes + { + const char next = i+1 < line.len ? line.str[i+1] : '~'; + if(next != '\'') // so just look for the first quote + { // without another after it + pos = i; + break; + } + else + { + needs_filter = true; // needs filter to remove escaped quotes + ++i; // skip the escaped quote + } + } + else if(curr != ' ') + { + line_is_blank = false; + } + } + + // leading whitespace also needs filtering + needs_filter = needs_filter + || numlines > 1 + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')) + || (m_state->line_contents.full.last_of('\r') != csubstr::npos); + + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '\''); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_state->pos.offset - b - 1; // but we stop before it + break; + } + + _line_ended(); + _scan_line(); + } + + if(pos == npos) + { + _c4err("reached end of file while looking for closing quote"); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '\''); + s = s.sub(0, pos-1); + } + + if(needs_filter) + { + csubstr ret = _filter_squot_scalar(s); + _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); + _c4dbgpf("final scalar: \"{}\"", ret); + return ret; + } + + _c4dbgpf("final scalar: \"{}\"", s); + + return s; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_dquot_scalar() +{ + // quoted scalars can spread over multiple lines! + // nice explanation here: http://yaml-multiline.info/ + + // a span to the end of the file + size_t b = m_state->pos.offset; + substr s = m_buf.sub(b); + if(s.begins_with(' ')) + { + s = s.triml(' '); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.sub(b).is_super(s)); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begin() >= m_buf.sub(b).begin()); + _line_progressed((size_t)(s.begin() - m_buf.sub(b).begin())); + } + b = m_state->pos.offset; // take this into account + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('"')); + + // skip the opening quote + _line_progressed(1); + s = s.sub(1); + + bool needs_filter = false; + + size_t numlines = 1; // we already have one line + size_t pos = npos; // find the pos of the matching quote + while( ! _finished_file()) + { + const csubstr line = m_state->line_contents.rem; + bool line_is_blank = true; + _c4dbgpf("scanning double quoted scalar @ line[{}]: line='{}'", m_state->pos.line, line); + for(size_t i = 0; i < line.len; ++i) + { + const char curr = line.str[i]; + if(curr != ' ') + line_is_blank = false; + // every \ is an escape + if(curr == '\\') + { + const char next = i+1 < line.len ? line.str[i+1] : '~'; + needs_filter = true; + if(next == '"' || next == '\\') + ++i; + } + else if(curr == '"') + { + pos = i; + break; + } + } + + // leading whitespace also needs filtering + needs_filter = needs_filter + || numlines > 1 + || line_is_blank + || (_at_line_begin() && line.begins_with(' ')) + || (m_state->line_contents.full.last_of('\r') != csubstr::npos); + + if(pos == npos) + { + _line_progressed(line.len); + ++numlines; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos >= 0 && pos < m_buf.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf[m_state->pos.offset + pos] == '"'); + _line_progressed(pos + 1); // progress beyond the quote + pos = m_state->pos.offset - b - 1; // but we stop before it + break; + } + + _line_ended(); + _scan_line(); + } + + if(pos == npos) + { + _c4err("reached end of file looking for closing quote"); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, pos > 0); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() == m_buf.end() || *s.end() == '"'); + _RYML_CB_ASSERT(m_stack.m_callbacks, s.end() >= m_buf.begin() && s.end() <= m_buf.end()); + s = s.sub(0, pos-1); + } + + if(needs_filter) + { + csubstr ret = _filter_dquot_scalar(s); + _c4dbgpf("final scalar: [{}]\"{}\"", ret.len, ret); + _RYML_CB_ASSERT(m_stack.m_callbacks, ret.len <= s.len || s.empty() || s.trim(' ').empty()); + return ret; + } + + _c4dbgpf("final scalar: \"{}\"", s); + + return s; +} + +//----------------------------------------------------------------------------- +csubstr Parser::_scan_block() +{ + // nice explanation here: http://yaml-multiline.info/ + csubstr s = m_state->line_contents.rem; + csubstr trimmed = s.triml(' '); + if(trimmed.str > s.str) + { + _c4dbgp("skipping whitespace"); + _RYML_CB_ASSERT(m_stack.m_callbacks, trimmed.str >= s.str); + _line_progressed(static_cast(trimmed.str - s.str)); + s = trimmed; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with('|') || s.begins_with('>')); + + _c4dbgpf("scanning block: specs=\"{}\"", s); + + // parse the spec + BlockStyle_e newline = s.begins_with('>') ? BLOCK_FOLD : BLOCK_LITERAL; + BlockChomp_e chomp = CHOMP_CLIP; // default to clip unless + or - are used + size_t indentation = npos; // have to find out if no spec is given + csubstr digits; + if(s.len > 1) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, s.begins_with_any("|>")); + csubstr t = s.sub(1); + _c4dbgpf("scanning block: spec is multichar: '{}'", t); + _RYML_CB_ASSERT(m_stack.m_callbacks, t.len >= 1); + size_t pos = t.first_of("-+"); + _c4dbgpf("scanning block: spec chomp char at {}", pos); + if(pos != npos) + { + if(t[pos] == '-') + chomp = CHOMP_STRIP; + else if(t[pos] == '+') + chomp = CHOMP_KEEP; + if(pos == 0) + t = t.sub(1); + else + t = t.first(pos); + } + // from here to the end, only digits are considered + digits = t.left_of(t.first_not_of("0123456789")); + if( ! digits.empty()) + { + if( ! c4::atou(digits, &indentation)) + _c4err("parse error: could not read decimal"); + _c4dbgpf("scanning block: indentation specified: {}. add {} from curr state -> {}", indentation, m_state->indref, indentation+m_state->indref); + indentation += m_state->indref; + } + } + + // finish the current line + _line_progressed(s.len); + _line_ended(); + _scan_line(); + + _c4dbgpf("scanning block: style={} chomp={} indentation={}", newline==BLOCK_FOLD ? "fold" : "literal", + chomp==CHOMP_CLIP ? "clip" : (chomp==CHOMP_STRIP ? "strip" : "keep"), indentation); + + // start with a zero-length block, already pointing at the right place + substr raw_block(m_buf.data() + m_state->pos.offset, size_t(0));// m_state->line_contents.full.sub(0, 0); + _RYML_CB_ASSERT(m_stack.m_callbacks, raw_block.begin() == m_state->line_contents.full.begin()); + + // read every full line into a raw block, + // from which newlines are to be stripped as needed. + // + // If no explicit indentation was given, pick it from the first + // non-empty line. See + // https://yaml.org/spec/1.2.2/#8111-block-indentation-indicator + size_t num_lines = 0, first = m_state->pos.line, provisional_indentation = npos; + LineContents lc; + while(( ! _finished_file())) + { + // peek next line, but do not advance immediately + lc.reset_with_next_line(m_buf, m_state->pos.offset); + _c4dbgpf("scanning block: peeking at '{}'", lc.stripped); + // evaluate termination conditions + if(indentation != npos) + { + // stop when the line is deindented and not empty + if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty())) + { + _c4dbgpf("scanning block: indentation decreased ref={} thisline={}", indentation, lc.indentation); + break; + } + else if(indentation == 0) + { + if((lc.rem == "..." || lc.rem.begins_with("... ")) + || + (lc.rem == "---" || lc.rem.begins_with("--- "))) + { + _c4dbgp("scanning block: stop. indentation=0 and stream ended"); + break; + } + } + } + else + { + _c4dbgpf("scanning block: indentation ref not set. firstnonws={}", lc.stripped.first_not_of(' ')); + if(lc.stripped.first_not_of(' ') != npos) // non-empty line + { + _c4dbgpf("scanning block: line not empty. indref={} indprov={} indentation={}", m_state->indref, provisional_indentation, lc.indentation); + if(provisional_indentation == npos) + { + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + if(lc.indentation < m_state->indref) + { + _c4dbgpf("scanning block: block terminated indentation={} < indref={}", lc.indentation, m_state->indref); + break; + } + else + #endif + if(lc.indentation == m_state->indref) + { + if(has_any(RSEQ|RMAP)) + { + _c4dbgpf("scanning block: block terminated. reading container and indentation={}==indref={}", lc.indentation, m_state->indref); + break; + } + } + _c4dbgpf("scanning block: set indentation ref from this line: ref={}", lc.indentation); + indentation = lc.indentation; + } + else + { + if(lc.indentation >= provisional_indentation) + { + _c4dbgpf("scanning block: set indentation ref from provisional indentation: provisional_ref={}, thisline={}", provisional_indentation, lc.indentation); + //indentation = provisional_indentation ? provisional_indentation : lc.indentation; + indentation = lc.indentation; + } + else + { + break; + //_c4err("parse error: first non-empty block line should have at least the original indentation"); + } + } + } + else // empty line + { + _c4dbgpf("scanning block: line empty or {} spaces. line_indentation={} prov_indentation={}", lc.stripped.len, lc.indentation, provisional_indentation); + if(provisional_indentation != npos) + { + if(lc.stripped.len >= provisional_indentation) + { + _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.stripped.len); + provisional_indentation = lc.stripped.len; + } + #ifdef RYML_NO_COVERAGE__TO_BE_DELETED + else if(lc.indentation >= provisional_indentation && lc.indentation != npos) + { + _c4dbgpf("scanning block: increase provisional_ref {} -> {}", provisional_indentation, lc.indentation); + provisional_indentation = lc.indentation; + } + #endif + } + else + { + provisional_indentation = lc.indentation ? lc.indentation : has_any(RSEQ|RVAL); + _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); + if(provisional_indentation == npos) + { + provisional_indentation = lc.stripped.len ? lc.stripped.len : has_any(RSEQ|RVAL); + _c4dbgpf("scanning block: initialize provisional_ref={}", provisional_indentation); + } + } + } + } + // advance now that we know the folded scalar continues + m_state->line_contents = lc; + _c4dbgpf("scanning block: append '{}'", m_state->line_contents.rem); + raw_block.len += m_state->line_contents.full.len; + _line_progressed(m_state->line_contents.rem.len); + _line_ended(); + ++num_lines; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, m_state->pos.line == (first + num_lines)); + C4_UNUSED(num_lines); + C4_UNUSED(first); + + if(indentation == npos) + { + _c4dbgpf("scanning block: set indentation from provisional: {}", provisional_indentation); + indentation = provisional_indentation; + } + + if(num_lines) + _line_ended_undo(); + + _c4dbgpf("scanning block: raw=~~~{}~~~", raw_block); + + // ok! now we strip the newlines and spaces according to the specs + s = _filter_block_scalar(raw_block, newline, chomp, indentation); + + _c4dbgpf("scanning block: final=~~~{}~~~", s); + + return s; +} + + +//----------------------------------------------------------------------------- + +template +bool Parser::_filter_nl(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos, size_t indentation) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfnl(fmt, ...) _c4dbgpf("filter_nl[{}]: " fmt, *i, __VA_ARGS__) + #else + #define _c4dbgfnl(...) + #endif + + const char curr = r[*i]; + bool replaced = false; + + _RYML_CB_ASSERT(m_stack.m_callbacks, indentation != npos); + _RYML_CB_ASSERT(m_stack.m_callbacks, curr == '\n'); + + _c4dbgfnl("found newline. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); + size_t ii = *i; + size_t numnl_following = count_following_newlines(r, &ii, indentation); + if(numnl_following) + { + _c4dbgfnl("{} consecutive (empty) lines {} in the middle. totalws={}", 1+numnl_following, ii < r.len ? "in the middle" : "at the end", ii - *i); + for(size_t j = 0; j < numnl_following; ++j) + m_filter_arena.str[(*pos)++] = '\n'; + } + else + { + if(r.first_not_of(" \t", *i+1) != npos) + { + m_filter_arena.str[(*pos)++] = ' '; + _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); + replaced = true; + } + else + { + if C4_IF_CONSTEXPR (keep_trailing_whitespace) + { + m_filter_arena.str[(*pos)++] = ' '; + _c4dbgfnl("single newline. convert to space. ii={}/{}. sofar=[{}]~~~{}~~~", ii, r.len, *pos, m_filter_arena.first(*pos)); + replaced = true; + } + else + { + _c4dbgfnl("last newline, everything else is whitespace. ii={}/{}", ii, r.len); + *i = r.len; + } + } + if C4_IF_CONSTEXPR (backslash_is_escape) + { + if(ii < r.len && r.str[ii] == '\\') + { + const char next = ii+1 < r.len ? r.str[ii+1] : '\0'; + if(next == ' ' || next == '\t') + { + _c4dbgfnl("extend skip to backslash{}", ""); + ++ii; + } + } + } + } + *i = ii - 1; // correct for the loop increment + + #undef _c4dbgfnl + + return replaced; +} + + +//----------------------------------------------------------------------------- + +template +void Parser::_filter_ws(substr r, size_t *C4_RESTRICT i, size_t *C4_RESTRICT pos) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfws(fmt, ...) _c4dbgpf("filt_nl[{}]: " fmt, *i, __VA_ARGS__) + #else + #define _c4dbgfws(...) + #endif + + const char curr = r[*i]; + _c4dbgfws("found whitespace '{}'", _c4prc(curr)); + _RYML_CB_ASSERT(m_stack.m_callbacks, curr == ' ' || curr == '\t'); + + size_t first = *i > 0 ? r.first_not_of(" \t", *i) : r.first_not_of(' ', *i); + if(first != npos) + { + if(r[first] == '\n' || r[first] == '\r') // skip trailing whitespace + { + _c4dbgfws("whitespace is trailing on line. firstnonws='{}'@{}", _c4prc(r[first]), first); + *i = first - 1; // correct for the loop increment + } + else // a legit whitespace + { + m_filter_arena.str[(*pos)++] = curr; + _c4dbgfws("legit whitespace. sofar=[{}]~~~{}~~~", *pos, m_filter_arena.first(*pos)); + } + } + else + { + _c4dbgfws("... everything else is trailing whitespace{}", ""); + if C4_IF_CONSTEXPR (keep_trailing_whitespace) + for(size_t j = *i; j < r.len; ++j) + m_filter_arena.str[(*pos)++] = r[j]; + *i = r.len; + } + + #undef _c4dbgfws +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_plain_scalar(substr s, size_t indentation) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfps(...) _c4dbgpf("filt_plain_scalar" __VA_ARGS__) + #else + #define _c4dbgfps(...) + #endif + + _c4dbgfps("before=~~~{}~~~", s); + + substr r = s.triml(" \t"); + _grow_filter_arena(r.len); + size_t pos = 0; // the filtered size + bool filtered_chars = false; + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r.str[i]; + _c4dbgfps("[{}]: '{}'", i, _c4prc(curr)); + if(curr == ' ' || curr == '\t') + { + _filter_ws(r, &i, &pos); + } + else if(curr == '\n') + { + filtered_chars = _filter_nl(r, &i, &pos, indentation); + } + else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + { + ; + } + else + { + m_filter_arena.str[pos++] = r[i]; + } + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + if(pos < r.len || filtered_chars) + { + r = _finish_filter_arena(r, pos); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); + _c4dbgfps("#filteredchars={} after=~~~{}~~~", s.len - r.len, r); + + #undef _c4dbgfps + return r; +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_squot_scalar(substr s) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfsq(...) _c4dbgpf("filt_squo_scalar") + #else + #define _c4dbgfsq(...) + #endif + + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/single-quoted + + _c4dbgfsq(": before=~~~{}~~~", s); + + _grow_filter_arena(s.len); + substr r = s; + size_t pos = 0; // the filtered size + bool filtered_chars = false; + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r[i]; + _c4dbgfsq("[{}]: '{}'", i, _c4prc(curr)); + if(curr == ' ' || curr == '\t') + { + _filter_ws(r, &i, &pos); + } + else if(curr == '\n') + { + filtered_chars = _filter_nl(r, &i, &pos, /*indentation*/0); + } + else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + { + ; + } + else if(curr == '\'') + { + char next = i+1 < r.len ? r[i+1] : '\0'; + if(next == '\'') + { + _c4dbgfsq("[{}]: two consecutive quotes", i); + filtered_chars = true; + m_filter_arena.str[pos++] = '\''; + ++i; + } + } + else + { + m_filter_arena.str[pos++] = curr; + } + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + if(pos < r.len || filtered_chars) + { + r = _finish_filter_arena(r, pos); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); + _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); + + #undef _c4dbgfsq + return r; +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_dquot_scalar(substr s) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfdq(...) _c4dbgpf("filt_dquo_scalar" __VA_ARGS__) + #else + #define _c4dbgfdq(...) + #endif + + _c4dbgfdq(": before=~~~{}~~~", s); + + // from the YAML spec for double-quoted scalars: + // https://yaml.org/spec/1.2-old/spec.html#style/flow/double-quoted + // + // All leading and trailing white space characters are excluded + // from the content. Each continuation line must therefore contain + // at least one non-space character. Empty lines, if any, are + // consumed as part of the line folding. + + _grow_filter_arena(s.len + 2u * s.count('\\')); + substr r = s; + size_t pos = 0; // the filtered size + bool filtered_chars = false; + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r[i]; + _c4dbgfdq("[{}]: '{}'", i, _c4prc(curr)); + if(curr == ' ' || curr == '\t') + { + _filter_ws(r, &i, &pos); + } + else if(curr == '\n') + { + filtered_chars = _filter_nl(r, &i, &pos, /*indentation*/0); + } + else if(curr == '\r') // skip \r --- https://stackoverflow.com/questions/1885900 + { + ; + } + else if(curr == '\\') + { + char next = i+1 < r.len ? r[i+1] : '\0'; + _c4dbgfdq("[{}]: backslash, next='{}'", i, _c4prc(next)); + filtered_chars = true; + if(next == '\r') + { + if(i+2 < r.len && r[i+2] == '\n') + { + ++i; // newline escaped with \ -- skip both (add only one as i is loop-incremented) + next = '\n'; + _c4dbgfdq("[{}]: was \\r\\n, now next='\\n'", i); + } + } + // remember the loop will also increment i + if(next == '\n') + { + size_t ii = i + 2; + for( ; ii < r.len; ++ii) + { + if(r.str[ii] == ' ' || r.str[ii] == '\t') // skip leading whitespace + ; + else + break; + } + i += ii - i - 1; + } + else if(next == '"' || next == '/' || next == ' ' || next == '\t') // escapes for json compatibility + { + m_filter_arena.str[pos++] = next; + ++i; + } + else if(next == '\r') + { + //++i; + } + else if(next == 'n') + { + m_filter_arena.str[pos++] = '\n'; + ++i; + } + else if(next == 'r') + { + m_filter_arena.str[pos++] = '\r'; + ++i; // skip + } + else if(next == 't') + { + m_filter_arena.str[pos++] = '\t'; + ++i; + } + else if(next == '\\') + { + m_filter_arena.str[pos++] = '\\'; + ++i; + } + else if(next == 'x') // UTF8 + { + if(i + 1u + 2u >= r.len) + _c4err("\\x requires 2 hex digits"); + uint8_t byteval = {}; + if(!read_hex(r.sub(i + 2u, 2u), &byteval)) + _c4err("failed to read \\x codepoint"); + m_filter_arena.str[pos++] = *(char*)&byteval; + i += 1u + 2u; + } + else if(next == 'u') // UTF16 + { + if(i + 1u + 4u >= r.len) + _c4err("\\u requires 4 hex digits"); + char readbuf[8]; + csubstr codepoint = r.sub(i + 2u, 4u); + uint32_t codepoint_val = {}; + if(!read_hex(codepoint, &codepoint_val)) + _c4err("failed to parse \\u codepoint"); + size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + C4_ASSERT(numbytes <= 4); + memcpy(m_filter_arena.str + pos, readbuf, numbytes); + pos += numbytes; + i += 1u + 4u; + } + else if(next == 'U') // UTF32 + { + if(i + 1u + 8u >= r.len) + _c4err("\\U requires 8 hex digits"); + char readbuf[8]; + csubstr codepoint = r.sub(i + 2u, 8u); + uint32_t codepoint_val = {}; + if(!read_hex(codepoint, &codepoint_val)) + _c4err("failed to parse \\U codepoint"); + size_t numbytes = decode_code_point((uint8_t*)readbuf, sizeof(readbuf), codepoint_val); + C4_ASSERT(numbytes <= 4); + memcpy(m_filter_arena.str + pos, readbuf, numbytes); + pos += numbytes; + i += 1u + 8u; + } + // https://yaml.org/spec/1.2.2/#rule-c-ns-esc-char + else if(next == '0') + { + m_filter_arena.str[pos++] = '\0'; + ++i; + } + else if(next == 'b') // backspace + { + m_filter_arena.str[pos++] = '\b'; + ++i; + } + else if(next == 'f') // form feed + { + m_filter_arena.str[pos++] = '\f'; + ++i; + } + else if(next == 'a') // bell character + { + m_filter_arena.str[pos++] = '\a'; + ++i; + } + else if(next == 'v') // vertical tab + { + m_filter_arena.str[pos++] = '\v'; + ++i; + } + else if(next == 'e') // escape character + { + m_filter_arena.str[pos++] = '\x1b'; + ++i; + } + else if(next == '_') // unicode non breaking space \u00a0 + { + // https://www.compart.com/en/unicode/U+00a0 + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x60, 0xa0); + ++i; + } + else if(next == 'N') // unicode next line \u0085 + { + // https://www.compart.com/en/unicode/U+0085 + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x3e, 0xc2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x7b, 0x85); + ++i; + } + else if(next == 'L') // unicode line separator \u2028 + { + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x58, 0xa8); + ++i; + } + else if(next == 'P') // unicode paragraph separator \u2029 + { + // https://www.utf8-chartable.de/unicode-utf8-table.pl?start=8192&number=1024&names=-&utf8=0x&unicodeinhtml=hex + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x1e, 0xe2); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x80, 0x80); + m_filter_arena.str[pos++] = _RYML_CHCONST(-0x57, 0xa9); + ++i; + } + _c4dbgfdq("[{}]: backslash...sofar=[{}]~~~{}~~~", i, pos, m_filter_arena.first(pos)); + } + else + { + m_filter_arena.str[pos++] = curr; + } + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + if(pos < r.len || filtered_chars) + { + r = _finish_filter_arena(r, pos); + } + + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= r.len); + _c4dbgpf(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); + + #undef _c4dbgfdq + + return r; +} + + +//----------------------------------------------------------------------------- +bool Parser::_apply_chomp(substr buf, size_t *C4_RESTRICT pos, BlockChomp_e chomp) +{ + substr trimmed = buf.first(*pos).trimr('\n'); + bool added_newline = false; + switch(chomp) + { + case CHOMP_KEEP: + if(trimmed.len == *pos) + { + _c4dbgpf("chomp=KEEP: add missing newline @{}", *pos); + //m_filter_arena.str[(*pos)++] = '\n'; + added_newline = true; + } + break; + case CHOMP_CLIP: + if(trimmed.len == *pos) + { + _c4dbgpf("chomp=CLIP: add missing newline @{}", *pos); + m_filter_arena.str[(*pos)++] = '\n'; + added_newline = true; + } + else + { + _c4dbgpf("chomp=CLIP: include single trailing newline @{}", trimmed.len+1); + *pos = trimmed.len + 1; + } + break; + case CHOMP_STRIP: + _c4dbgpf("chomp=STRIP: strip {}-{}-{} newlines", *pos, trimmed.len, *pos-trimmed.len); + *pos = trimmed.len; + break; + default: + _c4err("unknown chomp style"); + } + return added_newline; +} + + +//----------------------------------------------------------------------------- +csubstr Parser::_filter_block_scalar(substr s, BlockStyle_e style, BlockChomp_e chomp, size_t indentation) +{ + // a debugging scaffold: + #if 0 + #define _c4dbgfbl(fmt, ...) _c4dbgpf("filt_block" fmt, __VA_ARGS__) + #else + #define _c4dbgfbl(...) + #endif + + _c4dbgfbl(": indentation={} before=[{}]~~~{}~~~", indentation, s.len, s); + + if(chomp != CHOMP_KEEP && s.trim(" \n\r\t").len == 0u) + { + _c4dbgp("filt_block: empty scalar"); + return s.first(0); + } + + substr r = s; + + switch(style) + { + case BLOCK_LITERAL: + { + _c4dbgp("filt_block: style=literal"); + // trim leading whitespace up to indentation + { + size_t numws = r.first_not_of(' '); + if(numws != npos) + { + if(numws > indentation) + r = r.sub(indentation); + else + r = r.sub(numws); + _c4dbgfbl(": after triml=[{}]~~~{}~~~", r.len, r); + } + else + { + if(chomp != CHOMP_KEEP || r.len == 0) + { + _c4dbgfbl(": all spaces {}, return empty", r.len); + return r.first(0); + } + else + { + r[0] = '\n'; + return r.first(1); + } + } + } + _grow_filter_arena(s.len + 2u); // use s.len! because we may need to add a newline at the end, so the leading indentation will allow space for that newline + size_t pos = 0; // the filtered size + for(size_t i = 0; i < r.len; ++i) + { + const char curr = r.str[i]; + _c4dbgfbl("[{}]='{}' pos={}", i, _c4prc(curr), pos); + if(curr == '\r') + continue; + m_filter_arena.str[pos++] = curr; + if(curr == '\n') + { + _c4dbgfbl("[{}]: found newline", i); + // skip indentation on the next line + csubstr rem = r.sub(i+1); + size_t first = rem.first_not_of(' '); + if(first != npos) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); + _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, rem.str[first]); + if(first < indentation) + { + _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); + i += first; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + } + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); + first = rem.len; + _c4dbgfbl("[{}]: {} spaces to the end", i, first); + if(first) + { + if(first < indentation) + { + _c4dbgfbl("[{}]: skip everything", i); + --pos; + break; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + } + } + else if(i+1 == r.len) + { + if(chomp == CHOMP_STRIP) + --pos; + break; + } + } + } + } + _RYML_CB_ASSERT(m_stack.m_callbacks, s.len >= pos); + _c4dbgfbl(": #filteredchars={} after=~~~{}~~~", s.len - r.len, r); + bool changed = _apply_chomp(m_filter_arena, &pos, chomp); + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= s.len); + if(pos < r.len || changed) + { + r = _finish_filter_arena(s, pos); // write into s + } + break; + } + case BLOCK_FOLD: + { + _c4dbgp("filt_block: style=fold"); + _grow_filter_arena(r.len + 2); + size_t pos = 0; // the filtered size + bool filtered_chars = false; + bool started = false; + bool is_indented = false; + size_t i = r.first_not_of(' '); + _c4dbgfbl(": first non space at {}", i); + if(i > indentation) + { + is_indented = true; + i = indentation; + } + _c4dbgfbl(": start folding at {}, is_indented={}", i, (int)is_indented); + auto on_change_indentation = [&](size_t numnl_following, size_t last_newl, size_t first_non_whitespace){ + _c4dbgfbl("[{}]: add 1+{} newlines", i, numnl_following); + for(size_t j = 0; j < 1 + numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + for(i = last_newl + 1 + indentation; i < first_non_whitespace; ++i) + { + if(r.str[i] == '\r') + continue; + _c4dbgfbl("[{}]: add '{}'", i, _c4prc(r.str[i])); + m_filter_arena.str[pos++] = r.str[i]; + } + --i; + }; + for( ; i < r.len; ++i) + { + const char curr = r.str[i]; + _c4dbgfbl("[{}]='{}'", i, _c4prc(curr)); + if(curr == '\n') + { + filtered_chars = true; + // skip indentation on the next line, and advance over the next non-indented blank lines as well + size_t first_non_whitespace; + size_t numnl_following = (size_t)-1; + while(r[i] == '\n') + { + ++numnl_following; + csubstr rem = r.sub(i+1); + size_t first = rem.first_not_of(' '); + _c4dbgfbl("[{}]: found newline. first={} rem.len={}", i, first, rem.len); + if(first != npos) + { + first_non_whitespace = first + i+1; + while(first_non_whitespace < r.len && r[first_non_whitespace] == '\r') + ++first_non_whitespace; + _RYML_CB_ASSERT(m_stack.m_callbacks, first < rem.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1+first < r.len); + _c4dbgfbl("[{}]: {} spaces follow before next nonws character @ [{}]='{}'", i, first, i+1+first, _c4prc(rem.str[first])); + if(first < indentation) + { + _c4dbgfbl("[{}]: skip {}<{} spaces from indentation", i, first, indentation); + i += first; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + if(first > indentation) + { + _c4dbgfbl("[{}]: {} further indented than {}, stop newlining", i, first, indentation); + goto finished_counting_newlines; + } + } + // prepare the next while loop iteration + // by setting i at the next newline after + // an empty line + if(r[first_non_whitespace] == '\n') + i = first_non_whitespace; + else + goto finished_counting_newlines; + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 <= r.len); + first = rem.len; + first_non_whitespace = first + i+1; + if(first) + { + _c4dbgfbl("[{}]: {} spaces to the end", i, first); + if(first < indentation) + { + _c4dbgfbl("[{}]: skip everything", i); + i += first; + } + else + { + _c4dbgfbl("[{}]: skip {} spaces from indentation", i, indentation); + i += indentation; + if(first > indentation) + { + _c4dbgfbl("[{}]: {} spaces missing. not done yet", i, indentation - first); + goto finished_counting_newlines; + } + } + } + else // if(i+1 == r.len) + { + _c4dbgfbl("[{}]: it's the final newline", i); + _RYML_CB_ASSERT(m_stack.m_callbacks, i+1 == r.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, rem.len == 0); + } + goto end_of_scalar; + } + } + end_of_scalar: + // Write all the trailing newlines. Since we're + // at the end no folding is needed, so write every + // newline (add 1). + _c4dbgfbl("[{}]: add {} trailing newlines", i, 1+numnl_following); + for(size_t j = 0; j < 1 + numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + break; + finished_counting_newlines: + _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); + while(first_non_whitespace < r.len && r[first_non_whitespace] == '\t') + ++first_non_whitespace; + _c4dbgfbl("[{}]: #newlines={} firstnonws={}", i, numnl_following, first_non_whitespace); + _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace <= r.len); + size_t last_newl = r.last_of('\n', first_non_whitespace); + size_t this_indentation = first_non_whitespace - last_newl - 1; + _c4dbgfbl("[{}]: #newlines={} firstnonws={} lastnewl={} this_indentation={} vs indentation={}", i, numnl_following, first_non_whitespace, last_newl, this_indentation, indentation); + _RYML_CB_ASSERT(m_stack.m_callbacks, first_non_whitespace >= last_newl + 1); + _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation >= indentation); + if(!started) + { + _c4dbgfbl("[{}]: #newlines={}. write all leading newlines", i, numnl_following); + for(size_t j = 0; j < 1 + numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + if(this_indentation > indentation) + { + is_indented = true; + _c4dbgfbl("[{}]: advance ->{}", i, last_newl + indentation); + i = last_newl + indentation; + } + else + { + i = first_non_whitespace - 1; + _c4dbgfbl("[{}]: advance ->{}", i, first_non_whitespace); + } + } + else if(this_indentation == indentation) + { + _c4dbgfbl("[{}]: same indentation", i); + if(!is_indented) + { + if(numnl_following == 0) + { + _c4dbgfbl("[{}]: fold!", i); + m_filter_arena.str[pos++] = ' '; + } + else + { + _c4dbgfbl("[{}]: add {} newlines", i, 1 + numnl_following); + for(size_t j = 0; j < numnl_following; ++j) + m_filter_arena.str[pos++] = '\n'; + } + i = first_non_whitespace - 1; + _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); + } + else + { + _c4dbgfbl("[{}]: back to ref indentation", i); + is_indented = false; + on_change_indentation(numnl_following, last_newl, first_non_whitespace); + _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); + } + } + else + { + _c4dbgfbl("[{}]: increased indentation.", i); + is_indented = true; + _RYML_CB_ASSERT(m_stack.m_callbacks, this_indentation > indentation); + on_change_indentation(numnl_following, last_newl, first_non_whitespace); + _c4dbgfbl("[{}]: advance {}->{}", i, i, first_non_whitespace); + } + } + else if(curr != '\r') + { + if(curr != '\t') + started = true; + m_filter_arena.str[pos++] = curr; + } + } + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + _c4dbgfbl(": #filteredchars={} after=[{}]~~~{}~~~", (int)s.len - (int)pos, pos, m_filter_arena.first(pos)); + bool changed = _apply_chomp(m_filter_arena, &pos, chomp); + if(pos < r.len || filtered_chars || changed) + { + r = _finish_filter_arena(s, pos); // write into s + } + } + break; + default: + _c4err("unknown block style"); + } + + _c4dbgfbl(": final=[{}]~~~{}~~~", r.len, r); + + #undef _c4dbgfbl + + return r; +} + +//----------------------------------------------------------------------------- +size_t Parser::_count_nlines(csubstr src) +{ + return 1 + src.count('\n'); +} + +//----------------------------------------------------------------------------- +void Parser::_handle_directive(csubstr directive_) +{ + csubstr directive = directive_; + if(directive.begins_with("%TAG")) + { + TagDirective td; + _c4dbgpf("%TAG directive: {}", directive_); + directive = directive.sub(4); + if(!directive.begins_with(' ')) + _c4err("malformed tag directive: {}", directive_); + directive = directive.triml(' '); + size_t pos = directive.find(' '); + if(pos == npos) + _c4err("malformed tag directive: {}", directive_); + td.handle = directive.first(pos); + directive = directive.sub(td.handle.len).triml(' '); + pos = directive.find(' '); + if(pos != npos) + directive = directive.first(pos); + td.prefix = directive; + td.next_node_id = m_tree->size(); + if(m_tree->size() > 0) + { + size_t prev = m_tree->size() - 1; + if(m_tree->is_root(prev) && m_tree->type(prev) != NOTYPE && !m_tree->is_stream(prev)) + ++td.next_node_id; + } + _c4dbgpf("%TAG: handle={} prefix={} next_node={}", td.handle, td.prefix, td.next_node_id); + m_tree->add_tag_directive(td); + } + else if(directive.begins_with("%YAML")) + { + _c4dbgpf("%YAML directive! ignoring...: {}", directive); + } +} + +//----------------------------------------------------------------------------- +void Parser::set_flags(flag_t f, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64]; + csubstr buf1 = _prfl(buf1_, f); + csubstr buf2 = _prfl(buf2_, s->flags); + _c4dbgpf("state[{}]: setting flags to {}: before={}", s-m_stack.begin(), buf1, buf2); +#endif + s->flags = f; +} + +void Parser::add_flags(flag_t on, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = _prfl(buf1_, on); + csubstr buf2 = _prfl(buf2_, s->flags); + csubstr buf3 = _prfl(buf3_, s->flags|on); + _c4dbgpf("state[{}]: adding flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); +#endif + s->flags |= on; +} + +void Parser::addrem_flags(flag_t on, flag_t off, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64], buf3_[64], buf4_[64]; + csubstr buf1 = _prfl(buf1_, on); + csubstr buf2 = _prfl(buf2_, off); + csubstr buf3 = _prfl(buf3_, s->flags); + csubstr buf4 = _prfl(buf4_, ((s->flags|on)&(~off))); + _c4dbgpf("state[{}]: adding flags {} / removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3, buf4); +#endif + s->flags |= on; + s->flags &= ~off; +} + +void Parser::rem_flags(flag_t off, State * s) +{ +#ifdef RYML_DBG + char buf1_[64], buf2_[64], buf3_[64]; + csubstr buf1 = _prfl(buf1_, off); + csubstr buf2 = _prfl(buf2_, s->flags); + csubstr buf3 = _prfl(buf3_, s->flags&(~off)); + _c4dbgpf("state[{}]: removing flags {}: before={} after={}", s-m_stack.begin(), buf1, buf2, buf3); +#endif + s->flags &= ~off; +} + +//----------------------------------------------------------------------------- + +csubstr Parser::_prfl(substr buf, flag_t flags) +{ + size_t pos = 0; + bool gotone = false; + + #define _prflag(fl) \ + if((flags & fl) == (fl)) \ + { \ + if(gotone) \ + { \ + if(pos + 1 < buf.len) \ + buf[pos] = '|'; \ + ++pos; \ + } \ + csubstr fltxt = #fl; \ + if(pos + fltxt.len <= buf.len) \ + memcpy(buf.str + pos, fltxt.str, fltxt.len); \ + pos += fltxt.len; \ + gotone = true; \ + } + + _prflag(RTOP); + _prflag(RUNK); + _prflag(RMAP); + _prflag(RSEQ); + _prflag(FLOW); + _prflag(QMRK); + _prflag(RKEY); + _prflag(RVAL); + _prflag(RNXT); + _prflag(SSCL); + _prflag(QSCL); + _prflag(RSET); + _prflag(NDOC); + _prflag(RSEQIMAP); + + #undef _prflag + + RYML_ASSERT(pos <= buf.len); + + return buf.first(pos); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +void Parser::_grow_filter_arena(size_t num_characters_needed) +{ + _c4dbgpf("grow: arena={} numchars={}", m_filter_arena.len, num_characters_needed); + if(num_characters_needed <= m_filter_arena.len) + return; + size_t sz = m_filter_arena.len << 1; + _c4dbgpf("grow: sz={}", sz); + sz = num_characters_needed > sz ? num_characters_needed : sz; + _c4dbgpf("grow: sz={}", sz); + sz = sz < 128u ? 128u : sz; + _c4dbgpf("grow: sz={}", sz); + _RYML_CB_ASSERT(m_stack.m_callbacks, sz >= num_characters_needed); + _resize_filter_arena(sz); +} + +void Parser::_resize_filter_arena(size_t num_characters) +{ + if(num_characters > m_filter_arena.len) + { + _c4dbgpf("resize: sz={}", num_characters); + char *prev = m_filter_arena.str; + if(m_filter_arena.str) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_filter_arena.len > 0); + _RYML_CB_FREE(m_stack.m_callbacks, m_filter_arena.str, char, m_filter_arena.len); + } + m_filter_arena.str = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, char, num_characters, prev); + m_filter_arena.len = num_characters; + } +} + +substr Parser::_finish_filter_arena(substr dst, size_t pos) +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= m_filter_arena.len); + _RYML_CB_ASSERT(m_stack.m_callbacks, pos <= dst.len); + memcpy(dst.str, m_filter_arena.str, pos); + return dst.first(pos); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +csubstr Parser::location_contents(Location const& loc) const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, loc.offset < m_buf.len); + return m_buf.sub(loc.offset); +} + +Location Parser::location(NodeRef node) const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, node.valid()); + return location(*node.tree(), node.id()); +} + +Location Parser::location(Tree const& tree, size_t node) const +{ + _RYML_CB_CHECK(m_stack.m_callbacks, m_buf.str == m_newline_offsets_buf.str); + _RYML_CB_CHECK(m_stack.m_callbacks, m_buf.len == m_newline_offsets_buf.len); + if(tree.has_key(node)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, tree.key(node).is_sub(m_buf)); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(tree.key(node))); + return val_location(tree.key(node).str); + } + else if(tree.has_val(node)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, tree.val(node).is_sub(m_buf)); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.is_super(tree.val(node))); + return val_location(tree.val(node).str); + } + else if(tree.is_container(node)) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, !tree.has_key(node)); + if(!tree.is_stream(node)) + { + const char *node_start = tree._p(node)->m_val.scalar.str; // this was stored in the container + if(tree.has_children(node)) + { + size_t child = tree.first_child(node); + if(tree.has_key(child)) + { + // when a map starts, the container was set after the key + csubstr k = tree.key(child); + if(node_start > k.str) + node_start = k.str; + } + } + return val_location(node_start); + } + else // it's a stream + { + return val_location(m_buf.str); // just return the front of the buffer + } + } + _RYML_CB_ASSERT(m_stack.m_callbacks, tree.type(node) == NOTYPE); + return val_location(m_buf.str); +} + +Location Parser::val_location(const char *val) const +{ + if(_locations_dirty()) + _prepare_locations(); + csubstr src = m_buf; + _RYML_CB_CHECK(m_stack.m_callbacks, src.str == m_newline_offsets_buf.str); + _RYML_CB_CHECK(m_stack.m_callbacks, src.len == m_newline_offsets_buf.len); + _RYML_CB_CHECK(m_stack.m_callbacks, val >= src.begin() && val <= src.end()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets != nullptr); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size > 0); + using linetype = size_t const* C4_RESTRICT; + linetype line = nullptr; + size_t offset = (size_t)(val - src.begin()); + if(m_newline_offsets_size < 30) + { + // do a linear search if the size is small. + for(linetype curr = m_newline_offsets; curr < m_newline_offsets + m_newline_offsets_size; ++curr) + { + if(*curr > offset) + { + line = curr; + break; + } + } + } + else + { + // Do a bisection search if the size is not small. + // + // We could use std::lower_bound but this is simple enough and + // spares the include of . + size_t count = m_newline_offsets_size; + size_t step; + linetype it; + line = m_newline_offsets; + while(count) + { + step = count >> 1; + it = line + step; + if(*it < offset) + { + line = ++it; + count -= step + 1; + } + else + { + count = step; + } + } + } + if(line) + { + _RYML_CB_ASSERT(m_stack.m_callbacks, *line > offset); + } + else + { + _RYML_CB_ASSERT(m_stack.m_callbacks, m_buf.empty()); + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == 1); + line = m_newline_offsets; + } + _RYML_CB_ASSERT(m_stack.m_callbacks, line >= m_newline_offsets && line < m_newline_offsets + m_newline_offsets_size);; + Location loc = {}; + loc.name = m_file; + loc.offset = offset; + loc.line = (size_t)(line - m_newline_offsets); + if(line > m_newline_offsets) + loc.col = (offset - *(line-1) - 1u); + else + loc.col = offset; + return loc; +} + +void Parser::_prepare_locations() const +{ + _RYML_CB_ASSERT(m_stack.m_callbacks, !m_file.empty()); + size_t numnewlines = 1u + m_buf.count('\n'); + _resize_locations(numnewlines); + m_newline_offsets_size = 0; + for(size_t i = 0; i < m_buf.len; i++) + if(m_buf[i] == '\n') + m_newline_offsets[m_newline_offsets_size++] = i; + m_newline_offsets[m_newline_offsets_size++] = m_buf.len; + _RYML_CB_ASSERT(m_stack.m_callbacks, m_newline_offsets_size == numnewlines); +} + +void Parser::_resize_locations(size_t numnewlines) const +{ + if(numnewlines > m_newline_offsets_capacity) + { + if(m_newline_offsets) + _RYML_CB_FREE(m_stack.m_callbacks, m_newline_offsets, size_t, m_newline_offsets_capacity); + m_newline_offsets = _RYML_CB_ALLOC_HINT(m_stack.m_callbacks, size_t, numnewlines, m_newline_offsets); + m_newline_offsets_capacity = numnewlines; + } +} + +void Parser::_mark_locations_dirty() +{ + m_newline_offsets_size = 0u; + m_newline_offsets_buf = m_buf; +} + +bool Parser::_locations_dirty() const +{ + return !m_newline_offsets_size; +} + +} // namespace yml +} // namespace c4 + + +#if defined(_MSC_VER) +# pragma warning(pop) +#elif defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#endif + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/parse.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/node.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + + +namespace c4 { +namespace yml { + +size_t NodeRef::set_key_serialized(c4::fmt::const_base64_wrapper w) +{ + _apply_seed(); + csubstr encoded = this->to_arena(w); + this->set_key(encoded); + return encoded.len; +} + +size_t NodeRef::set_val_serialized(c4::fmt::const_base64_wrapper w) +{ + _apply_seed(); + csubstr encoded = this->to_arena(w); + this->set_val(encoded); + return encoded.len; +} + +size_t NodeRef::deserialize_key(c4::fmt::base64_wrapper w) const +{ + RYML_ASSERT( ! is_seed()); + RYML_ASSERT(valid()); + RYML_ASSERT(get() != nullptr); + return from_chars(key(), &w); +} + +size_t NodeRef::deserialize_val(c4::fmt::base64_wrapper w) const +{ + RYML_ASSERT( ! is_seed()); + RYML_ASSERT(valid()); + RYML_ASSERT(get() != nullptr); + return from_chars(val(), &w); +} + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/node.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/preprocess.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_PREPROCESS_HPP_ +#define _C4_YML_PREPROCESS_HPP_ + +/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */ + +/** @defgroup Preprocessors Preprocessor functions + * + * These are the existing preprocessors: + * + * @code{.cpp} + * size_t preprocess_json(csubstr json, substr buf) + * size_t preprocess_rxmap(csubstr json, substr buf) + * @endcode + */ + +#ifndef _C4_YML_COMMON_HPP_ +//included above: +//#include "./common.hpp" +#endif +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/substr.hpp +//#include +#if !defined(C4_SUBSTR_HPP_) && !defined(_C4_SUBSTR_HPP_) +#error "amalgamate: file c4/substr.hpp must have been included at this point" +#endif /* C4_SUBSTR_HPP_ */ + + + +namespace c4 { +namespace yml { + +namespace detail { +using Preprocessor = size_t(csubstr, substr); +template +substr preprocess_into_container(csubstr input, CharContainer *out) +{ + // try to write once. the preprocessor will stop writing at the end of + // the container, but will process all the input to determine the + // required container size. + size_t sz = PP(input, to_substr(*out)); + // if the container size is not enough, resize, and run again in the + // resized container + if(sz > out->size()) + { + out->resize(sz); + sz = PP(input, to_substr(*out)); + } + return to_substr(*out).first(sz); +} +} // namespace detail + + +//----------------------------------------------------------------------------- + +/** @name preprocess_rxmap + * Convert flow-type relaxed maps (with implicit bools) into strict YAML + * flow map. + * + * @code{.yaml} + * {a, b, c, d: [e, f], g: {a, b}} + * # is converted into this: + * {a: 1, b: 1, c: 1, d: [e, f], g: {a, b}} + * @endcode + + * @note this is NOT recursive - conversion happens only in the top-level map + * @param rxmap A relaxed map + * @param buf output buffer + * @param out output container + */ + +//@{ + +/** Write into a given output buffer. This function is safe to call with + * empty or small buffers; it won't write beyond the end of the buffer. + * + * @return the number of characters required for output + */ +RYML_EXPORT size_t preprocess_rxmap(csubstr rxmap, substr buf); + + +/** Write into an existing container. It is resized to contained the output. + * @return a substr of the container + * @overload preprocess_rxmap */ +template +substr preprocess_rxmap(csubstr rxmap, CharContainer *out) +{ + return detail::preprocess_into_container(rxmap, out); +} + + +/** Create a container with the result. + * @overload preprocess_rxmap */ +template +CharContainer preprocess_rxmap(csubstr rxmap) +{ + CharContainer out; + preprocess_rxmap(rxmap, &out); + return out; +} + +//@} + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_PREPROCESS_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/preprocess.cpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.cpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifdef RYML_SINGLE_HDR_DEFINE_NOW +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp +//#include "c4/yml/preprocess.hpp" +#if !defined(C4_YML_PREPROCESS_HPP_) && !defined(_C4_YML_PREPROCESS_HPP_) +#error "amalgamate: file c4/yml/preprocess.hpp must have been included at this point" +#endif /* C4_YML_PREPROCESS_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/parser_dbg.hpp +//#include "c4/yml/detail/parser_dbg.hpp" +#if !defined(C4_YML_DETAIL_PARSER_DBG_HPP_) && !defined(_C4_YML_DETAIL_PARSER_DBG_HPP_) +#error "amalgamate: file c4/yml/detail/parser_dbg.hpp must have been included at this point" +#endif /* C4_YML_DETAIL_PARSER_DBG_HPP_ */ + + +/** @file preprocess.hpp Functions for preprocessing YAML prior to parsing. */ + +namespace c4 { +namespace yml { + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +namespace { +C4_ALWAYS_INLINE bool _is_idchar(char c) +{ + return (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || (c == '_' || c == '-' || c == '~' || c == '$'); +} + +typedef enum { kReadPending = 0, kKeyPending = 1, kValPending = 2 } _ppstate; +C4_ALWAYS_INLINE _ppstate _next(_ppstate s) +{ + int n = (int)s + 1; + return (_ppstate)(n <= (int)kValPending ? n : 0); +} +} // empty namespace + + +//----------------------------------------------------------------------------- + +size_t preprocess_rxmap(csubstr s, substr buf) +{ + detail::_SubstrWriter writer(buf); + _ppstate state = kReadPending; + size_t last = 0; + + if(s.begins_with('{')) + { + RYML_CHECK(s.ends_with('}')); + s = s.offs(1, 1); + } + + writer.append('{'); + + for(size_t i = 0; i < s.len; ++i) + { + const char curr = s[i]; + const char next = i+1 < s.len ? s[i+1] : '\0'; + + if(curr == '\'' || curr == '"') + { + csubstr ss = s.sub(i).pair_range_esc(curr, '\\'); + i += static_cast(ss.end() - (s.str + i)); + state = _next(state); + } + else if(state == kReadPending && _is_idchar(curr)) + { + state = _next(state); + } + + switch(state) + { + case kKeyPending: + { + if(curr == ':' && next == ' ') + { + state = _next(state); + } + else if(curr == ',' && next == ' ') + { + writer.append(s.range(last, i)); + writer.append(": 1, "); + last = i + 2; + } + break; + } + case kValPending: + { + if(curr == '[' || curr == '{' || curr == '(') + { + csubstr ss = s.sub(i).pair_range_nested(curr, '\\'); + i += static_cast(ss.end() - (s.str + i)); + state = _next(state); + } + else if(curr == ',' && next == ' ') + { + state = _next(state); + } + break; + } + default: + // nothing to do + break; + } + } + + writer.append(s.sub(last)); + if(state == kKeyPending) + writer.append(": 1"); + writer.append('}'); + + return writer.pos; +} + + +} // namespace yml +} // namespace c4 + +#endif /* RYML_SINGLE_HDR_DEFINE_NOW */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.cpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/detail/checks.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/checks.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_YML_DETAIL_CHECKS_HPP_ +#define C4_YML_DETAIL_CHECKS_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + + +#ifdef __clang__ +# pragma clang diagnostic push +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wtype-limits" // error: comparison of unsigned expression >= 0 is always true +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4296/*expression is always 'boolean_value'*/) +#endif + +namespace c4 { +namespace yml { + + +void check_invariants(Tree const& t, size_t node=NONE); +void check_free_list(Tree const& t); +void check_arena(Tree const& t); + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void check_invariants(Tree const& t, size_t node) +{ + if(node == NONE) + { + if(t.size() == 0) return; + node = t.root_id(); + } + + auto const& n = *t._p(node); +#ifdef RYML_DBG + if(n.m_first_child != NONE || n.m_last_child != NONE) + { + printf("check(%zu): fc=%zu lc=%zu\n", node, n.m_first_child, n.m_last_child); + } + else + { + printf("check(%zu)\n", node); + } +#endif + + C4_CHECK(n.m_parent != node); + if(n.m_parent == NONE) + { + C4_CHECK(t.is_root(node)); + } + else //if(n.m_parent != NONE) + { + C4_CHECK(t.has_child(n.m_parent, node)); + + auto const& p = *t._p(n.m_parent); + if(n.m_prev_sibling == NONE) + { + C4_CHECK(p.m_first_child == node); + C4_CHECK(t.first_sibling(node) == node); + } + else + { + C4_CHECK(p.m_first_child != node); + C4_CHECK(t.first_sibling(node) != node); + } + + if(n.m_next_sibling == NONE) + { + C4_CHECK(p.m_last_child == node); + C4_CHECK(t.last_sibling(node) == node); + } + else + { + C4_CHECK(p.m_last_child != node); + C4_CHECK(t.last_sibling(node) != node); + } + } + + C4_CHECK(n.m_first_child != node); + C4_CHECK(n.m_last_child != node); + if(n.m_first_child != NONE || n.m_last_child != NONE) + { + C4_CHECK(n.m_first_child != NONE); + C4_CHECK(n.m_last_child != NONE); + } + + C4_CHECK(n.m_prev_sibling != node); + C4_CHECK(n.m_next_sibling != node); + if(n.m_prev_sibling != NONE) + { + C4_CHECK(t._p(n.m_prev_sibling)->m_next_sibling == node); + C4_CHECK(t._p(n.m_prev_sibling)->m_prev_sibling != node); + } + if(n.m_next_sibling != NONE) + { + C4_CHECK(t._p(n.m_next_sibling)->m_prev_sibling == node); + C4_CHECK(t._p(n.m_next_sibling)->m_next_sibling != node); + } + + size_t count = 0; + for(size_t i = n.m_first_child; i != NONE; i = t.next_sibling(i)) + { +#ifdef RYML_DBG + printf("check(%zu): descend to child[%zu]=%zu\n", node, count, i); +#endif + auto const& ch = *t._p(i); + C4_CHECK(ch.m_parent == node); + C4_CHECK(ch.m_next_sibling != i); + ++count; + } + C4_CHECK(count == t.num_children(node)); + + if(n.m_prev_sibling == NONE && n.m_next_sibling == NONE) + { + if(n.m_parent != NONE) + { + C4_CHECK(t.num_children(n.m_parent) == 1); + C4_CHECK(t.num_siblings(node) == 1); + } + } + + if(node == t.root_id()) + { + C4_CHECK(t.size() == t.m_size); + C4_CHECK(t.capacity() == t.m_cap); + C4_CHECK(t.m_cap == t.m_size + t.slack()); + check_free_list(t); + check_arena(t); + } + + for(size_t i = t.first_child(node); i != NONE; i = t.next_sibling(i)) + { + check_invariants(t, i); + } +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void check_free_list(Tree const& t) +{ + if(t.m_free_head == NONE) + { + C4_CHECK(t.m_free_tail == t.m_free_head); + return; + } + + C4_CHECK(t.m_free_head >= 0 && t.m_free_head < t.m_cap); + C4_CHECK(t.m_free_tail >= 0 && t.m_free_tail < t.m_cap); + + auto const& head = *t._p(t.m_free_head); + //auto const& tail = *t._p(t.m_free_tail); + + //C4_CHECK(head.m_prev_sibling == NONE); + //C4_CHECK(tail.m_next_sibling == NONE); + + size_t count = 0; + for(size_t i = t.m_free_head, prev = NONE; i != NONE; i = t._p(i)->m_next_sibling) + { + auto const& elm = *t._p(i); + if(&elm != &head) + { + C4_CHECK(elm.m_prev_sibling == prev); + } + prev = i; + ++count; + } + C4_CHECK(count == t.slack()); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void check_arena(Tree const& t) +{ + C4_CHECK(t.m_arena.len == 0 || (t.m_arena_pos >= 0 && t.m_arena_pos <= t.m_arena.len)); + C4_CHECK(t.arena_size() == t.m_arena_pos); + C4_CHECK(t.arena_slack() + t.m_arena_pos == t.m_arena.len); +} + + +} /* namespace yml */ +} /* namespace c4 */ + +#ifdef __clang__ +# pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop +#elif defined(_MSC_VER) +# pragma warning(pop) +#endif + +#endif /* C4_YML_DETAIL_CHECKS_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/checks.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/detail/print.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef C4_YML_DETAIL_PRINT_HPP_ +#define C4_YML_DETAIL_PRINT_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + + + +namespace c4 { +namespace yml { + + +inline size_t print_node(Tree const& p, size_t node, int level, size_t count, bool print_children) +{ + printf("[%zd]%*s[%zd] %p", count, (2*level), "", node, (void*)p.get(node)); + if(p.is_root(node)) + { + printf(" [ROOT]"); + } + printf(" %s:", p.type_str(node)); + if(p.has_key(node)) + { + if(p.has_key_anchor(node)) + { + csubstr ka = p.key_anchor(node); + printf(" &%.*s", (int)ka.len, ka.str); + } + if(p.has_key_tag(node)) + { + csubstr kt = p.key_tag(node); + csubstr k = p.key(node); + printf(" %.*s '%.*s'", (int)kt.len, kt.str, (int)k.len, k.str); + } + else + { + csubstr k = p.key(node); + printf(" '%.*s'", (int)k.len, k.str); + } + } + else + { + RYML_ASSERT( ! p.has_key_tag(node)); + } + if(p.has_val(node)) + { + if(p.has_val_tag(node)) + { + csubstr vt = p.val_tag(node); + csubstr v = p.val(node); + printf(" %.*s '%.*s'", (int)vt.len, vt.str, (int)v.len, v.str); + } + else + { + csubstr v = p.val(node); + printf(" '%.*s'", (int)v.len, v.str); + } + } + else + { + if(p.has_val_tag(node)) + { + csubstr vt = p.val_tag(node); + printf(" %.*s", (int)vt.len, vt.str); + } + } + if(p.has_val_anchor(node)) + { + auto &a = p.val_anchor(node); + printf(" valanchor='&%.*s'", (int)a.len, a.str); + } + printf(" (%zd sibs)", p.num_siblings(node)); + + ++count; + + if(p.is_container(node)) + { + printf(" %zd children:\n", p.num_children(node)); + if(print_children) + { + for(size_t i = p.first_child(node); i != NONE; i = p.next_sibling(i)) + { + count = print_node(p, i, level+1, count, print_children); + } + } + } + else + { + printf("\n"); + } + + return count; +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline void print_node(NodeRef const& p, int level=0) +{ + print_node(*p.tree(), p.id(), level, 0, true); +} + + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +inline size_t print_tree(Tree const& p, size_t node=NONE) +{ + printf("--------------------------------------\n"); + size_t ret = 0; + if(!p.empty()) + { + if(node == NONE) + node = p.root_id(); + ret = print_node(p, node, 0, 0, true); + } + printf("#nodes=%zd vs #printed=%zd\n", p.size(), ret); + printf("--------------------------------------\n"); + return ret; +} + + +} /* namespace yml */ +} /* namespace c4 */ + + +#endif /* C4_YML_DETAIL_PRINT_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/detail/print.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/c4/yml/yml.hpp +// https://github.com/biojppm/rapidyaml/src/c4/yml/yml.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _C4_YML_YML_HPP_ +#define _C4_YML_YML_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/tree.hpp +//#include "c4/yml/tree.hpp" +#if !defined(C4_YML_TREE_HPP_) && !defined(_C4_YML_TREE_HPP_) +#error "amalgamate: file c4/yml/tree.hpp must have been included at this point" +#endif /* C4_YML_TREE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/node.hpp +//#include "c4/yml/node.hpp" +#if !defined(C4_YML_NODE_HPP_) && !defined(_C4_YML_NODE_HPP_) +#error "amalgamate: file c4/yml/node.hpp must have been included at this point" +#endif /* C4_YML_NODE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/emit.hpp +//#include "c4/yml/emit.hpp" +#if !defined(C4_YML_EMIT_HPP_) && !defined(_C4_YML_EMIT_HPP_) +#error "amalgamate: file c4/yml/emit.hpp must have been included at this point" +#endif /* C4_YML_EMIT_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/parse.hpp +//#include "c4/yml/parse.hpp" +#if !defined(C4_YML_PARSE_HPP_) && !defined(_C4_YML_PARSE_HPP_) +#error "amalgamate: file c4/yml/parse.hpp must have been included at this point" +#endif /* C4_YML_PARSE_HPP_ */ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/preprocess.hpp +//#include "c4/yml/preprocess.hpp" +#if !defined(C4_YML_PREPROCESS_HPP_) && !defined(_C4_YML_PREPROCESS_HPP_) +#error "amalgamate: file c4/yml/preprocess.hpp must have been included at this point" +#endif /* C4_YML_PREPROCESS_HPP_ */ + + +#endif // _C4_YML_YML_HPP_ + + +// (end https://github.com/biojppm/rapidyaml/src/c4/yml/yml.hpp) + + + +//******************************************************************************** +//-------------------------------------------------------------------------------- +// src/ryml.hpp +// https://github.com/biojppm/rapidyaml/src/ryml.hpp +//-------------------------------------------------------------------------------- +//******************************************************************************** + +#ifndef _RYML_HPP_ +#define _RYML_HPP_ + +// amalgamate: removed include of +// https://github.com/biojppm/rapidyaml/src/c4/yml/yml.hpp +//#include "c4/yml/yml.hpp" +#if !defined(C4_YML_YML_HPP_) && !defined(_C4_YML_YML_HPP_) +#error "amalgamate: file c4/yml/yml.hpp must have been included at this point" +#endif /* C4_YML_YML_HPP_ */ + + +namespace ryml { +using namespace c4::yml; +using namespace c4; +} + +#endif /* _RYML_HPP_ */ + + +// (end https://github.com/biojppm/rapidyaml/src/ryml.hpp) + +#endif /* _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ */ + diff --git a/src/time_formats.am b/src/time_formats.am index a91b7749..9d437610 100644 --- a/src/time_formats.am +++ b/src/time_formats.am @@ -49,6 +49,7 @@ TIME_FORMATS = \ "%m/%d/%Y %H:%M:%S" \ "%d/%b/%y %H:%M:%S" \ "%m%d %H:%M:%S" \ + "%Y%m%d %H:%M:%S" \ "%Y%m%d.%H%M%S" \ "%H:%M:%S" \ "%M:%S" \ diff --git a/src/view_curses.cc b/src/view_curses.cc index d3095a82..3f0235bc 100644 --- a/src/view_curses.cc +++ b/src/view_curses.cc @@ -325,7 +325,8 @@ view_curses::mvwattrline(WINDOW* window, } else if (iter->sa_type == &VC_STYLE) { attrs = iter->sa_value.get(); } else if (iter->sa_type == &SA_LEVEL) { - attrs = vc.vc_level_attrs[iter->sa_value.get()].first; + attrs = vc.attrs_for_level( + (log_level_t) iter->sa_value.get()); } else if (iter->sa_type == &VC_ROLE) { auto role = iter->sa_value.get(); attrs = vc.attrs_for_role(role); @@ -581,13 +582,26 @@ attr_for_colors(nonstd::optional fg, nonstd::optional bg) return retval; } -std::pair +view_colors::role_attrs view_colors::to_attrs(const lnav_theme& lt, - const style_config& sc, - const style_config& fallback_sc, + const positioned_property& pp_sc, + const positioned_property& pp_fallback_sc, lnav_config_listener::error_reporter& reporter) { + const auto& sc = pp_sc.pp_value; + const auto& fallback_sc = pp_fallback_sc.pp_value; std::string fg1, bg1, fg_color, bg_color; + intern_string_t role_class; + + if (!pp_sc.pp_path.empty()) { + auto role_class_path + = ghc::filesystem::path(pp_sc.pp_path.to_string()).parent_path(); + auto inner = role_class_path.filename().string(); + auto outer = role_class_path.parent_path().filename().string(); + + role_class = intern_string::lookup( + fmt::format(FMT_STRING("-lnav_{}_{}"), outer, inner)); + } fg1 = sc.sc_color; if (fg1.empty()) { @@ -629,7 +643,7 @@ view_colors::to_attrs(const lnav_theme& lt, retval2.ta_attrs |= A_BOLD; } - return {retval1, retval2}; + return {retval1, retval2, role_class}; } void @@ -695,14 +709,17 @@ view_colors::init_roles(const lnav_theme& lt, if (lnav_config.lc_ui_dim_text) { this->vc_role_attrs[lnav::enums::to_underlying(role_t::VCR_TEXT)] - .first.ta_attrs + .ra_normal.ta_attrs |= A_DIM; this->vc_role_attrs[lnav::enums::to_underlying(role_t::VCR_TEXT)] - .second.ta_attrs + .ra_reverse.ta_attrs |= A_DIM; } this->vc_role_attrs[lnav::enums::to_underlying(role_t::VCR_SEARCH)] - = std::make_pair(text_attrs{A_REVERSE}, text_attrs{A_REVERSE}); + = role_attrs{text_attrs{A_REVERSE}, text_attrs{A_REVERSE}}; + this->vc_role_attrs[lnav::enums::to_underlying(role_t::VCR_SEARCH)] + .ra_class_name + = intern_string::lookup("-lnav_styles_search"); this->vc_role_attrs[lnav::enums::to_underlying(role_t::VCR_IDENTIFIER)] = this->to_attrs( lt, lt.lt_style_identifier, lt.lt_style_text, reporter); @@ -743,17 +760,19 @@ view_colors::init_roles(const lnav_theme& lt, = this->to_attrs( lt, lt.lt_style_active_status, lt.lt_style_status, reporter); this->vc_role_attrs[lnav::enums::to_underlying(role_t::VCR_ACTIVE_STATUS2)] - = std::make_pair(this->vc_role_attrs[lnav::enums::to_underlying( - role_t::VCR_ACTIVE_STATUS)] - .first, - this->vc_role_attrs[lnav::enums::to_underlying( - role_t::VCR_ACTIVE_STATUS)] - .second); + = role_attrs{ + this->vc_role_attrs[lnav::enums::to_underlying( + role_t::VCR_ACTIVE_STATUS)] + .ra_normal, + this->vc_role_attrs[lnav::enums::to_underlying( + role_t::VCR_ACTIVE_STATUS)] + .ra_reverse, + }; this->vc_role_attrs[lnav::enums::to_underlying(role_t::VCR_ACTIVE_STATUS2)] - .first.ta_attrs + .ra_normal.ta_attrs |= A_BOLD; this->vc_role_attrs[lnav::enums::to_underlying(role_t::VCR_ACTIVE_STATUS2)] - .second.ta_attrs + .ra_reverse.ta_attrs |= A_BOLD; this->vc_role_attrs[lnav::enums::to_underlying(role_t::VCR_STATUS_TITLE)] = this->to_attrs( @@ -824,61 +843,69 @@ view_colors::init_roles(const lnav_theme& lt, lt, lt.lt_style_snippet_border, lt.lt_style_text, reporter); { - style_config stitch_sc; + positioned_property stitch_sc; - stitch_sc.sc_color = lt.lt_style_status_subtitle.sc_background_color; - stitch_sc.sc_background_color - = lt.lt_style_status_title.sc_background_color; + stitch_sc.pp_value.sc_color + = lt.lt_style_status_subtitle.pp_value.sc_background_color; + stitch_sc.pp_value.sc_background_color + = lt.lt_style_status_title.pp_value.sc_background_color; this->vc_role_attrs[lnav::enums::to_underlying( role_t::VCR_STATUS_STITCH_TITLE_TO_SUB)] = this->to_attrs(lt, stitch_sc, lt.lt_style_status, reporter); } { - style_config stitch_sc; + positioned_property stitch_sc; - stitch_sc.sc_color = lt.lt_style_status_title.sc_background_color; - stitch_sc.sc_background_color - = lt.lt_style_status_subtitle.sc_background_color; + stitch_sc.pp_value.sc_color + = lt.lt_style_status_title.pp_value.sc_background_color; + stitch_sc.pp_value.sc_background_color + = lt.lt_style_status_subtitle.pp_value.sc_background_color; this->vc_role_attrs[lnav::enums::to_underlying( role_t::VCR_STATUS_STITCH_SUB_TO_TITLE)] = this->to_attrs(lt, stitch_sc, lt.lt_style_status, reporter); } { - style_config stitch_sc; + positioned_property stitch_sc; - stitch_sc.sc_color = lt.lt_style_status.sc_background_color; - stitch_sc.sc_background_color - = lt.lt_style_status_subtitle.sc_background_color; + stitch_sc.pp_value.sc_color + = lt.lt_style_status.pp_value.sc_background_color; + stitch_sc.pp_value.sc_background_color + = lt.lt_style_status_subtitle.pp_value.sc_background_color; this->vc_role_attrs[lnav::enums::to_underlying( role_t::VCR_STATUS_STITCH_SUB_TO_NORMAL)] = this->to_attrs(lt, stitch_sc, lt.lt_style_status, reporter); } { - style_config stitch_sc; + positioned_property stitch_sc; - stitch_sc.sc_color = lt.lt_style_status_subtitle.sc_background_color; - stitch_sc.sc_background_color = lt.lt_style_status.sc_background_color; + stitch_sc.pp_value.sc_color + = lt.lt_style_status_subtitle.pp_value.sc_background_color; + stitch_sc.pp_value.sc_background_color + = lt.lt_style_status.pp_value.sc_background_color; this->vc_role_attrs[lnav::enums::to_underlying( role_t::VCR_STATUS_STITCH_NORMAL_TO_SUB)] = this->to_attrs(lt, stitch_sc, lt.lt_style_status, reporter); } { - style_config stitch_sc; + positioned_property stitch_sc; - stitch_sc.sc_color = lt.lt_style_status.sc_background_color; - stitch_sc.sc_background_color - = lt.lt_style_status_title.sc_background_color; + stitch_sc.pp_value.sc_color + = lt.lt_style_status.pp_value.sc_background_color; + stitch_sc.pp_value.sc_background_color + = lt.lt_style_status_title.pp_value.sc_background_color; this->vc_role_attrs[lnav::enums::to_underlying( role_t::VCR_STATUS_STITCH_TITLE_TO_NORMAL)] = this->to_attrs(lt, stitch_sc, lt.lt_style_status, reporter); } { - style_config stitch_sc; + positioned_property stitch_sc; - stitch_sc.sc_color = lt.lt_style_status_title.sc_background_color; - stitch_sc.sc_background_color = lt.lt_style_status.sc_background_color; + stitch_sc.pp_value.sc_color + = lt.lt_style_status_title.pp_value.sc_background_color; + stitch_sc.pp_value.sc_background_color + = lt.lt_style_status.pp_value.sc_background_color; this->vc_role_attrs[lnav::enums::to_underlying( role_t::VCR_STATUS_STITCH_NORMAL_TO_TITLE)] = this->to_attrs(lt, stitch_sc, lt.lt_style_status, reporter); @@ -909,19 +936,21 @@ view_colors::init_roles(const lnav_theme& lt, = this->to_attrs( lt, lt.lt_style_scrollbar, lt.lt_style_status, reporter); { - style_config bar_sc; + positioned_property bar_sc; - bar_sc.sc_color = lt.lt_style_error.sc_color; - bar_sc.sc_background_color = lt.lt_style_scrollbar.sc_background_color; + bar_sc.pp_value.sc_color = lt.lt_style_error.pp_value.sc_color; + bar_sc.pp_value.sc_background_color + = lt.lt_style_scrollbar.pp_value.sc_background_color; this->vc_role_attrs[lnav::enums::to_underlying( role_t::VCR_SCROLLBAR_ERROR)] = this->to_attrs(lt, bar_sc, lt.lt_style_alert_status, reporter); } { - style_config bar_sc; + positioned_property bar_sc; - bar_sc.sc_color = lt.lt_style_warning.sc_color; - bar_sc.sc_background_color = lt.lt_style_scrollbar.sc_background_color; + bar_sc.pp_value.sc_color = lt.lt_style_warning.pp_value.sc_color; + bar_sc.pp_value.sc_background_color + = lt.lt_style_scrollbar.pp_value.sc_background_color; this->vc_role_attrs[lnav::enums::to_underlying( role_t::VCR_SCROLLBAR_WARNING)] = this->to_attrs(lt, bar_sc, lt.lt_style_warn_status, reporter); @@ -984,7 +1013,7 @@ view_colors::init_roles(const lnav_theme& lt, if (level_iter == lt.lt_level_styles.end()) { this->vc_level_attrs[level] - = std::make_pair(text_attrs{}, text_attrs{}); + = role_attrs{text_attrs{}, text_attrs{}}; } else { this->vc_level_attrs[level] = this->to_attrs( lt, level_iter->second, lt.lt_style_text, reporter); @@ -995,6 +1024,28 @@ view_colors::init_roles(const lnav_theme& lt, this->vc_color_pair_end = 1; } this->vc_dyn_pairs.clear(); + + for (int32_t role_index = 0; + role_index < lnav::enums::to_underlying(role_t::VCR__MAX); + role_index++) + { + const auto& ra = this->vc_role_attrs[role_index]; + if (ra.ra_class_name.empty()) { + continue; + } + + this->vc_class_to_role[ra.ra_class_name.to_string()] + = VC_ROLE.value(role_t(role_index)); + } + for (int level_index = 0; level_index < LEVEL__MAX; level_index++) { + const auto& ra = this->vc_level_attrs[level_index]; + if (ra.ra_class_name.empty()) { + continue; + } + + this->vc_class_to_role[ra.ra_class_name.to_string()] + = SA_LEVEL.value(level_index); + } } int @@ -1198,12 +1249,6 @@ lab_color::operator!=(const lab_color& rhs) const return !(rhs == *this); } -string_attr_pair -view_colors::roles::file() -{ - return VC_ROLE.value(role_t::VCR_FILE); -} - #include Result diff --git a/src/view_curses.hh b/src/view_curses.hh index a3a24bae..c6687369 100644 --- a/src/view_curses.hh +++ b/src/view_curses.hh @@ -56,6 +56,7 @@ #include #include #include +#include #include #include "base/attr_line.hh" @@ -228,16 +229,8 @@ public: require(role < role_t::VCR__MAX); return selected - ? this->vc_role_attrs[lnav::enums::to_underlying(role)].second - : this->vc_role_attrs[lnav::enums::to_underlying(role)].first; - } - - text_attrs reverse_attrs_for_role(role_t role) const - { - require(role > role_t::VCR_NONE); - require(role < role_t::VCR__MAX); - - return this->vc_role_reverse_colors[lnav::enums::to_underlying(role)]; + ? this->vc_role_attrs[lnav::enums::to_underlying(role)].ra_reverse + : this->vc_role_attrs[lnav::enums::to_underlying(role)].ra_normal; } nonstd::optional color_for_ident(const char* str, size_t len) const; @@ -259,6 +252,11 @@ public: return this->attrs_for_ident(str.c_str(), str.length()); } + text_attrs attrs_for_level(log_level_t level) const + { + return this->vc_level_attrs[level].ra_normal; + } + int ensure_color_pair(short fg, short bg); int ensure_color_pair(nonstd::optional fg, @@ -272,22 +270,12 @@ public: nonstd::optional match_color(const styling::color_unit& color) const; - std::pair to_attrs( - const lnav_theme& lt, - const style_config& sc, - const style_config& fallback_sc, - lnav_config_listener::error_reporter& reporter); - - std::pair vc_level_attrs[LEVEL__MAX]; - short ansi_to_theme_color(short ansi_fg) const { return this->vc_ansi_to_theme[ansi_fg]; } - struct roles { - static string_attr_pair file(); - }; + std::unordered_map vc_class_to_role; static bool initialized; @@ -301,12 +289,21 @@ private: int dp_color_pair; }; + struct role_attrs { + text_attrs ra_normal; + text_attrs ra_reverse; + intern_string_t ra_class_name; + }; + + role_attrs to_attrs(const lnav_theme& lt, + const positioned_property& sc, + const positioned_property& fallback_sc, + lnav_config_listener::error_reporter& reporter); + + role_attrs vc_level_attrs[LEVEL__MAX]; + /** Map of role IDs to attribute values. */ - std::pair - vc_role_attrs[lnav::enums::to_underlying(role_t::VCR__MAX)]; - /** Map of role IDs to reverse-video attribute values. */ - text_attrs - vc_role_reverse_colors[lnav::enums::to_underlying(role_t::VCR__MAX)]; + role_attrs vc_role_attrs[lnav::enums::to_underlying(role_t::VCR__MAX)]; short vc_ansi_to_theme[8]; short vc_highlight_colors[HI_COLOR_COUNT]; int vc_color_pair_end{0}; diff --git a/src/view_helpers.cc b/src/view_helpers.cc index cce80b0e..6a279f15 100644 --- a/src/view_helpers.cc +++ b/src/view_helpers.cc @@ -45,6 +45,7 @@ #include "shlex.hh" #include "sql_help.hh" #include "sql_util.hh" +#include "static_file_vtab.hh" #include "view_helpers.crumbs.hh" #include "view_helpers.examples.hh" #include "view_helpers.hist.hh" @@ -108,6 +109,7 @@ open_schema_view() schema += "\n\n-- Virtual Table Definitions --\n\n"; schema += ENVIRON_CREATE_STMT; + schema += STATIC_FILE_CREATE_STMT; schema += vtab_module_schemas; for (const auto& vtab_iter : *lnav_data.ld_vtab_manager) { schema += "\n" + vtab_iter.second->get_table_statement(); diff --git a/src/views_vtab.cc b/src/views_vtab.cc index 0f3b8cb3..567ceb86 100644 --- a/src/views_vtab.cc +++ b/src/views_vtab.cc @@ -162,12 +162,14 @@ static const typed_json_path_container breadcrumb_crumb_handlers struct top_line_meta { nonstd::optional tlm_time; nonstd::optional tlm_file; + nonstd::optional tlm_anchor; std::vector tlm_crumbs; }; static const typed_json_path_container top_line_meta_handlers = { yajlpp::property_handler("time").for_field(&top_line_meta::tlm_time), yajlpp::property_handler("file").for_field(&top_line_meta::tlm_file), + yajlpp::property_handler("anchor").for_field(&top_line_meta::tlm_anchor), yajlpp::property_handler("breadcrumbs#") .for_field(&top_line_meta::tlm_crumbs) .with_children(breadcrumb_crumb_handlers), @@ -283,6 +285,7 @@ CREATE TABLE lnav_views ( if (tss != nullptr && tss->text_line_count() > 0) { auto* time_source = dynamic_cast( tc.get_sub_source()); + auto* ta = dynamic_cast(tc.get_sub_source()); std::vector crumbs; tss->text_crumbs_for_line(tc.get_top(), crumbs); @@ -302,6 +305,9 @@ CREATE TABLE lnav_views ( tlm.tlm_time = timestamp; } } + if (ta != nullptr) { + tlm.tlm_anchor = ta->anchor_for_row(tc.get_top()); + } tlm.tlm_file = tc.map_top_row([](const auto& al) { return get_string_attr(al.get_attrs(), logline::L_FILE) | [](const auto wrapper) { @@ -385,6 +391,50 @@ CREATE TABLE lnav_views ( return SQLITE_ERROR; } } + if (top_meta != nullptr) { + static const intern_string_t SQL_SRC + = intern_string::lookup("top_meta"); + + auto parse_res = top_line_meta_handlers.parser_for(SQL_SRC).of( + string_fragment::from_c_str(top_meta)); + if (parse_res.isErr()) { + auto errmsg = parse_res.unwrapErr(); + tab->zErrMsg = sqlite3_mprintf( + "Invalid top_meta: %s", + errmsg[0].to_attr_line().get_string().c_str()); + return SQLITE_ERROR; + } + + auto tlm = parse_res.unwrap(); + + if (index == LNV_TEXT && tlm.tlm_file) { + if (!lnav_data.ld_text_source.to_front(tlm.tlm_file.value())) { + auto errmsg = parse_res.unwrapErr(); + tab->zErrMsg = sqlite3_mprintf("unknown top_meta.file: %s", + tlm.tlm_file->c_str()); + return SQLITE_ERROR; + } + } + + auto* ta = dynamic_cast(tc.get_sub_source()); + if (ta != nullptr && tlm.tlm_anchor + && !tlm.tlm_anchor.value().empty()) + { + auto req_anchor = tlm.tlm_anchor.value(); + auto req_anchor_top = ta->row_for_anchor(req_anchor); + if (req_anchor_top) { + auto curr_anchor = ta->anchor_for_row(tc.get_top()); + + if (!curr_anchor || curr_anchor.value() != req_anchor) { + tc.set_top(req_anchor_top.value()); + } + } else { + tab->zErrMsg = sqlite3_mprintf( + "unknown top_meta.anchor: %s", req_anchor.c_str()); + return SQLITE_ERROR; + } + } + } tc.set_left(left); tc.set_paused(is_paused); tc.execute_search(search); diff --git a/src/vtab_module.hh b/src/vtab_module.hh index 95811eae..5ef52a97 100644 --- a/src/vtab_module.hh +++ b/src/vtab_module.hh @@ -413,6 +413,8 @@ struct sqlite_func_adapter { Return retval = f(from_sqlite()(argc, argv, Idx)...); to_sqlite(context, std::move(retval)); + } catch (const lnav::console::user_message& um) { + to_sqlite(context, um); } catch (from_sqlite_conversion_error& e) { char buffer[256]; @@ -935,6 +937,8 @@ struct tvt_iterator_cursor { template struct tvt_no_update : public T { + using T::T; + int delete_row(sqlite3_vtab* vt, sqlite3_int64 rowid) { vt->zErrMsg = sqlite3_mprintf("Rows cannot be deleted from this table"); diff --git a/src/yajlpp/yajlpp.cc b/src/yajlpp/yajlpp.cc index 394de199..cc9382c5 100644 --- a/src/yajlpp/yajlpp.cc +++ b/src/yajlpp/yajlpp.cc @@ -683,7 +683,6 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers, } this->ypc_sibling_handlers = orig_handlers; - pcre_input pi(&this->ypc_path[0], 0, this->ypc_path.size() - 1); this->ypc_callbacks = DEFAULT_CALLBACKS; @@ -731,7 +730,7 @@ yajlpp_parse_context::update_callbacks(const json_path_container* orig_handlers, || index != yajlpp_provider_context::nindex)) { this->ypc_obj_stack.push(jph.jph_obj_provider( - {{this->ypc_pcre_context, pi}, index}, + {{this->ypc_pcre_context, pi}, index, this}, this->ypc_obj_stack.top())); } } @@ -1080,7 +1079,7 @@ const intern_string_t yajlpp_parse_context::get_full_path() const { if (this->ypc_path.size() <= 1) { - static intern_string_t SLASH = intern_string::lookup("/"); + static const intern_string_t SLASH = intern_string::lookup("/"); return SLASH; } diff --git a/src/yajlpp/yajlpp.hh b/src/yajlpp/yajlpp.hh index 6ccd920f..b005840f 100644 --- a/src/yajlpp/yajlpp.hh +++ b/src/yajlpp/yajlpp.hh @@ -94,6 +94,7 @@ class yajlpp_parse_context; struct yajlpp_provider_context { pcre_extractor ypc_extractor; size_t ypc_index{0}; + yajlpp_parse_context* ypc_parse_context; static constexpr size_t nindex = static_cast(-1); @@ -135,10 +136,7 @@ public: this->ye_msg = reinterpret_cast(yajl_msg.in()); } - const char* what() const noexcept override - { - return this->ye_msg.c_str(); - } + const char* what() const noexcept override { return this->ye_msg.c_str(); } private: std::string ye_msg; @@ -517,10 +515,7 @@ public: return yajl_gen_status_ok; } - yajl_gen_status operator()() - { - return yajl_gen_null(this->yg_handle); - } + yajl_gen_status operator()() { return yajl_gen_null(this->yg_handle); } private: yajl_gen yg_handle; @@ -543,10 +538,7 @@ public: yajl_gen_map_open(handle); } - ~yajlpp_map() - { - yajl_gen_map_close(this->ycb_handle); - } + ~yajlpp_map() { yajl_gen_map_close(this->ycb_handle); } }; class yajlpp_array : public yajlpp_container_base { @@ -556,10 +548,7 @@ public: yajl_gen_array_open(handle); } - ~yajlpp_array() - { - yajl_gen_array_close(this->ycb_handle); - } + ~yajlpp_array() { yajl_gen_array_close(this->ycb_handle); } }; class yajlpp_gen_context { @@ -606,15 +595,9 @@ public: this->yg_handle = yajl_gen_alloc(nullptr); } - yajl_gen get_handle() const - { - return this->yg_handle.in(); - } + yajl_gen get_handle() const { return this->yg_handle.in(); } - operator yajl_gen() - { - return this->yg_handle.in(); - } + operator yajl_gen() { return this->yg_handle.in(); } string_fragment to_string_fragment(); diff --git a/src/yajlpp/yajlpp_def.hh b/src/yajlpp/yajlpp_def.hh index 2fc1c155..53b522d9 100644 --- a/src/yajlpp/yajlpp_def.hh +++ b/src/yajlpp/yajlpp_def.hh @@ -37,7 +37,6 @@ #include "config.h" #include "mapbox/variant.hpp" #include "relative_time.hh" -#include "view_curses.hh" #include "yajlpp.hh" #define FOR_FIELD(T, FIELD) for_field() @@ -432,11 +431,13 @@ struct json_path_handler : public json_path_handler_base { } if ((rc = yajl_gen_string(handle, pair.first)) - != yajl_gen_status_ok) { + != yajl_gen_status_ok) + { return rc; } if ((rc = yajl_gen_string(handle, pair.second)) - != yajl_gen_status_ok) { + != yajl_gen_status_ok) + { return rc; } } @@ -669,6 +670,22 @@ struct json_path_handler : public json_path_handler_base { return *this; } + template + json_path_handler& for_child(positioned_property(T::*field)) + { + this->jph_obj_provider + = [field](const yajlpp_provider_context& ypc, void* root) -> void* { + auto& child = json_path_handler::get_field(root, field); + + if (ypc.ypc_parse_context != nullptr && child.pp_path.empty()) { + child.pp_path = ypc.ypc_parse_context->get_full_path(); + } + return &child.pp_value; + }; + + return *this; + } + template json_path_handler& for_child(Args... args) { @@ -685,7 +702,8 @@ struct json_path_handler : public json_path_handler_base { template, Args...>::value, - bool> = true> + bool> + = true> json_path_handler& for_field(Args... args) { this->add_cb(str_field_cb2); @@ -732,7 +750,8 @@ struct json_path_handler : public json_path_handler_base { template, Args...>::value, - bool> = true> + bool> + = true> json_path_handler& for_field(Args... args) { this->add_cb(bool_field_cb); @@ -859,7 +878,8 @@ struct json_path_handler : public json_path_handler_base { template< typename... Args, std::enable_if_t, Args...>::value, - bool> = true> + bool> + = true> json_path_handler& for_field(Args... args) { this->add_cb(str_field_cb2); @@ -920,7 +940,8 @@ struct json_path_handler : public json_path_handler_base { template, Args...>::value, - bool> = true> + bool> + = true> json_path_handler& for_field(Args... args) { this->add_cb(str_field_cb2); @@ -975,9 +996,9 @@ struct json_path_handler : public json_path_handler_base { return *this; } - template< - typename... Args, - std::enable_if_t::value, bool> = true> + template::value, bool> + = true> json_path_handler& for_field(Args... args) { this->add_cb(str_field_cb2); @@ -1031,7 +1052,8 @@ struct json_path_handler : public json_path_handler_base { template, Args...>::value, - bool> = true> + bool> + = true> json_path_handler& for_field(Args... args) { this->add_cb(str_field_cb2); @@ -1191,9 +1213,10 @@ struct json_path_handler : public json_path_handler_base { return *this; } - template::value, - bool> = true> + template< + typename... Args, + std::enable_if_t::value, bool> + = true> json_path_handler& for_field(Args... args) { this->add_cb(str_field_cb2); diff --git a/src/yaml-extension-functions.cc b/src/yaml-extension-functions.cc new file mode 100644 index 00000000..0a586527 --- /dev/null +++ b/src/yaml-extension-functions.cc @@ -0,0 +1,103 @@ +/** + * Copyright (c) 2022, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @file yaml-extension-functions.cc + */ + +#include + +#define RYML_SINGLE_HDR_DEFINE_NOW + +#include "ryml_all.hpp" +#include "sqlite-extension-func.hh" +#include "vtab_module.hh" +#include "yajlpp/yajlpp.hh" + +using namespace lnav::roles::literals; + +static void +ryml_error_to_um(const char* msg, size_t len, ryml::Location loc, void* ud) +{ + intern_string_t src = intern_string::lookup( + string_fragment::from_bytes(loc.name.data(), loc.name.size())); + auto& sf = *(static_cast(ud)); + auto msg_str = string_fragment::from_bytes(msg, len).trim().to_string(); + + if (loc.offset == sf.length()) { + loc.line -= 1; + } + throw lnav::console::user_message::error("failed to parse YAML content") + .with_reason(msg_str) + .with_snippet(lnav::console::snippet::from( + source_location{src, (int32_t) loc.line}, "")); +} + +static text_auto_buffer +yaml_to_json(string_fragment in) +{ + ryml::Callbacks callbacks(&in, nullptr, nullptr, ryml_error_to_um); + + ryml::set_callbacks(callbacks); + auto tree = ryml::parse_in_arena( + "input", ryml::csubstr{in.data(), (size_t) in.length()}); + + auto output = ryml::emit_json( + tree, tree.root_id(), ryml::substr{}, /*error_on_excess*/ false); + auto buf = auto_buffer::alloc(output.len); + buf.resize(output.len); + output = ryml::emit_json(tree, + tree.root_id(), + ryml::substr(buf.in(), buf.size()), + /*error_on_excess*/ true); + + return {std::move(buf)}; +} + +int +yaml_extension_functions(struct FuncDef** basic_funcs, + struct FuncDefAgg** agg_funcs) +{ + static struct FuncDef yaml_funcs[] = { + sqlite_func_adapter::builder( + help_text("yaml_to_json", + "Convert a YAML document to a JSON-encoded string") + .sql_function() + .with_parameter({"yaml", "The YAML value to convert to JSON."}) + .with_tags({"json", "yaml"}) + .with_example({ + "To convert the document \"abc: def\"", + "SELECT yaml_to_json('abc: def')", + })), + + {nullptr}, + }; + + *basic_funcs = yaml_funcs; + + return SQLITE_OK; +} diff --git a/test/Makefile.am b/test/Makefile.am index 1250728e..33dc3804 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -208,6 +208,7 @@ dist_noinst_SCRIPTS = \ test_sql_time_func.sh \ test_sql_views_vtab.sh \ test_sql_xml_func.sh \ + test_sql_yaml_func.sh \ test_text_file.sh \ test_tui.sh \ test_view_colors.sh \ @@ -411,6 +412,7 @@ TESTS = \ test_sql_time_func.sh \ test_sql_views_vtab.sh \ test_sql_xml_func.sh \ + test_sql_yaml_func.sh \ test_text_file.sh \ test_tui.sh \ test_data_parser.sh \ diff --git a/test/document.sections.tests.cc b/test/document.sections.tests.cc index 01d99ce7..f9b41100 100644 --- a/test/document.sections.tests.cc +++ b/test/document.sections.tests.cc @@ -118,6 +118,9 @@ SYNOPSIS DESCRIPTION Lorem ipsum + AbcDef + Lorem ipsum + )"; auto meta = lnav::document::discover_structure(INPUT, line_range{0, -1}); diff --git a/test/expected/expected.am b/test/expected/expected.am index ac7ce85e..bd1cf5df 100644 --- a/test/expected/expected.am +++ b/test/expected/expected.am @@ -302,6 +302,8 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_meta.sh_41f643bb4f720130625b042563e9591bee4ae588.out \ $(srcdir)/%reldir%/test_meta.sh_45ff39a3d0ac0ca0c95aaca14d043450cec1cedd.err \ $(srcdir)/%reldir%/test_meta.sh_45ff39a3d0ac0ca0c95aaca14d043450cec1cedd.out \ + $(srcdir)/%reldir%/test_meta.sh_48e85ba0c0945a5085fb4ee255771406061a9c17.err \ + $(srcdir)/%reldir%/test_meta.sh_48e85ba0c0945a5085fb4ee255771406061a9c17.out \ $(srcdir)/%reldir%/test_meta.sh_4c39b356748c67ccf8a6027a1af88da532f8252a.err \ $(srcdir)/%reldir%/test_meta.sh_4c39b356748c67ccf8a6027a1af88da532f8252a.out \ $(srcdir)/%reldir%/test_meta.sh_7b75763926d832bf9784ca234a060859770aabe7.err \ @@ -912,6 +914,8 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_time_func.sh_f3b1ea49779117bf45f85ad5615fdc5e89193db6.out \ $(srcdir)/%reldir%/test_sql_views_vtab.sh_28e23f4e98b1acd6478e39844fd9306b444550c3.err \ $(srcdir)/%reldir%/test_sql_views_vtab.sh_28e23f4e98b1acd6478e39844fd9306b444550c3.out \ + $(srcdir)/%reldir%/test_sql_views_vtab.sh_32acc1a8bb5028636fdbf08f077f9a835ab51bec.err \ + $(srcdir)/%reldir%/test_sql_views_vtab.sh_32acc1a8bb5028636fdbf08f077f9a835ab51bec.out \ $(srcdir)/%reldir%/test_sql_views_vtab.sh_485a6ac7c69bd4b5d34d3399a9c17f6a2dc89ad3.err \ $(srcdir)/%reldir%/test_sql_views_vtab.sh_485a6ac7c69bd4b5d34d3399a9c17f6a2dc89ad3.out \ $(srcdir)/%reldir%/test_sql_views_vtab.sh_62d15cb9d5a9259f198aa01ca8ed200d6da38d68.err \ @@ -960,12 +964,22 @@ EXPECTED_FILES = \ $(srcdir)/%reldir%/test_sql_xml_func.sh_b036c73528a446cba46625767517cdac868aba72.out \ $(srcdir)/%reldir%/test_sql_xml_func.sh_fefeb387ae14d4171225ea06cbbff3ec43990cf0.err \ $(srcdir)/%reldir%/test_sql_xml_func.sh_fefeb387ae14d4171225ea06cbbff3ec43990cf0.out \ + $(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err \ + $(srcdir)/%reldir%/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.out \ + $(srcdir)/%reldir%/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.err \ + $(srcdir)/%reldir%/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.out \ $(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.err \ $(srcdir)/%reldir%/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out \ + $(srcdir)/%reldir%/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.err \ + $(srcdir)/%reldir%/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.out \ $(srcdir)/%reldir%/test_text_file.sh_87943c6be50d701a03e901f16493314c839af1ab.err \ $(srcdir)/%reldir%/test_text_file.sh_87943c6be50d701a03e901f16493314c839af1ab.out \ + $(srcdir)/%reldir%/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.err \ + $(srcdir)/%reldir%/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.out \ $(srcdir)/%reldir%/test_text_file.sh_ac872aadda29b9a824361a2c711d62ec1c75d40f.err \ $(srcdir)/%reldir%/test_text_file.sh_ac872aadda29b9a824361a2c711d62ec1c75d40f.out \ $(srcdir)/%reldir%/test_text_file.sh_c2a346ca1da2da4346f1d310212e166767993ce9.err \ $(srcdir)/%reldir%/test_text_file.sh_c2a346ca1da2da4346f1d310212e166767993ce9.out \ + $(srcdir)/%reldir%/test_text_file.sh_e088ea61a5382458cc48a2607e2639e52b0be1da.err \ + $(srcdir)/%reldir%/test_text_file.sh_e088ea61a5382458cc48a2607e2639e52b0be1da.out \ $() diff --git a/test/expected/test_cmds.sh_b6a3bb78e9d60e5e1f5ce5b18e40d2f1662707ab.out b/test/expected/test_cmds.sh_b6a3bb78e9d60e5e1f5ce5b18e40d2f1662707ab.out index bdb6b86c..2b5d658f 100644 --- a/test/expected/test_cmds.sh_b6a3bb78e9d60e5e1f5ce5b18e40d2f1662707ab.out +++ b/test/expected/test_cmds.sh_b6a3bb78e9d60e5e1f5ce5b18e40d2f1662707ab.out @@ -389,7 +389,7 @@ can always use  q  to pop the top view off of the stack. Key(s) Action ════════════════════════════════════════════════════════════════════════ - /regexp Start a search for the given regular expression. + /regexp Start a search for the given regular expression. The search is live, so when there is a pause in typing, the currently running search will be canceled and a new one started. The first ten @@ -770,7 +770,9 @@ For support questions, email: :comment text ══════════════════════════════════════════════════════════════════════ - Attach a comment to the top log line + Attach a comment to the top log line. The comment will be displayed + right below the log message it is associated with. The comment can + be formatted using markdown and you can add new-lines with '\n'. Parameter text The comment text See Also @@ -1045,12 +1047,13 @@ For support questions, email: -:goto line#|N%|timestamp +:goto line#|N%|timestamp|#anchor ══════════════════════════════════════════════════════════════════════ Go to the given location in the top view Parameter - line#|N%|timestamp A line number, percent - into the file, or a timestamp + line#|N%|timestamp|#anchor A line + number, percent into the file, + timestamp, or an anchor in a text file See Also :next-location, :next-mark, :prev-location, :prev-mark, :relative-goto Examples @@ -1066,6 +1069,10 @@ For support questions, email: :goto 2017-01-01  +#4 To go to the Screenshots section: + :goto #screenshots  + + :help ══════════════════════════════════════════════════════════════════════ @@ -2600,7 +2607,7 @@ For support questions, email: default The default value if the value was not found See Also json_concat(), json_contains(), json_group_array(), - json_group_object() + json_group_object(), yaml_to_json() Examples #1 To get the root of a JSON value: ;SELECT jget('1', '')  @@ -2655,7 +2662,8 @@ For support questions, email: json The initial JSON value. value The value(s) to add to the end of the array. See Also - jget(), json_contains(), json_group_array(), json_group_object() + jget(), json_contains(), json_group_array(), json_group_object(), + yaml_to_json() Examples #1 To append the number 4 to null: ;SELECT json_concat(NULL, 4)  @@ -2677,7 +2685,8 @@ For support questions, email: json The JSON value to query. value The value to look for in the first argument See Also - jget(), json_concat(), json_group_array(), json_group_object() + jget(), json_concat(), json_group_array(), json_group_object(), + yaml_to_json() Examples #1 To test if a JSON array contains the number 4: ;SELECT json_contains('[1, 2, 3]', 4)  @@ -2694,7 +2703,8 @@ For support questions, email: Parameter value The values to append to the array See Also - jget(), json_concat(), json_contains(), json_group_object() + jget(), json_concat(), json_contains(), json_group_object(), + yaml_to_json() Examples #1 To create an array from arguments: ;SELECT json_group_array('one', 2, 3.4)  @@ -2712,7 +2722,8 @@ For support questions, email: name The property name for the value value The value to add to the object See Also - jget(), json_concat(), json_contains(), json_group_array() + jget(), json_concat(), json_contains(), json_group_array(), + yaml_to_json() Examples #1 To create an object from arguments: ;SELECT json_group_object('a', 1, 'b', 2)  @@ -4122,6 +4133,20 @@ For support questions, email: +yaml_to_json(yaml) +══════════════════════════════════════════════════════════════════════ + Convert a YAML document to a JSON-encoded string +Parameter + yaml The YAML value to convert to JSON. +See Also + jget(), json_concat(), json_contains(), json_group_array(), + json_group_object() +Example +#1 To convert the document "abc: def": + ;SELECT yaml_to_json('abc: def')  + + + zeroblob(N) ══════════════════════════════════════════════════════════════════════ Returns a BLOB consisting of N bytes of 0x00. diff --git a/test/expected/test_cmds.sh_c2b4431dd0cc36c6201d263b727b3305e8cda6b1.err b/test/expected/test_cmds.sh_c2b4431dd0cc36c6201d263b727b3305e8cda6b1.err index f33f5c32..82952722 100644 --- a/test/expected/test_cmds.sh_c2b4431dd0cc36c6201d263b727b3305e8cda6b1.err +++ b/test/expected/test_cmds.sh_c2b4431dd0cc36c6201d263b727b3305e8cda6b1.err @@ -2,6 +2,6 @@ reason: expecting line number/percentage, timestamp, or relative time  --> command-option:1  | :goto invalid  - = help: :goto line#|N%|timestamp + = help: :goto line#|N%|timestamp|#anchor ══════════════════════════════════════════════════════════════════════ Go to the given location in the top view diff --git a/test/expected/test_meta.sh_48e85ba0c0945a5085fb4ee255771406061a9c17.err b/test/expected/test_meta.sh_48e85ba0c0945a5085fb4ee255771406061a9c17.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_meta.sh_48e85ba0c0945a5085fb4ee255771406061a9c17.out b/test/expected/test_meta.sh_48e85ba0c0945a5085fb4ee255771406061a9c17.out new file mode 100644 index 00000000..47310702 --- /dev/null +++ b/test/expected/test_meta.sh_48e85ba0c0945a5085fb4ee255771406061a9c17.out @@ -0,0 +1,6 @@ +192.168.202.254 - - [20/Jul/2009:22:59:26 +0000] "GET /vmw/cgi/tramp HTTP/1.0" 200 134 "-" "gPXE/0.9.7" + │ Hello, World! + │ + └ This is  markdown  now! +192.168.202.254 - - [20/Jul/2009:22:59:29 +0000] "GET /vmw/vSphere/default/vmkboot.gz HTTP/1.0" 404 46210 "-" "gPXE/0.9.7" +192.168.202.254 - - [20/Jul/2009:22:59:29 +0000] "GET /vmw/vSphere/default/vmkernel.gz HTTP/1.0" 200 78929 "-" "gPXE/0.9.7" diff --git a/test/expected/test_sql_views_vtab.sh_32acc1a8bb5028636fdbf08f077f9a835ab51bec.err b/test/expected/test_sql_views_vtab.sh_32acc1a8bb5028636fdbf08f077f9a835ab51bec.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_sql_views_vtab.sh_32acc1a8bb5028636fdbf08f077f9a835ab51bec.out b/test/expected/test_sql_views_vtab.sh_32acc1a8bb5028636fdbf08f077f9a835ab51bec.out new file mode 100644 index 00000000..710f6687 --- /dev/null +++ b/test/expected/test_sql_views_vtab.sh_32acc1a8bb5028636fdbf08f077f9a835ab51bec.out @@ -0,0 +1,19 @@ +Build + +Lnav follows the usual GNU style for configuring and installing +software: + +Run  ./autogen.sh  if compiling from a cloned repository. + + ▌$ ./configure  + ▌$ make  + ▌$ sudo make install  + +See Also + +Angle-grinder[1] is a tool to slice and dice log files on the +command-line. If you're familiar with the SumoLogic query language, +you might find this tool more comfortable to work with. + + ▌[1] - https://github.com/rcoh/angle-grinder + diff --git a/test/expected/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err b/test/expected/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err new file mode 100644 index 00000000..b7f3a79d --- /dev/null +++ b/test/expected/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.err @@ -0,0 +1,4 @@ +✘ error: failed to parse YAML content + reason: closing ] not found + --> command-option:1 + | ;SELECT yaml_to_json('[abc')  diff --git a/test/expected/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.out b/test/expected/test_sql_yaml_func.sh_41c6abde708a69e74f5b7fde865d88fa75f91e0a.out new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.err b/test/expected/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.err new file mode 100644 index 00000000..08a372fe --- /dev/null +++ b/test/expected/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.err @@ -0,0 +1,2 @@ +✘ error: unable to open file: non-existent: + reason: failed to ssh to host: ssh: Could not resolve hostname non-existent: nodename nor servname provided, or not known diff --git a/test/expected/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.out b/test/expected/test_text_file.sh_2e69c22dcfa37b5c3e8490a6026eacb7ca953998.out new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out b/test/expected/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out index 65c87cc7..da9fbdfe 100644 --- a/test/expected/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out +++ b/test/expected/test_text_file.sh_5b51b55dff7332c5bee2c9b797c401c5614d574a.out @@ -6,17 +6,17 @@ ▌[4] - https://docs.lnav.org ▌[5] - https://coveralls.io/repos/github/tstack/lnav/badge.svg?branch=master ▌[6] - https://coveralls.io/github/tstack/lnav?branch=master - ▌[7] - https://snapcraft.io//lnav/badge.svg + ▌[7] - https://snapcraft.io/lnav/badge.svg ▌[8] - https://snapcraft.io/lnav [1] +height="20"/>[1] ▌[1] - https://discord.gg/erBPnKwz7R -This is the source repository for lnav, visit https://lnav.org[1] for -a high level overview. +This is the source repository for lnav, visit https://lnav.org[1] for +a high level overview. ▌[1] - https://lnav.org diff --git a/test/expected/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.err b/test/expected/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.out b/test/expected/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.out new file mode 100644 index 00000000..f3f15de0 --- /dev/null +++ b/test/expected/test_text_file.sh_6a24078983cf1b7a80b6fb65d5186cd125498136.out @@ -0,0 +1,149 @@ +Screenshot + +The following screenshot shows a syslog file. Log lines are displayed +with highlights. Errors are red and warnings are yellow. + +Screenshot[1][2] + + ▌[1] - file://{top_srcdir}/docs/assets/images/lnav-syslog-thumb.png + ▌[2] - file://{top_srcdir}/docs/assets/images/lnav-syslog.png + +Features + + • Log messages from different files are collated together + into a single view + • Automatic detection of log format + • Automatic decompression of GZip and BZip2 files + • Filter log messages based on regular expressions + • Use SQL to analyze your logs + • And more... + +Installation + +Download a statically-linked binary for Linux/MacOS from the release +page[1] + + ▌[1] - https://github.com/tstack/lnav/releases/latest#release-artifacts + +Usage + +The only file installed is the executable,  lnav . You can execute it +with no arguments to view the default set of files: + + ▌$ lnav  + +You can view all the syslog messages by running: + + ▌$ lnav /var/log/messages*  + +Usage with  systemd-journald  + +On systems running  systemd-journald , you can use  lnav  as the +pager: + + ▌$ journalctl | lnav  + +or in follow mode: + + ▌$ journalctl -f | lnav  + +Since  journalctl 's default output format omits the year, if you are +viewing logs which span multiple years you will need to change the +output format to include the year, otherwise  lnav  gets confused: + + ▌$ journalctl -o short-iso | lnav  + +It is also possible to use  journalctl 's json output format and  lnav +will make use of additional fields such as PRIORITY and _SYSTEMD_UNIT: + + ▌$ journalctl -o json | lnav  + +In case some MESSAGE fields contain special characters such as ANSI +color codes which are considered as unprintable by journalctl, +specifying  journalctl 's  -a  option might be preferable in order to +output those messages still in a non-binary representation: + + ▌$ journalctl -a -o json | lnav  + +If using systemd v236 or newer, the output fields can be limited to +the ones actually recognized by  lnav  for increased efficiency: + + ▌$ journalctl -o json --output-fields=MESSAGE,PRIORITY,_PID,SYSLOG_IDENTIFIER,_SYSTEMD_UNIT | lnav  + +If your system has been running for a long time, for increased +efficiency you may want to limit the number of log lines fed into  lnav +, e.g. via  journalctl 's  -n  or  --since=...  options. + +In case of a persistent journal, you may want to limit the number of +log lines fed into  lnav  via  journalctl 's  -b  option. + +Support + +Please file issues on this repository or use the discussions section. +The following alternatives are also available: + + • support@lnav.org[1] + • Discord[2] + • Google Groups[3] + + ▌[1] - mailto:support@lnav.org + ▌[2] - https://discord.gg/erBPnKwz7R + ▌[3] - https://groups.google.com/g/lnav + +Links + + • Main Site[1] + • Documentation[2] on Read the Docs + • Internal Architecture[3] + + ▌[1] - https://lnav.org + ▌[2] - https://docs.lnav.org + ▌[3] - file://{top_srcdir}/ARCHITECTURE.md + +Contributing + + • Become a Sponsor on GitHub[1] + + ▌[1] - https://github.com/sponsors/tstack + +Building From Source + +Prerequisites + +The following software packages are required to build lnav: + + • gcc/clang - A C++14-compatible compiler. + • libpcre - The Perl Compatible Regular Expression + (PCRE) library. + • sqlite - The SQLite database engine. Version 3.9.0 + or higher is required. + • ncurses - The ncurses text UI library. + • readline - The readline line editing library. + • zlib - The zlib compression library. + • bz2 - The bzip2 compression library. + • libcurl - The cURL library for downloading files + from URLs. Version 7.23.0 or higher is required. + • libarchive - The libarchive library for opening archive + files, like zip/tgz. + • wireshark - The 'tshark' program is used to interpret + pcap files. + +Build + +Lnav follows the usual GNU style for configuring and installing +software: + +Run  ./autogen.sh  if compiling from a cloned repository. + + ▌$ ./configure  + ▌$ make  + ▌$ sudo make install  + +See Also + +Angle-grinder[1] is a tool to slice and dice log files on the +command-line. If you're familiar with the SumoLogic query language, +you might find this tool more comfortable to work with. + + ▌[1] - https://github.com/rcoh/angle-grinder + diff --git a/test/expected/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.err b/test/expected/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.out b/test/expected/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.out new file mode 100644 index 00000000..5a1b89ae --- /dev/null +++ b/test/expected/test_text_file.sh_ac486314c4e02e480d829ea2f077b86c49fedcec.out @@ -0,0 +1,4 @@ +you might find this tool more comfortable to work with. + + ▌[1] - https://github.com/rcoh/angle-grinder + diff --git a/test/expected/test_text_file.sh_c2a346ca1da2da4346f1d310212e166767993ce9.out b/test/expected/test_text_file.sh_c2a346ca1da2da4346f1d310212e166767993ce9.out index 3eb0d516..1efd1ba9 100644 --- a/test/expected/test_text_file.sh_c2a346ca1da2da4346f1d310212e166767993ce9.out +++ b/test/expected/test_text_file.sh_c2a346ca1da2da4346f1d310212e166767993ce9.out @@ -2,6 +2,7 @@ { "top_meta": { "file": "{top_srcdir}/README.md", + "anchor": "#support", "breadcrumbs": [ { "display_value": "README.md", diff --git a/test/expected/test_text_file.sh_e088ea61a5382458cc48a2607e2639e52b0be1da.err b/test/expected/test_text_file.sh_e088ea61a5382458cc48a2607e2639e52b0be1da.err new file mode 100644 index 00000000..e69de29b diff --git a/test/expected/test_text_file.sh_e088ea61a5382458cc48a2607e2639e52b0be1da.out b/test/expected/test_text_file.sh_e088ea61a5382458cc48a2607e2639e52b0be1da.out new file mode 100644 index 00000000..f3f15de0 --- /dev/null +++ b/test/expected/test_text_file.sh_e088ea61a5382458cc48a2607e2639e52b0be1da.out @@ -0,0 +1,149 @@ +Screenshot + +The following screenshot shows a syslog file. Log lines are displayed +with highlights. Errors are red and warnings are yellow. + +Screenshot[1][2] + + ▌[1] - file://{top_srcdir}/docs/assets/images/lnav-syslog-thumb.png + ▌[2] - file://{top_srcdir}/docs/assets/images/lnav-syslog.png + +Features + + • Log messages from different files are collated together + into a single view + • Automatic detection of log format + • Automatic decompression of GZip and BZip2 files + • Filter log messages based on regular expressions + • Use SQL to analyze your logs + • And more... + +Installation + +Download a statically-linked binary for Linux/MacOS from the release +page[1] + + ▌[1] - https://github.com/tstack/lnav/releases/latest#release-artifacts + +Usage + +The only file installed is the executable,  lnav . You can execute it +with no arguments to view the default set of files: + + ▌$ lnav  + +You can view all the syslog messages by running: + + ▌$ lnav /var/log/messages*  + +Usage with  systemd-journald  + +On systems running  systemd-journald , you can use  lnav  as the +pager: + + ▌$ journalctl | lnav  + +or in follow mode: + + ▌$ journalctl -f | lnav  + +Since  journalctl 's default output format omits the year, if you are +viewing logs which span multiple years you will need to change the +output format to include the year, otherwise  lnav  gets confused: + + ▌$ journalctl -o short-iso | lnav  + +It is also possible to use  journalctl 's json output format and  lnav +will make use of additional fields such as PRIORITY and _SYSTEMD_UNIT: + + ▌$ journalctl -o json | lnav  + +In case some MESSAGE fields contain special characters such as ANSI +color codes which are considered as unprintable by journalctl, +specifying  journalctl 's  -a  option might be preferable in order to +output those messages still in a non-binary representation: + + ▌$ journalctl -a -o json | lnav  + +If using systemd v236 or newer, the output fields can be limited to +the ones actually recognized by  lnav  for increased efficiency: + + ▌$ journalctl -o json --output-fields=MESSAGE,PRIORITY,_PID,SYSLOG_IDENTIFIER,_SYSTEMD_UNIT | lnav  + +If your system has been running for a long time, for increased +efficiency you may want to limit the number of log lines fed into  lnav +, e.g. via  journalctl 's  -n  or  --since=...  options. + +In case of a persistent journal, you may want to limit the number of +log lines fed into  lnav  via  journalctl 's  -b  option. + +Support + +Please file issues on this repository or use the discussions section. +The following alternatives are also available: + + • support@lnav.org[1] + • Discord[2] + • Google Groups[3] + + ▌[1] - mailto:support@lnav.org + ▌[2] - https://discord.gg/erBPnKwz7R + ▌[3] - https://groups.google.com/g/lnav + +Links + + • Main Site[1] + • Documentation[2] on Read the Docs + • Internal Architecture[3] + + ▌[1] - https://lnav.org + ▌[2] - https://docs.lnav.org + ▌[3] - file://{top_srcdir}/ARCHITECTURE.md + +Contributing + + • Become a Sponsor on GitHub[1] + + ▌[1] - https://github.com/sponsors/tstack + +Building From Source + +Prerequisites + +The following software packages are required to build lnav: + + • gcc/clang - A C++14-compatible compiler. + • libpcre - The Perl Compatible Regular Expression + (PCRE) library. + • sqlite - The SQLite database engine. Version 3.9.0 + or higher is required. + • ncurses - The ncurses text UI library. + • readline - The readline line editing library. + • zlib - The zlib compression library. + • bz2 - The bzip2 compression library. + • libcurl - The cURL library for downloading files + from URLs. Version 7.23.0 or higher is required. + • libarchive - The libarchive library for opening archive + files, like zip/tgz. + • wireshark - The 'tshark' program is used to interpret + pcap files. + +Build + +Lnav follows the usual GNU style for configuring and installing +software: + +Run  ./autogen.sh  if compiling from a cloned repository. + + ▌$ ./configure  + ▌$ make  + ▌$ sudo make install  + +See Also + +Angle-grinder[1] is a tool to slice and dice log files on the +command-line. If you're familiar with the SumoLogic query language, +you might find this tool more comfortable to work with. + + ▌[1] - https://github.com/rcoh/angle-grinder + diff --git a/test/test_meta.sh b/test/test_meta.sh index 8f4b29e7..caab4f92 100644 --- a/test/test_meta.sh +++ b/test/test_meta.sh @@ -101,3 +101,10 @@ run_cap_test ${lnav_test} -n \ run_cap_test ${lnav_test} -d /tmp/lnav.err -n \ -I ${test_dir} \ ${test_dir}/logfile_xml_msg.0 + +run_cap_test ${lnav_test} -n -f- \ + ${test_dir}/logfile_access_log.0 <<'EOF' +:comment Hello, **World**! + +This is `markdown` now! +EOF diff --git a/test/test_sql.sh b/test/test_sql.sh index 3928cabe..528c0fda 100644 --- a/test/test_sql.sh +++ b/test/test_sql.sh @@ -727,7 +727,7 @@ EOF schema_dump() { - ${lnav_test} -n -c ';.schema' ${test_dir}/logfile_access_log.0 | head -n19 + ${lnav_test} -n -c ';.schema' ${test_dir}/logfile_access_log.0 | head -n21 } run_test schema_dump @@ -735,12 +735,14 @@ run_test schema_dump check_output "schema view is not working" <