aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPřemysl Eric Janouch <p@janouch.name>2022-09-27 17:13:45 +0200
committerPřemysl Eric Janouch <p@janouch.name>2022-09-27 23:27:06 +0200
commitaf2756ee01fa6b1921c6bcb581817e64c30beb48 (patch)
tree879859caaf90ed0d0413fa29378b41148baa84cc
parent688c458095974fcd85b2f92c6b5380edfb564398 (diff)
downloadliberty-af2756ee01fa6b1921c6bcb581817e64c30beb48.tar.gz
liberty-af2756ee01fa6b1921c6bcb581817e64c30beb48.tar.xz
liberty-af2756ee01fa6b1921c6bcb581817e64c30beb48.zip
Add a rudimentary CMake script parser
-rw-r--r--CMakeLists.txt4
-rw-r--r--tools/cmake-dump.awk24
-rw-r--r--tools/cmake-parser.awk250
3 files changed, 278 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fa996bf..af9c910 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,3 +52,7 @@ foreach (name ${tests})
target_link_libraries (test-${name} ${common_libraries})
add_test (NAME test-${name} COMMAND test-${name})
endforeach ()
+
+add_test (test-cmake-parser
+ env LC_ALL=C awk -f ${PROJECT_SOURCE_DIR}/tools/cmake-parser.awk
+ -f ${PROJECT_SOURCE_DIR}/tools/cmake-dump.awk ${CMAKE_CURRENT_LIST_FILE})
diff --git a/tools/cmake-dump.awk b/tools/cmake-dump.awk
new file mode 100644
index 0000000..d0b68b9
--- /dev/null
+++ b/tools/cmake-dump.awk
@@ -0,0 +1,24 @@
+# cmake-dump.awk: dump parsed CMake scripts as tables
+#
+# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# Parsed scripts are output in a table, with commands separated using ASCII
+# Record Separators, and arguments using Unit Separators.
+#
+# Example usage: awk -f cmake-parser.awk -f cmake-dump.awk CMakeLists.txt \
+# | sed 'y/\x1F\x1E\t\n/\t\n /' \
+# | sed -n '/^project\t\([^\t]*\).*\tVERSION\t\([^\t]*\).*/{s//\1 \2/p;q;}'
+
+function sanitize(s) {
+ if (s ~ /[\x1E\x1F]/)
+ fatal("conflicting ASCII control characters found in source")
+ return s
+}
+
+Command {
+ out = sanitize(Command)
+ for (i in Args)
+ out = out "\x1F" sanitize(Args[i])
+ printf "%s\x1E", out
+}
diff --git a/tools/cmake-parser.awk b/tools/cmake-parser.awk
new file mode 100644
index 0000000..7651cd1
--- /dev/null
+++ b/tools/cmake-parser.awk
@@ -0,0 +1,250 @@
+# cmake-parser.awk: rudimentary CMake script parser
+#
+# Copyright (c) 2022, Přemysl Eric Janouch <p@janouch.name>
+# SPDX-License-Identifier: 0BSD
+#
+# Implemented roughly according to the grammar described in cmake-language(7),
+# which is self-conflicting, and not an accurate description.
+#
+# The result of parsing is stored in the case-normalized Command variable,
+# and the Args array. These can be used by subsequent scripts.
+
+function warning(message) {
+ print FILENAME ":" FNR ": warning: " message > "/dev/stderr"
+}
+
+function fatal(message) {
+ print FILENAME ":" FNR ": fatal error: " message > "/dev/stderr"
+ exit 1
+}
+
+function expect(v) {
+ if (!v && v == 0)
+ fatal("broken expectations at `" $0 "'")
+ return v
+}
+
+function literal(v) {
+ if (substr($0, 1, length(v)) != v)
+ return 0
+ $0 = substr($0, length(v) + 1)
+ return 1
+}
+
+function regexp(re) {
+ if (!match($0, "^" re))
+ return 0
+ $0 = substr($0, RLENGTH + 1)
+ return 1
+}
+
+function space() {
+ return regexp("[ \t]+")
+}
+
+function unbracket(len, v) {
+ do {
+ if (match($0, "]={" len "}]")) {
+ v = v substr($0, 1, RSTART - 1)
+ $0 = substr($0, RSTART + RLENGTH)
+ return v
+ }
+ v = v $0 RS
+ } while (getline > 0)
+ fatal("unterminated bracket")
+}
+
+function bracket_comment() {
+ if (!match($0, /^#\[=*\[/))
+ return 0
+ $0 = substr($0, RSTART + RLENGTH)
+ unbracket(RLENGTH - 3)
+ return 1
+}
+
+function line_ending() {
+ while (space() || bracket_comment()) {}
+ if (/^#/)
+ $0 = ""
+ return !$0
+}
+
+# ------------------------------------------------------------------------------
+
+# While elementary expansion of previously set variables is implementable,
+# it doesn't seem to be worth the effort.
+function expand(s, v) {
+ v = s
+ while (match(v, /\\*[$](|ENV|CACHE)[{]/)) {
+ if (index(substr(v, RSTART), "$") % 2 != 0) {
+ warning("variable expansion is not supported: " s)
+ return s
+ }
+ v = substr(v, RSTART + RLENGTH)
+ }
+ return s
+}
+
+function escape_sequence( v) {
+ if (!literal("\\"))
+ return 0
+
+ if (literal("t")) return "\t"
+ if (literal("r")) return "\r"
+ if (literal("n")) return "\n"
+
+ # escape_semicolon isn't treated any specially here.
+ if (regexp("[A-Za-z0-9]"))
+ fatal("unsupported escape sequence")
+
+ if ($0) {
+ v = substr($0, 1, 1)
+ $0 = substr($0, 2)
+ return v
+ }
+ if (getline > 0)
+ return ""
+ fatal("premature end of file")
+}
+
+function quoted_argument( v, unescaped) {
+ if (!literal("\""))
+ return 0
+
+ v = ""
+ while (!literal("\"")) {
+ if (!$0) {
+ if (getline <= 0)
+ fatal("premature end of file")
+ v = v RS
+ } else if ((unescaped = escape_sequence())) {
+ if (unescaped == "\\" || unescaped == "$")
+ v = v "\\"
+ else if (unescaped == ";")
+ v = v "\\\\"
+ v = v unescaped
+ } else if (unescaped == "") {
+ # quoted_continuation
+ } else {
+ v = v substr($0, 1, 1)
+ $0 = substr($0, 2)
+ }
+ }
+ return v
+}
+
+function unquoted_argument( v, unescaped) {
+ while (1) {
+ if (match($0, /^[^[:space:]()#"\\]+/)) {
+ v = v substr($0, RSTART, RLENGTH)
+ $0 = substr($0, RSTART + RLENGTH)
+ } else if ((unescaped = escape_sequence())) {
+ if (unescaped == "\\" || unescaped == "$" || unescaped == ";")
+ v = v "\\"
+ v = v unescaped
+ } else if (unescaped == "") {
+ fatal("unexpected backslash in an unquoted argument")
+ } else {
+ # unquoted_legacy is not supported.
+ return v
+ }
+ }
+}
+
+# Note that we keep and reprocess some escape sequences in here.
+function argument( arg, expanded, v) {
+ if (regexp("\\[=*\\[")) {
+ Args[++N] = unbracket(RLENGTH - 2)
+ return 1
+ }
+ if ((arg = quoted_argument()) || arg == "") {
+ expanded = expand(arg)
+ while (match(expanded, /\\./)) {
+ v = v substr(expanded, 1, RSTART - 1) \
+ substr(expanded, RSTART + 1, 1)
+ expanded = substr(expanded, RSTART + RLENGTH)
+ }
+ Args[++N] = v expanded
+ return 1
+ }
+ if ((arg = unquoted_argument())) {
+ expanded = expand(arg)
+ while (expanded) {
+ if (expanded ~ /^;/) {
+ if (v)
+ Args[++N] = v
+ v = ""
+ expanded = substr(expanded, 2)
+ } else if (expanded ~ /^\\./) {
+ v = v substr(expanded, 2, 1)
+ expanded = substr(expanded, 3)
+ } else {
+ v = v substr(expanded, 1, 1)
+ expanded = substr(expanded, 2)
+ }
+ }
+ if (v)
+ Args[++N] = v
+ return 1
+ }
+ return 0
+}
+
+# ------------------------------------------------------------------------------
+
+function identifier( v) {
+ if (!match($0, /^[A-Za-z_][A-Za-z0-9_]*/))
+ return 0
+ v = substr($0, 1, RLENGTH)
+ $0 = substr($0, RLENGTH + 1)
+ return v
+}
+
+function separation() {
+ if (space() || bracket_comment())
+ return 1
+
+ if (!line_ending())
+ return 0
+ if (getline > 0)
+ return 1
+ fatal("premature end of file")
+}
+
+function command_invocation( level) {
+ while (space()) {}
+ Command = identifier()
+ if (!Command)
+ return 0
+ while (space()) {}
+
+ Command = tolower(Command)
+ for (N in Args)
+ delete Args[N]
+
+ N = 0
+ expect(literal("("))
+ while (1) {
+ while (separation()) {}
+ if (literal(")")) {
+ if (!level--)
+ break
+ Args[++N] = ")"
+ continue
+ }
+ if (literal("(")) {
+ level++
+ Args[++N] = "("
+ continue
+ }
+ expect(argument())
+ if (!/^[()]/)
+ expect(separation())
+ }
+ return 1
+}
+
+{
+ command_invocation()
+ expect(line_ending())
+}