From d7396f518cc9228e526ea706b718e75c9f4bd601 Mon Sep 17 00:00:00 2001 From: "Serge S. Gulin" <gulin.serge@gmail.com> Date: Sun, 5 May 2024 22:08:29 +0300 Subject: [PATCH] Size parsing is added, sample to parse bundle without unicode --- out/all.js | 4 ++-- out/all.min.js | 4 ++-- out/all.min.js.gv | 4 ++-- out/all.min.js.tree | 4 ++-- out/all.no_comments.js | 4 ++-- out/all.no_comments.js.gv | 4 ++-- out/all.no_comments.js.tree | 4 ++-- trace_gv.sh | 22 ++++++++++++++----- tree.py | 44 +++++++++++++++++++++++-------------- 9 files changed, 59 insertions(+), 35 deletions(-) diff --git a/out/all.js b/out/all.js index 901ec8f..8f9f962 100644 --- a/out/all.js +++ b/out/all.js @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3f6f6da82aa85903803c737a769622403b662d80910eb39d88e2f9d7d9fe22b -size 7135157 +oid sha256:18131d7755e7b5880b05497132c62adedca9b5d9f9adcc0f3cb87731dd3bbfcc +size 2296048 diff --git a/out/all.min.js b/out/all.min.js index 2ca4965..e8852bd 100644 --- a/out/all.min.js +++ b/out/all.min.js @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1029cea84998d4f293ce5ab0f1fda82e33c887e5e72ab37e21bd33d005cc2345 -size 7127540 +oid sha256:45026a304bdc2678e919c87e9ae8f8bb28460a63f36818495b402fa99b92b99e +size 2207253 diff --git a/out/all.min.js.gv b/out/all.min.js.gv index 6fdbb2a..30554e5 100644 --- a/out/all.min.js.gv +++ b/out/all.min.js.gv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8e5e931d7204556168723acd08f941a091773fd89db10189ef209b87fe87f68d -size 51462260 +oid sha256:3ccab2d642b7ea581647af2f0742a244836e693e2a4bfbdac2ce53c20157d500 +size 39663077 diff --git a/out/all.min.js.tree b/out/all.min.js.tree index 5072bb3..e9d2969 100644 --- a/out/all.min.js.tree +++ b/out/all.min.js.tree @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db9f17162bba8cd156a75d38a275c55c53df663b947aa97cd0e4ff0c0d8b8a0c -size 12248718 +oid sha256:d8b1de60a7e4866223094f5e26f3f8c15860c83f89a46696eb9e51ba4d2c5e1e +size 10215732 diff --git a/out/all.no_comments.js b/out/all.no_comments.js index b88e95e..8844c40 100644 --- a/out/all.no_comments.js +++ b/out/all.no_comments.js @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:711723b76bd80cbd239997b94956beeefc39c1d809c6a24e8084e61dc793c2c6 -size 6993759 +oid sha256:0d4944e9bcc28b0bef853d62dfd760a1d544b27c9befd21ca273dc2bdec62ddc +size 2151862 diff --git a/out/all.no_comments.js.gv b/out/all.no_comments.js.gv index 504fba5..f4c3352 100644 --- a/out/all.no_comments.js.gv +++ b/out/all.no_comments.js.gv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ee08dfb8b632596d85696bb6042e662ec154d96594155095f9fa08ca77650a41 -size 3978735 +oid sha256:7ee2917816ba2d05144da592b310fbc18374e5750fb7cd52338a14265891e9d4 +size 3377112 diff --git a/out/all.no_comments.js.tree b/out/all.no_comments.js.tree index 69fcbb2..330676d 100644 --- a/out/all.no_comments.js.tree +++ b/out/all.no_comments.js.tree @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c5c687ee4de3122eaf01bb978ab41757209abf639d102d7218b2f275bf9aca81 -size 15111090 +oid sha256:b463f24bcf45fa1b3f79b724c65afc772d56094c5066118d23be63e4b375fdfc +size 13096172 diff --git a/trace_gv.sh b/trace_gv.sh index 8c87d54..1c410fa 100755 --- a/trace_gv.sh +++ b/trace_gv.sh @@ -23,9 +23,19 @@ generate_ast_and_tree () { sed -i -r 's/\[source_file.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" echo "Remove [free_call... section, we do not use it" sed -i -r 's/\[free_call.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" - echo "Remove [length... section, we do not use it" - # TODO: use this section for graph weight - sed -i -r 's/\[length.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" + echo "Remove [added_block... section, we do not use it" + sed -i -r 's/\[added_block.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" + echo "Remove [quoted... section, we do not use it" + sed -i -r 's/\[quoted.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" + echo "Remove [is_parenthesized... section, we do not use it" + sed -i -r 's/\[is_parenthesized.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" + echo "Remove [incrdecr... section, we do not use it" + sed -i -r 's/\[incrdecr.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" + echo "Remove [direct_eval... section, we do not use it" + sed -i -r 's/\[direct_eval.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" + echo "Reformat [length... section, leave only number and cut the beginning space" + sed -r 's/ \[length: ([0-9]+)\]/\1/' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" > "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}.sed" && \ + mv -f "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}.sed" "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" echo "String literals may contains newline symbols, we need escape it" # TODO: better to support such case in `tree.py` because multline stringliterals could be present and valid @@ -89,10 +99,11 @@ main () { echo "Source: ${WORKING_DIR}/HelloJS.jsexe/all.no_comments.js" cp "${WORKING_DIR}/HelloJS.jsexe/all.no_comments.js" ./out + # "h\$mainZCZCMainzimain" generate_ast_and_tree \ "${WORKING_DIR}" \ "all.no_comments.js" \ - "h\$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1" \ + "h\$ghczminternalZCGHCziInternalziIOziExceptionzizdfExceptionBlockedIndefinitelyOnMVarzuzdcbacktraceDesired" \ "h\$o h\$sti h\$stc h\$stl" echo "Prevent inlining for easier removal unicode array" @@ -112,10 +123,11 @@ main () { echo "Source: ${WORKING_DIR}/HelloJS.jsexe/all.min.js" cp "${WORKING_DIR}/HelloJS.jsexe/all.min.js" ./out + # "\$h\$mainZCZCMainzimain\$\$" generate_ast_and_tree \ "${WORKING_DIR}" \ "all.min.js" \ - "\$h\$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1\$\$" \ + "\$h\$ghczminternalZCGHCziInternalziIOziExceptionzizdfExceptionBlockedIndefinitelyOnMVarzuzdcbacktraceDesired\$\$" \ "\$h\$o\$\$ \$h\$sti\$\$ \$h\$stc\$\$ \$h\$stl\$\$" } diff --git a/tree.py b/tree.py index ee33099..763b8e6 100755 --- a/tree.py +++ b/tree.py @@ -1,30 +1,33 @@ #!/usr/bin/env python3 -# ./tree.py "\$h\$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1\$\$" /var/folders/9t/tf_ktyc11ng7njtg804grg4c0000gn/T/tmp.OFC7xXPZ/HelloJS.jsexe/all.min.js.tree +# ./tree.py "h\$ghczminternalZCGHCziInternalziIOziExceptionzizdfExceptionBlockedIndefinitelyOnMVarzuzdcbacktraceDesired" "./out/all.no_comments.js.tree" "h\$o h\$sti h\$stc h\$stl" import math import sys LEVEL_DELIM = 4 -# SEARCH_NEEDLE = 'h$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1' SEARCH_NEEDLE = sys.argv[1] def parse_line_elements(elements): + type = elements[0] + if type == 'EMPTY': + return { + "type": type, + "name": '', + "line_number": int(elements[-1].split(':')[0]), + "line_position": int(elements[-1].split(':')[1]), + "size": 0 + } + parsed_elements = { - "type": elements[0], - "name": (' '.join(elements[1:-1])), - "line_number": int(elements[-1].split(':')[0]), - "line_position": int(elements[-1].split(':')[1]) + "type": type, + "name": (' '.join(elements[1:-2])), + "line_number": int(elements[-2].split(':')[0]), + "line_position": int(elements[-2].split(':')[1]), + "size": int(elements[-1]) } return parsed_elements -def print_graph(parsed_result, parsed_index, key, level=0): - if key in parsed_result: - for index in parsed_result[key]: - for next_key in parsed_index[index]: - print('\t'*level+key+' -> '+next_key) - print_graph(parsed_result, next_key, level+1) - # STG_SPECIAL_FNS = ["h$o", "h$sti", "h$stc", "h$stl"] # STG_SPECIAL_FNS = ["$h$o$$", "$h$sti$$", "$h$stc$$", "$h$stl$$"] STG_SPECIAL_FNS = sys.argv[3].strip().split(' ') @@ -34,6 +37,7 @@ def index_full_path_list(lines): full_path_list = [] last_level = -1 horizontal_mode = None + nodes_size = {} i = 0 while i < len(lines): @@ -64,6 +68,12 @@ def index_full_path_list(lines): last_level = current_level if horizontal_mode_switched or (horizontal_mode == None): + if parsed['type'] != "STRINGLIT" and (current_name.startswith('h$') or current_name.startswith('$h$')): + if current_name in nodes_size: + nodes_size[current_name] = max(parsed['size'], nodes_size[current_name]) + else: + nodes_size[current_name] = parsed['size'] + # Control linking manually if horizontal_mode_switched: full_path_list.append((i, full_path)) @@ -72,7 +82,7 @@ def index_full_path_list(lines): i = i + 1 - return full_path_list + return full_path_list, nodes_size def find_edges(lpos, lres, local_needle, level=0, seen={}): for p in lpos[local_needle]: @@ -94,7 +104,7 @@ def find_edges(lpos, lres, local_needle, level=0, seen={}): find_edges(lpos, lres, input_name, level+1, seen) def format_dot_edge(input, output, line, meta, level): - return input+" -> "+output+" [line="+str(line)+" meta="+meta+" level="+str(level)+"]"+";\n" + return input+" -> "+output+" [line="+str(line)+" meta="+meta+" level="+str(level)+"]"+";" def unique_order_kept(seq): seen = set() @@ -104,7 +114,7 @@ def unique_order_kept(seq): # with open('HelloJS.jsexe/all.min.debug.js.tree', 'r') as f: with open(sys.argv[2], 'r') as f: lines = f.read().splitlines() - full_path_list = index_full_path_list(lines) + full_path_list, nodes_size = index_full_path_list(lines) literals_relations = [[]] * len(lines) literals_positions = {} @@ -119,5 +129,7 @@ with open(sys.argv[2], 'r') as f: literals_positions[a] = [i] print("digraph g {") + for node_name, node_size in nodes_size.items(): + print(node_name+" [size="+str(node_size)+"]") find_edges(literals_positions, literals_relations, SEARCH_NEEDLE) print("}") -- GitLab