Skip to content
Snippets Groups Projects
Commit d7396f51 authored by Serge S. Gulin's avatar Serge S. Gulin :construction_worker:
Browse files

Size parsing is added, sample to parse bundle without unicode

parent 8f28635a
No related branches found
No related tags found
No related merge requests found
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
No preview for this file type
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
Source diff could not be displayed: it is stored in LFS. Options to address this: view the blob.
No preview for this file type
......@@ -23,9 +23,19 @@ generate_ast_and_tree () {
sed -i -r 's/\[source_file.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
echo "Remove [free_call... section, we do not use it"
sed -i -r 's/\[free_call.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
echo "Remove [length... section, we do not use it"
# TODO: use this section for graph weight
sed -i -r 's/\[length.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
echo "Remove [added_block... section, we do not use it"
sed -i -r 's/\[added_block.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
echo "Remove [quoted... section, we do not use it"
sed -i -r 's/\[quoted.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
echo "Remove [is_parenthesized... section, we do not use it"
sed -i -r 's/\[is_parenthesized.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
echo "Remove [incrdecr... section, we do not use it"
sed -i -r 's/\[incrdecr.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
echo "Remove [direct_eval... section, we do not use it"
sed -i -r 's/\[direct_eval.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
echo "Reformat [length... section, leave only number and cut the beginning space"
sed -r 's/ \[length: ([0-9]+)\]/\1/' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" > "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}.sed" && \
mv -f "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}.sed" "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
echo "String literals may contains newline symbols, we need escape it"
# TODO: better to support such case in `tree.py` because multline stringliterals could be present and valid
......@@ -89,10 +99,11 @@ main () {
echo "Source: ${WORKING_DIR}/HelloJS.jsexe/all.no_comments.js"
cp "${WORKING_DIR}/HelloJS.jsexe/all.no_comments.js" ./out
# "h\$mainZCZCMainzimain"
generate_ast_and_tree \
"${WORKING_DIR}" \
"all.no_comments.js" \
"h\$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1" \
"h\$ghczminternalZCGHCziInternalziIOziExceptionzizdfExceptionBlockedIndefinitelyOnMVarzuzdcbacktraceDesired" \
"h\$o h\$sti h\$stc h\$stl"
echo "Prevent inlining for easier removal unicode array"
......@@ -112,10 +123,11 @@ main () {
echo "Source: ${WORKING_DIR}/HelloJS.jsexe/all.min.js"
cp "${WORKING_DIR}/HelloJS.jsexe/all.min.js" ./out
# "\$h\$mainZCZCMainzimain\$\$"
generate_ast_and_tree \
"${WORKING_DIR}" \
"all.min.js" \
"\$h\$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1\$\$" \
"\$h\$ghczminternalZCGHCziInternalziIOziExceptionzizdfExceptionBlockedIndefinitelyOnMVarzuzdcbacktraceDesired\$\$" \
"\$h\$o\$\$ \$h\$sti\$\$ \$h\$stc\$\$ \$h\$stl\$\$"
}
......
#!/usr/bin/env python3
# ./tree.py "\$h\$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1\$\$" /var/folders/9t/tf_ktyc11ng7njtg804grg4c0000gn/T/tmp.OFC7xXPZ/HelloJS.jsexe/all.min.js.tree
# ./tree.py "h\$ghczminternalZCGHCziInternalziIOziExceptionzizdfExceptionBlockedIndefinitelyOnMVarzuzdcbacktraceDesired" "./out/all.no_comments.js.tree" "h\$o h\$sti h\$stc h\$stl"
import math
import sys
LEVEL_DELIM = 4
# SEARCH_NEEDLE = 'h$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1'
SEARCH_NEEDLE = sys.argv[1]
def parse_line_elements(elements):
type = elements[0]
if type == 'EMPTY':
return {
"type": type,
"name": '',
"line_number": int(elements[-1].split(':')[0]),
"line_position": int(elements[-1].split(':')[1]),
"size": 0
}
parsed_elements = {
"type": elements[0],
"name": (' '.join(elements[1:-1])),
"line_number": int(elements[-1].split(':')[0]),
"line_position": int(elements[-1].split(':')[1])
"type": type,
"name": (' '.join(elements[1:-2])),
"line_number": int(elements[-2].split(':')[0]),
"line_position": int(elements[-2].split(':')[1]),
"size": int(elements[-1])
}
return parsed_elements
def print_graph(parsed_result, parsed_index, key, level=0):
if key in parsed_result:
for index in parsed_result[key]:
for next_key in parsed_index[index]:
print('\t'*level+key+' -> '+next_key)
print_graph(parsed_result, next_key, level+1)
# STG_SPECIAL_FNS = ["h$o", "h$sti", "h$stc", "h$stl"]
# STG_SPECIAL_FNS = ["$h$o$$", "$h$sti$$", "$h$stc$$", "$h$stl$$"]
STG_SPECIAL_FNS = sys.argv[3].strip().split(' ')
......@@ -34,6 +37,7 @@ def index_full_path_list(lines):
full_path_list = []
last_level = -1
horizontal_mode = None
nodes_size = {}
i = 0
while i < len(lines):
......@@ -64,6 +68,12 @@ def index_full_path_list(lines):
last_level = current_level
if horizontal_mode_switched or (horizontal_mode == None):
if parsed['type'] != "STRINGLIT" and (current_name.startswith('h$') or current_name.startswith('$h$')):
if current_name in nodes_size:
nodes_size[current_name] = max(parsed['size'], nodes_size[current_name])
else:
nodes_size[current_name] = parsed['size']
# Control linking manually
if horizontal_mode_switched:
full_path_list.append((i, full_path))
......@@ -72,7 +82,7 @@ def index_full_path_list(lines):
i = i + 1
return full_path_list
return full_path_list, nodes_size
def find_edges(lpos, lres, local_needle, level=0, seen={}):
for p in lpos[local_needle]:
......@@ -94,7 +104,7 @@ def find_edges(lpos, lres, local_needle, level=0, seen={}):
find_edges(lpos, lres, input_name, level+1, seen)
def format_dot_edge(input, output, line, meta, level):
return input+" -> "+output+" [line="+str(line)+" meta="+meta+" level="+str(level)+"]"+";\n"
return input+" -> "+output+" [line="+str(line)+" meta="+meta+" level="+str(level)+"]"+";"
def unique_order_kept(seq):
seen = set()
......@@ -104,7 +114,7 @@ def unique_order_kept(seq):
# with open('HelloJS.jsexe/all.min.debug.js.tree', 'r') as f:
with open(sys.argv[2], 'r') as f:
lines = f.read().splitlines()
full_path_list = index_full_path_list(lines)
full_path_list, nodes_size = index_full_path_list(lines)
literals_relations = [[]] * len(lines)
literals_positions = {}
......@@ -119,5 +129,7 @@ with open(sys.argv[2], 'r') as f:
literals_positions[a] = [i]
print("digraph g {")
for node_name, node_size in nodes_size.items():
print(node_name+" [size="+str(node_size)+"]")
find_edges(literals_positions, literals_relations, SEARCH_NEEDLE)
print("}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment