From d7396f518cc9228e526ea706b718e75c9f4bd601 Mon Sep 17 00:00:00 2001
From: "Serge S. Gulin" <gulin.serge@gmail.com>
Date: Sun, 5 May 2024 22:08:29 +0300
Subject: [PATCH] Size parsing is added, sample to parse bundle without unicode

---
 out/all.js                  |  4 ++--
 out/all.min.js              |  4 ++--
 out/all.min.js.gv           |  4 ++--
 out/all.min.js.tree         |  4 ++--
 out/all.no_comments.js      |  4 ++--
 out/all.no_comments.js.gv   |  4 ++--
 out/all.no_comments.js.tree |  4 ++--
 trace_gv.sh                 | 22 ++++++++++++++-----
 tree.py                     | 44 +++++++++++++++++++++++--------------
 9 files changed, 59 insertions(+), 35 deletions(-)

diff --git a/out/all.js b/out/all.js
index 901ec8f..8f9f962 100644
--- a/out/all.js
+++ b/out/all.js
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3f6f6da82aa85903803c737a769622403b662d80910eb39d88e2f9d7d9fe22b
-size 7135157
+oid sha256:18131d7755e7b5880b05497132c62adedca9b5d9f9adcc0f3cb87731dd3bbfcc
+size 2296048
diff --git a/out/all.min.js b/out/all.min.js
index 2ca4965..e8852bd 100644
--- a/out/all.min.js
+++ b/out/all.min.js
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1029cea84998d4f293ce5ab0f1fda82e33c887e5e72ab37e21bd33d005cc2345
-size 7127540
+oid sha256:45026a304bdc2678e919c87e9ae8f8bb28460a63f36818495b402fa99b92b99e
+size 2207253
diff --git a/out/all.min.js.gv b/out/all.min.js.gv
index 6fdbb2a..30554e5 100644
--- a/out/all.min.js.gv
+++ b/out/all.min.js.gv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e5e931d7204556168723acd08f941a091773fd89db10189ef209b87fe87f68d
-size 51462260
+oid sha256:3ccab2d642b7ea581647af2f0742a244836e693e2a4bfbdac2ce53c20157d500
+size 39663077
diff --git a/out/all.min.js.tree b/out/all.min.js.tree
index 5072bb3..e9d2969 100644
--- a/out/all.min.js.tree
+++ b/out/all.min.js.tree
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db9f17162bba8cd156a75d38a275c55c53df663b947aa97cd0e4ff0c0d8b8a0c
-size 12248718
+oid sha256:d8b1de60a7e4866223094f5e26f3f8c15860c83f89a46696eb9e51ba4d2c5e1e
+size 10215732
diff --git a/out/all.no_comments.js b/out/all.no_comments.js
index b88e95e..8844c40 100644
--- a/out/all.no_comments.js
+++ b/out/all.no_comments.js
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:711723b76bd80cbd239997b94956beeefc39c1d809c6a24e8084e61dc793c2c6
-size 6993759
+oid sha256:0d4944e9bcc28b0bef853d62dfd760a1d544b27c9befd21ca273dc2bdec62ddc
+size 2151862
diff --git a/out/all.no_comments.js.gv b/out/all.no_comments.js.gv
index 504fba5..f4c3352 100644
--- a/out/all.no_comments.js.gv
+++ b/out/all.no_comments.js.gv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee08dfb8b632596d85696bb6042e662ec154d96594155095f9fa08ca77650a41
-size 3978735
+oid sha256:7ee2917816ba2d05144da592b310fbc18374e5750fb7cd52338a14265891e9d4
+size 3377112
diff --git a/out/all.no_comments.js.tree b/out/all.no_comments.js.tree
index 69fcbb2..330676d 100644
--- a/out/all.no_comments.js.tree
+++ b/out/all.no_comments.js.tree
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5c687ee4de3122eaf01bb978ab41757209abf639d102d7218b2f275bf9aca81
-size 15111090
+oid sha256:b463f24bcf45fa1b3f79b724c65afc772d56094c5066118d23be63e4b375fdfc
+size 13096172
diff --git a/trace_gv.sh b/trace_gv.sh
index 8c87d54..1c410fa 100755
--- a/trace_gv.sh
+++ b/trace_gv.sh
@@ -23,9 +23,19 @@ generate_ast_and_tree () {
   sed -i -r 's/\[source_file.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
   echo "Remove [free_call... section, we do not use it"
   sed -i -r 's/\[free_call.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
-  echo "Remove [length... section, we do not use it"
-  # TODO: use this section for graph weight
-  sed -i -r 's/\[length.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
+  echo "Remove [added_block... section, we do not use it"
+  sed -i -r 's/\[added_block.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
+  echo "Remove [quoted... section, we do not use it"
+  sed -i -r 's/\[quoted.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
+  echo "Remove [is_parenthesized... section, we do not use it"
+  sed -i -r 's/\[is_parenthesized.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
+  echo "Remove [incrdecr... section, we do not use it"
+  sed -i -r 's/\[incrdecr.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
+  echo "Remove [direct_eval... section, we do not use it"
+  sed -i -r 's/\[direct_eval.*//' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
+  echo "Reformat [length... section, leave only number and cut the beginning space"
+  sed -r 's/ \[length: ([0-9]+)\]/\1/' "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}" > "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}.sed" && \
+    mv -f "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}.sed" "${WORKING_DIR}/HelloJS.jsexe/${AST_JS}"
 
   echo "String literals may contains newline symbols, we need escape it"
   # TODO: better to support such case in `tree.py` because multline stringliterals could be present and valid
@@ -89,10 +99,11 @@ main () {
   echo "Source: ${WORKING_DIR}/HelloJS.jsexe/all.no_comments.js"
   cp "${WORKING_DIR}/HelloJS.jsexe/all.no_comments.js" ./out
 
+  # "h\$mainZCZCMainzimain"
   generate_ast_and_tree \
     "${WORKING_DIR}" \
     "all.no_comments.js" \
-    "h\$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1" \
+    "h\$ghczminternalZCGHCziInternalziIOziExceptionzizdfExceptionBlockedIndefinitelyOnMVarzuzdcbacktraceDesired" \
     "h\$o h\$sti h\$stc h\$stl"
 
   echo "Prevent inlining for easier removal unicode array"
@@ -112,10 +123,11 @@ main () {
   echo "Source: ${WORKING_DIR}/HelloJS.jsexe/all.min.js"
   cp "${WORKING_DIR}/HelloJS.jsexe/all.min.js" ./out
 
+  # "\$h\$mainZCZCMainzimain\$\$"
   generate_ast_and_tree \
     "${WORKING_DIR}" \
     "all.min.js" \
-    "\$h\$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1\$\$" \
+    "\$h\$ghczminternalZCGHCziInternalziIOziExceptionzizdfExceptionBlockedIndefinitelyOnMVarzuzdcbacktraceDesired\$\$" \
     "\$h\$o\$\$ \$h\$sti\$\$ \$h\$stc\$\$ \$h\$stl\$\$"
 }
 
diff --git a/tree.py b/tree.py
index ee33099..763b8e6 100755
--- a/tree.py
+++ b/tree.py
@@ -1,30 +1,33 @@
 #!/usr/bin/env python3
 
-# ./tree.py "\$h\$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1\$\$" /var/folders/9t/tf_ktyc11ng7njtg804grg4c0000gn/T/tmp.OFC7xXPZ/HelloJS.jsexe/all.min.js.tree
+# ./tree.py "h\$ghczminternalZCGHCziInternalziIOziExceptionzizdfExceptionBlockedIndefinitelyOnMVarzuzdcbacktraceDesired" "./out/all.no_comments.js.tree" "h\$o h\$sti h\$stc h\$stl"
 
 import math
 import sys
 
 LEVEL_DELIM = 4
-# SEARCH_NEEDLE = 'h$ghczminternalZCGHCziInternalziUnicodeziCharziUnicodeDataziGeneralCategoryzilvl_1'
 SEARCH_NEEDLE = sys.argv[1]
 
 def parse_line_elements(elements):
+  type = elements[0]
+  if type == 'EMPTY':
+    return {
+      "type": type,
+      "name": '',
+      "line_number": int(elements[-1].split(':')[0]),
+      "line_position": int(elements[-1].split(':')[1]),
+      "size": 0
+    }
+
   parsed_elements = {
-    "type": elements[0],
-    "name": (' '.join(elements[1:-1])),
-    "line_number": int(elements[-1].split(':')[0]),
-    "line_position": int(elements[-1].split(':')[1])
+    "type": type,
+    "name": (' '.join(elements[1:-2])),
+    "line_number": int(elements[-2].split(':')[0]),
+    "line_position": int(elements[-2].split(':')[1]),
+    "size": int(elements[-1])
   }
   return parsed_elements
 
-def print_graph(parsed_result, parsed_index, key, level=0):
-    if key in parsed_result:
-        for index in parsed_result[key]:
-            for next_key in parsed_index[index]:
-              print('\t'*level+key+' -> '+next_key)
-              print_graph(parsed_result, next_key, level+1)
-
 # STG_SPECIAL_FNS = ["h$o", "h$sti", "h$stc", "h$stl"]
 # STG_SPECIAL_FNS = ["$h$o$$", "$h$sti$$", "$h$stc$$", "$h$stl$$"]
 STG_SPECIAL_FNS = sys.argv[3].strip().split(' ')
@@ -34,6 +37,7 @@ def index_full_path_list(lines):
   full_path_list = []
   last_level = -1
   horizontal_mode = None
+  nodes_size = {}
 
   i = 0
   while i < len(lines):
@@ -64,6 +68,12 @@ def index_full_path_list(lines):
 
     last_level = current_level
     if horizontal_mode_switched or (horizontal_mode == None):
+      if parsed['type'] != "STRINGLIT" and (current_name.startswith('h$') or current_name.startswith('$h$')):
+        if current_name in nodes_size:
+          nodes_size[current_name] = max(parsed['size'], nodes_size[current_name])
+        else:
+          nodes_size[current_name] = parsed['size']
+
       # Control linking manually
       if horizontal_mode_switched:
         full_path_list.append((i, full_path))
@@ -72,7 +82,7 @@ def index_full_path_list(lines):
 
     i = i + 1
 
-  return full_path_list
+  return full_path_list, nodes_size
 
 def find_edges(lpos, lres, local_needle, level=0, seen={}):
   for p in lpos[local_needle]:
@@ -94,7 +104,7 @@ def find_edges(lpos, lres, local_needle, level=0, seen={}):
         find_edges(lpos, lres, input_name, level+1, seen)
 
 def format_dot_edge(input, output, line, meta, level):
-  return input+" -> "+output+" [line="+str(line)+" meta="+meta+" level="+str(level)+"]"+";\n"
+  return input+" -> "+output+" [line="+str(line)+" meta="+meta+" level="+str(level)+"]"+";"
 
 def unique_order_kept(seq):
   seen = set()
@@ -104,7 +114,7 @@ def unique_order_kept(seq):
 # with open('HelloJS.jsexe/all.min.debug.js.tree', 'r') as f:
 with open(sys.argv[2], 'r') as f:
   lines = f.read().splitlines()
-  full_path_list = index_full_path_list(lines)
+  full_path_list, nodes_size = index_full_path_list(lines)
 
   literals_relations = [[]] * len(lines)
   literals_positions = {}
@@ -119,5 +129,7 @@ with open(sys.argv[2], 'r') as f:
         literals_positions[a] = [i]
 
   print("digraph g {")
+  for node_name, node_size in nodes_size.items():
+    print(node_name+" [size="+str(node_size)+"]")
   find_edges(literals_positions, literals_relations, SEARCH_NEEDLE)
   print("}")
-- 
GitLab