cluster is now written into a pickle and you can use it with main.py
to print all function definitions of a c file with: python main.py <src.c>
This commit is contained in:
parent
c4f6468cb4
commit
ee3243f1e6
|
@ -2,7 +2,7 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 110,
|
||||
"execution_count": 2,
|
||||
"id": "9bbbd1e6-64a8-4301-81ea-a911315f58f2",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
|
@ -13,7 +13,7 @@
|
|||
"\n",
|
||||
"fd = open(\"onion_crypto.c\", \"r\")\n",
|
||||
"src = fd.read()\n",
|
||||
"\n",
|
||||
"fd.close()\n",
|
||||
"\n",
|
||||
"# remove comments\n",
|
||||
"# this function was taken from:\n",
|
||||
|
@ -35,10 +35,10 @@
|
|||
"\n",
|
||||
"\n",
|
||||
"# remove preprocessor macros\n",
|
||||
"t = False\n",
|
||||
"def preproc_remover(text):\n",
|
||||
" t = False\n",
|
||||
" ret = \"\"\n",
|
||||
" for l in src.split('\\n'):\n",
|
||||
" for l in text.split('\\n'):\n",
|
||||
" if l.lstrip() == '':\n",
|
||||
" continue\n",
|
||||
" if not '#' == l.lstrip()[0] and False == t:\n",
|
||||
|
@ -63,7 +63,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 111,
|
||||
"execution_count": 4,
|
||||
"id": "b72627a9-2bd9-432f-aa97-3c4b1c035efa",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
|
@ -145,7 +145,7 @@
|
|||
"0 0 2 0 memcpy(keys->my_identity, router_get_my_id_..."
|
||||
]
|
||||
},
|
||||
"execution_count": 111,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -172,7 +172,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 116,
|
||||
"execution_count": 5,
|
||||
"id": "add32db0-81c5-4914-8050-2da4afa48340",
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
|
@ -260,7 +260,7 @@
|
|||
"0 3 int onion_skin_client_handshake(int type, ..."
|
||||
]
|
||||
},
|
||||
"execution_count": 116,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -269,6 +269,7 @@
|
|||
"from sklearn.cluster import KMeans\n",
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"import pickle\n",
|
||||
"\n",
|
||||
"data = c_expr_df\n",
|
||||
"data = data.drop(['c_expr'], axis=1)\n",
|
||||
|
@ -281,8 +282,12 @@
|
|||
" ])\n",
|
||||
"\n",
|
||||
"data['cluster'] = pipe.fit_predict(data)\n",
|
||||
"data['c_expr'] = c_expr_df['c_expr'].apply(lambda x: x.lstrip())\n",
|
||||
"\n",
|
||||
"outFd = open('cluster.pkl', 'wb')\n",
|
||||
"pickle.dump(pipe, outFd)\n",
|
||||
"outFd.close()\n",
|
||||
"\n",
|
||||
"data['c_expr'] = c_expr_df['c_expr'].apply(lambda x: x.lstrip())\n",
|
||||
"data.loc[data['cluster'] == 3, ['cluster', 'c_expr']]"
|
||||
]
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,88 @@
|
|||
import sys
|
||||
import pickle
|
||||
import re
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
import pandas as pd
|
||||
|
||||
|
||||
# remove comments
|
||||
# this function was taken from:
|
||||
# https://stackoverflow.com/questions/241327/remove-c-and-c-comments-using-python
|
||||
def comment_remover(text):
|
||||
def replacer(match):
|
||||
s = match.group(0)
|
||||
if s.startswith('/'):
|
||||
return " " # note: a space and not an empty string
|
||||
else:
|
||||
return s
|
||||
pattern = re.compile(
|
||||
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
|
||||
re.DOTALL | re.MULTILINE
|
||||
)
|
||||
return re.sub(pattern, replacer, text)
|
||||
|
||||
def preproc_remover(text):
|
||||
t = False
|
||||
ret = ""
|
||||
for l in text.split('\n'):
|
||||
if l.lstrip() == '':
|
||||
continue
|
||||
if not '#' == l.lstrip()[0] and False == t:
|
||||
ret += l + "\n"
|
||||
else:
|
||||
t = False
|
||||
if '\\' == l[-1]:
|
||||
t = True
|
||||
return ret
|
||||
|
||||
def main():
|
||||
fd = open(sys.argv[1], 'r')
|
||||
src = fd.read()
|
||||
fd.close()
|
||||
|
||||
src = comment_remover(src)
|
||||
src = preproc_remover(src)
|
||||
src = src.replace('\n', ' ')
|
||||
c_expr = []
|
||||
for s in re.split("([\{\};:])", src):
|
||||
if s not in ['{', '}', ';', ':']:
|
||||
c_expr.append(s)
|
||||
else:
|
||||
c_expr[-1] += s
|
||||
|
||||
# create a dataframe of the c_expr
|
||||
c_expr_df = pd.DataFrame()
|
||||
for e in c_expr:
|
||||
ser = pd.Series({'{': e.count('{'),
|
||||
'(': e.count('('),
|
||||
# counts only the spaces before the first bracelet
|
||||
'space_before_(' : e.split("(")[0].strip().count(' '),
|
||||
'c_expr' : e,
|
||||
})
|
||||
c_expr_df = pd.concat([c_expr_df, ser.to_frame().T])
|
||||
|
||||
c_expr_df['space_before_('] = c_expr_df.apply(lambda row: 0 if 0 == row['('] else row['space_before_('],
|
||||
axis=1)
|
||||
data = c_expr_df
|
||||
data = data.drop(['c_expr'], axis=1)
|
||||
data = data.drop(['('], axis=1)
|
||||
data['sb(>0'] = data.apply(lambda row: 0 if 0 == row['space_before_('] else 1, axis=1)
|
||||
|
||||
pickleFd = open('cluster.pkl', "rb")
|
||||
pipe = pickle.load(pickleFd)
|
||||
pickleFd.close()
|
||||
|
||||
c_expr_df['cluster'] = pipe.predict(data)
|
||||
# print function definitions
|
||||
for i, row in c_expr_df.iterrows():
|
||||
if 3 == row['cluster']:
|
||||
print (row['c_expr'])
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue