How to call module written with argparse in iPython notebook
我正在尝试将生物量子序列传递给IlyaStepanov在ipython的笔记本环境中实现Ukkonen的后缀树算法。我在argparse组件上遇到了困难。
我以前从来没有直接接触过argparse。如何在不重写main()的情况下使用它?
顺便说一下,这篇关于Ukkonen算法的文章非常精彩。
在ipython笔记本中使用argparse的另一种方法是将字符串传递给:
会是这样的:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | parser = argparse.ArgumentParser( description='Searching longest common substring. ' 'Uses Ukkonen\'s suffix tree algorithm and generalized suffix tree. ' 'Written by Ilya Stepanov (c) 2013') parser.add_argument( 'strings', metavar='STRING', nargs='*', help='String for searching', ) parser.add_argument( '-f', '--file', help='Path for input file. First line should contain number of lines to search in' ) |
和
编辑:它工作
我以前也遇到过类似的问题,但是使用
您不需要更改原始脚本中的任何内容,只需为
1 2 3 4 5 6 | if __name__ =="__main__": from Bio import SeqIO path = '/path/to/sequences.txt' sequences = [str(record.seq) for record in SeqIO.parse(path, 'fasta')] sys.argv = ['-f'] + sequences main() |
如果使用IPython进行测试,将argparse转换为类格式可能是一个快速的虚拟解决方案。
1 2 3 4 5 6 7 | class Args: data = './data/penn' model = 'LSTM' emsize = 200 nhid = 200 args=Args() |
GITHUB页面repo提供转换Web服务。http://35.192.144.192:8000/arg2cls.html希望它对您的测试有所帮助。19年1月9日许多错误都被修复了。转换脚本。必须填写python3。
1 | python3 [arg2cls.py] [argparse_script.py] |
然后复制和粘贴类格式以替换argparse函数。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 | #!/usr/bin/env python3 from collections import OrderedDict import sys import re DBG = False #add_argument(), set_defaults() only available. ListStartPatt = re.compile(r'\s*\[.*') ListStartPatt2 = re.compile(r'\).*\[.*') # list out of function scope. ListPatt = re.compile(r'(\[.*?\])') GbgPatt = re.compile(r'(.*?)\)[^\)]+') # for float('inf') cmplx. GbgPatt2 = re.compile(r'(.*?)\).*') # general gbg, ? for non greedy. LpRegex = re.compile(r'\({1,}\s{0,}') RpRegex = re.compile(r'\s{0,}\){1,}') PrRegex = re.compile(r'\((.*)(\))(?!.*\))') # from \( to last \). CmRegex = re.compile(r'\s{0,},\s{0,}') StrRegex = re.compile(r'\'(.*?)\'') # Argument dict : {arg_name : value} argDct=OrderedDict() # process 'default=' value. def default_value(tval, dtype=''): # string pattern. regres = StrRegex.match(tval) if regres and not re.search('int|float|long|bool|complex', dtype): if DBG: print('default_value: str patt found') tval = regres.group(0) return tval # typed pattern. CommaSeparated = CmRegex.split(tval)[0] if DBG: print('comma sepearated value:', CommaSeparated) if ListStartPatt.match(CommaSeparated) and not ListStartPatt2.match(CommaSeparated): lres = ListPatt.search(tval) if lres: tval = lres.group(1) if DBG: print('list patt exist tval: ', tval) else : tval = CmRegex.split(tval)[0] if DBG: print('no list format tval: ', tval) # if default value is not like - int('inf') , remove characters after ')' garbage chars. ires = RpRegex.split(tval)[0] if not (re.search('int|float|long|bool|complex', ires) and re.search(r'[a-z]+\(',ires)): if DBG: print('not int("inf") format. Rp removed tval : ', tval) tval = re.split(r'\s{0,}\){1,}',tval)[0] gbg = GbgPatt2.search(tval) if gbg: tval = gbg.group(1) if DBG: print('garbage exist & removed. tval : ', tval) # int('inf') patt. else: if DBG: print('type("inf") value garbaging!') gbg = GbgPatt.search(tval) if gbg: if DBG: print('garbage found, extract!') tval = gbg.group(1) return tval # Handling add_argument() def add_argument(arg_line): global argDct if DBG: print(' in add_argument : **Pre regex: ', arg_line) ''' argument name ''' # argname = DdRegex.split(arg_line)[1] # Dash or regex for arg name. argname = re.search('\'--(.*?)\'', arg_line) if not argname: argname = re.search('\'-+(.*?)\'', arg_line) # dest= keyword handling. dest = re.search(r',\s*dest\s*=(.*)', arg_line) if dest: dval = dest.group(1) dval = default_value(dval) argname = StrRegex.search(dval) # hyphen(-) to underscore(_) if argname: argname = argname.group(1).replace('-', '_') else : # naive str argname. sres = StrRegex.match(arg_line) if sres: argname = sres.group(1) if not argname: return # no argument name ''' check for syntaxes (type=, default=, required=, action=, help=, choices=) ''' dtype = '' dres = re.search(r',\s*type\s*=\s*(.*)', arg_line) if dres: dtype = dres.group(1) dtype = CmRegex.split(dtype)[0] dfult = re.search(r',\s*default\s*=\s*(.*)', arg_line) rquird = re.search(r',\s*required\s*=\s*(.*)', arg_line) action = re.search(r',\s*action\s*=\s*(.*)', arg_line) hlp = re.search(r',\s*help\s*=\s*(.*)', arg_line) chice = re.search(r',\s*choices\s*=\s*(.*)', arg_line) # help message hlp_msg = '' if hlp: thl = hlp.group(1) if DBG: print('handling help=') hlp_msg = default_value(thl) if hlp_msg: hlp_msg = 'help='+hlp_msg # choice message choice_msg = '' if chice: tch = chice.group(1) if DBG: print('handling choices=') choice_msg = default_value(tch) if choice_msg: choice_msg = 'choices='+choice_msg+' ' ''' argument value ''' # tval: argument value. tval = '' # default exist. if dfult: tval = dfult.group(1) tval = default_value(tval, dtype) if DBG: print('value determined : ', tval) # action or required syntaxes exist. elif action or rquird: if DBG: print('in action/required handling') msg_str = '' if action: tval = action.group(1) msg_str = 'action' elif rquird: tval = rquird.group(1) msg_str = 'required' tval = default_value(tval) tval = ' ** ' + msg_str + ' '+tval+'; '+choice_msg+ hlp_msg # no default, action, required. else : argDct[argname] = ' ** default not found; '+choice_msg+ hlp_msg # value found. if tval: argDct[argname] = tval # Handling set_defaults() def set_defaults(arg_line): global argDct if DBG: print(' in set_defaults arg_line: ', arg_line) # arguments to process. tv='' # arguments of set_default() SetPatt = re.compile(r'(.+=.+\)?)') sres = SetPatt.match(arg_line) if sres: tv = sres.group(1) if DBG: print("setPatt res:", tv) tv = re.sub(r'\s+','', tv) if DBG: print(' set_default values: ', tv) # one arguemnt regex. SetArgPatt = re.compile(r',?([^=]+=)[^=,]+,?') # handling multiple set_default() arguments. (may have a bug) while True: tname='' tval ='' tnv='' # func closed. if re.match(r',*\).*',tv): tv='' break if DBG: print('set_default remaining: ', tv) nres = SetArgPatt.match(tv) if nres: tname = nres.group(1) if len(tv.split(tname, 1)) > 1: tval = tv.split(tname,1)[1] tval = default_value(tval) tnv=tname+tval tname = tname.rsplit('=',1)[0] if DBG: print('set_default tnam: ', tname) print('set_default tval: ', tval) if tname: argDct[tname] = tval # split with processed argument. tv = tv.split(tnv) if len(tv) > 1: tv = tv[1] # no more value to process else: break # no arg=value pattern found. else: break # Remove empty line & Concatenate line-separated syntax. def preprocess(fname): try : with open(fname, 'r', encoding='UTF8') as f: txt = f.read() t = txt.splitlines(True) t = list( filter(None, t) ) # remove empty line t = [x for x in t if not re.match(r'\s{0,} ',x)] # concatenate multiple lined arguments. # empl : lines to be deleted from t[]. empl = [] for i in range(len(t)-1, 0, -1): if not re.search('add_argument|set_defaults', t[i]): t[i-1] += t[i] t[i-1]=re.sub(r' {0,}','',t[i-1]) t[i-1]=re.sub(r'\s{1,}',' ',t[i-1]) empl.append(t[i]) for d in empl: t.remove(d) for i, line in enumerate(t): t[i] = line.replace('"', '\'').split('parse_args()')[0] return t except IOError: print('IOError : no such file.', fname) sys.exit() def transform(fname): # t : list() contains add_argument|set_defaults lines. arg_line_list = preprocess(fname) for i, arg_line in enumerate(arg_line_list): t = PrRegex.search(arg_line) if t: t = t.group(1) # t: content of add_argument Parentheses. else : continue # nothing to parse. if re.search(r'add_argument\s*\(', arg_line): add_argument(t) elif re.search(r'set_defaults\s*\(',arg_line): set_defaults(t) else : # Nothing to parse. continue print(' class Args:') for i in argDct: print(' ',i, '=', argDct[i]) print() print('args=Args()') def main(): if len(sys.argv) <2: print('Usage : python arg2cls.py [target.py] [target2.py(optional)] ...') sys.exit(0) sys.argv.pop(0) #handling multiple file input. for fname in sys.argv: transform(fname) if(__name__ =="__main__"): main() |
最后我使用Biopython提取序列,然后编辑ilya-steanov的实现来删除argparse方法。
1 2 3 4 5 6 | import imp seqs = [] lcsm = imp.load_source('lcsm', '/path/to/ukkonen.py') for record in SeqIO.parse('/path/to/sequences.txt', 'fasta'): seqs.append(record) lcsm.main(seqs) |
对于算法,我让
1 | suffix_tree.append_string(s) |
到
1 | suffix_tree.append_string(str(s.seq)) |
这看起来有点脆弱,但这就是我现在所拥有的。
我在调用argsparse时遇到了类似的问题,字符串"-f"导致了这个问题。把它从sys.srgv中去掉就行了。
1 2 3 4 5 | import sys if __name__ == '__main__': if '-f' in sys.argv: sys.argv.remove('-f') main() |