python实现类似awk的简单功能

发布时间:2019-09-22 07:52:59编辑:auto阅读(2040)

    命令行的awk很方便,但处理灵活一点的话对awk不熟,深入学习又没太大必要,用python做个简单的,复杂的话也用python脚本实现,程序的一致性更好。

    #!/usr/bin/python
    #coding:utf-8
    
    import os
    import sys
    from getopt import getopt
    
    #处理协程
    def deallines(dealer,instream,paras):
        sep = paras.get("sep","\t")
        for line in instream:
            fields = line.strip("\r\n").split(sep)
            for result in dealer(line,fields) or ():
                if type(result) == str:
                    yield result
                elif type(result) == int:
                    yield str(result)+"\n"
                else:
                    yield sep.join([str(f) for f in result])+"\n"
    
    #定义处理函数
    dealerid = 0
    def definedealer(dealstr,paras):
        global dealerid
        dealerid += 1
        funcname = "dealer_"+str(dealerid)
        exec "def "+funcname+"(line,f): "+dealstr
        return vars()[funcname]
    
    #打印结果
    def printer(instream):
        for line in instream:
            print line,
    
    #处理字符转义
    def convertchar(s):
        if s == "\\t":
            return "\t"
        elif s == "\\r":
            return "\r"
        elif s == "\\n":
            return "\n"
        elif s == "\\\\":
            return "\\"
        elif s == "\\'":
            return "\'"
        elif s == '\\"':
            return '\"'
        elif s == "\\a":
            return "\a"
        elif s == "\\b":
            return "\b"
        elif s == "\\e":
            return "\e"
        elif s == "\\v":
            return "\v"
        elif s == "\\f":
            return "\f"
        elif len(s) == 4 and s.startswith("\\x"):
            return chr(eval('0x'+s[2:4]))
        else:
            return s
    
    #转换参数
    def convertparas(paras):
        result = {"sep":"\t"}
        for key,value in paras:
            if key == '-F':
                result["sep"] = convertchar(value)
        return result
    <pre name="code" class="python"><pre name="code" class="python">

    FNR = 0def record(instream): global FNR for r in instream: FNR += 1 yield r
    #主函数def main(args): paras,deals = getopt(args[1:],'F:') paras = convertparas(paras) dealers = [definedealer(dealstr,paras) for dealstr in deals] stream = record(sys.stdin) for dealer in dealers: stream = deallines(dealer,stream,paras) printer(stream)if __name__
     =='__main__': main(sys.argv)
    

    #输入数据,姓名 年龄 身高 体重

    cat person.txt
    zs      10      1.3     35.0

    ls      12      1.4     36.0

    #print直接输出

    cat person.txt | dealline 'print line,'

    zs      10      1.3     35.0

    ls      12      1.4     36.0

    #分两步,第一步提取姓名 身高 体重,第二步计算体重指数=体重/身高/身高

    cat person.txt | dealline 'yield f[0],f[2],f[3]' 'yield f[0],float(f[2])/float(f[1])/float(f[1])'
    zs      20.7100591716
    ls      18.3673469388

    注:-F分隔符缺省为\t,line为整行,f为按分隔符split后的数组,FNR为记录号

关键字