awk题目3

发布时间:2019-09-21 11:10:47编辑:auto阅读(1745)

    1.

    http://bbs.chinaunix.net/thread-4157887-1-1.html

     

     

     

    cat file:

    >a1
    1
    >a1
    11
    >a1
    111
    >a2
    2
    >a2
    22
    >b1
    3
    >b1
    33

     

    将各个title 下边的合在一起 输出如下:
    >a1
    1
    11
    111
    >a2
    2
    22
    >b1
    3
    33

     

    awk '/^>/&&!a[$1]++||!/^>/'

    (可以改写成 awk '/>/{if(!a[$0]++){print $0};next}1' )

     

    awk '{if($0~/>/){m=$0}else{a[m]=a[m]"\n"$0}}END{for(i in a)print i,a[i]}'

    (if else语句可以next代替,改写成awk '{if(/>/){m=$0}else{a[m]=a[m]"\n"$0}}END{for(i in a)print i,a[i]}')

     

    awk '/>/{!a[$0]++;print}' f1
    >a1
    >a1
    >a1
    >a2
    >a2
    >b1
    >b1

     

    awk '/>/{print}' f1
    >a1
    >a1
    >a1
    >a2
    >a2
    >b1
    >b1

     

    awk '/>/{if(!a[$0]++)print $0}' f1
    >a1
    >a2
    >b1

     

    awk '/>/&&!a[$0]++' f1
    >a1
    >a2
    >b1

     

    2.

    http://bbs.chinaunix.net/forum.php?mod=viewthread&tid=4157497&page=1#pid24299288

     

    cat file:

    aaaaaa
    adada
    adadadad
    3a3a3
    a3a3
    a3s0
    2a2a2a2a2

     

    重复出现1至4次字符的行都输出,重复出现1至4次字符的结果应是
    ababa
    adadadad
    3a3a3
    a3s0

     

    awk -v n=5 -F '' '{delete a;for(i=1;i<=NF;i++)a[$i]++;l=asort(a)}a[l]<n' file

     

    awk -v n=5 -F '' '{delete a;for(i=1;i<=NF;i++)a[$i]++}{for(i in a)print i,a[i]}' f5
    a 6
    a 3
    d 2
    a 4
    d 4
    a 2
    3 3
    a 2
    3 2
    a 1
    0 1
    s 1
    3 1
    a 4
    2 5

    (i为每行重复出现的字符,a[i]为每行中某个字符重复出现的次数)

     

    awk -v n=5 -F '' '{delete a;for(i=1;i<=NF;i++)a[$i]++;len=asort(a)}{for(i in a)print i,a[i]}' f5
    1 6
    1 2
    2 3
    1 4
    2 4
    1 2
    2 3
    1 2
    2 2
    4 1
    1 1
    2 1
    3 1
    1 4
    2 5

    (a[i]为每行中某个字符重复出现的次数。i为每行中某个字符的数组下标值(每行去重后,每行有哪几个字符),如第一行只有a重复,去重后下标值为1;第二行中a、d重复,下标值为1、2;a3s0行没有重复,所以有下标值1、2、3、4,至于排序为何为4、1、2、3,可参考 awk 'BEGIN{s="a3s0";split(s,a,"");len=asort(a);for(i in a)print i,a[i]}'
    4 s
    1 0
    2 3
    3 a)

     

    awk -v n=5 -F '' '{delete a;for(i=1;i<=NF;i++)a[$i]++;len=asort(a)}{print a[len]}' f5
    6
    3
    4
    3
    2
    1
    5

    (a[len]为每行中某个字符重复出现的次数,所以a[len]<n表示取重复次数少于n的行)

     

    3.

    http://bbs.chinaunix.net/forum.php?mod=viewthread&tid=4157298&page=1#pid24298104

     

    df -h
    /dev/sda2        49G   21G   26G  45% /
    tmpfs            16G     0   16G   0% /dev/shm
    /dev/sda1       193M   34M  150M  19% /boot

     

    AB=`df -h`
    echo $AB
    发现结果是以空格为分隔符
    如何实现跟df -h一样的换行显示呢?

     

    AB="$(df -h)"
    echo "$AB"

     

    4.

    http://bbs.chinaunix.net/forum.php?mod=viewthread&tid=4156330&page=1#pid24293665

     

     

    有一个文件file1:(tab分隔)
    track name=STRINGENT_LT_1KB
    chr3    23232   asdbaskjdb
    chr3    32434   daffsf
    chr5    43131   dkfjkdjgkdjgk
    chr6    4574857   wejwe
    chr7    11313     sjdjsgd
    chr8    2323     lskdkss
    track name=STRINGENT_HIGH_SCORE
    chr1   1212123      fkekfhd
    chr2   2322342     ererer
    chr2   43444        sdsdsd
    chr3    454545      dkfjdkjfk
    chr5    343434     qsadjhdjh
    track name=STRINGENT_LT_332KB
    track name=STRINGENT_LOW_SCORE
    chr7   2323232    sdsdsdaaa
    chr10  223232    dssdsds
    chr19   677675   dlkslk

     

    统计不同track name下的数目,结果如下:(tab分隔)
    track name=STRINGENT_LT_1KB:6
    track name=STRINGENT_HIGH_SCORE:5
    track name=STRINGENT_LT_332KB:0
    track name=STRINGENT_LOW_SCORE:3

     

    awk '{if(/^t/){if(NR>1)print s":"n;n=0;s=$0}else n++}END{print s":"n}' f8

    (可以改写成next语句:awk '/^t/{if(NR>1)print s":"n;n=0;s=$0;next}{n++}END{print s":"n}' f8) 

     

    awk '{if(/track/){m=$0}else i++;print m,i}' f8
    track name=STRINGENT_LT_1KB
    track name=STRINGENT_LT_1KB 1
    track name=STRINGENT_LT_1KB 2
    track name=STRINGENT_LT_1KB 3
    track name=STRINGENT_LT_1KB 4
    track name=STRINGENT_LT_1KB 5
    track name=STRINGENT_LT_1KB 6
    track name=STRINGENT_HIGH_SCORE 6
    track name=STRINGENT_HIGH_SCORE 7
    track name=STRINGENT_HIGH_SCORE 8

     ……

     

    awk '{if(/track/){m=$0;i=0}else i++;print m,i}' f8
    track name=STRINGENT_LT_1KB 0
    track name=STRINGENT_LT_1KB 1
    track name=STRINGENT_LT_1KB 2
    track name=STRINGENT_LT_1KB 3
    track name=STRINGENT_LT_1KB 4
    track name=STRINGENT_LT_1KB 5
    track name=STRINGENT_LT_1KB 6
    track name=STRINGENT_HIGH_SCORE 0
    track name=STRINGENT_HIGH_SCORE 1
    track name=STRINGENT_HIGH_SCORE 2
    track name=STRINGENT_HIGH_SCORE 3
    track name=STRINGENT_HIGH_SCORE 4
    track name=STRINGENT_HIGH_SCORE 5

    ……

     

    awk '{if(/^t/){m=$0;i=0}else i++}END{print m,i}' f8
    track name=STRINGENT_LOW_SCORE 3

     

    awk '{if(/^t/){m=$0;a[m]=0}else a[m]++}END{print m,a[m]}' f8
    track name=STRINGENT_LOW_SCORE 3

     

    awk '{if(/^t/){m=$0;a[m]=0}else a[m]++}END{for(i in a){print i":"a[i]}}' f8

    (可以改写成next语句:awk '/^t/{m=$0;a[m]=0;next}{a[m]++}END{for(i in a){print i":"a[i]}}' f8)
    track name=STRINGENT_LT_1KB:6
    track name=STRINGENT_LT_332KB:0
    track name=STRINGENT_HIGH_SCORE:5
    track name=STRINGENT_LOW_SCORE:3

     

     

    awk '{if(/^t/){if(NR>1)print s;s=$0}}' f8
    track name=STRINGENT_LT_1KB
    track name=STRINGENT_HIGH_SCORE
    track name=STRINGENT_LT_332KB

     

    awk '{if(/^t/){if(NR>1)print s;s=$0;print s}}' f8
    track name=STRINGENT_LT_1KB
    track name=STRINGENT_LT_1KB
    track name=STRINGENT_HIGH_SCORE
    track name=STRINGENT_HIGH_SCORE
    track name=STRINGENT_LT_332KB
    track name=STRINGENT_LT_332KB
    track name=STRINGENT_LOW_SCORE

     

    5.

    http://bbs.chinaunix.net/thread-4155608-2-1.html

    echo "a b|c"|awk -F' |\\|' '{print NF}'
    3

    echo "a b|c"|awk -F' |\|' '{print NF}'
    awk: 警告: 转义序列“\|”被当作单纯的“|”
    2

    echo "a b|c"|awk -F'[ |]' '{print NF}'
    3

     

      awk -v RS='...'

    这种写法是可以用少写一个,但不"规范"吧。可以少一个的原因是,最
    后落单的那个\,经过 awk 的字符串解释后还是\,与\\的解释结果是一样的。

    awk 'BEGIN{RS="...";}

    写法就必须写够,一个也不能少。

    $ echo -E '\\\'
    \\\

    $ echo|awk -v v='\\\' '{print "["v"]"}'
    [\\]

    $ echo|awk -v v='\\\\' '{print "["v"]"}'
    [\\]

    $ echo|awk -v v='\' '{print "["v"]"}'
    [\]

    $ echo|awk -v v='\\' '{print "["v"]"}'
    [\]

    $ echo|awk  'BEGIN{v="\\";}{print "["v"]"}'
    [\]

    $ echo|awk  'BEGIN{v="\\\";}{print "["v"]"}'

    出错 

    cat file:

    123
    123
    {\

    123
    123
    123
    {\
    1af
    ewfwf
    af

     

    怎么才能用awk以{\为行分隔符来分割得到每一段的内容想要得到第一段
    123
    123

     

    awk -vRS='{\\\\' 'NR==1'

     

     

    6.

    http://bbs.chinaunix.net/thread-3679733-1-1.html

     

    字符串 'aaabcccaaabbbccc',连续的字母作为一个子字符串,从左往右,去重复后,变为 'aaabcccbbb' , awk或sed实现,不使用管道。

     

    awk -F '' '{while(i++<=NF){s=s$i;if($i!=$(i+1)){if(!a[s]++)printf s;s=""}}}'

    awk -F'ccc' '{sub(/[a]+/,"",$2);print $1FS$2}'

     

    7.

    seq 9
    1
    2
    3
    4
    5
    6
    7
    8
    9
    如何得到以下结果:
    1 2 3
    2 3 4
    3 4 5
    4 5 6
    5 6 7
    6 7 8
    7 8 9
    8 9

     

    seq 9|awk '{a[NR]=$0}END{for(i=1;i<NR;i++)print a[i],a[i+1],a[i+2]}'

    seq 9 | awk '{b=($1+1<=9)?$1+1:" ";c=($1+2<=9)?$1+2:" "; print $1" "b" "c}'

    seq 9|awk 'NR==1{a=$1};NR==2{b=$1};NR>2{print a,b,$1;a=b;b=$1}END{print a,b}'

     

    8.

    http://bbs.chinaunix.net/thread-4154751-1-1.html

    [root@localhost ~]# a=1
    [root@localhost ~]# while read num ; do a=$num ; done < <(seq 5) ; echo $a
    5
    [root@localhost ~]# for num in $(seq 5) ; do a=$num ; done ; echo $a
    5

     seq 5 | while ... 这样的方式是产生一个了shell子进程,自然子进程里的东西不能改变父进程的值

     

    9.

    http://bbs.chinaunix.net/thread-4154600-1-1.html

     


    awk '/<frame name="mainFrame" src="/{print a[match($0,/<frame name="mainFrame" src="\/([^"]*)".*/,a)]}'

    grep -Po '(?<=name="mainFrame" src=")[^"]+'

    awk -F '[=" ]+' -v RS=">" '/"mainFrame" src="/{print $6}' 

     

    10.

    http://bbs.chinaunix.net/thread-4154603-1-1.html

    实现这样
    (a=1且b=1)或者(c=1且d=1)成立时
    echo pass
    这样能用if实现么?
    如何来嵌套?

     

    a=1; b=1; c=0; d=0;if [ $a -eq 1 ] && [ $b -eq 1 ];then echo pass;elif [ $c -eq 1 ] && [ $d -eq 1 ];then echo pass;fi

     

    a=1; b=1; c=0; d=0; if (((a==1 && b==1 )||(c==1 && d ==1)));then echo pass;fi

     

    awk 'BEGIN{a=1;b=1;c=0;d=1;if(((a==1&&b==1)||(c==1&&d==1))){print "pass"}else{print "。。"}}'

     

    awk 'BEGIN{a=1; b=1; c=0; d=0;if(a==1&&b==1){print "pass"}else if(c==1&&d==1){print "pass"}else{print "。。"}}'

     

    if [[ ($a = 1 && $b = 1) || ($c = 1 && $d = 1) ]];then
        echo True
    else
        echo False
    fi

     

    11.

    http://bbs.chinaunix.net/thread-4154531-3-1.html

     

    awk 'm~/==/&&/^\s*$/{print m}{m=$0}'

     

    12.

    http://bbs.chinaunix.net/thread-4154504-1-1.html

     

    grep -vf B A

    awk 'NR==FNR{a[$0];next}{for(i in a){if(match($0,i)>0)next}print}' B A

    awk 'NR==FNR{a[$0];next}{for(i in a)if(index($0,i))next}1' B A

     

    awk 'FNR==NR{k=k?k"|"$0:$0;next}$0!~k{print}' B A
    (awk '{k=k?k"|"$0:$0}END{print k}' B
    Now|We|ment)

     

    13.

    http://bbs.chinaunix.net/thread-4154469-1-1.html 

     

    二维数组

    awk 'BEGIN{for(i=1;i<=3;i++)for(j=1;j<=4;j++)a[i,j]=i*j;for(x in a){split(x,b,SUBSEP);print b[1]"*"b[2]"="a[x]}}'

     

    找出文件A有文件B没有的行

    awk 'BEGIN{for(i=1;i<=3;i++){for(j=1;j<=4;j++){s=s"R"i"C"j;print s}}}'

    或者awk 'BEGIN{for(i=1;i<=3;i++)for(j=1;j<=4;j++)print "R"i"C"j}'

    awk 'BEGIN{for(i=1;i<=3;i++)for(j=1;j<=4;j++)a["R"i"C"j];for(i in a)print i}'

    awk '{for(i=1;i<=3;i++)for(j=1;j<=4;j++)a["R"i"C"j]}END{for(i in a)print i}' file

     

    awk 'BEGIN{for(i=1;i<=3;i++){for(j=1;j<=4;j++){s="R"i"C"j;print s>"file"}}}'

    awk 'NR==FNR{a[$2];next}{for(i in a){if($0==i)next}print}’

     

     14.

    http://bbs.chinaunix.net/thread-4153934-1-1.html

     

    awk   '{ORS=NR%2?"\n":" "}1' 

    awk 'NR==1{print;next}NR%2==0{lastline=$0;next}{print lastline,$0}'

    awk 'NR%2==0{m=$0}NR%2==1{print m,$0}'

    awk 'NR==1{print}NR%2==0{x=NR;m=$0}x{getline s;print m,s}'

    awk 'NR==1{print}NR%2==0{x=NR;m=$0}x&&NR-x==1{print m,$0}'

    awk 'NR==1{print}{a[NR]=$0}END{for(i=1;i<=NR;i++){if(i%2==0)print a[i],a[i+1]}}'

     

     

    15.

    http://bbs.chinaunix.net/thread-4150372-1-1.html

     
    \1代表你前面第一个\( \)里面的内容  \2代表第二个,以此例推

    awk --re-interval 'NR==1{a=gensub(/.*(.{10})/,"\\1",1);print a+0}'
    awk -F '' '{m=substr($0,NF-9,NF);gsub(/0+/,"",m);print m}'

     

     16.

    http://bbs.chinaunix.net/thread-4150043-1-1.html

    awk '{a[$1]=$2;b[$1]=$1}END{for(i=1;i<=asort(b,c);i++)print b[c[i]],a[b[c[i]]],i%3==1?++n:n}'

     

    cat file:

    1        a
    3        s
    2        f
    5        g
    7        j
    4        t
    9        r
    6        w
    8        l

     

    根据第一行排序第二行

    awk '{a[$1]=$2}END{for(i=1;i<=NR;i++)print i,a[i]}'

    awk '{a[$1]=$2}END{for(i=1;i<=asort(a,c);i++)print i,a[i]}'

     

    $ cat file
    aaa 125
    ddd 123
    bbb 128
    ccc 120

    $ awk '{a[$2]=$0}END{for(i=1;i<=asort(a);i++)print a[i]}' file
    aaa 125
    bbb 128
    ccc 120
    ddd 123

    $ awk '{a[$2]=$0}END{for(i=1;i<=asorti(a,b);i++)print a[b[i]]}' file
    ccc 120
    ddd 123
    aaa 125
    bbb 128

     

    echo "8 11111 9" | awk '{split($0,a," ");for(i=1;i<=asort(a,c);i++)print i,a[i],c[i]}'
    1 8 8
    2 11111 9
    3 9 11111

     

    echo "8 11111 9" | awk '{split($0,a," ");for(i=1;i<=asort(a);i++)print i,a[i],c[i]}'
    1 8
    2 9
    3 11111

     

    echo "8 11111 9" | awk '{split($0,a," ");for(i=1;i<=asort(a,c);i++)print i,a[2],c[2]}'
    1 11111 9
    2 11111 9
    3 11111 9 

     

    17.

    http://bbs.chinaunix.net/thread-4149019-1-1.html

     

    var="'(]\\{}\$\""

     

    18.

    http://bbs.chinaunix.net/thread-1393874-1-1.html

     

      #! /bin/sh
        A=B  echo  $A
        echo  $A

     

    19.

    http://bbs.chinaunix.net/thread-4148647-1-1.html

     

    awk '/ve/||(sub($2,$2 FS $NF) && NF--)' OFS='\t'

    awk '/ap/{for(i=1;i<=NF;i++){if(i==3)printf $NF"\t"$i"\t";else if($i==$NF)print "";else{printf $i"\t"}}next}1' 

     

    20.

    http://blog.chinaunix.net/uid-20778583-id-4247874.html

     

    awk 'NR==FNR{a[$1" "$2" "$3]=$1;b[$1]++}NR>FNR{counter=0;for(i in a)if($1==a[i]){split(i,m," ");i || $2>m[3] || (m[2]==$2 && m[3]==$3))counter++}print NR,counter}' f1 f1

     

    awk 'NR==FNR{a[$1" "$2" "$3]=$1;b[$1]++}NR>FNR{counter=0;for(i in a){if($1==a[i]){split(i,m," ");] || $2>m[3] || ( m[2]==$2 && m[3]==$3))counter++;else next;if(counter == b[$1])print $0}}}' f1 f1

     

     21.

    http://bbs.chinaunix.net/thread-3620300-1-1.html

    awk '{if($1==x){s=s","$2}else{if(NR>1){print s}s=$1"\t"$2}x=$1}END{print s}'

    awk '{x[$1]=length(x[$1])?x[$1]","$2:$2}END{for(i in x)print i"\t"x[i]}'

    awk '{x[$1]=length(x[$1])?x[$1]","$2:$2;len=asorti(x,b)}END{for(i=1;i<=len;i++)print b[i],x[b[i]]}' f5

     

    awk '/track/{x=NR;a[x]=$0}x{print x,NR,a[x],NR-x}'

    http://bbs.chinaunix.net/forum.php?mod=viewthread&tid=4156330&page=1#pid24396455

     

    7720 98

    7720 99

    7720 218

    7720 219

    7720 220

    awk '{a[$1]=$2}{printf a[$1]","}'

    awk -v RS='7720' '{printf ","$1}'

    awk '{x=$1;if($1==x){s=s","$2}}END{print x,s}'

    awk '{if($1==x){s=s","$2}else{if(NR>1){print s}s=$1"\t"$2}x=$1}END{print s}'

    awk '{if($1==x){if($2==y+1)s=s"-"$2;else s=s","$2}else{if(NR>1){print s}s=$1"\t"$2}x=$1;y=$2}END{gsub(/-[^,]*-/,"-",s);print s}'

     

     

    22.

    http://bbs.chinaunix.net/thread-4119434-1-1.html   练习题(排序合并)

     

    awk '{m=gensub(/([^0-9]+).*/,"\\1",1,$2);n=gensub(/[^0-9]+(.*)/,"\\1",1,$2);t=$1" "sprintf("%s%10[t]=$1;b[t]=m;c[t]=n}END{for(i=0;i++<asorti(c,d);){print d[i],a[d[i]]}}'

    apple cat        12 apple
    apple cat       106 apple
    apple cat       107 apple
    apple cat       108 apple
    apple cat       109 apple
    apple cat       123 apple
    apple cat       125 apple
    pear dog        11 pear
    pear dog       101 pear
    pear dog       103 pear
    pear dog       104 pear
    pear dog       105 pear
    pear mouse       106 pear
    pear mouse       107 pear
    pear mouse       108 pear
    pear mouse       109 pear
    pear mouse       123 pear
    pear mouse       125 pear

     

    awk '{m=gensub(/([^0-9]+).*/,"\\1",1,$2);n=gensub(/[^0-9]+(.*)/,"\\1",1,$2);t=$1" "sprintf("%s%10s",m,n);a[t]=$1;b[t]=m;c[t]=n}END{for(i=0;i++<asorti(c,d);){if(!k)printf "%s: ",a[d[i]]}}'


    awk '{m=gensub(/([^0-9]+).*/,"\\1",1,$2);n=gensub(/[^0-9]+(.*)/,"\\1",1,$2);t=$1" "sprintf("%s%10s",m,n);a[t]=$1;b[t]=m;c[t]=n}END{for(i=0;i++<asorti(c,d);){if(!k)printf "%s: ",a[d[i]];if(k&&k!=a[d[i]]){printf "%s\n%s: ",s,a[d[i]];s=""}}}'

     

     

    23.

    http://bbs.chinaunix.net/forum.php?mod=viewthread&tid=4147474

     

    awk '{a[$1];for(i=1;i++<NF;)b[$1,i]=b[$1,i]!=""?b[$1,i]"/"$i:$i}END{for(i in a){printf i;for(j=1;j++<NF;)printf " "b[i,j];print ""}}'

     

    awk '{if(a[$1]){split(a[$1],b," ");a[$1]=$1" "b[2]"/"$2" "b[3]"/"$3" "b[4]"/"$4}else{a[$1]=$0}}END{l=asort(a);for(i=1;i<=l;i++)print a[i]}' file

     

    24.

    http://bbs.chinaunix.net/thread-4145403-1-1.html

     

    awk -F '' '{if($3~4)print $0>"a";else print$0>"b"}' f5
    awk -F '' '$3~4{print $0>"e"}$3~5{print$0>"f"}' f5
    awk '/^..4/{print $0>"g"}/^..5/{print$0>"h"}' f5
    awk '{m=substr($0,3,1);if(m==4)print $0>"c";else{print $0>"d"}}' f5
    awk '{print >substr($0,3,1)".txt"}'

关键字