发布时间:2019-09-19 08:00:54编辑:auto阅读(1603)
生成激活码
#!/usr/bin/env python #encoding:utf-8 #Author:sean import string import random #激活码中的字符和数字 field = string.letters + string.digits #获得四个字母和数字的随机组合 def getRandom(): return ''.join(random.sample(field,4)) #生成的每个激活码中有几组 def concatenate(group): return '-'.join([getRandom() for i in range(group)]) #生成n组激活码 def generate(n): return [concatenate(4) for i in range(n)] if __name__ == '__main__': print generate(10)
统计单词
#!/usr/bin/env python #encoding:utf-8 import re from collections import Counter FileSource = './media/abc.txt' def getMostCommonWord(articlefilesource): '''输入一个英文的纯文本文件,统计其中的单词出现的个数''' pattern = r'[A-Za-z]+|\$?\d+%?$' with open(articlefilesource) as f: r = re.findall(pattern,f.read()) return Counter(r).most_common() if __name__ == '__main__': print getMostCommonWord(FileSource)
提取网页正文
#!/usr/bin/env python #encoding:utf-8 from goose import Goose from goose.text import StopWordsChinese import sys #要分析的网页url url = ' def extract(url): ''' 提取网页正文 ''' g = Goose({'stopwords_class':StopWordsChinese}) artlcle = g.extract(url=url) return artlcle.cleaned_text if __name__ == '__main__': print extract(url)
上一篇: 构建python的运行环境
下一篇: python dict 与list比较
47840
46386
37279
34729
29312
25970
24909
19948
19541
18020
5790°
6411°
5926°
5960°
7063°
5910°
5942°
6437°
6404°
7775°