发布时间:2019-09-19 08:00:54编辑:auto阅读(1821)
生成激活码
#!/usr/bin/env python #encoding:utf-8 #Author:sean import string import random #激活码中的字符和数字 field = string.letters + string.digits #获得四个字母和数字的随机组合 def getRandom(): return ''.join(random.sample(field,4)) #生成的每个激活码中有几组 def concatenate(group): return '-'.join([getRandom() for i in range(group)]) #生成n组激活码 def generate(n): return [concatenate(4) for i in range(n)] if __name__ == '__main__': print generate(10)
统计单词
#!/usr/bin/env python #encoding:utf-8 import re from collections import Counter FileSource = './media/abc.txt' def getMostCommonWord(articlefilesource): '''输入一个英文的纯文本文件,统计其中的单词出现的个数''' pattern = r'[A-Za-z]+|\$?\d+%?$' with open(articlefilesource) as f: r = re.findall(pattern,f.read()) return Counter(r).most_common() if __name__ == '__main__': print getMostCommonWord(FileSource)
提取网页正文
#!/usr/bin/env python #encoding:utf-8 from goose import Goose from goose.text import StopWordsChinese import sys #要分析的网页url url = ' def extract(url): ''' 提取网页正文 ''' g = Goose({'stopwords_class':StopWordsChinese}) artlcle = g.extract(url=url) return artlcle.cleaned_text if __name__ == '__main__': print extract(url)
上一篇: 构建python的运行环境
下一篇: python dict 与list比较
49895
49147
39821
36864
31253
28097
27050
21853
21785
20131
58°
64°
250°
268°
889°
1598°
1490°
1260°
2572°
1541°