【python 图像识别】图像识别从菜鸟

发布时间:2019-07-25 09:13:04编辑:auto阅读(3203)

    一、安装配置(python2.7)

    1.pip install pytesseract
    2、pip install pyocr
    3、pip install pillow
    4、安装tesseract-ocr:http://jaist.dl.sourceforge.net/project/tesseract-ocr-alt/tesseract-ocr-setup-3.02.02.exe,安装在C:\Program Files\下
    5、找到 pytesseract.py 更改 tesseract_cmd = 'C:/Program Files/Tesseract-OCR/tesseract.exe'

    二、识别英文
    这里写图片描述

    这里写图片描述

    三、识别验证码
    这里写图片描述

    这里写图片描述

    这里写图片描述

    二、实现源代码
    1、识别英文

    #-*-coding:utf-8-*-
    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
    
    import time
    time1 = time.time()
    from PIL import Image
    import pytesseract
    
    
    image = Image.open(r'D:\Program Files\Python27\Lib\site-packages\pytesseract\test.png')
    code = pytesseract.image_to_string(image)
    print(code)

    2、识别验证码

    #-*-coding:utf-8-*-
    import sys
    reload(sys)
    sys.setdefaultencoding('utf-8')
    
    import time
    time1 = time.time()
    from PIL import Image
    import pytesseract
    
    ###########二值化算法
    def binarizing(img,threshold):
        pixdata = img.load()
        w, h = img.size
        for y in range(h):
            for x in range(w):
                if pixdata[x, y] < threshold:
                    pixdata[x, y] = 0
                else:
                    pixdata[x, y] = 255
        return img
    image = Image.open(r'E:\taqu\12.png')
    
    ###########去除干扰线算法
    def depoint(img):   #input: gray image
        pixdata = img.load()
        w,h = img.size
        for y in range(1,h-1):
            for x in range(1,w-1):
                count = 0
                if pixdata[x,y-1] > 245:
                    count = count + 1
                if pixdata[x,y+1] > 245:
                    count = count + 1
                if pixdata[x-1,y] > 245:
                    count = count + 1
                if pixdata[x+1,y] > 245:
                    count = count + 1
                if count > 2:
                    pixdata[x,y] = 255
        return img
    
    
    
    # 转化为灰度图
    img = image.convert('L')
    # 把图片变成二值图像。
    img1=binarizing(img,190)
    # img2=depoint(img1)
    img1.show()
    code = pytesseract.image_to_string(img1)
    print "识别该验证码是:" + str(code)

关键字