0
点赞
收藏
分享

微信扫一扫

【图像】搜索相同,或者相似照片

young_d807 2022-03-12 阅读 37

1.

目录

1. 查找完全相同的一对张照片

2. 查找相似照片,


查找完全相同的一对张照片

 

利用MD5,变换找到两张一模一样的图片。

import cv2
import numpy as np
import os

import json
import os
from hashlib import md5

def getmd5(image_path, md5_path):
    # 得到所有图片的路径,加到列表images中
    files = os.listdir(image_path)
    #files.pop(files.index('.DS_Store'))  # MAC
    images = [os.path.join(image_path, f) for f in files]

    # 计算每张图片的md5值,并将{md5值:图片路径}整合到字典image_md5中
    image_md5 = {}
    for n in range(len(images)):
        hash = md5()
        img = open(images[n], 'rb')
        hash.update(img.read())
        img.close()
        md5_value = hash.hexdigest()
        image_md5[md5_value] = images[n]

    with open(md5_path, "w", encoding='utf-8') as fr:
        json.dump(image_md5, fr, indent=2, sort_keys=True, ensure_ascii=False)


def check(md5_val_path, md5_test_path, md5_repeat_path):
    # 两两比较测试和验证集md5值
    with open(md5_val_path, "rt", encoding="utf-8") as fr:
        md5_val = json.load(fr)
    with open(md5_test_path, "rt", encoding="utf-8") as fr:
        md5_test = json.load(fr)
    md5_repeat = {}
    for md5 in md5_test:
        if md5 in md5_val:
            print('[md5]{}    [val]{}    [test]{}'.format(md5, md5_val[md5], md5_test[md5]))
            md5_repeat[md5] = {'val': md5_val[md5], 'test': md5_test[md5]}

    with open(md5_repeat_path, "w", encoding='utf-8') as fr:
        json.dump(md5_repeat, fr, indent=2, sort_keys=True, ensure_ascii=False)

    print('------------------ Analysis ------------------')
    print('Test num {}    Val num {}    Repeat {}'.format(len(md5_val), len(md5_test), len(md5_repeat)))


if __name__ == '__main__':
    image_val_path = r'H:\testImages\原始图片池'
    image_test_path = r'H:\testImages待查原始图片池'

    md5_val_path = 'H:/testImages/md5_val.json'
    md5_test_path = 'H:/testImages/md5_test.json'
    md5_repeat_path = 'H:/testImages/md5_repeat.json'

    getmd5(image_val_path, md5_val_path)
    getmd5(image_test_path, md5_test_path)

    check(md5_val_path, md5_test_path, md5_repeat_path)

2. 查找相似照片

比如,同一张照片,被压缩了或者手机翻拍的两张相同内容的图片进行辨识。比如,JPG压缩位PNG。避免了MD5的要求完全一致的雪崩效应,增强鲁棒性。

​​​​​​​

 

 

主要综合四种指标:

1. 感知哈希(P Hashing 余弦),

2. 平局散列,

3. 梯度散列,

4. 离散小波变换


import os,imagehash
from PIL import Image



def hash(SourcePath,Test):
    highfreq_factor = 4 # resize的尺度
    hash_size = 32 # 最终返回hash数值长度
    image_scale = 64
    img_size = hash_size * highfreq_factor
    list_file = []
    list_phash = []
    list_ahash = []
    list_dhash = []
    list_whash = []
    for file in os.listdir(SourcePath):
        if os.path.splitext(file)[1] == '.png':
            path_file = os.path.join(SourcePath, file)  # 拼路径
            list_file.append(file)
            phash = imagehash.phash(Image.open(path_file),hash_size=hash_size,highfreq_factor=highfreq_factor)#感知哈希(perception hashing)
            ahash = imagehash.average_hash(Image.open(path_file),hash_size=hash_size)#平均散列(average hashing)
            dhash = imagehash.dhash(Image.open(path_file),hash_size=hash_size)#梯度散列(difference hashing)
            whash = imagehash.whash(Image.open(path_file),image_scale=image_scale,hash_size=hash_size,mode = 'db4')#离散小波变换(wavelet hashing)
            list_phash.append(phash)
            list_ahash.append(ahash)
            list_dhash.append(dhash)
            list_whash.append(whash)

    list_fileTest = []
    list_phashTest = []
    list_ahashTest = []
    list_dhashTest = []
    list_whashTest = []
    for file in os.listdir(Test):
        if os.path.splitext(file)[1] == '.jpg':
            path_file = os.path.join(Test, file)  # 拼路径
            list_fileTest.append(file)
            phash = imagehash.phash(Image.open(path_file), hash_size=hash_size,
                                    highfreq_factor=highfreq_factor)  # 感知哈希(perception hashing)
            ahash = imagehash.average_hash(Image.open(path_file), hash_size=hash_size)  # 平均散列(average hashing)
            dhash = imagehash.dhash(Image.open(path_file), hash_size=hash_size)  # 梯度散列(difference hashing)
            whash = imagehash.whash(Image.open(path_file), image_scale=image_scale, hash_size=hash_size,
                                    mode='db4')  # 离散小波变换(wavelet hashing)
            list_phashTest.append(phash)
            list_ahashTest.append(ahash)
            list_dhashTest.append(dhash)
            list_whashTest.append(whash)
    #print(list_hash)
    for i in range(len(list_fileTest)):
        for j in range(len(list_file)):
            phash_value = 1-(list_phashTest[i]-list_phash[j])/len(list_phashTest[i].hash)**2
            ahash_value = 1-(list_ahashTest[i]-list_ahash[j])/len(list_ahashTest[i].hash)**2
            dhash_value = 1-(list_dhashTest[i]-list_dhash[j])/len(list_dhashTest[i].hash)**2
            whash_value = 1-(list_whashTest[i]-list_whash[j])/len(list_whashTest[i].hash)**2
            value_hash = max(phash_value,ahash_value,dhash_value,whash_value)
            if(value_hash > 0.97):#阈值设为0.9
                size_i = os.path.getsize(Test + '\\' + list_fileTest[i])
                size_j = os.path.getsize(SourcePath + '\\' + list_file[j])
                #print(list_fileTest[i],str(size_i/1024)+'KB')
                print(list_file[j],str(size_j/1024)+'KB')
                #print(value_hash)
                print('***********************')

if __name__ == '__main__':
    imagesPath1 = r'H:\testImages\原始照片池'
    imagesPath2 = r'H:\testImages\对照照片池'
    hash(imagesPath1 ,imagesPath1 )
举报

相关推荐

0 条评论