HEX
Server: LiteSpeed
System: Linux CentOS-79-64-minimal 3.10.0-1160.119.1.el7.x86_64 #1 SMP Tue Jun 4 14:43:51 UTC 2024 x86_64
User: vishn3436 (5293)
PHP: 8.0.15
Disabled: NONE
Upload Files
File: //data/ownphotos/playground/bench.py
import os
import hashlib
from datetime import datetime

import exifread
import PIL
import face_recognition

import ipdb
from tqdm import tqdm

import numpy as np
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import fcluster
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import dendrogram
from sklearn.manifold import TSNE

import matplotlib.pyplot as plt


images_path = '../data/samplephotos'
image_paths = [os.path.abspath(os.path.join(images_path,p)) for p in os.listdir(images_path)]

thumbnails_path = '../data/thumbnails'
if not os.path.isdir(thumbnails_path):
    os.mkdir(thumbnails_path)

THUMBNAIL_SIZE = (1024,1024)

def generate_thumbnail(fname_in,fname_out,thumbnails_path):
    image = PIL.Image.open(fname_in)
    image.thumbnail(THUMBNAIL_SIZE, PIL.Image.ANTIALIAS)
    image.save(os.path.abspath(os.path.join(thumbnails_path,fname_out)),'JPEG')

def md5(fname):
    hash_md5 = hashlib.md5()
    with open(fname, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

def extract_exif(fname):
    with open(image_path,'rb') as fimg:
        exif_raw = exifread.process_file(fimg,details=False)
        exif_processed = {}
        if 'EXIF DateTimeOriginal' in exif_raw.keys():
            tst_str = exif_raw['EXIF DateTimeOriginal'].values
            tst_dt = datetime.strptime(tst_str,"%Y:%m:%d %H:%M:%S") 
            exif_processed['timestamp'] = tst_dt 
        else:
            exif_processed['timestamp'] = None 
        if 'GPS GPSLongitude' in exif_raw.keys():
            exif_processed['gps_lat'] = exif_raw['GPS GPSLongitude'].values 
        else:
            exif_processed['gps_lat'] = None 
        if 'GPS GPSLatitude' in exif_raw.keys():
            exif_processed['gps_lon'] = exif_raw['GPS GPSLatitude'].values
        else:
            exif_processed['gps_lon'] = None 
        exif_raw = dict([(key,value.values) for key,value in exif_raw.items()])
    return {'raw':exif_raw, 'processed':exif_processed}

def extract_faces(fname):
    image = face_recognition.load_image_file(fname)
    face_encodings = face_recognition.face_encodings(image)
    face_locations = face_recognition.face_locations(image)
    if len(face_locations) > 0:
        for face_location in face_locations:
            top,right,bottom,left = face_location
            face_image = image[top:bottom, left:right]
            pil_image = PIL.Image.fromarray(face_image)
    return {'encodings':face_encodings, 'locations':face_locations}


hash2metadata = {}
for image_path in tqdm(image_paths):
    image_hash = md5(image_path)
    thumbnail_path = os.path.abspath(os.path.join(thumbnails_path,image_hash))
    generate_thumbnail(image_path,image_hash,thumbnails_path)

    metadata = {}
    metadata['img_path'] = image_path
    metadata['thumbnail_path'] = thumbnail_path
    metadata['exif'] = extract_exif(image_path)
    metadata['faces'] = extract_faces(thumbnail_path)

    hash2metadata[image_hash] = metadata



hash2tst = {}
for key,value in hash2metadata.items():
    if value['exif']['processed']['timestamp']:
        hash2tst[key] = value['exif']['processed']['timestamp']

    



faces_all = []
for key,value in hash2metadata.items():
    if len(value['faces']['encodings']) > 0:
        faces_all.extend(value['faces']['encodings'])

vecs = np.array(faces_all)
pca = PCA(n_components=2)
vis = pca.fit_transform(vecs)

plt.scatter(vis.T[0],vis.T[1],marker='o')
plt.show()

Z = linkage(vecs,metric='euclidean',method='ward')
dendrogram(Z)
labels = [fcluster(Z,t,criterion='distance') for t in np.linspace(0,1,100)]
lens = [len(set(label)) for label in labels]

plt.plot(lens)
plt.grid()
plt.show()

clusters = fcluster(Z,2,criterion='maxclust')
plt.scatter(vis.T[0],vis.T[1],marker='o',s=10,c=clusters)
plt.show()