吴裕雄--天生自然 PYTHON数据分析:糖尿病视网膜病变数据分析(完整版)

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here‘s several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io
from skimage.transform import resize
#from imgaug import augmenters as iaa
from tqdm import tqdm
import PIL
from PIL import Image, ImageOps
import cv2
from sklearn.utils import class_weight, shuffle
from keras.losses import binary_crossentropy
from keras.applications.resnet50 import preprocess_input
import keras.backend as K
import tensorflow as tf
from sklearn.metrics import f1_score, fbeta_score
from keras.utils import Sequence
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

WORKERS = 2
CHANNEL = 3

import warnings
warnings.filterwarnings("ignore")
IMG_SIZE = 512
NUM_CLASSES = 5
SEED = 77
TRAIN_NUM = 1000 # use 1000 when you just want to explore new idea, use -1 for full train
df_train = pd.read_csv(‘F:\\kaggleDataSet\\diabeticRetinopathy\\trainLabels19.csv‘)
df_test = pd.read_csv(‘F:\\kaggleDataSet\\diabeticRetinopathy\\testImages19.csv‘)

x = df_train[‘id_code‘]
y = df_train[‘diagnosis‘]

x, y = shuffle(x, y, random_state=SEED)
train_x, valid_x, train_y, valid_y = train_test_split(x, y, test_size=0.15,stratify=y, random_state=SEED)
print(train_x.shape, train_y.shape, valid_x.shape, valid_y.shape)
train_y.hist()
valid_y.hist()

%%time
fig = plt.figure(figsize=(25, 16))
# display 10 images from each class
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train[‘diagnosis‘] == class_id].sample(5, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, 5, class_id * 5 + i + 1, xticks=[], yticks=[])
        path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized train 19\\"+str(row[‘id_code‘])+".jpg"
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        plt.imshow(image)
        ax.set_title(‘Label: %d-%d-%s‘ % (class_id, idx, row[‘id_code‘]) )

%%time
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train[‘diagnosis‘] == class_id].sample(5, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, 5, class_id * 5 + i + 1, xticks=[], yticks=[])
        path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized train 19\\"+str(row[‘id_code‘])+".jpg"
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#         image=cv2.addWeighted ( image, 0 , cv2.GaussianBlur( image , (0 ,0 ) , 10) ,-4 ,128)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        plt.imshow(image, cmap=‘gray‘)
        ax.set_title(‘Label: %d-%d-%s‘ % (class_id, idx, row[‘id_code‘]) )

dpi = 80 #inch

# path=f"../input/aptos2019-blindness-detection/train_images/5c7ab966a3ee.png" # notice upper part
path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized train 19\\cd54d022e37d.jpg" # lower-right, this still looks not so severe, can be class3
image = cv2.imread(path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
height, width = image.shape
print(height, width)

SCALE=2
figsize = (width / float(dpi))/SCALE, (height / float(dpi))/SCALE

fig = plt.figure(figsize=figsize)
plt.imshow(image, cmap=‘gray‘)

%%time
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train[‘diagnosis‘] == class_id].sample(5, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, 5, class_id * 5 + i + 1, xticks=[], yticks=[])
        path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized train 19\\"+str(row[‘id_code‘])+".jpg"
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , IMG_SIZE/10) ,-4 ,128) # the trick is to add this line

        plt.imshow(image, cmap=‘gray‘)
        ax.set_title(‘Label: %d-%d-%s‘ % (class_id, idx, row[‘id_code‘]) )

def crop_image1(img,tol=7):
    # img is image data
    # tol  is tolerance
    mask = img>tol
    return img[np.ix_(mask.any(1),mask.any(0))]

def crop_image_from_gray(img,tol=7):
    if img.ndim ==2:
        mask = img>tol
        return img[np.ix_(mask.any(1),mask.any(0))]
    elif img.ndim==3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        mask = gray_img>tol
        check_shape = img[:,:,0][np.ix_(mask.any(1),mask.any(0))].shape[0]
        if (check_shape == 0): # image is too dark so that we crop out everything,
            return img # return original image
        else:
            img1=img[:,:,0][np.ix_(mask.any(1),mask.any(0))]
            img2=img[:,:,1][np.ix_(mask.any(1),mask.any(0))]
            img3=img[:,:,2][np.ix_(mask.any(1),mask.any(0))]
    #         print(img1.shape,img2.shape,img3.shape)
            img = np.stack([img1,img2,img3],axis=-1)
    #         print(img.shape)
        return img
def load_ben_color(path, sigmaX=10):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = crop_image_from_gray(image)
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , sigmaX) ,-4 ,128)
    return image
%%time

NUM_SAMP=7
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train[‘diagnosis‘] == class_id].sample(NUM_SAMP, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, NUM_SAMP, class_id * NUM_SAMP + i + 1, xticks=[], yticks=[])
        path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized train 19\\"+str(row[‘id_code‘])+".jpg"
        image = load_ben_color(path,sigmaX=30)
        plt.imshow(image)
        ax.set_title(‘%d-%d-%s‘ % (class_id, idx, row[‘id_code‘]) )

def circle_crop(img, sigmaX=10):
    """
    Create circular crop around image centre
    """
    img = cv2.imread(img)
    img = crop_image_from_gray(img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    height, width, depth = img.shape
    x = int(width/2)
    y = int(height/2)
    r = np.amin((x,y))
    circle_img = np.zeros((height, width), np.uint8)
    cv2.circle(circle_img, (x,y), int(r), 1, thickness=-1)
    img = cv2.bitwise_and(img, img, mask=circle_img)
    img = crop_image_from_gray(img)
    img=cv2.addWeighted ( img,4, cv2.GaussianBlur( img , (0,0) , sigmaX) ,-4 ,128)
    return img 
%%time
## try circle crop
NUM_SAMP=7
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_train.loc[df_train[‘diagnosis‘] == class_id].sample(NUM_SAMP, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, NUM_SAMP, class_id * NUM_SAMP + i + 1, xticks=[], yticks=[])
        path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized train 19\\"+str(row[‘id_code‘])+".jpg"
        image = circle_crop(path,sigmaX=30)
        plt.imshow(image)
        ax.set_title(‘%d-%d-%s‘ % (class_id, idx, row[‘id_code‘]) )

dpi = 80 #inch
path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized train 19\\cd54d022e37d.jpg"
image = load_ben_color(path,sigmaX=10)

height, width = IMG_SIZE, IMG_SIZE
print(height, width)

SCALE=1
figsize = (width / float(dpi))/SCALE, (height / float(dpi))/SCALE

fig = plt.figure(figsize=figsize)
plt.imshow(image, cmap=‘gray‘)

%%time
NUM_SAMP=10
fig = plt.figure(figsize=(25, 16))
for jj in range(5):
    for i, (idx, row) in enumerate(df_test.sample(NUM_SAMP,random_state=SEED+jj).iterrows()):
        ax = fig.add_subplot(5, NUM_SAMP, jj * NUM_SAMP + i + 1, xticks=[], yticks=[])
        path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized test 19\\"+str(row[‘id_code‘])+".jpg"
        image = load_ben_color(path,sigmaX=30)
        plt.imshow(image)
        ax.set_title(‘%d-%s‘ % (idx, row[‘id_code‘]) )

%%time
NUM_SAMP=10
fig = plt.figure(figsize=(25, 16))
for jj in range(5):
    for i, (idx, row) in enumerate(df_test.sample(NUM_SAMP,random_state=SEED+jj).iterrows()):
        ax = fig.add_subplot(5, NUM_SAMP, jj * NUM_SAMP + i + 1, xticks=[], yticks=[])
        path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized test 19\\"+str(row[‘id_code‘])+".jpg"
        image = load_ben_color(path,sigmaX=50)
        plt.imshow(image, cmap=‘gray‘)
        ax.set_title(‘%d-%s‘ % (idx, row[‘id_code‘]) )

df_old = pd.read_csv(‘F:\\kaggleDataSet\\diabeticRetinopathy\\trainLabels.csv‘)
df_old.head()

NUM_SAMP=10
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_old.loc[df_old[‘level‘] == class_id].sample(NUM_SAMP, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, NUM_SAMP, class_id * NUM_SAMP + i + 1, xticks=[], yticks=[])
        path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized_train\\"+row[‘image‘]+".jpeg"
        image = load_ben_color(path,sigmaX=30)
        plt.imshow(image)
        ax.set_title(‘%d-%d-%s‘ % (class_id, idx, row[‘image‘]) )

NUM_SAMP=10
fig = plt.figure(figsize=(25, 16))
for class_id in sorted(train_y.unique()):
    for i, (idx, row) in enumerate(df_old.loc[df_old[‘level‘] == class_id].sample(NUM_SAMP, random_state=SEED).iterrows()):
        ax = fig.add_subplot(5, NUM_SAMP, class_id * NUM_SAMP + i + 1, xticks=[], yticks=[])
        path="F:\\kaggleDataSet\\diabeticRetinopathy\\resized_train\\"+row[‘image‘]+".jpeg"
        image = cv2.imread(path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        plt.imshow(image, cmap=‘gray‘)
        ax.set_title(‘%d-%d-%s‘ % (class_id, idx, row[‘image‘]) )

dpi = 80 #inch

path=f"F:\\kaggleDataSet\\diabeticRetinopathy\\resized_train\\31590_right.jpeg" # too many vessels?
image = load_ben_color(path,sigmaX=30)
# image = cv2.imread(path)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# image = crop_image1(image)
# image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
# image=cv2.addWeighted ( image,4, cv2.GaussianBlur( image , (0,0) , IMG_SIZE/10) ,-4 ,128)

height, width = IMG_SIZE, IMG_SIZE
print(height, width)
SCALE=1
figsize = (width / float(dpi))/SCALE, (height / float(dpi))/SCALE
fig = plt.figure(figsize=figsize)
plt.imshow(image, cmap=‘gray‘)

dpi = 80 #inch

path_jpg=f"F:\\kaggleDataSet\\diabeticRetinopathy\\resized_train_cropped\\18017_left.jpeg" # too many vessels?
path_png=f"F:\\kaggleDataSet\\diabeticRetinopathy\\rescaled_train_896\\18017_left.png" # details are lost
image = cv2.imread(path_png)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))

image2 =  cv2.imread(path_jpg)
image2 = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB)
image2 = cv2.resize(image2, (IMG_SIZE, IMG_SIZE))

height, width = IMG_SIZE, IMG_SIZE
print(height, width)

SCALE=1/4
figsize = (width / float(dpi))/SCALE, (height / float(dpi))/SCALE

fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(2, 2, 1, xticks=[], yticks=[])
ax.set_title(‘png format original‘ )
plt.imshow(image, cmap=‘gray‘)
ax = fig.add_subplot(2, 2, 2, xticks=[], yticks=[])
ax.set_title(‘jpg format original‘ )
plt.imshow(image2, cmap=‘gray‘)

image = load_ben_color(path_png,sigmaX=30)
image2 = load_ben_color(path_jpg,sigmaX=30)
ax = fig.add_subplot(2, 2, 3, xticks=[], yticks=[])
ax.set_title(‘png format transformed‘ )
plt.imshow(image, cmap=‘gray‘)
ax = fig.add_subplot(2, 2, 4, xticks=[], yticks=[])
ax.set_title(‘jpg format transformed‘ )
plt.imshow(image2, cmap=‘gray‘)

import json
import math
import os

import cv2
from PIL import Image
import numpy as np
from keras import layers
from keras.applications import DenseNet121
from keras.callbacks import Callback, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score, accuracy_score
import scipy
from tqdm import tqdm

%matplotlib inline
train_df = pd.read_csv(‘F:\\kaggleDataSet\\diabeticRetinopathy\\trainLabels19.csv‘)
test_df = pd.read_csv(‘F:\\kaggleDataSet\\diabeticRetinopathy\\testImages19.csv‘)
print(train_df.shape)
print(test_df.shape)
test_df.head()

def get_pad_width(im, new_shape, is_rgb=True):
    pad_diff = new_shape - im.shape[0], new_shape - im.shape[1]
    t, b = math.floor(pad_diff[0]/2), math.ceil(pad_diff[0]/2)
    l, r = math.floor(pad_diff[1]/2), math.ceil(pad_diff[1]/2)
    if is_rgb:
        pad_width = ((t,b), (l,r), (0, 0))
    else:
        pad_width = ((t,b), (l,r))
    return pad_width

def preprocess_image(image_path, desired_size=224):
    im = Image.open(image_path)
    im = im.resize((desired_size, )*2, resample=Image.LANCZOS)
    return im
N = test_df.shape[0]
x_test = np.empty((N, 224, 224, 3), dtype=np.uint8)

for i, image_id in enumerate(tqdm(test_df[‘id_code‘])):
    x_test[i, :, :, :] = preprocess_image("F:\\kaggleDataSet\\diabeticRetinopathy\\resized test 19\\"+str(image_id)+".jpg")

# model.summary()
def load_image_ben_orig(path,resize=True,crop=False,norm255=True,keras=False):
    image = cv2.imread(path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image=cv2.addWeighted( image,4, cv2.GaussianBlur( image , (0,0) ,  10) ,-4 ,128)
    if norm255:
        return image/255
    elif keras:
        #see https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py for mode
        #see https://github.com/keras-team/keras-applications/blob/master/keras_applications/xception.py for inception,xception mode
        #the use of tf based preprocessing (- and / by 127 respectively) will results in [-1,1] so it will not visualize correctly (directly)
        image = np.expand_dims(image, axis=0)
        return preprocess_input(image)[0]
    else:
        return image.astype(np.int16)
    return image

def transform_image_ben(img,resize=True,crop=False,norm255=True,keras=False):
    image=cv2.addWeighted( img,4, cv2.GaussianBlur( img , (0,0) ,  10) ,-4 ,128)
    if norm255:
        return image/255
    elif keras:
        image = np.expand_dims(image, axis=0)
        return preprocess_input(image)[0]
    else:
        return image.astype(np.int16)
    return image
def display_samples(df, columns=5, rows=2, Ben=True):
    fig=plt.figure(figsize=(5*columns, 4*rows))
    for i in range(columns*rows):
        image_path = df.loc[i,‘id_code‘]
        path = f"F:\\kaggleDataSet\\diabeticRetinopathy\\resized test 19\\"+str(image_path)+".jpg"
        if Ben:
            img = load_image_ben_orig(path)
        else:
            img = cv2.imread(path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        fig.add_subplot(rows, columns, i+1)
        plt.imshow(img)
    plt.tight_layout()
display_samples(test_df, Ben=False)
display_samples(test_df, Ben=True)

from keras import layers
from keras.models import Model
import keras.backend as K
K.clear_session()
densenet = DenseNet121(weights=None,include_top=False,input_shape=(None,None,3))
GAP_layer = layers.GlobalAveragePooling2D()
drop_layer = layers.Dropout(0.5)
dense_layer = layers.Dense(5, activation=‘sigmoid‘, name=‘final_output‘)
def build_model_sequential():
    model = Sequential()
    model.add(densenet)
    model.add(GAP_layer)
    model.add(drop_layer)
    model.add(dense_layer)
    return model
modelA = build_model_sequential()
modelA.load_weights(‘F:\\kaggleDataSet\\diabeticRetinopathy\\dense_xhlulu_731.h5‘)
modelA.summary()
model = build_model_functional() # with pretrained weights, and layers we want
model.summary()

y_test = model.predict(x_test) > 0.5
y_test = y_test.astype(int).sum(axis=1) - 1
import seaborn as sns
import cv2

SIZE=224
def create_pred_hist(pred_level_y,title=‘NoTitle‘):
    results = pd.DataFrame({‘diagnosis‘:pred_level_y})
    f, ax = plt.subplots(figsize=(7, 4))
    ax = sns.countplot(x="diagnosis", data=results, palette="GnBu_d")
    sns.despine()
    plt.title(title)
    plt.show()

create_pred_hist(y_test,title=‘predicted level distribution in test set‘)

def gen_heatmap_img(img, model0, layer_name=‘last_conv_layer‘,viz_img=None,orig_img=None):
    preds_raw = model0.predict(img[np.newaxis])
    preds = preds_raw > 0.5 # use the same threshold as @xhlulu original kernel
    class_idx = (preds.astype(int).sum(axis=1) - 1)[0]
    class_output_tensor = model0.output[:, class_idx]

    viz_layer = model0.get_layer(layer_name)
    grads = K.gradients(class_output_tensor ,viz_layer.output)[0] # gradients of viz_layer wrt output_tensor of predicted class
    pooled_grads=K.mean(grads,axis=(0,1,2))
    iterate=K.function([model0.input],[pooled_grads, viz_layer.output[0]])
    pooled_grad_value, viz_layer_out_value = iterate([img[np.newaxis]])
    for i in range(pooled_grad_value.shape[0]):
        viz_layer_out_value[:,:,i] *= pooled_grad_value[i]
    heatmap = np.mean(viz_layer_out_value, axis=-1)
    heatmap = np.maximum(heatmap,0)
    heatmap /= np.max(heatmap)
    viz_img=cv2.resize(viz_img,(img.shape[1],img.shape[0]))
    heatmap=cv2.resize(heatmap,(viz_img.shape[1],viz_img.shape[0]))
    heatmap_color = cv2.applyColorMap(np.uint8(heatmap*255), cv2.COLORMAP_SPRING)/255
    heated_img = heatmap_color*0.5 + viz_img*0.5
    print(‘raw output from model : ‘)
    print_pred(preds_raw)
    if orig_img is None:
        show_Nimages([img,viz_img,heatmap_color,heated_img])
    else:
        show_Nimages([orig_img,img,viz_img,heatmap_color,heated_img])
    plt.show()
    return heated_img
def show_image(image,figsize=None,title=None):
    if figsize is not None:
        fig = plt.figure(figsize=figsize)
    if image.ndim == 2:
        plt.imshow(image,cmap=‘gray‘)
    else:
        plt.imshow(image)
    if title is not None:
        plt.title(title)

def show_Nimages(imgs,scale=1):
    N=len(imgs)
    fig = plt.figure(figsize=(25/scale, 16/scale))
    for i, img in enumerate(imgs):
        ax = fig.add_subplot(1, N, i + 1, xticks=[], yticks=[])
        show_image(img)

def print_pred(array_of_classes):
    xx = array_of_classes
    s1,s2 = xx.shape
    for i in range(s1):
        for j in range(s2):
            print(‘%.3f ‘ % xx[i,j],end=‘‘)
        print(‘‘)
NUM_SAMP=10
SEED=77
layer_name = ‘relu‘ #‘conv5_block16_concat‘
for i, (idx, row) in enumerate(test_df[:NUM_SAMP].iterrows()):
    path=f"F:\\kaggleDataSet\\diabeticRetinopathy\\resized test 19\\"+str(row["id_code"])+".jpg"
    ben_img = load_image_ben_orig(path)
    input_img = np.empty((1,224, 224, 3), dtype=np.uint8)
    input_img[0,:,:,:] = preprocess_image(path)
    print(‘test pic no.%d‘ % (i+1))
    _ = gen_heatmap_img(input_img[0],model, layer_name=layer_name,viz_img=ben_img)

from albumentations import *
import time

IMG_SIZE = (224,224)

‘‘‘Use case from https://www.kaggle.com/alexanderliao/image-augmentation-demo-with-albumentation/‘‘‘
def albaugment(aug0, img):
    return aug0(image=img)[‘image‘]
idx=8
image1=x_test[idx]

‘‘‘1. Rotate or Flip‘‘‘
aug1 = OneOf([Rotate(p=0.99, limit=160, border_mode=0,value=0), Flip(p=0.5)],p=1)

‘‘‘2. Adjust Brightness or Contrast‘‘‘
aug2 = RandomBrightnessContrast(brightness_limit=0.45, contrast_limit=0.45,p=1)
h_min=np.round(IMG_SIZE[1]*0.72).astype(int)
h_max= np.round(IMG_SIZE[1]*0.9).astype(int)
print(h_min,h_max)

‘‘‘3. Random Crop and then Resize‘‘‘
#w2h_ratio = aspect ratio of cropping
aug3 = RandomSizedCrop((h_min, h_max),IMG_SIZE[1],IMG_SIZE[0], w2h_ratio=IMG_SIZE[0]/IMG_SIZE[1],p=1)

‘‘‘4. CutOut Augmentation‘‘‘
max_hole_size = int(IMG_SIZE[1]/10)
aug4 = Cutout(p=1,max_h_size=max_hole_size,max_w_size=max_hole_size,num_holes=8 )#default num_holes=8

‘‘‘5. SunFlare Augmentation‘‘‘
aug5 = RandomSunFlare(src_radius=max_hole_size,num_flare_circles_lower=10,num_flare_circles_upper=20,p=1)

‘‘‘6. Ultimate Augmentation -- combine everything‘‘‘
final_aug = Compose([aug1,aug2,aug3,aug4,aug5],p=1)

img1 = albaugment(aug1,image1)
img2 = albaugment(aug1,image1)
print(‘Rotate or Flip‘)
show_Nimages([image1,img1,img2],scale=2)
# time.sleep(1)

img1 = albaugment(aug2,image1)
img2 = albaugment(aug2,image1)
img3 = albaugment(aug2,image1)
print(‘Brightness or Contrast‘)
show_Nimages([img3,img1,img2],scale=2)
# time.sleep(1)

img1 = albaugment(aug3,image1)
img2 = albaugment(aug3,image1)
img3 = albaugment(aug3,image1)
print(‘Rotate and Resize‘)
show_Nimages([img3,img1,img2],scale=2)
print(img1.shape,img2.shape)
# time.sleep(1)

img1 = albaugment(aug4,image1)
img2 = albaugment(aug4,image1)
img3 = albaugment(aug4,image1)
print(‘CutOut‘)
show_Nimages([img3,img1,img2],scale=2)
# time.sleep(1)

img1 = albaugment(aug5,image1)
img2 = albaugment(aug5,image1)
img3 = albaugment(aug5,image1)
print(‘Sun Flare‘)
show_Nimages([img3,img1,img2],scale=2)
# time.sleep(1)

img1 = albaugment(final_aug,image1)
img2 = albaugment(final_aug,image1)
img3 = albaugment(final_aug,image1)
print(‘All above combined‘)
show_Nimages([img3,img1,img2],scale=2)
print(img1.shape,img2.shape)

aug_list = [aug5, aug2, aug3, aug4, aug1, final_aug]
aug_name = [‘SunFlare‘, ‘brightness or contrast‘, ‘crop and resized‘, ‘CutOut‘, ‘rotate or flip‘, ‘Everything Combined‘]

idx=8
layer_name = ‘relu‘
for i in range(len(aug_list)):
    path=f"F:\\kaggleDataSet\\diabeticRetinopathy\\resized test 19\\"+str(row["id_code"])+".jpg"
    input_img = np.empty((1,224, 224, 3), dtype=np.uint8)
    input_img[0,:,:,:] = preprocess_image(path)
    aug_img = albaugment(aug_list[i],input_img[0,:,:,:])
    ben_img = transform_image_ben(aug_img)
    print(‘test pic no.%d -- augmentation: %s‘ % (i+1, aug_name[i]))
    _ = gen_heatmap_img(aug_img,model, layer_name=layer_name,viz_img=ben_img,orig_img=input_img[0])

原文地址:https://www.cnblogs.com/tszr/p/11238752.html

时间: 2024-10-07 22:13:32

吴裕雄--天生自然 PYTHON数据分析:糖尿病视网膜病变数据分析(完整版)的相关文章

吴裕雄--天生自然 PYTHON数据分析:人类发展报告——HDI, GDI,健康,全球人口数据数据分析

import pandas as pd # Data analysis import numpy as np #Data analysis import seaborn as sns # Data visualization import matplotlib.pyplot as plt # Data Visualization import matplotlib.gridspec as gridspec # subplots and grid from wordcloud import Wor

吴裕雄--天生自然 PYTHON语言数据分析:ESA的火星快车操作数据集分析

import os import numpy as np import pandas as pd from datetime import datetime import matplotlib import matplotlib.pyplot as plt import seaborn as sns sns.set_style('white') %matplotlib inline %load_ext autoreload %autoreload 2 def to_utms(ut): retur

吴裕雄--天生自然python编程:turtle模块绘图(3)

turtle(海龟)是Python重要的标准库之一,它能够进行基本的图形绘制.turtle图形绘制的概念诞生于1969年,成功应用于LOGO编程语言. turtle库绘制图形有一个基本框架:一个小海龟在坐标系中爬行,其爬行轨迹形成了绘制图形.刚开始绘制时,小海龟位于画布正中央,此处坐标为(0,0),前进方向为水平右方. Python——turtle库 turtle库包含100多个功能函数,主要包括窗体函数.画笔状态函数和画笔运动函数3类. 画笔运动函数 turtle通过一组函数控制画笔的行进动作

吴裕雄--天生自然python编程:正则表达式

re.match函数 re.match 尝试从字符串的起始位置匹配一个模式,如果不是起始位置匹配成功的话,match()就返回none. 函数语法: re.match(pattern, string, flags=0) 函数参数说明: 参数 描述 pattern 匹配的正则表达式 string 要匹配的字符串. flags 标志位,用于控制正则表达式的匹配方式,如:是否区分大小写,多行匹配等等. 匹配成功re.match方法返回一个匹配的对象,否则返回None. 我们可以使用group(num)

吴裕雄--天生自然python机器学习:决策树算法

我们经常使用决策树处理分类问题’近来的调查表明决策树也是最经常使用的数据挖掘算法. 它之所以如此流行,一个很重要的原因就是使用者基本上不用了解机器学习算法,也不用深究它 是如何工作的. K-近邻算法可以完成很多分类任务,但是它最大的缺点就是无法给出数据的内 在含义,决策树的主要优势就在于数据形式非常容易理解. 决策树很多任务都 是为了数据中所蕴含的知识信息,因此决策树可以使用不熟悉的数据集合,并从中提取出一系列 规则,机器学习算法最终将使用这些机器从数据集中创造的规则.专家系统中经常使用决策树,

吴裕雄--天生自然python机器学习:朴素贝叶斯算法

分类器有时会产生错误结果,这时可以要求分类器给出一个最优的类别猜测结果,同 时给出这个猜测的概率估计值. 概率论是许多机器学习算法的基础 在计算 特征值取某个值的概率时涉及了一些概率知识,在那里我们先统计特征在数据集中取某个特定值 的次数,然后除以数据集的实例总数,就得到了特征取该值的概率. 首先从一个最简单的概率分类器开始,然后给 出一些假设来学习朴素贝叶斯分类器.我们称之为“朴素”,是因为整个形式化过程只做最原始.最简单的假设. 基于贝叶斯决策理论的分类方法 朴素贝叶斯是贝叶斯决策理论的一部

吴裕雄--天生自然python Google深度学习框架:Tensorflow实现迁移学习

import glob import os.path import numpy as np import tensorflow as tf from tensorflow.python.platform import gfile import tensorflow.contrib.slim as slim # 加载通过TensorFlow-Slim定义好的inception_v3模型. import tensorflow.contrib.slim.python.slim.nets.incepti

吴裕雄--天生自然python机器学习:使用K-近邻算法改进约会网站的配对效果

在约会网站使用K-近邻算法 准备数据:从文本文件中解析数据 海伦收集约会数据巳经有了一段时间,她把这些数据存放在文本文件(1如1^及抓 比加 中,每 个样本数据占据一行,总共有1000行.海伦的样本主要包含以下3种特征: 每年获得的飞行常客里程数 玩视频游戏所耗时间百分比 每周消费的冰淇淋公升数 将文本记录到转换NumPy的解析程序 import operator from numpy import * from os import listdir def file2matrix(filenam

吴裕雄--天生自然python机器学习:使用朴素贝叶斯过滤垃圾邮件

使用朴素贝叶斯解决一些现实生活中 的问题时,需要先从文本内容得到字符串列表,然后生成词向量. 准备数据:切分文本 测试算法:使用朴素贝叶斯进行交叉验证 文件解析及完整的垃圾邮件测试函数 def createVocabList(dataSet): vocabSet = set([]) #create empty set for document in dataSet: vocabSet = vocabSet | set(document) #union of the two sets return