!pip install --upgrade pip
!pip install imblearn
!pip install xgboost
import warnings
warnings.filterwarnings('ignore')

import s3fs
import pickle as pkl
import tarfile
import xgboost
import io
import os
import matplotlib.pyplot as plt
import seaborn as sn
import numpy as np 
import pandas as pd 
import boto3
import sagemaker
from scipy.stats                       import ks_2samp
from sklearn.metrics                   import confusion_matrix, auc, roc_curve, recall_score,accuracy_score, precision_score, roc_auc_score, precision_recall_curve, average_precision_score, f1_score
from sagemaker.amazon.amazon_estimator import get_image_uri
from time                              import gmtime, strftime, sleep
from sagemaker                         import get_execution_role
from sklearn.model_selection           import train_test_split
from imblearn.over_sampling            import SMOTE
%matplotlib inline

Requirement already satisfied: pip in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (21.0.1)
Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)
Collecting imbalanced-learn
  Downloading imbalanced_learn-0.8.0-py3-none-any.whl (206 kB)
     |████████████████████████████████| 206 kB 17.7 MB/s eta 0:00:01
Requirement already satisfied: joblib>=0.11 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from imbalanced-learn->imblearn) (1.0.1)
Requirement already satisfied: numpy>=1.13.3 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from imbalanced-learn->imblearn) (1.19.5)
Requirement already satisfied: scipy>=0.19.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from imbalanced-learn->imblearn) (1.5.3)
Requirement already satisfied: scikit-learn>=0.24 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from imbalanced-learn->imblearn) (0.24.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from scikit-learn>=0.24->imbalanced-learn->imblearn) (2.1.0)
Installing collected packages: imbalanced-learn, imblearn
Successfully installed imbalanced-learn-0.8.0 imblearn-0.0
Collecting xgboost
  Downloading xgboost-1.4.0-py3-none-manylinux2010_x86_64.whl (166.7 MB)
     |████████████████████████████████| 166.7 MB 23 kB/s s eta 0:00:01
Requirement already satisfied: numpy in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from xgboost) (1.19.5)
Requirement already satisfied: scipy in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from xgboost) (1.5.3)
Installing collected packages: xgboost
Successfully installed xgboost-1.4.0

# in this cell is all the import information for the all rest of code.
# all informations about Sagemaker.
sagemaker_session = sagemaker.Session()
region            = boto3.Session().region_name
smclient          = boto3.Session().client('sagemaker')
role              = sagemaker.get_execution_role()
bucket            = sagemaker_session.default_bucket()

# import dataset only
base_raiz = pd.DataFrame(pd.read_csv('TB_TBN_SISCAN_HISTO_MAMA_PACNT.csv',
                                    sep = ';'))

base_raiz.columns

Index(['CO_SEQ_SISCAN_HISTO_MAMA_PACNT', 'CO_UF_RESIDENCIA',
       'CO_MUN_RESIDENCIA', 'NU_ANO_COMPETENCIA', 'NU_ANO_MES_COMPETENCIA',
       'CO_RACA_COR', 'CO_IDADE_PACIENTE', 'CO_ESCOLARIDADE',
       'CO_INTERVALO_COLETA', 'CO_INTERVALO_EXAME', 'CO_TEMPO_EXAME',
       'TP_RISCO_ELEVADO', 'TP_EXAME_HISTOPATOLOGICO', 'TP_DETECCAO_LESAO',
       'TP_LATERALIDADE_LESAO', 'TP_TAMANHO_LESAO',
       'TP_LINFONODO_AXILAR_PALPAVEL', 'TP_MATER_ENVIA_PROCEDENTE',
       'TP_PROCEDIMENTO_CIRURGICO', 'TP_ADEQUABILIDAD_MATERIAL',
       'ST_MICROCALCIFICACAO', 'TP_LESAO', 'TP_LESAO_CARAT_NEOPL_MALI',
       'TP_GRAU_HISTOLOGICO', 'TP_MARGEM_CIRURGICA', 'CO_PACIENTE', 'SG_SEXO',
       'TP_DIAGNOSTICO_IMAGEM', 'TP_TAMANHO_TUMOR', 'CO_ANO_RESULTADO',
       'TP_LAUDO_HISTOPATOLOGICO', 'SG_UF_RESIDENCIA'],
      dtype='object')

# I make a correlation matrix only for visual help.
corrMatrix = base_raiz.corr()
sn.heatmap(corrMatrix, annot=True)
plt.show()

corr_matrix = base_raiz.corr().abs()
upper       = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
to_drop     = [column for column in upper.columns if any(upper[column] >= 0.95)]
to_drop

['CO_MUN_RESIDENCIA', 'NU_ANO_MES_COMPETENCIA', 'TP_LAUDO_HISTOPATOLOGICO']

# drop time.
# this column I not use on the problem.
base_raiz.drop(columns ={'NU_ANO_COMPETENCIA', 
                         'NU_ANO_MES_COMPETENCIA', 
                         'CO_TEMPO_EXAME', 
                         'TP_EXAME_HISTOPATOLOGICO', 
                         'TP_MATER_ENVIA_PROCEDENTE', 
                         'TP_PROCEDIMENTO_CIRURGICO',
                         'TP_ADEQUABILIDAD_MATERIAL',
                         'CO_ANO_RESULTADO', 
                         'TP_LESAO_CARAT_NEOPL_MALI', 
                         'TP_GRAU_HISTOLOGICO', 
                         'TP_MARGEM_CIRURGICA', 
                         'TP_DIAGNOSTICO_IMAGEM', 
                         'TP_LESAO',
                         'CO_PACIENTE'},
               inplace = True)
base_raiz = base_raiz.select_dtypes(exclude=['object'])
base_raiz.columns

Index(['CO_SEQ_SISCAN_HISTO_MAMA_PACNT', 'CO_UF_RESIDENCIA',
       'CO_MUN_RESIDENCIA', 'CO_RACA_COR', 'CO_IDADE_PACIENTE',
       'CO_ESCOLARIDADE', 'CO_INTERVALO_COLETA', 'CO_INTERVALO_EXAME',
       'TP_RISCO_ELEVADO', 'TP_DETECCAO_LESAO', 'TP_LATERALIDADE_LESAO',
       'TP_TAMANHO_LESAO', 'TP_LINFONODO_AXILAR_PALPAVEL', 'TP_TAMANHO_TUMOR',
       'TP_LAUDO_HISTOPATOLOGICO'],
      dtype='object')

base_raiz.groupby('TP_LAUDO_HISTOPATOLOGICO').count()

#shuffle dataframe
base_raiz = base_raiz.sample(frac = 1)
Y_column  = base_raiz['TP_LAUDO_HISTOPATOLOGICO']
X_columns = base_raiz.drop(columns = 'TP_LAUDO_HISTOPATOLOGICO')

#prepare the test out-of-time base:
X_train_val, X_test, Y_train_val, Y_test = train_test_split(X_columns,
                                                            Y_column, 
                                                            test_size    = 0.15, 
                                                            random_state = 666)
base_treino_val = pd.concat([X_train_val, Y_train_val], axis = 1)
base_treino_val.columns

Index(['CO_SEQ_SISCAN_HISTO_MAMA_PACNT', 'CO_UF_RESIDENCIA',
       'CO_MUN_RESIDENCIA', 'CO_RACA_COR', 'CO_IDADE_PACIENTE',
       'CO_ESCOLARIDADE', 'CO_INTERVALO_COLETA', 'CO_INTERVALO_EXAME',
       'TP_RISCO_ELEVADO', 'TP_DETECCAO_LESAO', 'TP_LATERALIDADE_LESAO',
       'TP_TAMANHO_LESAO', 'TP_LINFONODO_AXILAR_PALPAVEL', 'TP_TAMANHO_TUMOR',
       'TP_LAUDO_HISTOPATOLOGICO'],
      dtype='object')

X_test.sample(1).to_csv('teste_lambda.csv')

CORRIGE NAN NA BASE DE TREINO & VALIDAÇÃO¶

column_means    = base_treino_val.mean()
base_treino_val = base_treino_val.fillna(column_means)
base_treino_val.sample(1)

CORRIGE NAN NA BASE DE TESTE¶

base_teste              = pd.concat([X_test, Y_test], axis = 1)
base_teste              = base_teste.sample(frac = 1)
column_means            = base_teste.mean()
base_teste              = base_teste.fillna(column_means)
base_teste['flag_raiz'] = np.where(base_teste['TP_LAUDO_HISTOPATOLOGICO'] == 1, 0,
                          np.where(base_teste['TP_LAUDO_HISTOPATOLOGICO'] == 2, 1,
                          np.where(base_teste['TP_LAUDO_HISTOPATOLOGICO'] == 3, 2,
                          np.where(base_teste['TP_LAUDO_HISTOPATOLOGICO'] == 4, 3, 4))))
base_teste.drop(columns = 'TP_LAUDO_HISTOPATOLOGICO',
                inplace = True)

X_test = pd.DataFrame(base_teste.drop(columns = 'flag_raiz'))
Y_test = pd.DataFrame(base_teste['flag_raiz'])

X_test.sample(2)

Y_test

PREPARA BASE DE TREINO E VALIDAÇÃO¶

base_treino_val.groupby('TP_LAUDO_HISTOPATOLOGICO').count()

base_treino_val['flag_2'] = np.where(base_treino_val['TP_LAUDO_HISTOPATOLOGICO'] == 2, 1, 0)
base_treino_val['flag_3'] = np.where(base_treino_val['TP_LAUDO_HISTOPATOLOGICO'] == 3, 1, 0)
base_treino_val['flag_6'] = np.where(base_treino_val['TP_LAUDO_HISTOPATOLOGICO'] == 6, 1, 0)
base_treino_val.columns

Index(['CO_SEQ_SISCAN_HISTO_MAMA_PACNT', 'CO_UF_RESIDENCIA',
       'CO_MUN_RESIDENCIA', 'CO_RACA_COR', 'CO_IDADE_PACIENTE',
       'CO_ESCOLARIDADE', 'CO_INTERVALO_COLETA', 'CO_INTERVALO_EXAME',
       'TP_RISCO_ELEVADO', 'TP_DETECCAO_LESAO', 'TP_LATERALIDADE_LESAO',
       'TP_TAMANHO_LESAO', 'TP_LINFONODO_AXILAR_PALPAVEL', 'TP_TAMANHO_TUMOR',
       'TP_LAUDO_HISTOPATOLOGICO', 'flag_2', 'flag_3', 'flag_6'],
      dtype='object')

from collections import Counter
smote = SMOTE(sampling_strategy = 0.05,
              random_state = 666)
X_raiz, Y_raiz = smote.fit_resample(base_treino_val.drop(columns='flag_2'), base_treino_val['flag_2'])
base_treino_val = X_raiz
base_treino_val.groupby('TP_LAUDO_HISTOPATOLOGICO').count()

from collections import Counter
smote = SMOTE(sampling_strategy = 0.03,
              random_state = 666)
X_raiz, Y_raiz = smote.fit_resample(base_treino_val.drop(columns='flag_3'), base_treino_val['flag_3'])
base_treino_val = X_raiz
base_treino_val.groupby('TP_LAUDO_HISTOPATOLOGICO').count()

from collections import Counter
smote = SMOTE(sampling_strategy = 0.05,
              random_state = 666)
X_raiz, Y_raiz = smote.fit_resample(base_treino_val.drop(columns='flag_6'), base_treino_val['flag_6'])
base_treino_val = X_raiz
base_treino_val.groupby('TP_LAUDO_HISTOPATOLOGICO').count()

base_treino_val['flag_raiz'] = np.where(base_treino_val['TP_LAUDO_HISTOPATOLOGICO'] == 1, 0,
                               np.where(base_treino_val['TP_LAUDO_HISTOPATOLOGICO'] == 2, 1,
                               np.where(base_treino_val['TP_LAUDO_HISTOPATOLOGICO'] == 3, 2,
                               np.where(base_treino_val['TP_LAUDO_HISTOPATOLOGICO'] == 4, 3, 4))))
base_treino_val.drop(columns = 'TP_LAUDO_HISTOPATOLOGICO',
               inplace = True)
base_treino_val.groupby('flag_raiz').count()

base_treino_val = base_treino_val.sample(frac = 1)
Y_column        = base_treino_val['flag_raiz']
X_columns       = base_treino_val.drop(columns = 'flag_raiz')

X_train, X_val, Y_train, Y_val           = train_test_split(X_columns,
                                                            Y_column, 
                                                            test_size    = 0.3, 
                                                            random_state = 666)
Y_train = pd.DataFrame(Y_train)
X_train = pd.DataFrame(X_train)
Y_val   = pd.DataFrame(Y_val)
X_val   = pd.DataFrame(X_val)

INICIA PROCESSO DE TREINAMENTO¶

data_directory = 'histo_detection'
prefix_model   = 'model'
prefix_other   = 'predict'
prefix_tuning  = 'SGM - Output Hyper.Tun'

if not os.path.exists(data_directory):
        os.makedirs(data_directory)
        
pd.DataFrame(X_test).to_csv(os.path.join(data_directory, 'test_X.csv'), header=False, index=False)
pd.DataFrame(Y_test).to_csv(os.path.join(data_directory, 'test_Y.csv'), header=False, index=False)

pd.DataFrame(X_train).to_csv(os.path.join(data_directory, 'train_X.csv'), header=False, index=False)
pd.DataFrame(X_val).to_csv(os.path.join(data_directory, 'val_X.csv'), header=False, index=False)

pd.concat([Y_val  , X_val]  , axis=1).to_csv(os.path.join(data_directory, 'valid.csv'), header=False, index=False)
pd.concat([Y_train, X_train], axis=1).to_csv(os.path.join(data_directory, 'train.csv'), header=False, index=False)

# upload all data to S3
train_loc   = sagemaker_session.upload_data(os.path.join(data_directory, 'train.csv'), key_prefix=prefix_model)  #Y(variavel resposta)
train_X_loc = sagemaker_session.upload_data(os.path.join(data_directory, 'train_X.csv'), key_prefix=prefix_other)#X(features)

valid_loc   = sagemaker_session.upload_data(os.path.join(data_directory, 'valid.csv'), key_prefix=prefix_model)
valid_X_loc = sagemaker_session.upload_data(os.path.join(data_directory, 'val_X.csv'), key_prefix=prefix_other)

test_X_loc  = sagemaker_session.upload_data(os.path.join(data_directory, 'test_X.csv'), key_prefix=prefix_other)
test_Y_loc  = sagemaker_session.upload_data(os.path.join(data_directory, 'test_Y.csv'), key_prefix=prefix_other)

tuning_job_name = 'xgboost-tuningjob-' + strftime("%d-%H-%M-%S", gmtime())
print (tuning_job_name)
tuning_job_config = {
    "ParameterRanges": {
      "CategoricalParameterRanges": [],
      "ContinuousParameterRanges": [
        {
          "MaxValue": "0.5",
          "MinValue": "0",
          "Name": "eta",
        },
        {
          "MaxValue": "5",
          "MinValue": "0",
          "Name": "gamma",
        },
        {
          "MaxValue": "120",
          "MinValue": "0",
          "Name": "min_child_weight",
        },
        {
          "MaxValue": "1000",
          "MinValue": "0",
          "Name": "alpha",
        },
        {
          "MaxValue": "1",
          "MinValue": "0.5",
          "Name": "subsample",            
        },
        {
          "MaxValue": "1",
          "MinValue": "0.5",
          "Name": "colsample_bylevel",            
        },
        {
          "MaxValue": "1",
          "MinValue": "0.5",
          "Name": "colsample_bytree",            
        },
        {
          "MaxValue": "1000",
          "MinValue": "0",
          "Name": "lambda",            
        }
      ],
      "IntegerParameterRanges": [
        {
          "MaxValue": "10",
          "MinValue": "1",
          "Name": "max_depth",
        },
        {
          "MaxValue": "4000",
          "MinValue": "100",
          "Name": "num_round",
        },
        {
          "MaxValue": "10",
          "MinValue": "0",
          "Name": "max_delta_step",
        },
      ]
    },
    "ResourceLimits": {
      "MaxNumberOfTrainingJobs": 6,
      "MaxParallelTrainingJobs": 2
    },
    "Strategy": "Bayesian",
    "HyperParameterTuningJobObjective": {
      "MetricName": "validation:merror",
      "Type": "Minimize"
    }
  }

xgboost-tuningjob-31-00-43-18

training_image = get_image_uri(boto3.Session().region_name, 'xgboost', repo_version='latest')
     
s3_input_train = 's3://{}/{}/train'.format(bucket, prefix_model)
s3_input_validation ='s3://{}/{}/valid'.format(bucket, prefix_model)

training_job_definition = {
    "AlgorithmSpecification": {
      "TrainingImage": training_image,
      "TrainingInputMode": "File"
    },
    "InputDataConfig": [
      {
        "ChannelName": "train",
        "CompressionType": "None",
        "ContentType": "csv",
        "DataSource": {
          "S3DataSource": {
            "S3DataDistributionType": "FullyReplicated",
            "S3DataType": "S3Prefix",
            "S3Uri": s3_input_train
          }
        }
      },
      {
        "ChannelName": "validation",
        "CompressionType": "None",
        "ContentType": "csv",
        "DataSource": {
          "S3DataSource": {
            "S3DataDistributionType": "FullyReplicated",
            "S3DataType": "S3Prefix",
            "S3Uri": s3_input_validation
          }
        }
      }
    ],
    "OutputDataConfig": {
      "S3OutputPath": "s3://{}/{}/output".format(bucket,prefix_tuning)
    },
    "ResourceConfig": {
      "InstanceCount": 1,
      "InstanceType": "ml.m4.xlarge",
      "VolumeSizeInGB": 10
    },
    "RoleArn": role,
    "StaticHyperParameters": {
      "eval_metric": "merror",
      "objective": "multi:softprob",
      "num_class" : '5',
      "seed" : "42"
    },
    "StoppingCondition": {
      "MaxRuntimeInSeconds": 43200
    }
}

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.

smclient.create_hyper_parameter_tuning_job(HyperParameterTuningJobName = tuning_job_name,
                                           HyperParameterTuningJobConfig = tuning_job_config,
                                           TrainingJobDefinition = training_job_definition)

{'HyperParameterTuningJobArn': 'arn:aws:sagemaker:us-east-1:771330539858:hyper-parameter-tuning-job/xgboost-tuningjob-31-00-43-18',
 'ResponseMetadata': {'RequestId': '7f2ca6de-30d9-43ce-8a09-75ed50b7aba8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '7f2ca6de-30d9-43ce-8a09-75ed50b7aba8',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '130',
   'date': 'Wed, 31 Mar 2021 00:45:34 GMT'},
  'RetryAttempts': 0}}

smclient.describe_hyper_parameter_tuning_job(
    HyperParameterTuningJobName=tuning_job_name)['HyperParameterTuningJobStatus']

'InProgress'

container = get_image_uri(boto3.Session().region_name, 'xgboost', repo_version='latest')

s3_input_train = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/train'.format(bucket, prefix_model), content_type='csv')
s3_input_validation = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/valid'.format(bucket, prefix_model), content_type='csv')

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-1a0846a4636c> in <module>
      1 container = get_image_uri(boto3.Session().region_name, 'xgboost', repo_version='latest')
      2 
----> 3 s3_input_train = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/train'.format(bucket, prefix_model), content_type='csv')
      4 s3_input_validation = sagemaker.inputs.TrainingInput(s3_data='s3://{}/{}/valid'.format(bucket, prefix_model), content_type='csv')

NameError: name 'prefix_model' is not defined

xgb = sagemaker.estimator.Estimator(container,
                                    role, 
                                    train_instance_count=1, 
                                    train_instance_type='ml.m4.xlarge',
                                    output_path='s3://{}/{}/output'.format(bucket, prefix_model),
                                    sagemaker_session=sagemaker_session)
#after run the hiperparameter tunning I catch the best model from AWS Sagemaker View

xgb.set_hyperparameters(max_depth        = 6,
                        alpha            = 1.3590152328600276,
                        max_delta_step   = 6,
                        min_child_weight = 1.8323029919604323,
                        subsample        = 0.9891388904541443,
                        eta              = 0.4258935309521509,
                        gamma            = 0.3573669255797674,
                        num_round        = 27,#194
                        seed             = 666,
                        silent           = 0,
                        num_class        = 5,
                        objective        = 'multi:softprob',
                        eval_metric      = 'merror')

xgb.fit({'train': s3_input_train, 
         'validation': s3_input_validation})

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.

2021-03-30 01:05:09 Starting - Starting the training job...
2021-03-30 01:05:22 Starting - Launching requested ML instancesProfilerReport-1617066308: InProgress
......
2021-03-30 01:06:35 Starting - Preparing the instances for training.........
2021-03-30 01:08:04 Downloading - Downloading input data...
2021-03-30 01:08:33 Training - Training image download completed. Training in progress.Arguments: train
[2021-03-30:01:08:34:INFO] Running standalone xgboost training.
[2021-03-30:01:08:34:INFO] File size need to be processed in the node: 7.0mb. Available memory size in the node: 8426.58mb
[2021-03-30:01:08:34:INFO] Determined delimiter of CSV input is ','
[01:08:34] S3DistributionType set as FullyReplicated
[01:08:34] 85853x14 matrix with 1201942 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,
[2021-03-30:01:08:34:INFO] Determined delimiter of CSV input is ','
[01:08:34] S3DistributionType set as FullyReplicated
[01:08:34] 36795x14 matrix with 515130 entries loaded from /opt/ml/input/data/validation?format=csv&label_column=0&delimiter=,
[01:08:34] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 108 extra nodes, 4 pruned nodes, max_depth=6
[01:08:34] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 0 pruned nodes, max_depth=6
[01:08:34] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 72 extra nodes, 4 pruned nodes, max_depth=6
[01:08:34] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 112 extra nodes, 2 pruned nodes, max_depth=6
[01:08:34] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 80 extra nodes, 0 pruned nodes, max_depth=6
[0]#011train-merror:0.29667#011validation-merror:0.294632
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 114 extra nodes, 2 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 4 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 76 extra nodes, 0 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 118 extra nodes, 6 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 84 extra nodes, 0 pruned nodes, max_depth=6
[1]#011train-merror:0.290823#011validation-merror:0.291127
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 108 extra nodes, 8 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 64 extra nodes, 6 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 118 extra nodes, 0 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=6
[2]#011train-merror:0.285779#011validation-merror:0.286805
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 112 extra nodes, 10 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 2 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 68 extra nodes, 2 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 120 extra nodes, 0 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=6
[3]#011train-merror:0.28451#011validation-merror:0.286153
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 106 extra nodes, 14 pruned nodes, max_depth=6
[01:08:35] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 2 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 76 extra nodes, 0 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 108 extra nodes, 6 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=6
[4]#011train-merror:0.282693#011validation-merror:0.285881
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 104 extra nodes, 12 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 6 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 4 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 114 extra nodes, 2 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=6
[5]#011train-merror:0.280142#011validation-merror:0.28387
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 90 extra nodes, 10 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 0 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 116 extra nodes, 2 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 2 pruned nodes, max_depth=6
[6]#011train-merror:0.278744#011validation-merror:0.281451
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 92 extra nodes, 4 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 64 extra nodes, 0 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 112 extra nodes, 4 pruned nodes, max_depth=6
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 4 pruned nodes, max_depth=6
[7]#011train-merror:0.277043#011validation-merror:0.27944
[01:08:36] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 124 extra nodes, 0 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 0 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 0 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 104 extra nodes, 6 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 8 pruned nodes, max_depth=6
[8]#011train-merror:0.27405#011validation-merror:0.278706
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 84 extra nodes, 0 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 4 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 94 extra nodes, 14 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 6 pruned nodes, max_depth=6
[9]#011train-merror:0.271184#011validation-merror:0.275391
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 90 extra nodes, 4 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 64 extra nodes, 0 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 6 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 86 extra nodes, 16 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 0 pruned nodes, max_depth=6
[10]#011train-merror:0.26974#011validation-merror:0.274494
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 102 extra nodes, 4 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 64 extra nodes, 0 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 2 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 124 extra nodes, 2 pruned nodes, max_depth=6
[01:08:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 6 pruned nodes, max_depth=6
[11]#011train-merror:0.267061#011validation-merror:0.27164
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 66 extra nodes, 4 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 4 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 4 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 98 extra nodes, 0 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 64 extra nodes, 0 pruned nodes, max_depth=6
[12]#011train-merror:0.266153#011validation-merror:0.271015
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 14 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 2 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 0 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 80 extra nodes, 2 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 50 extra nodes, 2 pruned nodes, max_depth=6
[13]#011train-merror:0.26486#011validation-merror:0.270037
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 12 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 64 extra nodes, 0 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 4 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 116 extra nodes, 0 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=6
[14]#011train-merror:0.263474#011validation-merror:0.26895
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 118 extra nodes, 2 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 2 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 4 pruned nodes, max_depth=6
[01:08:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 104 extra nodes, 12 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 6 pruned nodes, max_depth=6
[15]#011train-merror:0.261598#011validation-merror:0.26808
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 100 extra nodes, 8 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 0 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 6 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 100 extra nodes, 6 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 6 pruned nodes, max_depth=6
[16]#011train-merror:0.260562#011validation-merror:0.267754
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 102 extra nodes, 14 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 4 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 4 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 78 extra nodes, 2 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 2 pruned nodes, max_depth=6
[17]#011train-merror:0.259187#011validation-merror:0.267047
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 80 extra nodes, 10 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 54 extra nodes, 0 pruned nodes, max_depth=6
[01:08:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 38 extra nodes, 6 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 96 extra nodes, 8 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 64 extra nodes, 0 pruned nodes, max_depth=6
[18]#011train-merror:0.258477#011validation-merror:0.266422
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 74 extra nodes, 10 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 2 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 2 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 80 extra nodes, 0 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 4 pruned nodes, max_depth=6
[19]#011train-merror:0.256566#011validation-merror:0.26558
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 76 extra nodes, 4 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 4 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 2 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 122 extra nodes, 2 pruned nodes, max_depth=6
[01:08:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 6 pruned nodes, max_depth=6
[20]#011train-merror:0.255052#011validation-merror:0.263541
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 114 extra nodes, 4 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 2 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 8 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 64 extra nodes, 2 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 4 pruned nodes, max_depth=6
[21]#011train-merror:0.254109#011validation-merror:0.262536
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 12 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 2 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 42 extra nodes, 10 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 72 extra nodes, 14 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 2 pruned nodes, max_depth=6
[22]#011train-merror:0.253655#011validation-merror:0.26221
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 100 extra nodes, 4 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 62 extra nodes, 0 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 48 extra nodes, 6 pruned nodes, max_depth=6
[01:08:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 88 extra nodes, 6 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 52 extra nodes, 4 pruned nodes, max_depth=6
[23]#011train-merror:0.25256#011validation-merror:0.260987
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 122 extra nodes, 2 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 60 extra nodes, 4 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 58 extra nodes, 4 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 84 extra nodes, 2 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 2 pruned nodes, max_depth=6
[24]#011train-merror:0.25108#011validation-merror:0.260117
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 102 extra nodes, 4 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 56 extra nodes, 2 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 44 extra nodes, 2 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 86 extra nodes, 0 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 0 pruned nodes, max_depth=6
[25]#011train-merror:0.250475#011validation-merror:0.259437
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 100 extra nodes, 2 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 102 extra nodes, 12 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 76 extra nodes, 14 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 46 extra nodes, 8 pruned nodes, max_depth=6
[01:08:42] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 122 extra nodes, 2 pruned nodes, max_depth=6
[26]#011train-merror:0.248914#011validation-merror:0.258079

2021-03-30 01:09:04 Uploading - Uploading generated training model
2021-03-30 01:09:04 Completed - Training job completed
Training seconds: 69
Billable seconds: 69

PREDICTION AND RESULTS¶

BATCH TRANSFORM¶

To analyse if the model is good I use an endpoint on batch transform mode in the three datasets:

1) training dataset (endpoint batch transform mode - after put the information of prediction on local directory)

2) validation dataset (endpoint batch transform mode - after put the information of prediction on local directory)

3) test dataset (endpoint batch transform mode - after put the information of prediction on local directory)

xgb_transformer = xgb.transformer(instance_count = 1, 
                                  instance_type = 'ml.m4.xlarge')
xgb_transformer.transform(train_X_loc, 
                          content_type='text/csv', 
                          split_type='Line')
xgb_transformer.wait()

...............................Arguments: serve
[2021-03-30 01:14:36 +0000] [1] [INFO] Starting gunicorn 19.9.0
[2021-03-30 01:14:36 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)
[2021-03-30 01:14:36 +0000] [1] [INFO] Using worker: gevent
[2021-03-30 01:14:36 +0000] [20] [INFO] Booting worker with pid: 20
[2021-03-30 01:14:36 +0000] [21] [INFO] Booting worker with pid: 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 21
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 20
[2021-03-30 01:14:36 +0000] [22] [INFO] Booting worker with pid: 22
[2021-03-30 01:14:36 +0000] [23] [INFO] Booting worker with pid: 23
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 22
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 23
[2021-03-30:01:14:41:INFO] Sniff delimiter as ','
[2021-03-30:01:14:41:INFO] Determined delimiter of CSV input is ','
2021-03-30T01:14:40.905:[sagemaker logs]: MaxConcurrentTransforms=4, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD

Arguments: serve
Arguments: serve
[2021-03-30 01:14:36 +0000] [1] [INFO] Starting gunicorn 19.9.0
[2021-03-30 01:14:36 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)
[2021-03-30 01:14:36 +0000] [1] [INFO] Using worker: gevent
[2021-03-30 01:14:36 +0000] [20] [INFO] Booting worker with pid: 20
[2021-03-30 01:14:36 +0000] [21] [INFO] Booting worker with pid: 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 21
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 20
[2021-03-30 01:14:36 +0000] [22] [INFO] Booting worker with pid: 22
[2021-03-30 01:14:36 +0000] [23] [INFO] Booting worker with pid: 23
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 22
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30 01:14:36 +0000] [1] [INFO] Starting gunicorn 19.9.0
[2021-03-30 01:14:36 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)
[2021-03-30 01:14:36 +0000] [1] [INFO] Using worker: gevent
[2021-03-30 01:14:36 +0000] [20] [INFO] Booting worker with pid: 20
[2021-03-30 01:14:36 +0000] [21] [INFO] Booting worker with pid: 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 21
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 20
[2021-03-30 01:14:36 +0000] [22] [INFO] Booting worker with pid: 22
[2021-03-30 01:14:36 +0000] [23] [INFO] Booting worker with pid: 23
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 22
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 23
[2021-03-30:01:14:36:INFO] Model loaded successfully for worker : 23
[2021-03-30:01:14:41:INFO] Sniff delimiter as ','
[2021-03-30:01:14:41:INFO] Determined delimiter of CSV input is ','
[2021-03-30:01:14:41:INFO] Sniff delimiter as ','
[2021-03-30:01:14:41:INFO] Determined delimiter of CSV input is ','
2021-03-30T01:14:40.905:[sagemaker logs]: MaxConcurrentTransforms=4, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD

!aws s3 cp --recursive $xgb_transformer.output_path $data_directory

download: s3://sagemaker-us-east-1-771330539858/xgboost-2021-03-30-01-09-33-523/train_X.csv.out to histo_detection/train_X.csv.out

xgb_transformer.transform(valid_X_loc, 
                          content_type='text/csv', 
                          split_type='Line')
xgb_transformer.wait()

.............................Arguments: serve
[2021-03-30 01:21:24 +0000] [1] [INFO] Starting gunicorn 19.9.0
[2021-03-30 01:21:24 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)
[2021-03-30 01:21:24 +0000] [1] [INFO] Using worker: gevent
[2021-03-30 01:21:24 +0000] [20] [INFO] Booting worker with pid: 20
[2021-03-30 01:21:24 +0000] [21] [INFO] Booting worker with pid: 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 20
[2021-03-30 01:21:24 +0000] [22] [INFO] Booting worker with pid: 22
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 22
[2021-03-30 01:21:24 +0000] [23] [INFO] Booting worker with pid: 23
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 23
[2021-03-30:01:21:29:INFO] Sniff delimiter as ','
[2021-03-30:01:21:29:INFO] Determined delimiter of CSV input is ','

2021-03-30T01:21:28.617:[sagemaker logs]: MaxConcurrentTransforms=4, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD
Arguments: serve
Arguments: serve
[2021-03-30 01:21:24 +0000] [1] [INFO] Starting gunicorn 19.9.0
[2021-03-30 01:21:24 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)
[2021-03-30 01:21:24 +0000] [1] [INFO] Using worker: gevent
[2021-03-30 01:21:24 +0000] [1] [INFO] Starting gunicorn 19.9.0
[2021-03-30 01:21:24 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)
[2021-03-30 01:21:24 +0000] [1] [INFO] Using worker: gevent
[2021-03-30 01:21:24 +0000] [20] [INFO] Booting worker with pid: 20
[2021-03-30 01:21:24 +0000] [21] [INFO] Booting worker with pid: 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 20
[2021-03-30 01:21:24 +0000] [22] [INFO] Booting worker with pid: 22
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 22
[2021-03-30 01:21:24 +0000] [23] [INFO] Booting worker with pid: 23
[2021-03-30 01:21:24 +0000] [20] [INFO] Booting worker with pid: 20
[2021-03-30 01:21:24 +0000] [21] [INFO] Booting worker with pid: 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 20
[2021-03-30 01:21:24 +0000] [22] [INFO] Booting worker with pid: 22
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 22
[2021-03-30 01:21:24 +0000] [23] [INFO] Booting worker with pid: 23
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 23
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)', 'requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:21:24:INFO] Model loaded successfully for worker : 23
[2021-03-30:01:21:29:INFO] Sniff delimiter as ','
[2021-03-30:01:21:29:INFO] Sniff delimiter as ','
[2021-03-30:01:21:29:INFO] Determined delimiter of CSV input is ','
[2021-03-30:01:21:29:INFO] Determined delimiter of CSV input is ','
2021-03-30T01:21:28.617:[sagemaker logs]: MaxConcurrentTransforms=4, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD

!aws s3 cp --recursive $xgb_transformer.output_path $data_directory

download: s3://sagemaker-us-east-1-771330539858/xgboost-2021-03-30-01-16-45-584/val_X.csv.out to histo_detection/val_X.csv.out

xgb_transformer.transform(test_X_loc, 
                          content_type='text/csv', 
                          split_type='Line')
xgb_transformer.wait()

..............................
Arguments: serve
[2021-03-30 01:27:03 +0000] [1] [INFO] Starting gunicorn 19.9.0
[2021-03-30 01:27:03 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)
[2021-03-30 01:27:03 +0000] [1] [INFO] Using worker: gevent
[2021-03-30 01:27:03 +0000] [21] [INFO] Booting worker with pid: 21
[2021-03-30 01:27:04 +0000] [22] [INFO] Booting worker with pid: 22
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 22
[2021-03-30 01:27:04 +0000] [23] [INFO] Booting worker with pid: 23
[2021-03-30 01:27:04 +0000] [24] [INFO] Booting worker with pid: 24
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 23
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 24
[2021-03-30:01:27:08:INFO] Sniff delimiter as ','
[2021-03-30:01:27:08:INFO] Determined delimiter of CSV input is ','
2021-03-30T01:27:08.151:[sagemaker logs]: MaxConcurrentTransforms=4, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD
Arguments: serve
Arguments: serve
[2021-03-30 01:27:03 +0000] [1] [INFO] Starting gunicorn 19.9.0
[2021-03-30 01:27:03 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)
[2021-03-30 01:27:03 +0000] [1] [INFO] Using worker: gevent
[2021-03-30 01:27:03 +0000] [1] [INFO] Starting gunicorn 19.9.0
[2021-03-30 01:27:03 +0000] [1] [INFO] Listening at: http://0.0.0.0:8080 (1)
[2021-03-30 01:27:03 +0000] [1] [INFO] Using worker: gevent
[2021-03-30 01:27:03 +0000] [21] [INFO] Booting worker with pid: 21
[2021-03-30 01:27:04 +0000] [22] [INFO] Booting worker with pid: 22
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 22
[2021-03-30 01:27:04 +0000] [23] [INFO] Booting worker with pid: 23
[2021-03-30 01:27:04 +0000] [24] [INFO] Booting worker with pid: 24
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 23
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 24
[2021-03-30 01:27:03 +0000] [21] [INFO] Booting worker with pid: 21
[2021-03-30 01:27:04 +0000] [22] [INFO] Booting worker with pid: 22
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 21
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 22
[2021-03-30 01:27:04 +0000] [23] [INFO] Booting worker with pid: 23
[2021-03-30 01:27:04 +0000] [24] [INFO] Booting worker with pid: 24
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 23
/opt/amazon/lib/python3.7/site-packages/gunicorn/workers/ggevent.py:65: MonkeyPatchWarning: Monkey-patching ssl after ssl has already been imported may lead to errors, including RecursionError on Python 3.6. It may also silently lead to incorrect behaviour on Python 3.7. Please monkey-patch earlier. See https://github.com/gevent/gevent/issues/1016. Modules that had direct imports (NOT patched): ['requests.packages.urllib3.util (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/__init__.py)', 'requests.packages.urllib3.util.ssl_ (/opt/amazon/lib/python3.7/site-packages/requests/packages/urllib3/util/ssl_.py)']. 
  monkey.patch_all(subprocess=True)
[2021-03-30:01:27:04:INFO] Model loaded successfully for worker : 24
[2021-03-30:01:27:08:INFO] Sniff delimiter as ','
[2021-03-30:01:27:08:INFO] Determined delimiter of CSV input is ','
[2021-03-30:01:27:08:INFO] Sniff delimiter as ','
[2021-03-30:01:27:08:INFO] Determined delimiter of CSV input is ','
2021-03-30T01:27:08.151:[sagemaker logs]: MaxConcurrentTransforms=4, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD

!aws s3 cp --recursive $xgb_transformer.output_path $data_directory

download: s3://sagemaker-us-east-1-771330539858/xgboost-2021-03-30-01-22-14-909/test_X.csv.out to histo_detection/test_X.csv.out

FINAL PREDICTIONS¶

In the cell above I make the predictions and add a column for the real class

pred_train         = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 'train_X.csv.out'), header=None))
pred_train[0]      = pred_train[0].map(lambda x: str(x).replace('[',''))
pred_train[4]      = pred_train[4].map(lambda x: str(x).replace(']',''))

pred_val           = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 'val_X.csv.out'), header=None))
pred_val[0]        = pred_val[0].map(lambda x: str(x).replace('[',''))
pred_val[4]        = pred_val[4].map(lambda x: str(x).replace(']',''))


pred_test          = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 'test_X.csv.out'), header=None))
pred_test[0]       = pred_test[0].map(lambda x: str(x).replace('[',''))
pred_test[4]       = pred_test[4].map(lambda x: str(x).replace(']',''))

pred_train              = pred_train.astype(float)
pred_val                = pred_val.astype(float)
pred_test               = pred_test.astype(float)

pred_train['Max']       = pred_train.idxmax(axis = 1)
pred_train.drop(columns = {0, 1, 2, 3, 4}, inplace = True)

pred_val['Max']         = pred_val.idxmax(axis = 1)
pred_val.drop(columns   = {0, 1, 2, 3, 4}, inplace = True)

pred_test['Max']        = pred_test.idxmax(axis = 1)
pred_test.drop(columns  = {0, 1, 2, 3, 4}, inplace = True)

pred_train['True'] = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 
                                                           'train.csv'), 
                                              header=None)[0]).rename(columns = {0 : 'True'})
pred_val['True']   = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 
                                                         'valid.csv'), 
                                            header=None)[0]).rename(columns = {0 : 'True'})
pred_test['True']  = pd.DataFrame(pd.read_csv(os.path.join(data_directory,
                                                          'test_Y.csv'), header = None))

confusion_matrix(pred_train['True'], pred_train['Max'])

array([[ 9759,   783,   351, 17898,   979],
       [ 1302,    86,    56,  2306,   110],
       [  710,    62,    37,  1465,    91],
       [14942,  1184,   525, 27626,  1511],
       [ 1328,   106,    39,  2454,   143]])

confusion_matrix(pred_val['True'], pred_val['Max'])

array([[ 4156,   306,   171,  7694,   436],
       [  521,    46,    23,   967,    52],
       [  348,    33,    13,   627,    42],
       [ 6525,   466,   234, 11748,   618],
       [  573,    50,    19,  1073,    54]])

confusion_matrix(pred_test['True'], pred_test['Max'])

array([[2607,   60,    7, 4856,   57],
       [  97,    4,    0,  205,    3],
       [  40,    0,    0,   73,    1],
       [3971,  102,   16, 7261,   81],
       [  55,    0,    0,   93,    1]])

table_metrics = pd.DataFrame(columns = {'Data',
                                        'Acc'})
table_metrics.at[0, 'Data'] = 'Train'
table_metrics.at[0, 'Acc']  = round(accuracy_score(pred_train['True'], pred_train['Max'])*100,2)

table_metrics.at[1, 'Data'] = 'Valid'
table_metrics.at[1, 'Acc']  = round(accuracy_score(pred_val['True'], pred_val['Max'])*100,2)

table_metrics.at[2, 'Data'] = 'Test'
table_metrics.at[2, 'Acc']  = round(accuracy_score(pred_test['True'], pred_test['Max'])*100,2)
table_metrics

pred_train.rename(columns = {'Max'  : 'Laudo Modelo',
                             'True' : 'Laudo Real'},
                 inplace  = True)
pred_val.rename(columns   = {'Max'  : 'Laudo Modelo',
                             'True' : 'Laudo Real'},
                 inplace  = True)
pred_test.rename(columns  = {'Max'  : 'Laudo Modelo',
                             'True' : 'Laudo Real'},
                 inplace  = True)

pred_train.to_csv('base_treino_predicao.csv',
                  sep = ';')
pred_val.to_csv('base_valid_predicao.csv',
                sep = ';')
pred_test.to_csv('base_teste_predicao.csv',
                 sep = ';')

MÉTRICAS¶

TABELAS COM MÉTRICAS¶

1) KS

2) ROC

3) F1

4) Recall

5) Precision

6) Accurácia

pred_train         = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 'train_X.csv.out'), header=None))
pred_train[0]      = pred_train[0].map(lambda x: str(x).replace('[',''))
pred_train[4]      = pred_train[4].map(lambda x: str(x).replace(']',''))

pred_val           = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 'val_X.csv.out'), header=None))
pred_val[0]        = pred_val[0].map(lambda x: str(x).replace('[',''))
pred_val[4]        = pred_val[4].map(lambda x: str(x).replace(']',''))


pred_test          = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 'test_X.csv.out'), header=None))
pred_test[0]       = pred_test[0].map(lambda x: str(x).replace('[',''))
pred_test[4]       = pred_test[4].map(lambda x: str(x).replace(']',''))

pred_train['True'] = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 
                                                           'train.csv'), 
                                              header=None)[0]).rename(columns = {0 : 'True'})
pred_val['True']   = pd.DataFrame(pd.read_csv(os.path.join(data_directory, 
                                                         'valid.csv'), 
                                            header=None)[0]).rename(columns = {0 : 'True'})
pred_test['True']  = pd.DataFrame(pd.read_csv(os.path.join(data_directory,
                                                          'test_Y.csv'), header = None))
pred_train.head(5)

pred_train['True 0'] = np.where(pred_train['True'] == 0, 1, 0).astype(int)
pred_train['True 1'] = np.where(pred_train['True'] == 1, 1, 0).astype(int)
pred_train['True 2'] = np.where(pred_train['True'] == 2, 1, 0).astype(int)
pred_train['True 3'] = np.where(pred_train['True'] == 3, 1, 0).astype(int)
pred_train['True 4'] = np.where(pred_train['True'] == 4, 1, 0).astype(int)
pred_train[0]        = pred_train[0].astype(float)
pred_train[1]        = pred_train[1].astype(float)
pred_train[2]        = pred_train[2].astype(float)
pred_train[3]        = pred_train[3].astype(float)
pred_train[4]        = pred_train[4].astype(float)
pred_train.drop(columns = 'True', inplace = True)
pred_train.rename(columns = {0 : 'Mod 0',
                             1 : 'Mod 1',
                             2 : 'Mod 2',
                             3 : 'Mod 3',
                             4 : 'Mod 4'},
                 inplace = True)

pred_val['True 0'] = np.where(pred_val['True'] == 0, 1, 0).astype(int)
pred_val['True 1'] = np.where(pred_val['True'] == 1, 1, 0).astype(int)
pred_val['True 2'] = np.where(pred_val['True'] == 2, 1, 0).astype(int)
pred_val['True 3'] = np.where(pred_val['True'] == 3, 1, 0).astype(int)
pred_val['True 4'] = np.where(pred_val['True'] == 4, 1, 0).astype(int)
pred_val[0]        = pred_val[0].astype(float)
pred_val[1]        = pred_val[1].astype(float)
pred_val[2]        = pred_val[2].astype(float)
pred_val[3]        = pred_val[3].astype(float)
pred_val[4]        = pred_val[4].astype(float)
pred_val.drop(columns = 'True', inplace = True)
pred_val.rename(columns = {0 : 'Mod 0',
                             1 : 'Mod 1',
                             2 : 'Mod 2',
                             3 : 'Mod 3',
                             4 : 'Mod 4'},
                 inplace = True)

pred_test['True 0'] = np.where(pred_test['True'] == 0, 1, 0).astype(int)
pred_test['True 1'] = np.where(pred_test['True'] == 1, 1, 0).astype(int)
pred_test['True 2'] = np.where(pred_test['True'] == 2, 1, 0).astype(int)
pred_test['True 3'] = np.where(pred_test['True'] == 3, 1, 0).astype(int)
pred_test['True 4'] = np.where(pred_test['True'] == 4, 1, 0).astype(int)
pred_test[0]        = pred_test[0].astype(float)
pred_test[1]        = pred_test[1].astype(float)
pred_test[2]        = pred_test[2].astype(float)
pred_test[3]        = pred_test[3].astype(float)
pred_test[4]        = pred_test[4].astype(float)
pred_test.drop(columns = 'True', inplace = True)
pred_test.rename(columns = {0 : 'Mod 0',
                             1 : 'Mod 1',
                             2 : 'Mod 2',
                             3 : 'Mod 3',
                             4 : 'Mod 4'},
                 inplace = True)

metricas_train = pd.DataFrame(columns = {'Data',
                                         'ROC',
                                         'Rec',
                                         'Prec',
                                         'Acc',
                                         'KS',
                                         'F1'})
metricas_train.at[0, 'Data'] = 'Treino - Faixa 1'
metricas_train.at[0, 'ROC']  = round(roc_auc_score(np.asarray(pred_train['True 0']), np.asarray(pred_train['Mod 0']))*100, 2)
metricas_train.at[0, 'KS']   = round(ks_2samp(pred_train.loc[pred_train['True 0'] == 0, 'Mod 0'], pred_train.loc[pred_train['True 0'] == 1, 'Mod 0'])[0]*100, 2)
metricas_train.at[0, 'F1']   = round(f1_score(np.asarray(pred_train['True 0']), np.asarray(pred_train['Mod 0'].round()), average = 'binary')*100,2)
metricas_train.at[0, 'Rec']  = round(recall_score(np.asarray(pred_train['True 0']), np.asarray(pred_train['Mod 0'].round()))*100,2)
metricas_train.at[0, 'Prec'] = round(precision_score(np.asarray(pred_train['True 0']), np.asarray(pred_train['Mod 0'].round()))*100,2)
metricas_train.at[0, 'Acc']  = round(accuracy_score(np.asarray(pred_train['True 0']), np.asarray(pred_train['Mod 0'].round()), normalize=True)*100,2)

metricas_train.at[1, 'Data'] = 'Treino - Faixa 2'
metricas_train.at[1, 'ROC']  = round(roc_auc_score(np.asarray(pred_train['True 1']), np.asarray(pred_train['Mod 1']))*100, 2)
metricas_train.at[1, 'KS']   = round(ks_2samp(pred_train.loc[pred_train['True 1'] == 0, 'Mod 1'], pred_train.loc[pred_train['True 1'] == 1, 'Mod 1'])[0]*100, 2)
metricas_train.at[1, 'F1']   = round(f1_score(np.asarray(pred_train['True 1']), np.asarray(pred_train['Mod 1'].round()), average = 'binary')*100,2)
metricas_train.at[1, 'Rec']  = round(recall_score(np.asarray(pred_train['True 1']), np.asarray(pred_train['Mod 1'].round()))*100,2)
metricas_train.at[1, 'Prec'] = round(precision_score(np.asarray(pred_train['True 1']), np.asarray(pred_train['Mod 1'].round()))*100,2)
metricas_train.at[1, 'Acc']  = round(accuracy_score(np.asarray(pred_train['True 1']), np.asarray(pred_train['Mod 1'].round()), normalize=True)*100,2)

metricas_train.at[2, 'Data'] = 'Treino - Faixa 3'
metricas_train.at[2, 'ROC']  = round(roc_auc_score(np.asarray(pred_train['True 2']), np.asarray(pred_train['Mod 2']))*100, 2)
metricas_train.at[2, 'KS']   = round(ks_2samp(pred_train.loc[pred_train['True 2'] == 0, 'Mod 2'], pred_train.loc[pred_train['True 2'] == 1, 'Mod 2'])[0]*100, 2)
metricas_train.at[2, 'F1']   = round(f1_score(np.asarray(pred_train['True 2']), np.asarray(pred_train['Mod 2'].round()), average = 'binary')*100,2)
metricas_train.at[2, 'Rec']  = round(recall_score(np.asarray(pred_train['True 2']), np.asarray(pred_train['Mod 2'].round()))*100,2)
metricas_train.at[2, 'Prec'] = round(precision_score(np.asarray(pred_train['True 2']), np.asarray(pred_train['Mod 2'].round()))*100,2)
metricas_train.at[2, 'Acc']  = round(accuracy_score(np.asarray(pred_train['True 2']), np.asarray(pred_train['Mod 2'].round()), normalize=True)*100,2)

metricas_train.at[3, 'Data'] = 'Treino - Faixa 4'
metricas_train.at[3, 'ROC']  = round(roc_auc_score(np.asarray(pred_train['True 3']), np.asarray(pred_train['Mod 3']))*100, 2)
metricas_train.at[3, 'KS']   = round(ks_2samp(pred_train.loc[pred_train['True 3'] == 0, 'Mod 3'], pred_train.loc[pred_train['True 3'] == 1, 'Mod 3'])[0]*100, 2)
metricas_train.at[3, 'F1']   = round(f1_score(np.asarray(pred_train['True 3']), np.asarray(pred_train['Mod 3'].round()), average = 'binary')*100,2)
metricas_train.at[3, 'Rec']  = round(recall_score(np.asarray(pred_train['True 3']), np.asarray(pred_train['Mod 3'].round()))*100,2)
metricas_train.at[3, 'Prec'] = round(precision_score(np.asarray(pred_train['True 3']), np.asarray(pred_train['Mod 3'].round()))*100,2)
metricas_train.at[3, 'Acc']  = round(accuracy_score(np.asarray(pred_train['True 3']), np.asarray(pred_train['Mod 3'].round()), normalize=True)*100,2)

metricas_train.at[4, 'Data'] = 'Treino - Faixa 6'
metricas_train.at[4, 'ROC']  = round(roc_auc_score(np.asarray(pred_train['True 4']), np.asarray(pred_train['Mod 4']))*100, 2)
metricas_train.at[4, 'KS']   = round(ks_2samp(pred_train.loc[pred_train['True 4'] == 0, 'Mod 4'], pred_train.loc[pred_train['True 4'] == 1, 'Mod 4'])[0]*100, 2)
metricas_train.at[4, 'F1']   = round(f1_score(np.asarray(pred_train['True 4']), np.asarray(pred_train['Mod 4'].round()), average = 'binary')*100,2)
metricas_train.at[4, 'Rec']  = round(recall_score(np.asarray(pred_train['True 4']), np.asarray(pred_train['Mod 4'].round()))*100,2)
metricas_train.at[4, 'Prec'] = round(precision_score(np.asarray(pred_train['True 4']), np.asarray(pred_train['Mod 4'].round()))*100,2)
metricas_train.at[4, 'Acc']  = round(accuracy_score(np.asarray(pred_train['True 4']), np.asarray(pred_train['Mod 4'].round()), normalize=True)*100,2)

metricas_valid = pd.DataFrame(columns = {'Data',
                                         'ROC',
                                         'Rec',
                                         'Prec',
                                         'Acc',
                                         'KS',
                                         'F1'})
metricas_valid.at[0, 'Data'] = 'Valid - Faixa 1'
metricas_valid.at[0, 'ROC']  = round(roc_auc_score(np.asarray(pred_val['True 0']), np.asarray(pred_val['Mod 0']))*100, 2)
metricas_valid.at[0, 'KS']   = round(ks_2samp(pred_val.loc[pred_val['True 0'] == 0, 'Mod 0'], pred_val.loc[pred_val['True 0'] == 1, 'Mod 0'])[0]*100, 2)
metricas_valid.at[0, 'F1']   = round(f1_score(np.asarray(pred_val['True 0']), np.asarray(pred_val['Mod 0'].round()), average = 'binary')*100,2)
metricas_valid.at[0, 'Rec']  = round(recall_score(np.asarray(pred_val['True 0']), np.asarray(pred_val['Mod 0'].round()))*100,2)
metricas_valid.at[0, 'Prec'] = round(precision_score(np.asarray(pred_val['True 0']), np.asarray(pred_val['Mod 0'].round()))*100,2)
metricas_valid.at[0, 'Acc']  = round(accuracy_score(np.asarray(pred_val['True 0']), np.asarray(pred_val['Mod 0'].round()), normalize=True)*100,2)

metricas_valid.at[1, 'Data'] = 'Valid - Faixa 2'
metricas_valid.at[1, 'ROC']  = round(roc_auc_score(np.asarray(pred_val['True 1']), np.asarray(pred_val['Mod 1']))*100, 2)
metricas_valid.at[1, 'KS']   = round(ks_2samp(pred_val.loc[pred_val['True 1'] == 0, 'Mod 1'], pred_val.loc[pred_val['True 1'] == 1, 'Mod 1'])[0]*100, 2)
metricas_valid.at[1, 'F1']   = round(f1_score(np.asarray(pred_val['True 1']), np.asarray(pred_val['Mod 1'].round()), average = 'binary')*100,2)
metricas_valid.at[1, 'Rec']  = round(recall_score(np.asarray(pred_val['True 1']), np.asarray(pred_val['Mod 1'].round()))*100,2)
metricas_valid.at[1, 'Prec'] = round(precision_score(np.asarray(pred_val['True 1']), np.asarray(pred_val['Mod 1'].round()))*100,2)
metricas_valid.at[1, 'Acc']  = round(accuracy_score(np.asarray(pred_val['True 1']), np.asarray(pred_val['Mod 1'].round()), normalize=True)*100,2)

metricas_valid.at[2, 'Data'] = 'Valid - Faixa 3'
metricas_valid.at[2, 'ROC']  = round(roc_auc_score(np.asarray(pred_val['True 2']), np.asarray(pred_val['Mod 2']))*100, 2)
metricas_valid.at[2, 'KS']   = round(ks_2samp(pred_val.loc[pred_val['True 2'] == 0, 'Mod 2'], pred_val.loc[pred_val['True 2'] == 1, 'Mod 2'])[0]*100, 2)
metricas_valid.at[2, 'F1']   = round(f1_score(np.asarray(pred_val['True 2']), np.asarray(pred_val['Mod 2'].round()), average = 'binary')*100,2)
metricas_valid.at[2, 'Rec']  = round(recall_score(np.asarray(pred_val['True 2']), np.asarray(pred_val['Mod 2'].round()))*100,2)
metricas_valid.at[2, 'Prec'] = round(precision_score(np.asarray(pred_val['True 2']), np.asarray(pred_val['Mod 2'].round()))*100,2)
metricas_valid.at[2, 'Acc']  = round(accuracy_score(np.asarray(pred_val['True 2']), np.asarray(pred_val['Mod 2'].round()), normalize=True)*100,2)

metricas_valid.at[3, 'Data'] = 'Valid - Faixa 4'
metricas_valid.at[3, 'ROC']  = round(roc_auc_score(np.asarray(pred_val['True 3']), np.asarray(pred_val['Mod 3']))*100, 2)
metricas_valid.at[3, 'KS']   = round(ks_2samp(pred_val.loc[pred_val['True 3'] == 0, 'Mod 3'], pred_val.loc[pred_val['True 3'] == 1, 'Mod 3'])[0]*100, 2)
metricas_valid.at[3, 'F1']   = round(f1_score(np.asarray(pred_val['True 3']), np.asarray(pred_val['Mod 3'].round()), average = 'binary')*100,2)
metricas_valid.at[3, 'Rec']  = round(recall_score(np.asarray(pred_val['True 3']), np.asarray(pred_val['Mod 3'].round()))*100,2)
metricas_valid.at[3, 'Prec'] = round(precision_score(np.asarray(pred_val['True 3']), np.asarray(pred_val['Mod 3'].round()))*100,2)
metricas_valid.at[3, 'Acc']  = round(accuracy_score(np.asarray(pred_val['True 3']), np.asarray(pred_val['Mod 3'].round()), normalize=True)*100,2)

metricas_valid.at[4, 'Data'] = 'Valid - Faixa 6'
metricas_valid.at[4, 'ROC']  = round(roc_auc_score(np.asarray(pred_val['True 4']), np.asarray(pred_val['Mod 4']))*100, 2)
metricas_valid.at[4, 'KS']   = round(ks_2samp(pred_val.loc[pred_val['True 4'] == 0, 'Mod 4'], pred_val.loc[pred_val['True 4'] == 1, 'Mod 4'])[0]*100, 2)
metricas_valid.at[4, 'F1']   = round(f1_score(np.asarray(pred_val['True 4']), np.asarray(pred_val['Mod 4'].round()), average = 'binary')*100,2)
metricas_valid.at[4, 'Rec']  = round(recall_score(np.asarray(pred_val['True 4']), np.asarray(pred_val['Mod 4'].round()))*100,2)
metricas_valid.at[4, 'Prec'] = round(precision_score(np.asarray(pred_val['True 4']), np.asarray(pred_val['Mod 4'].round()))*100,2)
metricas_valid.at[4, 'Acc']  = round(accuracy_score(np.asarray(pred_val['True 4']), np.asarray(pred_val['Mod 4'].round()), normalize=True)*100,2)

metricas_test = pd.DataFrame(columns = {'Data',
                                         'ROC',
                                         'Rec',
                                         'Prec',
                                         'Acc',
                                         'KS',
                                         'F1'})
metricas_test.at[0, 'Data'] = 'Teste - Faixa 1'
metricas_test.at[0, 'ROC']  = round(roc_auc_score(np.asarray(pred_test['True 0']), np.asarray(pred_test['Mod 0']))*100, 2)
metricas_test.at[0, 'KS']   = round(ks_2samp(pred_test.loc[pred_test['True 0'] == 0, 'Mod 0'], pred_test.loc[pred_test['True 0'] == 1, 'Mod 0'])[0]*100, 2)
metricas_test.at[0, 'F1']   = round(f1_score(np.asarray(pred_test['True 0']), np.asarray(pred_test['Mod 0'].round()), average = 'binary')*100,2)
metricas_test.at[0, 'Rec']  = round(recall_score(np.asarray(pred_test['True 0']), np.asarray(pred_test['Mod 0'].round()))*100,2)
metricas_test.at[0, 'Prec'] = round(precision_score(np.asarray(pred_test['True 0']), np.asarray(pred_test['Mod 0'].round()))*100,2)
metricas_test.at[0, 'Acc']  = round(accuracy_score(np.asarray(pred_test['True 0']), np.asarray(pred_test['Mod 0'].round()), normalize=True)*100,2)

metricas_test.at[1, 'Data'] = 'Teste - Faixa 2'
metricas_test.at[1, 'ROC']  = round(roc_auc_score(np.asarray(pred_test['True 1']), np.asarray(pred_test['Mod 1']))*100, 2)
metricas_test.at[1, 'KS']   = round(ks_2samp(pred_test.loc[pred_test['True 1'] == 0, 'Mod 1'], pred_test.loc[pred_test['True 1'] == 1, 'Mod 1'])[0]*100, 2)
metricas_test.at[1, 'F1']   = round(f1_score(np.asarray(pred_test['True 1']), np.asarray(pred_test['Mod 1'].round()), average = 'binary')*100,2)
metricas_test.at[1, 'Rec']  = round(recall_score(np.asarray(pred_test['True 1']), np.asarray(pred_test['Mod 1'].round()))*100,2)
metricas_test.at[1, 'Prec'] = round(precision_score(np.asarray(pred_test['True 1']), np.asarray(pred_test['Mod 1'].round()))*100,2)
metricas_test.at[1, 'Acc']  = round(accuracy_score(np.asarray(pred_test['True 1']), np.asarray(pred_test['Mod 1'].round()), normalize=True)*100,2)

metricas_test.at[2, 'Data'] = 'Teste - Faixa 3'
metricas_test.at[2, 'ROC']  = round(roc_auc_score(np.asarray(pred_test['True 2']), np.asarray(pred_test['Mod 2']))*100, 2)
metricas_test.at[2, 'KS']   = round(ks_2samp(pred_test.loc[pred_test['True 2'] == 0, 'Mod 2'], pred_test.loc[pred_test['True 2'] == 1, 'Mod 2'])[0]*100, 2)
metricas_test.at[2, 'F1']   = round(f1_score(np.asarray(pred_test['True 2']), np.asarray(pred_test['Mod 2'].round()), average = 'binary')*100,2)
metricas_test.at[2, 'Rec']  = round(recall_score(np.asarray(pred_test['True 2']), np.asarray(pred_test['Mod 2'].round()))*100,2)
metricas_test.at[2, 'Prec'] = round(precision_score(np.asarray(pred_test['True 2']), np.asarray(pred_test['Mod 2'].round()))*100,2)
metricas_test.at[2, 'Acc']  = round(accuracy_score(np.asarray(pred_test['True 2']), np.asarray(pred_test['Mod 2'].round()), normalize=True)*100,2)

metricas_test.at[3, 'Data'] = 'Teste - Faixa 4'
metricas_test.at[3, 'ROC']  = round(roc_auc_score(np.asarray(pred_test['True 3']), np.asarray(pred_test['Mod 3']))*100, 2)
metricas_test.at[3, 'KS']   = round(ks_2samp(pred_test.loc[pred_test['True 3'] == 0, 'Mod 3'], pred_test.loc[pred_test['True 3'] == 1, 'Mod 3'])[0]*100, 2)
metricas_test.at[3, 'F1']   = round(f1_score(np.asarray(pred_test['True 3']), np.asarray(pred_test['Mod 3'].round()), average = 'binary')*100,2)
metricas_test.at[3, 'Rec']  = round(recall_score(np.asarray(pred_test['True 3']), np.asarray(pred_test['Mod 3'].round()))*100,2)
metricas_test.at[3, 'Prec'] = round(precision_score(np.asarray(pred_test['True 3']), np.asarray(pred_test['Mod 3'].round()))*100,2)
metricas_test.at[3, 'Acc']  = round(accuracy_score(np.asarray(pred_test['True 3']), np.asarray(pred_test['Mod 3'].round()), normalize=True)*100,2)

metricas_test.at[4, 'Data'] = 'Teste - Faixa 6'
metricas_test.at[4, 'ROC']  = round(roc_auc_score(np.asarray(pred_test['True 4']), np.asarray(pred_test['Mod 4']))*100, 2)
metricas_test.at[4, 'KS']   = round(ks_2samp(pred_test.loc[pred_test['True 4'] == 0, 'Mod 4'], pred_test.loc[pred_test['True 4'] == 1, 'Mod 4'])[0]*100, 2)
metricas_test.at[4, 'F1']   = round(f1_score(np.asarray(pred_test['True 4']), np.asarray(pred_test['Mod 4'].round()), average = 'binary')*100,2)
metricas_test.at[4, 'Rec']  = round(recall_score(np.asarray(pred_test['True 4']), np.asarray(pred_test['Mod 4'].round()))*100,2)
metricas_test.at[4, 'Prec'] = round(precision_score(np.asarray(pred_test['True 4']), np.asarray(pred_test['Mod 4'].round()))*100,2)
metricas_test.at[4, 'Acc']  = round(accuracy_score(np.asarray(pred_test['True 4']), np.asarray(pred_test['Mod 4'].round()), normalize=True)*100,2)

#metricas_train = metricas_train[['Data', 'KS', 'ROC', 'F1', 'Rec', 'Prec', 'Acc']].set_index('Data')
metricas_train = metricas_train[['Data', 'KS', 'ROC', 'Acc']].set_index('Data')
metricas_train

#metricas_valid = metricas_valid[['Data', 'KS', 'ROC', 'F1', 'Rec', 'Prec', 'Acc']].set_index('Data')
metricas_valid = metricas_valid[['Data', 'KS', 'ROC', 'Acc']].set_index('Data')
metricas_valid

#metricas_test = metricas_test[['Data', 'KS', 'ROC', 'F1', 'Rec', 'Prec', 'Acc']].set_index('Data')
metricas_test = metricas_test[['Data', 'KS', 'ROC','Acc']].set_index('Data')
metricas_test

CURVAS ROC¶

title_font = {'fontname' : 'Arial',
              'size'     : '17',
              'weight'   : 'bold'}
axis_font  = {'fontname' : 'Arial',
              'size'     : '12'}

lr_fpr_0, lr_tpr_0, _ = roc_curve(np.asarray(pred_train['True 0']), np.asarray(pred_train['Mod 0']))
lr_fpr_1, lr_tpr_1, _ = roc_curve(np.asarray(pred_train['True 1']), np.asarray(pred_train['Mod 1']))
lr_fpr_2, lr_tpr_2, _ = roc_curve(np.asarray(pred_train['True 2']), np.asarray(pred_train['Mod 2']))
lr_fpr_3, lr_tpr_3, _ = roc_curve(np.asarray(pred_train['True 3']), np.asarray(pred_train['Mod 3']))
lr_fpr_4, lr_tpr_4, _ = roc_curve(np.asarray(pred_train['True 4']), np.asarray(pred_train['Mod 4']))
plt.plot([0.0, 1.0], [0.0, 1.0], 'r--', linewidth = 0.5, label = 'Coin', color = 'black')
plt.plot(lr_fpr_0, lr_tpr_0, linewidth = 0.5, label = '1', color = 'blue')
plt.plot(lr_fpr_1, lr_tpr_1, linewidth = 0.5, label = '2', color = 'red')
plt.plot(lr_fpr_2, lr_tpr_2, linewidth = 0.5, label = '3', color = 'orange')
plt.plot(lr_fpr_3, lr_tpr_3, linewidth = 0.5, label = '4', color = 'green')
plt.plot(lr_fpr_4, lr_tpr_4, linewidth = 0.5, label = '6', color = 'darkmagenta')
plt.title('Curva ROC - Treino', title_font)
plt.xlabel('False Positive Rate', axis_font)
plt.ylabel('True Positive Rate', axis_font)
plt.legend()

<matplotlib.legend.Legend at 0x7f2b9b365278>

findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.
findfont: Font family ['Arial'] not found. Falling back to DejaVu Sans.

lr_fpr_0, lr_tpr_0, _ = roc_curve(np.asarray(pred_val['True 0']), np.asarray(pred_val['Mod 0']))
lr_fpr_1, lr_tpr_1, _ = roc_curve(np.asarray(pred_val['True 1']), np.asarray(pred_val['Mod 1']))
lr_fpr_2, lr_tpr_2, _ = roc_curve(np.asarray(pred_val['True 2']), np.asarray(pred_val['Mod 2']))
lr_fpr_3, lr_tpr_3, _ = roc_curve(np.asarray(pred_val['True 3']), np.asarray(pred_val['Mod 3']))
lr_fpr_4, lr_tpr_4, _ = roc_curve(np.asarray(pred_val['True 4']), np.asarray(pred_val['Mod 4']))
plt.plot([0.0, 1.0], [0.0, 1.0], 'r--', linewidth = 0.5, label = 'Coin', color = 'black')
plt.plot(lr_fpr_0, lr_tpr_0, linewidth = 0.5, label = '1', color = 'blue')
plt.plot(lr_fpr_1, lr_tpr_1, linewidth = 0.5, label = '2', color = 'red')
plt.plot(lr_fpr_2, lr_tpr_2, linewidth = 0.5, label = '3', color = 'orange')
plt.plot(lr_fpr_3, lr_tpr_3, linewidth = 0.5, label = '4', color = 'green')
plt.plot(lr_fpr_4, lr_tpr_4, linewidth = 0.5, label = '6', color = 'darkmagenta')
plt.title('Curva ROC - Validação', title_font)
plt.xlabel('False Positive Rate', axis_font)
plt.ylabel('True Positive Rate', axis_font)
plt.legend()

<matplotlib.legend.Legend at 0x7f2b9b435390>

lr_fpr_0, lr_tpr_0, _ = roc_curve(np.asarray(pred_test['True 0']), np.asarray(pred_test['Mod 0']))
lr_fpr_1, lr_tpr_1, _ = roc_curve(np.asarray(pred_test['True 1']), np.asarray(pred_test['Mod 1']))
lr_fpr_2, lr_tpr_2, _ = roc_curve(np.asarray(pred_test['True 2']), np.asarray(pred_test['Mod 2']))
lr_fpr_3, lr_tpr_3, _ = roc_curve(np.asarray(pred_test['True 3']), np.asarray(pred_test['Mod 3']))
lr_fpr_4, lr_tpr_4, _ = roc_curve(np.asarray(pred_test['True 4']), np.asarray(pred_test['Mod 4']))
plt.plot([0.0, 1.0], [0.0, 1.0], 'r--', linewidth = 0.5, label = 'Coin', color = 'black')
plt.plot(lr_fpr_0, lr_tpr_0, linewidth = 0.5, label = '1', color = 'blue')
plt.plot(lr_fpr_1, lr_tpr_1, linewidth = 0.5, label = '2', color = 'red')
plt.plot(lr_fpr_2, lr_tpr_2, linewidth = 0.5, label = '3', color = 'orange')
plt.plot(lr_fpr_3, lr_tpr_3, linewidth = 0.5, label = '4', color = 'green')
plt.plot(lr_fpr_4, lr_tpr_4, linewidth = 0.5, label = '6', color = 'darkmagenta')
plt.title('Curva ROC - Teste', title_font)
plt.xlabel('False Positive Rate', axis_font)
plt.ylabel('True Positive Rate', axis_font)
plt.legend()

<matplotlib.legend.Legend at 0x7f2b98e0d6a0>

CREATE ENDPOINT FOR PREDICTION¶

predictor = xgb.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')

---------------!

from sagemaker.predictor import csv_serializer

predictor.serializer = csv_serializer

predictor.__dict__.keys()

dict_keys(['endpoint_name', 'sagemaker_session', 'serializer', 'deserializer', '_endpoint_config_name', '_model_names', '_context'])

predictor.endpoint

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.

'xgboost-2021-03-30-01-28-58-053'

#DELETAR O ENDPOINT POIS BILLA MUITO!!!!!!!!!!!!!!!!!
#DELETAR O ENDPOINT POIS BILLA MUITO!!!!!!!!!!!!!!!!!
#DELETAR O ENDPOINT POIS BILLA MUITO!!!!!!!!!!!!!!!!!
#DELETAR O ENDPOINT POIS BILLA MUITO!!!!!!!!!!!!!!!!!
#DELETAR O ENDPOINT POIS BILLA MUITO!!!!!!!!!!!!!!!!!
predictor.delete_endpoint()

	CO_SEQ_SISCAN_HISTO_MAMA_PACNT	CO_UF_RESIDENCIA	CO_MUN_RESIDENCIA	CO_RACA_COR	CO_IDADE_PACIENTE	CO_ESCOLARIDADE	CO_INTERVALO_COLETA	CO_INTERVALO_EXAME	TP_RISCO_ELEVADO	TP_DETECCAO_LESAO	TP_LATERALIDADE_LESAO	TP_TAMANHO_LESAO	TP_LINFONODO_AXILAR_PALPAVEL	TP_TAMANHO_TUMOR
TP_LAUDO_HISTOPATOLOGICO
1	50127	50127	50127	50127	50127	420	50126	50127	50127	50127	50127	50127	50127	50127
2	1922	1922	1922	1922	1922	2	1922	1922	1922	1922	1922	1922	1922	1922
3	665	665	665	665	665	2	665	665	665	665	665	665	665	665
4	76821	76821	76821	76821	76821	223	76819	76821	76821	76821	76821	76821	76821	76821
6	1062	1062	1062	1062	1062	7	1062	1062	1062	1062	1062	1062	1062	1062

	flag_raiz
63906	3
49264	0
16897	3
38712	3
125490	3
...	...
39986	3
85524	0
5621	3
96706	3
62064	3

	CO_SEQ_SISCAN_HISTO_MAMA_PACNT	CO_UF_RESIDENCIA	CO_MUN_RESIDENCIA	CO_RACA_COR	CO_IDADE_PACIENTE	CO_ESCOLARIDADE	CO_INTERVALO_COLETA	CO_INTERVALO_EXAME	TP_RISCO_ELEVADO	TP_DETECCAO_LESAO	TP_LATERALIDADE_LESAO	TP_TAMANHO_LESAO	TP_LINFONODO_AXILAR_PALPAVEL	TP_TAMANHO_TUMOR
TP_LAUDO_HISTOPATOLOGICO
1	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540
2	1613	1613	1613	1613	1613	1613	1613	1613	1613	1613	1613	1613	1613	1613
3	551	551	551	551	551	551	551	551	551	551	551	551	551	551
4	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390
6	913	913	913	913	913	913	913	913	913	913	913	913	913	913

	CO_SEQ_SISCAN_HISTO_MAMA_PACNT	CO_UF_RESIDENCIA	CO_MUN_RESIDENCIA	CO_RACA_COR	CO_IDADE_PACIENTE	CO_ESCOLARIDADE	CO_INTERVALO_COLETA	CO_INTERVALO_EXAME	TP_RISCO_ELEVADO	TP_DETECCAO_LESAO	TP_LATERALIDADE_LESAO	TP_TAMANHO_LESAO	TP_LINFONODO_AXILAR_PALPAVEL	TP_TAMANHO_TUMOR	flag_3	flag_6
TP_LAUDO_HISTOPATOLOGICO
1	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540
2	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469
3	551	551	551	551	551	551	551	551	551	551	551	551	551	551	551	551
4	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390
6	913	913	913	913	913	913	913	913	913	913	913	913	913	913	913	913

	CO_SEQ_SISCAN_HISTO_MAMA_PACNT	CO_UF_RESIDENCIA	CO_MUN_RESIDENCIA	CO_RACA_COR	CO_IDADE_PACIENTE	CO_ESCOLARIDADE	CO_INTERVALO_COLETA	CO_INTERVALO_EXAME	TP_RISCO_ELEVADO	TP_DETECCAO_LESAO	TP_LATERALIDADE_LESAO	TP_TAMANHO_LESAO	TP_LINFONODO_AXILAR_PALPAVEL	TP_TAMANHO_TUMOR	flag_6
TP_LAUDO_HISTOPATOLOGICO
1	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540	42540
2	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469	5469
3	3429	3429	3429	3429	3429	3429	3429	3429	3429	3429	3429	3429	3429	3429	3429
4	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390	65390
6	913	913	913	913	913	913	913	913	913	913	913	913	913	913	913

	CO_SEQ_SISCAN_HISTO_MAMA_PACNT	CO_UF_RESIDENCIA	CO_MUN_RESIDENCIA	CO_RACA_COR	CO_IDADE_PACIENTE	CO_ESCOLARIDADE	CO_INTERVALO_COLETA	CO_INTERVALO_EXAME	TP_RISCO_ELEVADO	TP_DETECCAO_LESAO	TP_LATERALIDADE_LESAO	TP_TAMANHO_LESAO	TP_LINFONODO_AXILAR_PALPAVEL	TP_TAMANHO_TUMOR
108077	152425	26	260790	99	29	2.811881	1.0	1	2	1	1	1	2	5
94449	351741	31	312770	99	74	2.811881	1.0	2	1	1	1	2	2	4

	0	1	2	3	4	True
0	0.44944697618484497	0.003712	0.005239	0.526942	0.014658913016319275	3
1	0.11719241738319397	0.007295	0.028363	0.837371	0.009779603220522404	0
2	0.7194904685020447	0.000478	0.000439	0.279143	0.000448645994765684	0
3	0.7211182117462158	0.000424	0.000435	0.277550	0.00047271294170059264	3
4	0.7815982103347778	0.004299	0.008939	0.186499	0.018665609881281853	1

	KS	ROC	Acc
Data
Treino - Faixa 1	56.7	86.97	79.87
Treino - Faixa 2	75.43	95.36	97.15
Treino - Faixa 3	75.89	95.04	97.69
Treino - Faixa 4	58.19	87.75	79.17
Treino - Faixa 6	80.81	96.86	97.3

	KS	ROC	Acc
Data
Valid - Faixa 1	54.92	85.95	79.16
Valid - Faixa 2	70.86	93.69	96.87
Valid - Faixa 3	73.63	94.22	97.59
Valid - Faixa 4	56.99	86.92	78.54
Valid - Faixa 6	77.87	95.68	97.23

	KS	ROC	Acc
Data
Teste - Faixa 1	53.84	85.27	77.75
Teste - Faixa 2	52.83	85.77	98.66
Teste - Faixa 3	42.9	79.17	99.44
Teste - Faixa 4	53.35	84.87	77.14
Teste - Faixa 6	54.36	84.98	99.18

	Acc	Data
0	43.86	Train
1	43.53	Valid
2	50.4	Test