Source code for pairpro.train_val_wrapper

"""
Wrapper functions for all of the machine learning component.
"""

from pairpro.train_val_classification import rf_wrapper
from pairpro.train_val_input_cleaning import input_cleaning_wrapper
from pairpro.train_val_featuregen import create_new_dataframe


[docs]def train_val_wrapper(dataframe, target, structure=False, features=False): ''' Takes dataframe and runs it through cleaning script. Generates features with iFeatureOmegaCLI. Passes result through RF Classifier model. Args: Dataframe (pandas dataframe) Features from iFeatureOmega (list) Returns: Vector of predictions (numpy arrray) Parity plot Model score ''' # clean input dataframe dataframe = input_cleaning_wrapper(dataframe, structure) if features is True: feature_list = [ 'AAC', 'GAAC', 'DistancePair', 'CTDC', 'CTDT', 'CTDD', 'CTriad', 'GDPC type 1', 'GDPC type 2', 'CKSAAGP type 1', 'CKSAAGP type 2', 'PseKRAAC type 2', 'PseKRAAC type 3A', 'PseKRAAC type 7', 'PseKRAAC type 9', 'Geary', 'APAAC', 'QSOrder'] # generate features from amino acid sequence dataframe = create_new_dataframe(dataframe, ['sequences_a.fasta', 'sequeneces_b.fasta'], descriptors=[feature for feature in feature_list]) else: pass # drop sequences dataframe.drop( columns=[ 'm_protein_seq', 't_protein_seq', 'pair_id'], inplace=True) # run through model score, model = rf_wrapper(dataframe, target) return score, model