1409 lines
58 KiB
Python
1409 lines
58 KiB
Python
|
# Copyright (c) 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved
|
||
|
#
|
||
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
||
|
# copy of this software and associated documentation files (the
|
||
|
# "Software"), to deal in the Software without restriction, including
|
||
|
# without limitation the rights to use, copy, modify, merge, publish, dis-
|
||
|
# tribute, sublicense, and/or sell copies of the Software, and to permit
|
||
|
# persons to whom the Software is furnished to do so, subject to the fol-
|
||
|
# lowing conditions:
|
||
|
#
|
||
|
# The above copyright notice and this permission notice shall be included
|
||
|
# in all copies or substantial portions of the Software.
|
||
|
#
|
||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||
|
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
|
||
|
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||
|
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||
|
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||
|
# IN THE SOFTWARE.
|
||
|
#
|
||
|
|
||
|
import boto
|
||
|
from boto.compat import json, urlsplit
|
||
|
from boto.connection import AWSQueryConnection
|
||
|
from boto.regioninfo import RegionInfo
|
||
|
from boto.exception import JSONResponseError
|
||
|
from boto.machinelearning import exceptions
|
||
|
|
||
|
|
||
|
class MachineLearningConnection(AWSQueryConnection):
|
||
|
"""
|
||
|
Definition of the public APIs exposed by Amazon Machine Learning
|
||
|
"""
|
||
|
APIVersion = "2014-12-12"
|
||
|
AuthServiceName = 'machinelearning'
|
||
|
DefaultRegionName = "us-east-1"
|
||
|
DefaultRegionEndpoint = "machinelearning.us-east-1.amazonaws.com"
|
||
|
ServiceName = "MachineLearning"
|
||
|
TargetPrefix = "AmazonML_20141212"
|
||
|
ResponseError = JSONResponseError
|
||
|
|
||
|
_faults = {
|
||
|
"InternalServerException": exceptions.InternalServerException,
|
||
|
"LimitExceededException": exceptions.LimitExceededException,
|
||
|
"ResourceNotFoundException": exceptions.ResourceNotFoundException,
|
||
|
"IdempotentParameterMismatchException": exceptions.IdempotentParameterMismatchException,
|
||
|
"PredictorNotMountedException": exceptions.PredictorNotMountedException,
|
||
|
"InvalidInputException": exceptions.InvalidInputException,
|
||
|
}
|
||
|
|
||
|
|
||
|
def __init__(self, **kwargs):
|
||
|
region = kwargs.pop('region', None)
|
||
|
if not region:
|
||
|
region = RegionInfo(self, self.DefaultRegionName,
|
||
|
self.DefaultRegionEndpoint)
|
||
|
|
||
|
if 'host' not in kwargs or kwargs['host'] is None:
|
||
|
kwargs['host'] = region.endpoint
|
||
|
|
||
|
super(MachineLearningConnection, self).__init__(**kwargs)
|
||
|
self.region = region
|
||
|
self.auth_region_name = self.region.name
|
||
|
|
||
|
def _required_auth_capability(self):
|
||
|
return ['hmac-v4']
|
||
|
|
||
|
def create_batch_prediction(self, batch_prediction_id, ml_model_id,
|
||
|
batch_prediction_data_source_id, output_uri,
|
||
|
batch_prediction_name=None):
|
||
|
"""
|
||
|
Generates predictions for a group of observations. The
|
||
|
observations to process exist in one or more data files
|
||
|
referenced by a `DataSource`. This operation creates a new
|
||
|
`BatchPrediction`, and uses an `MLModel` and the data files
|
||
|
referenced by the `DataSource` as information sources.
|
||
|
|
||
|
`CreateBatchPrediction` is an asynchronous operation. In
|
||
|
response to `CreateBatchPrediction`, Amazon Machine Learning
|
||
|
(Amazon ML) immediately returns and sets the `BatchPrediction`
|
||
|
status to `PENDING`. After the `BatchPrediction` completes,
|
||
|
Amazon ML sets the status to `COMPLETED`.
|
||
|
|
||
|
You can poll for status updates by using the
|
||
|
GetBatchPrediction operation and checking the `Status`
|
||
|
parameter of the result. After the `COMPLETED` status appears,
|
||
|
the results are available in the location specified by the
|
||
|
`OutputUri` parameter.
|
||
|
|
||
|
:type batch_prediction_id: string
|
||
|
:param batch_prediction_id: A user-supplied ID that uniquely identifies
|
||
|
the `BatchPrediction`.
|
||
|
|
||
|
:type batch_prediction_name: string
|
||
|
:param batch_prediction_name: A user-supplied name or description of
|
||
|
the `BatchPrediction`. `BatchPredictionName` can only use the UTF-8
|
||
|
character set.
|
||
|
|
||
|
:type ml_model_id: string
|
||
|
:param ml_model_id: The ID of the `MLModel` that will generate
|
||
|
predictions for the group of observations.
|
||
|
|
||
|
:type batch_prediction_data_source_id: string
|
||
|
:param batch_prediction_data_source_id: The ID of the `DataSource` that
|
||
|
points to the group of observations to predict.
|
||
|
|
||
|
:type output_uri: string
|
||
|
:param output_uri: The location of an Amazon Simple Storage Service
|
||
|
(Amazon S3) bucket or directory to store the batch prediction
|
||
|
results. The following substrings are not allowed in the s3 key
|
||
|
portion of the "outputURI" field: ':', '//', '/./', '/../'.
|
||
|
Amazon ML needs permissions to store and retrieve the logs on your
|
||
|
behalf. For information about how to set permissions, see the
|
||
|
`Amazon Machine Learning Developer Guide`_.
|
||
|
|
||
|
"""
|
||
|
params = {
|
||
|
'BatchPredictionId': batch_prediction_id,
|
||
|
'MLModelId': ml_model_id,
|
||
|
'BatchPredictionDataSourceId': batch_prediction_data_source_id,
|
||
|
'OutputUri': output_uri,
|
||
|
}
|
||
|
if batch_prediction_name is not None:
|
||
|
params['BatchPredictionName'] = batch_prediction_name
|
||
|
return self.make_request(action='CreateBatchPrediction',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def create_data_source_from_rds(self, data_source_id, rds_data, role_arn,
|
||
|
data_source_name=None,
|
||
|
compute_statistics=None):
|
||
|
"""
|
||
|
Creates a `DataSource` object from an ` Amazon Relational
|
||
|
Database Service`_ (Amazon RDS). A `DataSource` references
|
||
|
data that can be used to perform CreateMLModel,
|
||
|
CreateEvaluation, or CreateBatchPrediction operations.
|
||
|
|
||
|
`CreateDataSourceFromRDS` is an asynchronous operation. In
|
||
|
response to `CreateDataSourceFromRDS`, Amazon Machine Learning
|
||
|
(Amazon ML) immediately returns and sets the `DataSource`
|
||
|
status to `PENDING`. After the `DataSource` is created and
|
||
|
ready for use, Amazon ML sets the `Status` parameter to
|
||
|
`COMPLETED`. `DataSource` in `COMPLETED` or `PENDING` status
|
||
|
can only be used to perform CreateMLModel, CreateEvaluation,
|
||
|
or CreateBatchPrediction operations.
|
||
|
|
||
|
If Amazon ML cannot accept the input source, it sets the
|
||
|
`Status` parameter to `FAILED` and includes an error message
|
||
|
in the `Message` attribute of the GetDataSource operation
|
||
|
response.
|
||
|
|
||
|
:type data_source_id: string
|
||
|
:param data_source_id: A user-supplied ID that uniquely identifies the
|
||
|
`DataSource`. Typically, an Amazon Resource Number (ARN) becomes
|
||
|
the ID for a `DataSource`.
|
||
|
|
||
|
:type data_source_name: string
|
||
|
:param data_source_name: A user-supplied name or description of the
|
||
|
`DataSource`.
|
||
|
|
||
|
:type rds_data: dict
|
||
|
:param rds_data:
|
||
|
The data specification of an Amazon RDS `DataSource`:
|
||
|
|
||
|
|
||
|
+ DatabaseInformation -
|
||
|
|
||
|
+ `DatabaseName ` - Name of the Amazon RDS database.
|
||
|
+ ` InstanceIdentifier ` - Unique identifier for the Amazon RDS
|
||
|
database instance.
|
||
|
|
||
|
+ DatabaseCredentials - AWS Identity and Access Management (IAM)
|
||
|
credentials that are used to connect to the Amazon RDS database.
|
||
|
+ ResourceRole - Role (DataPipelineDefaultResourceRole) assumed by an
|
||
|
Amazon Elastic Compute Cloud (EC2) instance to carry out the copy
|
||
|
task from Amazon RDS to Amazon S3. For more information, see `Role
|
||
|
templates`_ for data pipelines.
|
||
|
+ ServiceRole - Role (DataPipelineDefaultRole) assumed by the AWS Data
|
||
|
Pipeline service to monitor the progress of the copy task from
|
||
|
Amazon RDS to Amazon Simple Storage Service (S3). For more
|
||
|
information, see `Role templates`_ for data pipelines.
|
||
|
+ SecurityInfo - Security information to use to access an Amazon RDS
|
||
|
instance. You need to set up appropriate ingress rules for the
|
||
|
security entity IDs provided to allow access to the Amazon RDS
|
||
|
instance. Specify a [ `SubnetId`, `SecurityGroupIds`] pair for a
|
||
|
VPC-based Amazon RDS instance.
|
||
|
+ SelectSqlQuery - Query that is used to retrieve the observation data
|
||
|
for the `Datasource`.
|
||
|
+ S3StagingLocation - Amazon S3 location for staging RDS data. The data
|
||
|
retrieved from Amazon RDS using `SelectSqlQuery` is stored in this
|
||
|
location.
|
||
|
+ DataSchemaUri - Amazon S3 location of the `DataSchema`.
|
||
|
+ DataSchema - A JSON string representing the schema. This is not
|
||
|
required if `DataSchemaUri` is specified.
|
||
|
+ DataRearrangement - A JSON string representing the splitting
|
||
|
requirement of a `Datasource`. Sample - ` "{\"randomSeed\":\"some-
|
||
|
random-seed\",
|
||
|
\"splitting\":{\"percentBegin\":10,\"percentEnd\":60}}"`
|
||
|
|
||
|
:type role_arn: string
|
||
|
:param role_arn: The role that Amazon ML assumes on behalf of the user
|
||
|
to create and activate a data pipeline in the users account and
|
||
|
copy data (using the `SelectSqlQuery`) query from Amazon RDS to
|
||
|
Amazon S3.
|
||
|
|
||
|
:type compute_statistics: boolean
|
||
|
:param compute_statistics: The compute statistics for a `DataSource`.
|
||
|
The statistics are generated from the observation data referenced
|
||
|
by a `DataSource`. Amazon ML uses the statistics internally during
|
||
|
an `MLModel` training. This parameter must be set to `True` if the
|
||
|
``DataSource `` needs to be used for `MLModel` training.
|
||
|
|
||
|
"""
|
||
|
params = {
|
||
|
'DataSourceId': data_source_id,
|
||
|
'RDSData': rds_data,
|
||
|
'RoleARN': role_arn,
|
||
|
}
|
||
|
if data_source_name is not None:
|
||
|
params['DataSourceName'] = data_source_name
|
||
|
if compute_statistics is not None:
|
||
|
params['ComputeStatistics'] = compute_statistics
|
||
|
return self.make_request(action='CreateDataSourceFromRDS',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def create_data_source_from_redshift(self, data_source_id, data_spec,
|
||
|
role_arn, data_source_name=None,
|
||
|
compute_statistics=None):
|
||
|
"""
|
||
|
Creates a `DataSource` from `Amazon Redshift`_. A `DataSource`
|
||
|
references data that can be used to perform either
|
||
|
CreateMLModel, CreateEvaluation or CreateBatchPrediction
|
||
|
operations.
|
||
|
|
||
|
`CreateDataSourceFromRedshift` is an asynchronous operation.
|
||
|
In response to `CreateDataSourceFromRedshift`, Amazon Machine
|
||
|
Learning (Amazon ML) immediately returns and sets the
|
||
|
`DataSource` status to `PENDING`. After the `DataSource` is
|
||
|
created and ready for use, Amazon ML sets the `Status`
|
||
|
parameter to `COMPLETED`. `DataSource` in `COMPLETED` or
|
||
|
`PENDING` status can only be used to perform CreateMLModel,
|
||
|
CreateEvaluation, or CreateBatchPrediction operations.
|
||
|
|
||
|
If Amazon ML cannot accept the input source, it sets the
|
||
|
`Status` parameter to `FAILED` and includes an error message
|
||
|
in the `Message` attribute of the GetDataSource operation
|
||
|
response.
|
||
|
|
||
|
The observations should exist in the database hosted on an
|
||
|
Amazon Redshift cluster and should be specified by a
|
||
|
`SelectSqlQuery`. Amazon ML executes ` Unload`_ command in
|
||
|
Amazon Redshift to transfer the result set of `SelectSqlQuery`
|
||
|
to `S3StagingLocation.`
|
||
|
|
||
|
After the `DataSource` is created, it's ready for use in
|
||
|
evaluations and batch predictions. If you plan to use the
|
||
|
`DataSource` to train an `MLModel`, the `DataSource` requires
|
||
|
another item -- a recipe. A recipe describes the observation
|
||
|
variables that participate in training an `MLModel`. A recipe
|
||
|
describes how each input variable will be used in training.
|
||
|
Will the variable be included or excluded from training? Will
|
||
|
the variable be manipulated, for example, combined with
|
||
|
another variable or split apart into word combinations? The
|
||
|
recipe provides answers to these questions. For more
|
||
|
information, see the Amazon Machine Learning Developer Guide.
|
||
|
|
||
|
:type data_source_id: string
|
||
|
:param data_source_id: A user-supplied ID that uniquely identifies the
|
||
|
`DataSource`.
|
||
|
|
||
|
:type data_source_name: string
|
||
|
:param data_source_name: A user-supplied name or description of the
|
||
|
`DataSource`.
|
||
|
|
||
|
:type data_spec: dict
|
||
|
:param data_spec:
|
||
|
The data specification of an Amazon Redshift `DataSource`:
|
||
|
|
||
|
|
||
|
+ DatabaseInformation -
|
||
|
|
||
|
+ `DatabaseName ` - Name of the Amazon Redshift database.
|
||
|
+ ` ClusterIdentifier ` - Unique ID for the Amazon Redshift cluster.
|
||
|
|
||
|
+ DatabaseCredentials - AWS Identity abd Access Management (IAM)
|
||
|
credentials that are used to connect to the Amazon Redshift
|
||
|
database.
|
||
|
+ SelectSqlQuery - Query that is used to retrieve the observation data
|
||
|
for the `Datasource`.
|
||
|
+ S3StagingLocation - Amazon Simple Storage Service (Amazon S3)
|
||
|
location for staging Amazon Redshift data. The data retrieved from
|
||
|
Amazon Relational Database Service (Amazon RDS) using
|
||
|
`SelectSqlQuery` is stored in this location.
|
||
|
+ DataSchemaUri - Amazon S3 location of the `DataSchema`.
|
||
|
+ DataSchema - A JSON string representing the schema. This is not
|
||
|
required if `DataSchemaUri` is specified.
|
||
|
+ DataRearrangement - A JSON string representing the splitting
|
||
|
requirement of a `Datasource`. Sample - ` "{\"randomSeed\":\"some-
|
||
|
random-seed\",
|
||
|
\"splitting\":{\"percentBegin\":10,\"percentEnd\":60}}"`
|
||
|
|
||
|
:type role_arn: string
|
||
|
:param role_arn: A fully specified role Amazon Resource Name (ARN).
|
||
|
Amazon ML assumes the role on behalf of the user to create the
|
||
|
following:
|
||
|
|
||
|
|
||
|
+ A security group to allow Amazon ML to execute the `SelectSqlQuery`
|
||
|
query on an Amazon Redshift cluster
|
||
|
+ An Amazon S3 bucket policy to grant Amazon ML read/write permissions
|
||
|
on the `S3StagingLocation`
|
||
|
|
||
|
:type compute_statistics: boolean
|
||
|
:param compute_statistics: The compute statistics for a `DataSource`.
|
||
|
The statistics are generated from the observation data referenced
|
||
|
by a `DataSource`. Amazon ML uses the statistics internally during
|
||
|
`MLModel` training. This parameter must be set to `True` if the
|
||
|
``DataSource `` needs to be used for `MLModel` training
|
||
|
|
||
|
"""
|
||
|
params = {
|
||
|
'DataSourceId': data_source_id,
|
||
|
'DataSpec': data_spec,
|
||
|
'RoleARN': role_arn,
|
||
|
}
|
||
|
if data_source_name is not None:
|
||
|
params['DataSourceName'] = data_source_name
|
||
|
if compute_statistics is not None:
|
||
|
params['ComputeStatistics'] = compute_statistics
|
||
|
return self.make_request(action='CreateDataSourceFromRedshift',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def create_data_source_from_s3(self, data_source_id, data_spec,
|
||
|
data_source_name=None,
|
||
|
compute_statistics=None):
|
||
|
"""
|
||
|
Creates a `DataSource` object. A `DataSource` references data
|
||
|
that can be used to perform CreateMLModel, CreateEvaluation,
|
||
|
or CreateBatchPrediction operations.
|
||
|
|
||
|
`CreateDataSourceFromS3` is an asynchronous operation. In
|
||
|
response to `CreateDataSourceFromS3`, Amazon Machine Learning
|
||
|
(Amazon ML) immediately returns and sets the `DataSource`
|
||
|
status to `PENDING`. After the `DataSource` is created and
|
||
|
ready for use, Amazon ML sets the `Status` parameter to
|
||
|
`COMPLETED`. `DataSource` in `COMPLETED` or `PENDING` status
|
||
|
can only be used to perform CreateMLModel, CreateEvaluation or
|
||
|
CreateBatchPrediction operations.
|
||
|
|
||
|
If Amazon ML cannot accept the input source, it sets the
|
||
|
`Status` parameter to `FAILED` and includes an error message
|
||
|
in the `Message` attribute of the GetDataSource operation
|
||
|
response.
|
||
|
|
||
|
The observation data used in a `DataSource` should be ready to
|
||
|
use; that is, it should have a consistent structure, and
|
||
|
missing data values should be kept to a minimum. The
|
||
|
observation data must reside in one or more CSV files in an
|
||
|
Amazon Simple Storage Service (Amazon S3) bucket, along with a
|
||
|
schema that describes the data items by name and type. The
|
||
|
same schema must be used for all of the data files referenced
|
||
|
by the `DataSource`.
|
||
|
|
||
|
After the `DataSource` has been created, it's ready to use in
|
||
|
evaluations and batch predictions. If you plan to use the
|
||
|
`DataSource` to train an `MLModel`, the `DataSource` requires
|
||
|
another item: a recipe. A recipe describes the observation
|
||
|
variables that participate in training an `MLModel`. A recipe
|
||
|
describes how each input variable will be used in training.
|
||
|
Will the variable be included or excluded from training? Will
|
||
|
the variable be manipulated, for example, combined with
|
||
|
another variable, or split apart into word combinations? The
|
||
|
recipe provides answers to these questions. For more
|
||
|
information, see the `Amazon Machine Learning Developer
|
||
|
Guide`_.
|
||
|
|
||
|
:type data_source_id: string
|
||
|
:param data_source_id: A user-supplied identifier that uniquely
|
||
|
identifies the `DataSource`.
|
||
|
|
||
|
:type data_source_name: string
|
||
|
:param data_source_name: A user-supplied name or description of the
|
||
|
`DataSource`.
|
||
|
|
||
|
:type data_spec: dict
|
||
|
:param data_spec:
|
||
|
The data specification of a `DataSource`:
|
||
|
|
||
|
|
||
|
+ DataLocationS3 - Amazon Simple Storage Service (Amazon S3) location
|
||
|
of the observation data.
|
||
|
+ DataSchemaLocationS3 - Amazon S3 location of the `DataSchema`.
|
||
|
+ DataSchema - A JSON string representing the schema. This is not
|
||
|
required if `DataSchemaUri` is specified.
|
||
|
+ DataRearrangement - A JSON string representing the splitting
|
||
|
requirement of a `Datasource`. Sample - ` "{\"randomSeed\":\"some-
|
||
|
random-seed\",
|
||
|
\"splitting\":{\"percentBegin\":10,\"percentEnd\":60}}"`
|
||
|
|
||
|
:type compute_statistics: boolean
|
||
|
:param compute_statistics: The compute statistics for a `DataSource`.
|
||
|
The statistics are generated from the observation data referenced
|
||
|
by a `DataSource`. Amazon ML uses the statistics internally during
|
||
|
an `MLModel` training. This parameter must be set to `True` if the
|
||
|
``DataSource `` needs to be used for `MLModel` training
|
||
|
|
||
|
"""
|
||
|
params = {
|
||
|
'DataSourceId': data_source_id,
|
||
|
'DataSpec': data_spec,
|
||
|
}
|
||
|
if data_source_name is not None:
|
||
|
params['DataSourceName'] = data_source_name
|
||
|
if compute_statistics is not None:
|
||
|
params['ComputeStatistics'] = compute_statistics
|
||
|
return self.make_request(action='CreateDataSourceFromS3',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def create_evaluation(self, evaluation_id, ml_model_id,
|
||
|
evaluation_data_source_id, evaluation_name=None):
|
||
|
"""
|
||
|
Creates a new `Evaluation` of an `MLModel`. An `MLModel` is
|
||
|
evaluated on a set of observations associated to a
|
||
|
`DataSource`. Like a `DataSource` for an `MLModel`, the
|
||
|
`DataSource` for an `Evaluation` contains values for the
|
||
|
Target Variable. The `Evaluation` compares the predicted
|
||
|
result for each observation to the actual outcome and provides
|
||
|
a summary so that you know how effective the `MLModel`
|
||
|
functions on the test data. Evaluation generates a relevant
|
||
|
performance metric such as BinaryAUC, RegressionRMSE or
|
||
|
MulticlassAvgFScore based on the corresponding `MLModelType`:
|
||
|
`BINARY`, `REGRESSION` or `MULTICLASS`.
|
||
|
|
||
|
`CreateEvaluation` is an asynchronous operation. In response
|
||
|
to `CreateEvaluation`, Amazon Machine Learning (Amazon ML)
|
||
|
immediately returns and sets the evaluation status to
|
||
|
`PENDING`. After the `Evaluation` is created and ready for
|
||
|
use, Amazon ML sets the status to `COMPLETED`.
|
||
|
|
||
|
You can use the GetEvaluation operation to check progress of
|
||
|
the evaluation during the creation operation.
|
||
|
|
||
|
:type evaluation_id: string
|
||
|
:param evaluation_id: A user-supplied ID that uniquely identifies the
|
||
|
`Evaluation`.
|
||
|
|
||
|
:type evaluation_name: string
|
||
|
:param evaluation_name: A user-supplied name or description of the
|
||
|
`Evaluation`.
|
||
|
|
||
|
:type ml_model_id: string
|
||
|
:param ml_model_id: The ID of the `MLModel` to evaluate.
|
||
|
The schema used in creating the `MLModel` must match the schema of the
|
||
|
`DataSource` used in the `Evaluation`.
|
||
|
|
||
|
:type evaluation_data_source_id: string
|
||
|
:param evaluation_data_source_id: The ID of the `DataSource` for the
|
||
|
evaluation. The schema of the `DataSource` must match the schema
|
||
|
used to create the `MLModel`.
|
||
|
|
||
|
"""
|
||
|
params = {
|
||
|
'EvaluationId': evaluation_id,
|
||
|
'MLModelId': ml_model_id,
|
||
|
'EvaluationDataSourceId': evaluation_data_source_id,
|
||
|
}
|
||
|
if evaluation_name is not None:
|
||
|
params['EvaluationName'] = evaluation_name
|
||
|
return self.make_request(action='CreateEvaluation',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def create_ml_model(self, ml_model_id, ml_model_type,
|
||
|
training_data_source_id, ml_model_name=None,
|
||
|
parameters=None, recipe=None, recipe_uri=None):
|
||
|
"""
|
||
|
Creates a new `MLModel` using the data files and the recipe as
|
||
|
information sources.
|
||
|
|
||
|
An `MLModel` is nearly immutable. Users can only update the
|
||
|
`MLModelName` and the `ScoreThreshold` in an `MLModel` without
|
||
|
creating a new `MLModel`.
|
||
|
|
||
|
`CreateMLModel` is an asynchronous operation. In response to
|
||
|
`CreateMLModel`, Amazon Machine Learning (Amazon ML)
|
||
|
immediately returns and sets the `MLModel` status to
|
||
|
`PENDING`. After the `MLModel` is created and ready for use,
|
||
|
Amazon ML sets the status to `COMPLETED`.
|
||
|
|
||
|
You can use the GetMLModel operation to check progress of the
|
||
|
`MLModel` during the creation operation.
|
||
|
|
||
|
CreateMLModel requires a `DataSource` with computed
|
||
|
statistics, which can be created by setting
|
||
|
`ComputeStatistics` to `True` in CreateDataSourceFromRDS,
|
||
|
CreateDataSourceFromS3, or CreateDataSourceFromRedshift
|
||
|
operations.
|
||
|
|
||
|
:type ml_model_id: string
|
||
|
:param ml_model_id: A user-supplied ID that uniquely identifies the
|
||
|
`MLModel`.
|
||
|
|
||
|
:type ml_model_name: string
|
||
|
:param ml_model_name: A user-supplied name or description of the
|
||
|
`MLModel`.
|
||
|
|
||
|
:type ml_model_type: string
|
||
|
:param ml_model_type: The category of supervised learning that this
|
||
|
`MLModel` will address. Choose from the following types:
|
||
|
|
||
|
+ Choose `REGRESSION` if the `MLModel` will be used to predict a
|
||
|
numeric value.
|
||
|
+ Choose `BINARY` if the `MLModel` result has two possible values.
|
||
|
+ Choose `MULTICLASS` if the `MLModel` result has a limited number of
|
||
|
values.
|
||
|
|
||
|
|
||
|
For more information, see the `Amazon Machine Learning Developer
|
||
|
Guide`_.
|
||
|
|
||
|
:type parameters: map
|
||
|
:param parameters:
|
||
|
A list of the training parameters in the `MLModel`. The list is
|
||
|
implemented as a map of key/value pairs.
|
||
|
|
||
|
The following is the current set of training parameters:
|
||
|
|
||
|
|
||
|
+ `sgd.l1RegularizationAmount` - Coefficient regularization L1 norm. It
|
||
|
controls overfitting the data by penalizing large coefficients.
|
||
|
This tends to drive coefficients to zero, resulting in sparse
|
||
|
feature set. If you use this parameter, start by specifying a small
|
||
|
value such as 1.0E-08. The value is a double that ranges from 0 to
|
||
|
MAX_DOUBLE. The default is not to use L1 normalization. The
|
||
|
parameter cannot be used when `L2` is specified. Use this parameter
|
||
|
sparingly.
|
||
|
+ `sgd.l2RegularizationAmount` - Coefficient regularization L2 norm. It
|
||
|
controls overfitting the data by penalizing large coefficients.
|
||
|
This tends to drive coefficients to small, nonzero values. If you
|
||
|
use this parameter, start by specifying a small value such as
|
||
|
1.0E-08. The valuseis a double that ranges from 0 to MAX_DOUBLE.
|
||
|
The default is not to use L2 normalization. This cannot be used
|
||
|
when `L1` is specified. Use this parameter sparingly.
|
||
|
+ `sgd.maxPasses` - Number of times that the training process traverses
|
||
|
the observations to build the `MLModel`. The value is an integer
|
||
|
that ranges from 1 to 10000. The default value is 10.
|
||
|
+ `sgd.maxMLModelSizeInBytes` - Maximum allowed size of the model.
|
||
|
Depending on the input data, the size of the model might affect its
|
||
|
performance. The value is an integer that ranges from 100000 to
|
||
|
2147483648. The default value is 33554432.
|
||
|
|
||
|
:type training_data_source_id: string
|
||
|
:param training_data_source_id: The `DataSource` that points to the
|
||
|
training data.
|
||
|
|
||
|
:type recipe: string
|
||
|
:param recipe: The data recipe for creating `MLModel`. You must specify
|
||
|
either the recipe or its URI. If you dont specify a recipe or its
|
||
|
URI, Amazon ML creates a default.
|
||
|
|
||
|
:type recipe_uri: string
|
||
|
:param recipe_uri: The Amazon Simple Storage Service (Amazon S3)
|
||
|
location and file name that contains the `MLModel` recipe. You must
|
||
|
specify either the recipe or its URI. If you dont specify a recipe
|
||
|
or its URI, Amazon ML creates a default.
|
||
|
|
||
|
"""
|
||
|
params = {
|
||
|
'MLModelId': ml_model_id,
|
||
|
'MLModelType': ml_model_type,
|
||
|
'TrainingDataSourceId': training_data_source_id,
|
||
|
}
|
||
|
if ml_model_name is not None:
|
||
|
params['MLModelName'] = ml_model_name
|
||
|
if parameters is not None:
|
||
|
params['Parameters'] = parameters
|
||
|
if recipe is not None:
|
||
|
params['Recipe'] = recipe
|
||
|
if recipe_uri is not None:
|
||
|
params['RecipeUri'] = recipe_uri
|
||
|
return self.make_request(action='CreateMLModel',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def create_realtime_endpoint(self, ml_model_id):
|
||
|
"""
|
||
|
Creates a real-time endpoint for the `MLModel`. The endpoint
|
||
|
contains the URI of the `MLModel`; that is, the location to
|
||
|
send real-time prediction requests for the specified
|
||
|
`MLModel`.
|
||
|
|
||
|
:type ml_model_id: string
|
||
|
:param ml_model_id: The ID assigned to the `MLModel` during creation.
|
||
|
|
||
|
"""
|
||
|
params = {'MLModelId': ml_model_id, }
|
||
|
return self.make_request(action='CreateRealtimeEndpoint',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def delete_batch_prediction(self, batch_prediction_id):
|
||
|
"""
|
||
|
Assigns the DELETED status to a `BatchPrediction`, rendering
|
||
|
it unusable.
|
||
|
|
||
|
After using the `DeleteBatchPrediction` operation, you can use
|
||
|
the GetBatchPrediction operation to verify that the status of
|
||
|
the `BatchPrediction` changed to DELETED.
|
||
|
|
||
|
The result of the `DeleteBatchPrediction` operation is
|
||
|
irreversible.
|
||
|
|
||
|
:type batch_prediction_id: string
|
||
|
:param batch_prediction_id: A user-supplied ID that uniquely identifies
|
||
|
the `BatchPrediction`.
|
||
|
|
||
|
"""
|
||
|
params = {'BatchPredictionId': batch_prediction_id, }
|
||
|
return self.make_request(action='DeleteBatchPrediction',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def delete_data_source(self, data_source_id):
|
||
|
"""
|
||
|
Assigns the DELETED status to a `DataSource`, rendering it
|
||
|
unusable.
|
||
|
|
||
|
After using the `DeleteDataSource` operation, you can use the
|
||
|
GetDataSource operation to verify that the status of the
|
||
|
`DataSource` changed to DELETED.
|
||
|
|
||
|
The results of the `DeleteDataSource` operation are
|
||
|
irreversible.
|
||
|
|
||
|
:type data_source_id: string
|
||
|
:param data_source_id: A user-supplied ID that uniquely identifies the
|
||
|
`DataSource`.
|
||
|
|
||
|
"""
|
||
|
params = {'DataSourceId': data_source_id, }
|
||
|
return self.make_request(action='DeleteDataSource',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def delete_evaluation(self, evaluation_id):
|
||
|
"""
|
||
|
Assigns the `DELETED` status to an `Evaluation`, rendering it
|
||
|
unusable.
|
||
|
|
||
|
After invoking the `DeleteEvaluation` operation, you can use
|
||
|
the GetEvaluation operation to verify that the status of the
|
||
|
`Evaluation` changed to `DELETED`.
|
||
|
|
||
|
The results of the `DeleteEvaluation` operation are
|
||
|
irreversible.
|
||
|
|
||
|
:type evaluation_id: string
|
||
|
:param evaluation_id: A user-supplied ID that uniquely identifies the
|
||
|
`Evaluation` to delete.
|
||
|
|
||
|
"""
|
||
|
params = {'EvaluationId': evaluation_id, }
|
||
|
return self.make_request(action='DeleteEvaluation',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def delete_ml_model(self, ml_model_id):
|
||
|
"""
|
||
|
Assigns the DELETED status to an `MLModel`, rendering it
|
||
|
unusable.
|
||
|
|
||
|
After using the `DeleteMLModel` operation, you can use the
|
||
|
GetMLModel operation to verify that the status of the
|
||
|
`MLModel` changed to DELETED.
|
||
|
|
||
|
The result of the `DeleteMLModel` operation is irreversible.
|
||
|
|
||
|
:type ml_model_id: string
|
||
|
:param ml_model_id: A user-supplied ID that uniquely identifies the
|
||
|
`MLModel`.
|
||
|
|
||
|
"""
|
||
|
params = {'MLModelId': ml_model_id, }
|
||
|
return self.make_request(action='DeleteMLModel',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def delete_realtime_endpoint(self, ml_model_id):
|
||
|
"""
|
||
|
Deletes a real time endpoint of an `MLModel`.
|
||
|
|
||
|
:type ml_model_id: string
|
||
|
:param ml_model_id: The ID assigned to the `MLModel` during creation.
|
||
|
|
||
|
"""
|
||
|
params = {'MLModelId': ml_model_id, }
|
||
|
return self.make_request(action='DeleteRealtimeEndpoint',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def describe_batch_predictions(self, filter_variable=None, eq=None,
|
||
|
gt=None, lt=None, ge=None, le=None,
|
||
|
ne=None, prefix=None, sort_order=None,
|
||
|
next_token=None, limit=None):
|
||
|
"""
|
||
|
Returns a list of `BatchPrediction` operations that match the
|
||
|
search criteria in the request.
|
||
|
|
||
|
:type filter_variable: string
|
||
|
:param filter_variable:
|
||
|
Use one of the following variables to filter a list of
|
||
|
`BatchPrediction`:
|
||
|
|
||
|
|
||
|
+ `CreatedAt` - Sets the search criteria to the `BatchPrediction`
|
||
|
creation date.
|
||
|
+ `Status` - Sets the search criteria to the `BatchPrediction` status.
|
||
|
+ `Name` - Sets the search criteria to the contents of the
|
||
|
`BatchPrediction` ** ** `Name`.
|
||
|
+ `IAMUser` - Sets the search criteria to the user account that invoked
|
||
|
the `BatchPrediction` creation.
|
||
|
+ `MLModelId` - Sets the search criteria to the `MLModel` used in the
|
||
|
`BatchPrediction`.
|
||
|
+ `DataSourceId` - Sets the search criteria to the `DataSource` used in
|
||
|
the `BatchPrediction`.
|
||
|
+ `DataURI` - Sets the search criteria to the data file(s) used in the
|
||
|
`BatchPrediction`. The URL can identify either a file or an Amazon
|
||
|
Simple Storage Solution (Amazon S3) bucket or directory.
|
||
|
|
||
|
:type eq: string
|
||
|
:param eq: The equal to operator. The `BatchPrediction` results will
|
||
|
have `FilterVariable` values that exactly match the value specified
|
||
|
with `EQ`.
|
||
|
|
||
|
:type gt: string
|
||
|
:param gt: The greater than operator. The `BatchPrediction` results
|
||
|
will have `FilterVariable` values that are greater than the value
|
||
|
specified with `GT`.
|
||
|
|
||
|
:type lt: string
|
||
|
:param lt: The less than operator. The `BatchPrediction` results will
|
||
|
have `FilterVariable` values that are less than the value specified
|
||
|
with `LT`.
|
||
|
|
||
|
:type ge: string
|
||
|
:param ge: The greater than or equal to operator. The `BatchPrediction`
|
||
|
results will have `FilterVariable` values that are greater than or
|
||
|
equal to the value specified with `GE`.
|
||
|
|
||
|
:type le: string
|
||
|
:param le: The less than or equal to operator. The `BatchPrediction`
|
||
|
results will have `FilterVariable` values that are less than or
|
||
|
equal to the value specified with `LE`.
|
||
|
|
||
|
:type ne: string
|
||
|
:param ne: The not equal to operator. The `BatchPrediction` results
|
||
|
will have `FilterVariable` values not equal to the value specified
|
||
|
with `NE`.
|
||
|
|
||
|
:type prefix: string
|
||
|
:param prefix:
|
||
|
A string that is found at the beginning of a variable, such as `Name`
|
||
|
or `Id`.
|
||
|
|
||
|
For example, a `Batch Prediction` operation could have the `Name`
|
||
|
`2014-09-09-HolidayGiftMailer`. To search for this
|
||
|
`BatchPrediction`, select `Name` for the `FilterVariable` and any
|
||
|
of the following strings for the `Prefix`:
|
||
|
|
||
|
|
||
|
+ 2014-09
|
||
|
+ 2014-09-09
|
||
|
+ 2014-09-09-Holiday
|
||
|
|
||
|
:type sort_order: string
|
||
|
:param sort_order: A two-value parameter that determines the sequence
|
||
|
of the resulting list of `MLModel`s.
|
||
|
|
||
|
+ `asc` - Arranges the list in ascending order (A-Z, 0-9).
|
||
|
+ `dsc` - Arranges the list in descending order (Z-A, 9-0).
|
||
|
|
||
|
|
||
|
Results are sorted by `FilterVariable`.
|
||
|
|
||
|
:type next_token: string
|
||
|
:param next_token: An ID of the page in the paginated results.
|
||
|
|
||
|
:type limit: integer
|
||
|
:param limit: The number of pages of information to include in the
|
||
|
result. The range of acceptable values is 1 through 100. The
|
||
|
default value is 100.
|
||
|
|
||
|
"""
|
||
|
params = {}
|
||
|
if filter_variable is not None:
|
||
|
params['FilterVariable'] = filter_variable
|
||
|
if eq is not None:
|
||
|
params['EQ'] = eq
|
||
|
if gt is not None:
|
||
|
params['GT'] = gt
|
||
|
if lt is not None:
|
||
|
params['LT'] = lt
|
||
|
if ge is not None:
|
||
|
params['GE'] = ge
|
||
|
if le is not None:
|
||
|
params['LE'] = le
|
||
|
if ne is not None:
|
||
|
params['NE'] = ne
|
||
|
if prefix is not None:
|
||
|
params['Prefix'] = prefix
|
||
|
if sort_order is not None:
|
||
|
params['SortOrder'] = sort_order
|
||
|
if next_token is not None:
|
||
|
params['NextToken'] = next_token
|
||
|
if limit is not None:
|
||
|
params['Limit'] = limit
|
||
|
return self.make_request(action='DescribeBatchPredictions',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def describe_data_sources(self, filter_variable=None, eq=None, gt=None,
|
||
|
lt=None, ge=None, le=None, ne=None,
|
||
|
prefix=None, sort_order=None, next_token=None,
|
||
|
limit=None):
|
||
|
"""
|
||
|
Returns a list of `DataSource` that match the search criteria
|
||
|
in the request.
|
||
|
|
||
|
:type filter_variable: string
|
||
|
:param filter_variable:
|
||
|
Use one of the following variables to filter a list of `DataSource`:
|
||
|
|
||
|
|
||
|
+ `CreatedAt` - Sets the search criteria to `DataSource` creation
|
||
|
dates.
|
||
|
+ `Status` - Sets the search criteria to `DataSource` statuses.
|
||
|
+ `Name` - Sets the search criteria to the contents of `DataSource` **
|
||
|
** `Name`.
|
||
|
+ `DataUri` - Sets the search criteria to the URI of data files used to
|
||
|
create the `DataSource`. The URI can identify either a file or an
|
||
|
Amazon Simple Storage Service (Amazon S3) bucket or directory.
|
||
|
+ `IAMUser` - Sets the search criteria to the user account that invoked
|
||
|
the `DataSource` creation.
|
||
|
|
||
|
:type eq: string
|
||
|
:param eq: The equal to operator. The `DataSource` results will have
|
||
|
`FilterVariable` values that exactly match the value specified with
|
||
|
`EQ`.
|
||
|
|
||
|
:type gt: string
|
||
|
:param gt: The greater than operator. The `DataSource` results will
|
||
|
have `FilterVariable` values that are greater than the value
|
||
|
specified with `GT`.
|
||
|
|
||
|
:type lt: string
|
||
|
:param lt: The less than operator. The `DataSource` results will have
|
||
|
`FilterVariable` values that are less than the value specified with
|
||
|
`LT`.
|
||
|
|
||
|
:type ge: string
|
||
|
:param ge: The greater than or equal to operator. The `DataSource`
|
||
|
results will have `FilterVariable` values that are greater than or
|
||
|
equal to the value specified with `GE`.
|
||
|
|
||
|
:type le: string
|
||
|
:param le: The less than or equal to operator. The `DataSource` results
|
||
|
will have `FilterVariable` values that are less than or equal to
|
||
|
the value specified with `LE`.
|
||
|
|
||
|
:type ne: string
|
||
|
:param ne: The not equal to operator. The `DataSource` results will
|
||
|
have `FilterVariable` values not equal to the value specified with
|
||
|
`NE`.
|
||
|
|
||
|
:type prefix: string
|
||
|
:param prefix:
|
||
|
A string that is found at the beginning of a variable, such as `Name`
|
||
|
or `Id`.
|
||
|
|
||
|
For example, a `DataSource` could have the `Name`
|
||
|
`2014-09-09-HolidayGiftMailer`. To search for this `DataSource`,
|
||
|
select `Name` for the `FilterVariable` and any of the following
|
||
|
strings for the `Prefix`:
|
||
|
|
||
|
|
||
|
+ 2014-09
|
||
|
+ 2014-09-09
|
||
|
+ 2014-09-09-Holiday
|
||
|
|
||
|
:type sort_order: string
|
||
|
:param sort_order: A two-value parameter that determines the sequence
|
||
|
of the resulting list of `DataSource`.
|
||
|
|
||
|
+ `asc` - Arranges the list in ascending order (A-Z, 0-9).
|
||
|
+ `dsc` - Arranges the list in descending order (Z-A, 9-0).
|
||
|
|
||
|
|
||
|
Results are sorted by `FilterVariable`.
|
||
|
|
||
|
:type next_token: string
|
||
|
:param next_token: The ID of the page in the paginated results.
|
||
|
|
||
|
:type limit: integer
|
||
|
:param limit: The maximum number of `DataSource` to include in the
|
||
|
result.
|
||
|
|
||
|
"""
|
||
|
params = {}
|
||
|
if filter_variable is not None:
|
||
|
params['FilterVariable'] = filter_variable
|
||
|
if eq is not None:
|
||
|
params['EQ'] = eq
|
||
|
if gt is not None:
|
||
|
params['GT'] = gt
|
||
|
if lt is not None:
|
||
|
params['LT'] = lt
|
||
|
if ge is not None:
|
||
|
params['GE'] = ge
|
||
|
if le is not None:
|
||
|
params['LE'] = le
|
||
|
if ne is not None:
|
||
|
params['NE'] = ne
|
||
|
if prefix is not None:
|
||
|
params['Prefix'] = prefix
|
||
|
if sort_order is not None:
|
||
|
params['SortOrder'] = sort_order
|
||
|
if next_token is not None:
|
||
|
params['NextToken'] = next_token
|
||
|
if limit is not None:
|
||
|
params['Limit'] = limit
|
||
|
return self.make_request(action='DescribeDataSources',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def describe_evaluations(self, filter_variable=None, eq=None, gt=None,
|
||
|
lt=None, ge=None, le=None, ne=None, prefix=None,
|
||
|
sort_order=None, next_token=None, limit=None):
|
||
|
"""
|
||
|
Returns a list of `DescribeEvaluations` that match the search
|
||
|
criteria in the request.
|
||
|
|
||
|
:type filter_variable: string
|
||
|
:param filter_variable:
|
||
|
Use one of the following variable to filter a list of `Evaluation`
|
||
|
objects:
|
||
|
|
||
|
|
||
|
+ `CreatedAt` - Sets the search criteria to the `Evaluation` creation
|
||
|
date.
|
||
|
+ `Status` - Sets the search criteria to the `Evaluation` status.
|
||
|
+ `Name` - Sets the search criteria to the contents of `Evaluation` **
|
||
|
** `Name`.
|
||
|
+ `IAMUser` - Sets the search criteria to the user account that invoked
|
||
|
an `Evaluation`.
|
||
|
+ `MLModelId` - Sets the search criteria to the `MLModel` that was
|
||
|
evaluated.
|
||
|
+ `DataSourceId` - Sets the search criteria to the `DataSource` used in
|
||
|
`Evaluation`.
|
||
|
+ `DataUri` - Sets the search criteria to the data file(s) used in
|
||
|
`Evaluation`. The URL can identify either a file or an Amazon
|
||
|
Simple Storage Solution (Amazon S3) bucket or directory.
|
||
|
|
||
|
:type eq: string
|
||
|
:param eq: The equal to operator. The `Evaluation` results will have
|
||
|
`FilterVariable` values that exactly match the value specified with
|
||
|
`EQ`.
|
||
|
|
||
|
:type gt: string
|
||
|
:param gt: The greater than operator. The `Evaluation` results will
|
||
|
have `FilterVariable` values that are greater than the value
|
||
|
specified with `GT`.
|
||
|
|
||
|
:type lt: string
|
||
|
:param lt: The less than operator. The `Evaluation` results will have
|
||
|
`FilterVariable` values that are less than the value specified with
|
||
|
`LT`.
|
||
|
|
||
|
:type ge: string
|
||
|
:param ge: The greater than or equal to operator. The `Evaluation`
|
||
|
results will have `FilterVariable` values that are greater than or
|
||
|
equal to the value specified with `GE`.
|
||
|
|
||
|
:type le: string
|
||
|
:param le: The less than or equal to operator. The `Evaluation` results
|
||
|
will have `FilterVariable` values that are less than or equal to
|
||
|
the value specified with `LE`.
|
||
|
|
||
|
:type ne: string
|
||
|
:param ne: The not equal to operator. The `Evaluation` results will
|
||
|
have `FilterVariable` values not equal to the value specified with
|
||
|
`NE`.
|
||
|
|
||
|
:type prefix: string
|
||
|
:param prefix:
|
||
|
A string that is found at the beginning of a variable, such as `Name`
|
||
|
or `Id`.
|
||
|
|
||
|
For example, an `Evaluation` could have the `Name`
|
||
|
`2014-09-09-HolidayGiftMailer`. To search for this `Evaluation`,
|
||
|
select `Name` for the `FilterVariable` and any of the following
|
||
|
strings for the `Prefix`:
|
||
|
|
||
|
|
||
|
+ 2014-09
|
||
|
+ 2014-09-09
|
||
|
+ 2014-09-09-Holiday
|
||
|
|
||
|
:type sort_order: string
|
||
|
:param sort_order: A two-value parameter that determines the sequence
|
||
|
of the resulting list of `Evaluation`.
|
||
|
|
||
|
+ `asc` - Arranges the list in ascending order (A-Z, 0-9).
|
||
|
+ `dsc` - Arranges the list in descending order (Z-A, 9-0).
|
||
|
|
||
|
|
||
|
Results are sorted by `FilterVariable`.
|
||
|
|
||
|
:type next_token: string
|
||
|
:param next_token: The ID of the page in the paginated results.
|
||
|
|
||
|
:type limit: integer
|
||
|
:param limit: The maximum number of `Evaluation` to include in the
|
||
|
result.
|
||
|
|
||
|
"""
|
||
|
params = {}
|
||
|
if filter_variable is not None:
|
||
|
params['FilterVariable'] = filter_variable
|
||
|
if eq is not None:
|
||
|
params['EQ'] = eq
|
||
|
if gt is not None:
|
||
|
params['GT'] = gt
|
||
|
if lt is not None:
|
||
|
params['LT'] = lt
|
||
|
if ge is not None:
|
||
|
params['GE'] = ge
|
||
|
if le is not None:
|
||
|
params['LE'] = le
|
||
|
if ne is not None:
|
||
|
params['NE'] = ne
|
||
|
if prefix is not None:
|
||
|
params['Prefix'] = prefix
|
||
|
if sort_order is not None:
|
||
|
params['SortOrder'] = sort_order
|
||
|
if next_token is not None:
|
||
|
params['NextToken'] = next_token
|
||
|
if limit is not None:
|
||
|
params['Limit'] = limit
|
||
|
return self.make_request(action='DescribeEvaluations',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def describe_ml_models(self, filter_variable=None, eq=None, gt=None,
|
||
|
lt=None, ge=None, le=None, ne=None, prefix=None,
|
||
|
sort_order=None, next_token=None, limit=None):
|
||
|
"""
|
||
|
Returns a list of `MLModel` that match the search criteria in
|
||
|
the request.
|
||
|
|
||
|
:type filter_variable: string
|
||
|
:param filter_variable:
|
||
|
Use one of the following variables to filter a list of `MLModel`:
|
||
|
|
||
|
|
||
|
+ `CreatedAt` - Sets the search criteria to `MLModel` creation date.
|
||
|
+ `Status` - Sets the search criteria to `MLModel` status.
|
||
|
+ `Name` - Sets the search criteria to the contents of `MLModel` ** **
|
||
|
`Name`.
|
||
|
+ `IAMUser` - Sets the search criteria to the user account that invoked
|
||
|
the `MLModel` creation.
|
||
|
+ `TrainingDataSourceId` - Sets the search criteria to the `DataSource`
|
||
|
used to train one or more `MLModel`.
|
||
|
+ `RealtimeEndpointStatus` - Sets the search criteria to the `MLModel`
|
||
|
real-time endpoint status.
|
||
|
+ `MLModelType` - Sets the search criteria to `MLModel` type: binary,
|
||
|
regression, or multi-class.
|
||
|
+ `Algorithm` - Sets the search criteria to the algorithm that the
|
||
|
`MLModel` uses.
|
||
|
+ `TrainingDataURI` - Sets the search criteria to the data file(s) used
|
||
|
in training a `MLModel`. The URL can identify either a file or an
|
||
|
Amazon Simple Storage Service (Amazon S3) bucket or directory.
|
||
|
|
||
|
:type eq: string
|
||
|
:param eq: The equal to operator. The `MLModel` results will have
|
||
|
`FilterVariable` values that exactly match the value specified with
|
||
|
`EQ`.
|
||
|
|
||
|
:type gt: string
|
||
|
:param gt: The greater than operator. The `MLModel` results will have
|
||
|
`FilterVariable` values that are greater than the value specified
|
||
|
with `GT`.
|
||
|
|
||
|
:type lt: string
|
||
|
:param lt: The less than operator. The `MLModel` results will have
|
||
|
`FilterVariable` values that are less than the value specified with
|
||
|
`LT`.
|
||
|
|
||
|
:type ge: string
|
||
|
:param ge: The greater than or equal to operator. The `MLModel` results
|
||
|
will have `FilterVariable` values that are greater than or equal to
|
||
|
the value specified with `GE`.
|
||
|
|
||
|
:type le: string
|
||
|
:param le: The less than or equal to operator. The `MLModel` results
|
||
|
will have `FilterVariable` values that are less than or equal to
|
||
|
the value specified with `LE`.
|
||
|
|
||
|
:type ne: string
|
||
|
:param ne: The not equal to operator. The `MLModel` results will have
|
||
|
`FilterVariable` values not equal to the value specified with `NE`.
|
||
|
|
||
|
:type prefix: string
|
||
|
:param prefix:
|
||
|
A string that is found at the beginning of a variable, such as `Name`
|
||
|
or `Id`.
|
||
|
|
||
|
For example, an `MLModel` could have the `Name`
|
||
|
`2014-09-09-HolidayGiftMailer`. To search for this `MLModel`,
|
||
|
select `Name` for the `FilterVariable` and any of the following
|
||
|
strings for the `Prefix`:
|
||
|
|
||
|
|
||
|
+ 2014-09
|
||
|
+ 2014-09-09
|
||
|
+ 2014-09-09-Holiday
|
||
|
|
||
|
:type sort_order: string
|
||
|
:param sort_order: A two-value parameter that determines the sequence
|
||
|
of the resulting list of `MLModel`.
|
||
|
|
||
|
+ `asc` - Arranges the list in ascending order (A-Z, 0-9).
|
||
|
+ `dsc` - Arranges the list in descending order (Z-A, 9-0).
|
||
|
|
||
|
|
||
|
Results are sorted by `FilterVariable`.
|
||
|
|
||
|
:type next_token: string
|
||
|
:param next_token: The ID of the page in the paginated results.
|
||
|
|
||
|
:type limit: integer
|
||
|
:param limit: The number of pages of information to include in the
|
||
|
result. The range of acceptable values is 1 through 100. The
|
||
|
default value is 100.
|
||
|
|
||
|
"""
|
||
|
params = {}
|
||
|
if filter_variable is not None:
|
||
|
params['FilterVariable'] = filter_variable
|
||
|
if eq is not None:
|
||
|
params['EQ'] = eq
|
||
|
if gt is not None:
|
||
|
params['GT'] = gt
|
||
|
if lt is not None:
|
||
|
params['LT'] = lt
|
||
|
if ge is not None:
|
||
|
params['GE'] = ge
|
||
|
if le is not None:
|
||
|
params['LE'] = le
|
||
|
if ne is not None:
|
||
|
params['NE'] = ne
|
||
|
if prefix is not None:
|
||
|
params['Prefix'] = prefix
|
||
|
if sort_order is not None:
|
||
|
params['SortOrder'] = sort_order
|
||
|
if next_token is not None:
|
||
|
params['NextToken'] = next_token
|
||
|
if limit is not None:
|
||
|
params['Limit'] = limit
|
||
|
return self.make_request(action='DescribeMLModels',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def get_batch_prediction(self, batch_prediction_id):
|
||
|
"""
|
||
|
Returns a `BatchPrediction` that includes detailed metadata,
|
||
|
status, and data file information for a `Batch Prediction`
|
||
|
request.
|
||
|
|
||
|
:type batch_prediction_id: string
|
||
|
:param batch_prediction_id: An ID assigned to the `BatchPrediction` at
|
||
|
creation.
|
||
|
|
||
|
"""
|
||
|
params = {'BatchPredictionId': batch_prediction_id, }
|
||
|
return self.make_request(action='GetBatchPrediction',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def get_data_source(self, data_source_id, verbose=None):
|
||
|
"""
|
||
|
Returns a `DataSource` that includes metadata and data file
|
||
|
information, as well as the current status of the
|
||
|
`DataSource`.
|
||
|
|
||
|
`GetDataSource` provides results in normal or verbose format.
|
||
|
The verbose format adds the schema description and the list of
|
||
|
files pointed to by the DataSource to the normal format.
|
||
|
|
||
|
:type data_source_id: string
|
||
|
:param data_source_id: The ID assigned to the `DataSource` at creation.
|
||
|
|
||
|
:type verbose: boolean
|
||
|
:param verbose: Specifies whether the `GetDataSource` operation should
|
||
|
return `DataSourceSchema`.
|
||
|
If true, `DataSourceSchema` is returned.
|
||
|
|
||
|
If false, `DataSourceSchema` is not returned.
|
||
|
|
||
|
"""
|
||
|
params = {'DataSourceId': data_source_id, }
|
||
|
if verbose is not None:
|
||
|
params['Verbose'] = verbose
|
||
|
return self.make_request(action='GetDataSource',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def get_evaluation(self, evaluation_id):
|
||
|
"""
|
||
|
Returns an `Evaluation` that includes metadata as well as the
|
||
|
current status of the `Evaluation`.
|
||
|
|
||
|
:type evaluation_id: string
|
||
|
:param evaluation_id: The ID of the `Evaluation` to retrieve. The
|
||
|
evaluation of each `MLModel` is recorded and cataloged. The ID
|
||
|
provides the means to access the information.
|
||
|
|
||
|
"""
|
||
|
params = {'EvaluationId': evaluation_id, }
|
||
|
return self.make_request(action='GetEvaluation',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def get_ml_model(self, ml_model_id, verbose=None):
|
||
|
"""
|
||
|
Returns an `MLModel` that includes detailed metadata, and data
|
||
|
source information as well as the current status of the
|
||
|
`MLModel`.
|
||
|
|
||
|
`GetMLModel` provides results in normal or verbose format.
|
||
|
|
||
|
:type ml_model_id: string
|
||
|
:param ml_model_id: The ID assigned to the `MLModel` at creation.
|
||
|
|
||
|
:type verbose: boolean
|
||
|
:param verbose: Specifies whether the `GetMLModel` operation should
|
||
|
return `Recipe`.
|
||
|
If true, `Recipe` is returned.
|
||
|
|
||
|
If false, `Recipe` is not returned.
|
||
|
|
||
|
"""
|
||
|
params = {'MLModelId': ml_model_id, }
|
||
|
if verbose is not None:
|
||
|
params['Verbose'] = verbose
|
||
|
return self.make_request(action='GetMLModel',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def predict(self, ml_model_id, record, predict_endpoint):
|
||
|
"""
|
||
|
Generates a prediction for the observation using the specified
|
||
|
`MLModel`.
|
||
|
|
||
|
|
||
|
Not all response parameters will be populated because this is
|
||
|
dependent on the type of requested model.
|
||
|
|
||
|
:type ml_model_id: string
|
||
|
:param ml_model_id: A unique identifier of the `MLModel`.
|
||
|
|
||
|
:type record: map
|
||
|
:param record: A map of variable name-value pairs that represent an
|
||
|
observation.
|
||
|
|
||
|
:type predict_endpoint: string
|
||
|
:param predict_endpoint: The endpoint to send the predict request to.
|
||
|
|
||
|
"""
|
||
|
predict_host = urlsplit(predict_endpoint).hostname
|
||
|
if predict_host is None:
|
||
|
predict_host = predict_endpoint
|
||
|
|
||
|
params = {
|
||
|
'MLModelId': ml_model_id,
|
||
|
'Record': record,
|
||
|
'PredictEndpoint': predict_host,
|
||
|
}
|
||
|
return self.make_request(action='Predict',
|
||
|
body=json.dumps(params),
|
||
|
host=predict_host)
|
||
|
|
||
|
def update_batch_prediction(self, batch_prediction_id,
|
||
|
batch_prediction_name):
|
||
|
"""
|
||
|
Updates the `BatchPredictionName` of a `BatchPrediction`.
|
||
|
|
||
|
You can use the GetBatchPrediction operation to view the
|
||
|
contents of the updated data element.
|
||
|
|
||
|
:type batch_prediction_id: string
|
||
|
:param batch_prediction_id: The ID assigned to the `BatchPrediction`
|
||
|
during creation.
|
||
|
|
||
|
:type batch_prediction_name: string
|
||
|
:param batch_prediction_name: A new user-supplied name or description
|
||
|
of the `BatchPrediction`.
|
||
|
|
||
|
"""
|
||
|
params = {
|
||
|
'BatchPredictionId': batch_prediction_id,
|
||
|
'BatchPredictionName': batch_prediction_name,
|
||
|
}
|
||
|
return self.make_request(action='UpdateBatchPrediction',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def update_data_source(self, data_source_id, data_source_name):
|
||
|
"""
|
||
|
Updates the `DataSourceName` of a `DataSource`.
|
||
|
|
||
|
You can use the GetDataSource operation to view the contents
|
||
|
of the updated data element.
|
||
|
|
||
|
:type data_source_id: string
|
||
|
:param data_source_id: The ID assigned to the `DataSource` during
|
||
|
creation.
|
||
|
|
||
|
:type data_source_name: string
|
||
|
:param data_source_name: A new user-supplied name or description of the
|
||
|
`DataSource` that will replace the current description.
|
||
|
|
||
|
"""
|
||
|
params = {
|
||
|
'DataSourceId': data_source_id,
|
||
|
'DataSourceName': data_source_name,
|
||
|
}
|
||
|
return self.make_request(action='UpdateDataSource',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def update_evaluation(self, evaluation_id, evaluation_name):
|
||
|
"""
|
||
|
Updates the `EvaluationName` of an `Evaluation`.
|
||
|
|
||
|
You can use the GetEvaluation operation to view the contents
|
||
|
of the updated data element.
|
||
|
|
||
|
:type evaluation_id: string
|
||
|
:param evaluation_id: The ID assigned to the `Evaluation` during
|
||
|
creation.
|
||
|
|
||
|
:type evaluation_name: string
|
||
|
:param evaluation_name: A new user-supplied name or description of the
|
||
|
`Evaluation` that will replace the current content.
|
||
|
|
||
|
"""
|
||
|
params = {
|
||
|
'EvaluationId': evaluation_id,
|
||
|
'EvaluationName': evaluation_name,
|
||
|
}
|
||
|
return self.make_request(action='UpdateEvaluation',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def update_ml_model(self, ml_model_id, ml_model_name=None,
|
||
|
score_threshold=None):
|
||
|
"""
|
||
|
Updates the `MLModelName` and the `ScoreThreshold` of an
|
||
|
`MLModel`.
|
||
|
|
||
|
You can use the GetMLModel operation to view the contents of
|
||
|
the updated data element.
|
||
|
|
||
|
:type ml_model_id: string
|
||
|
:param ml_model_id: The ID assigned to the `MLModel` during creation.
|
||
|
|
||
|
:type ml_model_name: string
|
||
|
:param ml_model_name: A user-supplied name or description of the
|
||
|
`MLModel`.
|
||
|
|
||
|
:type score_threshold: float
|
||
|
:param score_threshold: The `ScoreThreshold` used in binary
|
||
|
classification `MLModel` that marks the boundary between a positive
|
||
|
prediction and a negative prediction.
|
||
|
Output values greater than or equal to the `ScoreThreshold` receive a
|
||
|
positive result from the `MLModel`, such as `True`. Output values
|
||
|
less than the `ScoreThreshold` receive a negative response from the
|
||
|
`MLModel`, such as `False`.
|
||
|
|
||
|
"""
|
||
|
params = {'MLModelId': ml_model_id, }
|
||
|
if ml_model_name is not None:
|
||
|
params['MLModelName'] = ml_model_name
|
||
|
if score_threshold is not None:
|
||
|
params['ScoreThreshold'] = score_threshold
|
||
|
return self.make_request(action='UpdateMLModel',
|
||
|
body=json.dumps(params))
|
||
|
|
||
|
def make_request(self, action, body, host=None):
|
||
|
headers = {
|
||
|
'X-Amz-Target': '%s.%s' % (self.TargetPrefix, action),
|
||
|
'Host': self.region.endpoint,
|
||
|
'Content-Type': 'application/x-amz-json-1.1',
|
||
|
'Content-Length': str(len(body)),
|
||
|
}
|
||
|
http_request_kwargs = {
|
||
|
'method':'POST', 'path':'/', 'auth_path':'/', 'params':{},
|
||
|
'headers': headers, 'data':body
|
||
|
}
|
||
|
if host is not None:
|
||
|
headers['Host'] = host
|
||
|
http_request_kwargs['host'] = host
|
||
|
http_request = self.build_base_http_request(**http_request_kwargs)
|
||
|
response = self._mexe(http_request, sender=None,
|
||
|
override_num_retries=10)
|
||
|
response_body = response.read().decode('utf-8')
|
||
|
boto.log.debug(response_body)
|
||
|
if response.status == 200:
|
||
|
if response_body:
|
||
|
return json.loads(response_body)
|
||
|
else:
|
||
|
json_body = json.loads(response_body)
|
||
|
fault_name = json_body.get('__type', None)
|
||
|
exception_class = self._faults.get(fault_name, self.ResponseError)
|
||
|
raise exception_class(response.status, response.reason,
|
||
|
body=json_body)
|
||
|
|