import glob
import io
import logging
import tempfile
import zipfile
from typing import List, Union
import pandas as pd
from modep_client.client import Client
from modep_client.tasks import INCOMPLETE_STATUSES, BaseTask
logger = logging.getLogger(__name__)
[docs]class Frameworks:
def __init__(self, client: Client):
"""
Initialize the Framworks class
:param client: A :class:`modep_client.client.Client` object
"""
self.client = client
[docs] def info(self):
"""
Get info about the AutoML frameworks available through the API
:return: A :class:`pandas.DataFrame` with one row for each framework
"""
url = self.client.url + "frameworks/tabular/info"
resp = self.client.sess.get(url, headers=self.client.auth_header())
if resp.ok:
js = resp.json()
df = pd.DataFrame(js)
if len(df) > 0:
# keep column order same as json
df = df[list(js[0].keys())].set_index("framework_name")
return df
else:
self.client.response_exception(resp)
[docs] def list(self):
"""
List all AutoML framework training runs
:return: A :class:`pandas.DataFrame` with one row for each training run
"""
url = self.client.url + "frameworks/tabular"
resp = self.client.sess.get(url, headers=self.client.auth_header())
if resp.ok:
js = resp.json()
df = pd.DataFrame(js)
if len(df) > 0:
# keep column order same as json
df = df[list(js[0].keys())].set_index("id")
df = df.sort_values(by="created", ascending=False)
return df
else:
self.client.response_exception(resp)
[docs] def train(
self,
framework_name: str,
train_ids: Union[str, List[str]],
test_ids: Union[str, List[str]],
target: str,
max_runtime_seconds: int,
):
"""
Train an AutoML framework
:param str framework_name: The name of the framework (ie. AutoGluon, AutoGluon_bestquality,
autosklearn, autosklearn2, AutoWEKA, constantpredictor, DecisionTree, flaml, GAMA, H2OAutoML,
hyperoptsklearn, mljarsupervised, mljarsupervised_compete, MLNet, RandomForest, TPOT, TunedRandomForest)
:param train_ids: The id(s) of dataset(s) to train on (ie. `e1bc3d16b-6d67-43cd-af59-8d39d8cb2a02`)
:param test_ids: The id(s) of dataset(s) to test on (ie. `1bc3d16b-6d67-43cd-af59-8d39d8cb2a02`)
:param str target: The name of the target column in the training dataset(s)
:param int max_runtime_seconds: The maximum amount of time in seconds to train per dataset(s)
:return: A :class:`modep_client.tasks.BaseTask` object
"""
url = self.client.url + "frameworks/tabular"
train_ids = [train_ids] if isinstance(train_ids, str) else train_ids
test_ids = [test_ids] if isinstance(test_ids, str) else test_ids
data = dict(
framework_name=framework_name,
train_ids=train_ids,
test_ids=test_ids,
target=target,
max_runtime_seconds=max_runtime_seconds,
experiment_id="",
)
logger.info(data)
resp = self.client.sess.post(url, json=data, headers=self.client.auth_header())
if resp.ok:
return BaseTask(resp.json(), self.get)
else:
self.client.response_exception(resp)
[docs] def get(self, id: str):
"""
Get an AutoML training run by id
:param str id: The id of the training run
:return: A dictionary containing the training run
"""
url = self.client.url + "frameworks/tabular/" + str(id)
resp = self.client.sess.get(url, headers=self.client.auth_header())
if resp.ok:
return resp.json()
else:
self.client.response_exception(resp)
[docs] def stop(self, id):
"""
Stop an AutoML training run
:param str id: The id of the training run to stop
:return: A dictionary containing the training run
"""
url = self.client.url + f"frameworks/tabular/{id}/stop"
resp = self.client.sess.get(url, headers=self.client.auth_header())
if resp.ok:
return resp.json()
else:
self.client.response_exception(resp)
[docs] def delete(self, id):
"""
Delete an AutoML training run
:param str id: The id of the training run to delete
:return: A dictionary containing info about the deletion
"""
url = self.client.url + "frameworks/tabular/" + str(id)
resp = self.client.sess.delete(url, headers=self.client.auth_header())
if resp.ok:
return resp.json()
else:
self.client.response_exception(resp)
[docs] def predict(self, framework_id, dataset_id):
"""
Start a job to get predictions from an AutoML framework on a new dataset
:param str framework_id: The id of the framework to use
:param str dataset_id: The id of the dataset to predict on
:return: A :class:`modep_client.tasks.BaseTask` object
"""
url = self.client.url + "frameworks/tabular/predict"
data = dict(
framework_id=framework_id,
dataset_id=dataset_id,
)
resp = self.client.sess.post(url, json=data, headers=self.client.auth_header())
if resp.ok:
return BaseTask(resp.json(), self.get_predictions)
else:
self.client.response_exception(resp)
[docs] def get_predictions(self, predictions_id):
"""
Get the predictions created by an AutoML training or prediction job
:param str predictions_id: The id of the predictions to get
:return: A dictionary containing the predictions
"""
url = self.client.url + f"frameworks/tabular/predictions/{predictions_id}"
resp = self.client.sess.get(url, headers=self.client.auth_header())
if resp.ok:
return resp.json()
else:
self.client.response_exception(resp)
[docs] def get_output(self, framework_id, target_dir=None):
"""
Get the output files generated by the AutoML training run.
:param str framework_id: The id of the training run
:param str target_dir: The local directory to download the files to. If None,
the files are downloaded to a temp directory.
"""
url = self.client.url + f"frameworks/tabular/{framework_id}/output"
resp = self.client.sess.get(url, headers=self.client.auth_header())
if target_dir is None:
target_dir = tempfile.NamedTemporaryFile(suffix=f"-modep-output").name
if resp.ok:
# download contents to directory
z = zipfile.ZipFile(io.BytesIO(resp.content))
z.extractall(path=target_dir)
return target_dir
else:
self.client.response_exception(resp)
[docs] def print_log(self, framework_id, target_dir=None):
"""
Print the logs generated by the AutoML training run.
:param str framework_id: The id of the training run
:param str target_dir: The local directory to download the files to. If None,
the files are downloaded to a temp directory.
"""
target_dir = self.get_output(framework_id, target_dir=target_dir)
logfile = glob.glob(target_dir + "/*/logs/*.full.log")
if len(logfile) == 1:
with open(logfile[0], "r") as f:
logtxt = f.read()
print(logtxt)
else:
raise Exception("Could not find log file")
return logfile
[docs]class FrameworkFlights:
def __init__(self, client: Client):
"""
Initialize the FrameworkFlights class. A Flight is a set of AutoML
frameworks trained on the same data for comparison purposes.
:param client: A :class:`modep_client.client.Client` object
"""
self.client = client
[docs] def train(
self,
framework_names: List[str],
train_ids: Union[str, List[str]],
test_ids: Union[str, List[str]],
target: str,
max_runtime_seconds: int,
):
"""
Start a job to train an AutoML framework flight.
:param framework_names: A list of framework names to train. If empty, then all
frameworks are trained. Use :class:`modep_client.frameworks.Frameworks.info()`
to get a list of available frameworks. Available ones are AutoGluon, AutoGluon_bestquality,
autosklearn, autosklearn2, AutoWEKA, constantpredictor, DecisionTree, flaml, GAMA, H2OAutoML,
hyperoptsklearn, mljarsupervised, mljarsupervised_compete, MLNet, RandomForest, TPOT, TunedRandomForest.
:type framework_names: str or list of str
:param list train_ids: The ids of the dataset(s) to train on
:param list test_ids: The ids of the dataset(s) to test on
:param str target: The name of the target column
:param int max_runtime_seconds: The maximum amount of time in seconds to train per dataset(s)
:return: A :class:`modep_client.tasks.BaseTask` object
"""
url = self.client.url + "frameworks/tabular/flight"
# convert train_ids and test_ids to lists if they are not already
train_ids = [train_ids] if isinstance(train_ids, str) else train_ids
test_ids = [test_ids] if isinstance(test_ids, str) else test_ids
data = dict(
framework_names=framework_names,
train_ids=train_ids,
test_ids=test_ids,
target=target,
max_runtime_seconds=max_runtime_seconds,
)
resp = self.client.sess.post(url, json=data, headers=self.client.auth_header())
if resp.ok:
return BaseTask(resp.json(), self.get)
else:
self.client.response_exception(resp)
[docs] def list(self):
"""
List all flights
:return: A :class:`pandas.DataFrame` containing the flights
"""
url = self.client.url + "frameworks/tabular/flight"
resp = self.client.sess.get(url, headers=self.client.auth_header())
if resp.ok:
js = resp.json()
df = pd.DataFrame(js)
if len(df) > 0:
# keep column order same as json
df = df[list(js[0].keys())].set_index("id")
df = df.sort_values(by="created", ascending=False)
return df
else:
self.client.response_exception(resp)
[docs] def get(self, id):
"""
Get a flight by id
:return: A dictionary containing the flight
"""
url = self.client.url + "frameworks/tabular/flight/" + str(id)
resp = self.client.sess.get(url, headers=self.client.auth_header())
if resp.ok:
return resp.json()
else:
self.client.response_exception(resp)
[docs] def delete(self, id):
"""
Delete a flight by id
:return: A dictionary containing deletion information
"""
url = self.client.url + "frameworks/tabular/flight/" + str(id)
resp = self.client.sess.delete(url, headers=self.client.auth_header())
if resp.ok:
return resp.json()
else:
self.client.response_exception(resp)
[docs] def stop(self, id):
"""
Stop a flight by id
:return: A dictionary containing stopping information
"""
url = self.client.url + f"frameworks/tabular/flight/{id}/stop"
resp = self.client.sess.get(url, headers=self.client.auth_header())
if resp.ok:
return resp.json()
else:
self.client.response_exception(resp)
[docs] def wait(self, id):
"""
Wait for a flight to finish while printing out a DataFrame of the results.
This version is for running in a Jupyter notebook, for the terminal version,
see :func:`wait_terminal()`.
:param str id: The id of the flight to wait for
"""
import time
from IPython.display import HTML, clear_output, display
while True:
flight = self.get(id)
# make a DataFrame for the individual AutoML frameworks for this flight
frameworks = pd.DataFrame(flight.pop("frameworks", []))
# drop some columns for optimal viewing
frameworks = frameworks.drop(
["fold_results", "fold_leaderboard", "fold_model_txt"], 1
)
print(f"flight status: {flight['status']}")
print("flight members:")
display(HTML(frameworks.to_html()))
if flight["status"] not in INCOMPLETE_STATUSES:
break
time.sleep(5)
clear_output(wait=True)
[docs] def wait_terminal(self, id):
"""
Wait for a flight to finish while printing out a DataFrame of the results.
This version is for running in a terminal. Use :func:`wait()` if you are
running in a Jupyter notebook.
:param str id: The id of the flight to wait for
"""
import time
while True:
flight = self.get(id)
# make a DataFrame for the individual AutoML frameworks for this flight
frameworks = pd.DataFrame(flight.pop("frameworks", []))
# drop some columns for optimal viewing
frameworks = frameworks.drop(
["fold_results", "fold_leaderboard", "fold_model_txt"], 1
)
print(f"flight status: {flight['status']}")
print("flight members:")
print(frameworks)
if flight["status"] not in INCOMPLETE_STATUSES:
break
time.sleep(5)