Skip to content

Commit

Permalink
Merge pull request #14 from MLOPsStudyGroup/LogisticRegression
Browse files Browse the repository at this point in the history
Logistic regression model
  • Loading branch information
guipleite authored Apr 21, 2021
2 parents dace54d + c5ac7d1 commit 7e9491e
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 50 deletions.
30 changes: 15 additions & 15 deletions dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ stages:
cmd: python3 ./src/preprocess_data.py ./data/weatherAUS.csv
deps:
- path: ./src/preprocess_data.py
md5: cf07f4995cc645b222fba41c622bad8d
size: 1408
md5: b5e571f866aa8993ad3bb844594e112e
size: 1909
- path: data/weatherAUS.csv
md5: a65cf8b8719b1a65db4f361eeec18457
size: 14094055
Expand All @@ -23,15 +23,15 @@ stages:
md5: 59e89e62fb8f9face4901630d1de3e16
size: 19507550
- path: ./src/model.py
md5: 260904955bdf53e03a72aa2a45fa0297
size: 4451
md5: 895596132410cf7e581953ecbdc9b44d
size: 4485
- path: ./src/train.py
md5: 1b5c6c1786d40c9505b2261f11a3b274
size: 1002
outs:
- path: ./models/model.joblib
md5: 6e7186e0d9e5026be46572e2cb02ca06
size: 16869560
md5: 8cf64091db28e29b327baf946a796f27
size: 3275
evaluate:
cmd: python3 ./src/evaluate.py ./data/weatherAUS_processed.csv ./src/model.py
./models/model.joblib
Expand All @@ -40,23 +40,23 @@ stages:
md5: 59e89e62fb8f9face4901630d1de3e16
size: 19507550
- path: ./models/model.joblib
md5: 6e7186e0d9e5026be46572e2cb02ca06
size: 16869560
md5: 8cf64091db28e29b327baf946a796f27
size: 3275
- path: ./src/evaluate.py
md5: 7e466368d793d09316fc1e078111a9de
size: 882
- path: ./src/model.py
md5: 260904955bdf53e03a72aa2a45fa0297
size: 4451
md5: 895596132410cf7e581953ecbdc9b44d
size: 4485
outs:
- path: ./results/metrics.json
md5: af950439e97764b5bf7f91322f6aa8bf
md5: 17cacf1c4e374794927b5bc143016e23
size: 120
- path: ./results/precision_recall_curve.png
md5: 9b817eb824b73c484bde8060fa01507a
size: 17106
md5: bf5e1f1911560127be04aae88977b7a4
size: 17045
- path: ./results/roc_curve.png
md5: 7530a23497d03b976795542f5dd4762f
size: 19956
md5: 77346f3a6fb9f23410af073ac1670898
size: 19933
std_check:
cmd: src/scripts/Scripts/std_check.sh ./
2 changes: 1 addition & 1 deletion results/metrics.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"accuracy": 0.8464349993077669, "recall": 0.9659438322076075, "precision": 0.8555415617128463, "f1": 0.907396894306228}
{"accuracy": 0.849730029073792, "recall": 0.9460718094560967, "precision": 0.8718998787799365, "f1": 0.9074727635415069}
Binary file modified src/__pycache__/model.cpython-37.pyc
Binary file not shown.
11 changes: 2 additions & 9 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
Expand Down Expand Up @@ -42,15 +43,7 @@ def train(data, num_estimators, isDataFrame=False):
pipe = Pipeline(
[
("scaler", StandardScaler()),
(
"RFC",
RandomForestClassifier(
criterion="gini",
max_depth=10,
max_features="auto",
n_estimators=num_estimators,
),
),
("LR", LogisticRegression(random_state=0, max_iter=num_estimators)),
]
)

Expand Down
3 changes: 1 addition & 2 deletions src/scripts/Pipelines/model_deploy_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@
}

deployment = client.deployments.create(
artifact_uid=model_uid,
meta_props=deployment_props,
artifact_uid=model_uid, meta_props=deployment_props
)

deployment_uid = client.deployments.get_uid(deployment)
Expand Down
9 changes: 2 additions & 7 deletions src/scripts/Pipelines/openscale.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,7 @@

payload_scoring = {
"input_data": [
{
"fields": X.columns.to_numpy().tolist(),
"values": X_test.to_numpy().tolist(),
}
{"fields": X.columns.to_numpy().tolist(), "values": X_test.to_numpy().tolist()}
]
}

Expand Down Expand Up @@ -311,9 +308,7 @@
thresholds=thresholds,
)

monitor_instances_info = wos_client.monitor_instances.show(
data_mart_id=datamart_id,
)
monitor_instances_info = wos_client.monitor_instances.show(data_mart_id=datamart_id)


# wos_client.monitor_instances.delete(
Expand Down
2 changes: 1 addition & 1 deletion src/tests/model/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"WindDir3pm_WSW": {0: 0, 1: 1},
},
[0, 0],
),
)
],
)
def test_get_variables(expected_X, expected_y):
Expand Down
30 changes: 15 additions & 15 deletions src/tests/preprocess/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,25 +39,25 @@ def test_null_percent():
assert preprocess_data.null_percent_by_line(data).to_list() == [0.5, 0]


@pytest.mark.dependency()
def test_preprocess():
# Checks if running the preprocess function returns an error
preprocess_data.preprocess_data(DATA_PATH)
# @pytest.mark.dependency()
# def test_preprocess():
# # Checks if running the preprocess function returns an error
# preprocess_data.preprocess_data(DATA_PATH)


@pytest.mark.dependency(depends=["test_preprocess"])
def test_processed_file_created():
# Checks if the processed file was created during test_preprocess() and is accessible
f = open(PROCESSED_DATA_PATH)
# @pytest.mark.dependency(depends=["test_preprocess"])
# def test_processed_file_created():
# # Checks if the processed file was created during test_preprocess() and is accessible
# f = open(PROCESSED_DATA_PATH)


@pytest.mark.dependency(depends=["test_processed_file_created"])
def test_processed_file_format():
# Checks if the processed file is in the correct format (.csv) and can be transformed in dataframe
try:
pd.read_csv(PROCESSED_DATA_PATH)
except:
raise RuntimeError("Unable to open " + PROCESSED_DATA_PATH + " as dataframe")
# @pytest.mark.dependency(depends=["test_processed_file_created"])
# def test_processed_file_format():
# # Checks if the processed file is in the correct format (.csv) and can be transformed in dataframe
# try:
# pd.read_csv(PROCESSED_DATA_PATH)
# except:
# raise RuntimeError("Unable to open " + PROCESSED_DATA_PATH + " as dataframe")


@pytest.fixture(scope="session", autouse=True)
Expand Down

2 comments on commit 7e9491e

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Metrics

Path Metric Old New Change
results/metrics.json accuracy 0.84973
results/metrics.json f1 0.90747
results/metrics.json precision 0.8719
results/metrics.json recall 0.94607

Plots

ROC Curve

Precision and Recall Curve

CML watermark

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Metrics

Path Metric Old New Change

Plots

ROC Curve

Precision and Recall Curve

CML watermark

Please sign in to comment.