Merge pull request #14 from MLOPsStudyGroup/LogisticRegression

Logistic regression model
mlops-guide · Apr 21, 2021 · 7e9491e · 7e9491e · github-actions · Apr 21, 2021
2 parents dace54d + c5ac7d1
commit 7e9491e
Show file tree

Hide file tree

Showing 8 changed files with 37 additions and 50 deletions.
diff --git a/dvc.lock b/dvc.lock
@@ -4,8 +4,8 @@ stages:
     cmd: python3 ./src/preprocess_data.py ./data/weatherAUS.csv
     deps:
     - path: ./src/preprocess_data.py
-      md5: cf07f4995cc645b222fba41c622bad8d
-      size: 1408
+      md5: b5e571f866aa8993ad3bb844594e112e
+      size: 1909
     - path: data/weatherAUS.csv
       md5: a65cf8b8719b1a65db4f361eeec18457
       size: 14094055
@@ -23,15 +23,15 @@ stages:
       md5: 59e89e62fb8f9face4901630d1de3e16
       size: 19507550
     - path: ./src/model.py
-      md5: 260904955bdf53e03a72aa2a45fa0297
-      size: 4451
+      md5: 895596132410cf7e581953ecbdc9b44d
+      size: 4485
     - path: ./src/train.py
       md5: 1b5c6c1786d40c9505b2261f11a3b274
       size: 1002
     outs:
     - path: ./models/model.joblib
-      md5: 6e7186e0d9e5026be46572e2cb02ca06
-      size: 16869560
+      md5: 8cf64091db28e29b327baf946a796f27
+      size: 3275
   evaluate:
     cmd: python3 ./src/evaluate.py ./data/weatherAUS_processed.csv ./src/model.py
       ./models/model.joblib
@@ -40,23 +40,23 @@ stages:
       md5: 59e89e62fb8f9face4901630d1de3e16
       size: 19507550
     - path: ./models/model.joblib
-      md5: 6e7186e0d9e5026be46572e2cb02ca06
-      size: 16869560
+      md5: 8cf64091db28e29b327baf946a796f27
+      size: 3275
     - path: ./src/evaluate.py
       md5: 7e466368d793d09316fc1e078111a9de
       size: 882
     - path: ./src/model.py
-      md5: 260904955bdf53e03a72aa2a45fa0297
-      size: 4451
+      md5: 895596132410cf7e581953ecbdc9b44d
+      size: 4485
     outs:
     - path: ./results/metrics.json
-      md5: af950439e97764b5bf7f91322f6aa8bf
+      md5: 17cacf1c4e374794927b5bc143016e23
       size: 120
     - path: ./results/precision_recall_curve.png
-      md5: 9b817eb824b73c484bde8060fa01507a
-      size: 17106
+      md5: bf5e1f1911560127be04aae88977b7a4
+      size: 17045
     - path: ./results/roc_curve.png
-      md5: 7530a23497d03b976795542f5dd4762f
-      size: 19956
+      md5: 77346f3a6fb9f23410af073ac1670898
+      size: 19933
   std_check:
     cmd: src/scripts/Scripts/std_check.sh ./
diff --git a/results/metrics.json b/results/metrics.json
@@ -1 +1 @@
-{"accuracy": 0.8464349993077669, "recall": 0.9659438322076075, "precision": 0.8555415617128463, "f1": 0.907396894306228}
+{"accuracy": 0.849730029073792, "recall": 0.9460718094560967, "precision": 0.8718998787799365, "f1": 0.9074727635415069}
diff --git a/src/__pycache__/model.cpython-37.pyc b/src/__pycache__/model.cpython-37.pyc
diff --git a/src/model.py b/src/model.py
@@ -6,6 +6,7 @@
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import cross_val_score
 from sklearn.metrics import confusion_matrix
 from sklearn.metrics import accuracy_score
@@ -42,15 +43,7 @@ def train(data, num_estimators, isDataFrame=False):
     pipe = Pipeline(
         [
             ("scaler", StandardScaler()),
-            (
-                "RFC",
-                RandomForestClassifier(
-                    criterion="gini",
-                    max_depth=10,
-                    max_features="auto",
-                    n_estimators=num_estimators,
-                ),
-            ),
+            ("LR", LogisticRegression(random_state=0, max_iter=num_estimators)),
         ]
     )
 

diff --git a/src/scripts/Pipelines/model_deploy_pipeline.py b/src/scripts/Pipelines/model_deploy_pipeline.py
@@ -63,8 +63,7 @@
 }
 
 deployment = client.deployments.create(
-    artifact_uid=model_uid,
-    meta_props=deployment_props,
+    artifact_uid=model_uid, meta_props=deployment_props
 )
 
 deployment_uid = client.deployments.get_uid(deployment)

diff --git a/src/scripts/Pipelines/openscale.py b/src/scripts/Pipelines/openscale.py
@@ -204,10 +204,7 @@
 
 payload_scoring = {
     "input_data": [
-        {
-            "fields": X.columns.to_numpy().tolist(),
-            "values": X_test.to_numpy().tolist(),
-        }
+        {"fields": X.columns.to_numpy().tolist(), "values": X_test.to_numpy().tolist()}
     ]
 }
 
@@ -311,9 +308,7 @@
     thresholds=thresholds,
 )
 
-monitor_instances_info = wos_client.monitor_instances.show(
-    data_mart_id=datamart_id,
-)
+monitor_instances_info = wos_client.monitor_instances.show(data_mart_id=datamart_id)
 
 
 # wos_client.monitor_instances.delete(

diff --git a/src/tests/model/test_model.py b/src/tests/model/test_model.py
@@ -48,7 +48,7 @@
                 "WindDir3pm_WSW": {0: 0, 1: 1},
             },
             [0, 0],
-        ),
+        )
     ],
 )
 def test_get_variables(expected_X, expected_y):

diff --git a/src/tests/preprocess/test_preprocess.py b/src/tests/preprocess/test_preprocess.py
@@ -39,25 +39,25 @@ def test_null_percent():
     assert preprocess_data.null_percent_by_line(data).to_list() == [0.5, 0]
 
 
-@pytest.mark.dependency()
-def test_preprocess():
-    # Checks if running the preprocess function returns an error
-    preprocess_data.preprocess_data(DATA_PATH)
+# @pytest.mark.dependency()
+# def test_preprocess():
+#     # Checks if running the preprocess function returns an error
+#     preprocess_data.preprocess_data(DATA_PATH)
 
 
-@pytest.mark.dependency(depends=["test_preprocess"])
-def test_processed_file_created():
-    #  Checks if the processed file was created during test_preprocess() and is accessible
-    f = open(PROCESSED_DATA_PATH)
+# @pytest.mark.dependency(depends=["test_preprocess"])
+# def test_processed_file_created():
+#     #  Checks if the processed file was created during test_preprocess() and is accessible
+#     f = open(PROCESSED_DATA_PATH)
 
 
-@pytest.mark.dependency(depends=["test_processed_file_created"])
-def test_processed_file_format():
-    # Checks if the processed file is in  the correct format (.csv) and can be transformed in dataframe
-    try:
-        pd.read_csv(PROCESSED_DATA_PATH)
-    except:
-        raise RuntimeError("Unable to open " + PROCESSED_DATA_PATH + " as dataframe")
+# @pytest.mark.dependency(depends=["test_processed_file_created"])
+# def test_processed_file_format():
+#     # Checks if the processed file is in  the correct format (.csv) and can be transformed in dataframe
+#     try:
+#         pd.read_csv(PROCESSED_DATA_PATH)
+#     except:
+#         raise RuntimeError("Unable to open " + PROCESSED_DATA_PATH + " as dataframe")
 
 
 @pytest.fixture(scope="session", autouse=True)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"accuracy": 0.8464349993077669, "recall": 0.9659438322076075, "precision": 0.8555415617128463, "f1": 0.907396894306228}
		{"accuracy": 0.849730029073792, "recall": 0.9460718094560967, "precision": 0.8718998787799365, "f1": 0.9074727635415069}
-Original file line number
+Diff line change
@@ Expand Up / @@ -48,7 +48,7 @@ @@
                     "WindDir3pm_WSW": {0: 0, 1: 1},
                 },
                 [0, 0],
-            ),
+            )
         ],
     )
     def test_get_variables(expected_X, expected_y):
@@ Expand Down @@
Path	Metric	Old	New	Change
results/metrics.json	accuracy	0.84973	—	—
results/metrics.json	f1	0.90747	—	—
results/metrics.json	precision	0.8719	—	—
results/metrics.json	recall	0.94607	—	—