update docs and vernum

RJbalikian · Jan 10, 2024 · b1452e9 · b1452e9
1 parent 92806d2
commit b1452e9
Show file tree

Hide file tree

Showing 11 changed files with 780 additions and 471 deletions.
diff --git a/docs/classify.html b/docs/classify.html
@@ -32,7 +32,7 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
 import pandas as pd
 import numpy as np
 
-from w4h import logger_function
+from w4h import logger_function, verbose_print
 #The following flags are used to mark the classification method:
 #- 0: Not classified
 #- 1: Specific Search Term Match
@@ -66,6 +66,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
         Dataframe containing the well descriptions and their matched classifications.
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
+    if verbose:
+        verbose_print(specific_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
 
     if description_col != terms_col:
         terms_df.rename(columns={terms_col:description_col}, inplace=True)
@@ -82,7 +84,7 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
     terms_df.drop_duplicates(subset=terms_col, keep=&#39;last&#39;, inplace=True)
     terms_df.reset_index(drop=True, inplace=True)
 
-    df_Interps = pd.merge(df, terms_df.set_index(terms_col), on=description_col, how=&#39;left&#39;)
+    df_Interps = pd.merge(left=df, right=terms_df.set_index(terms_col), on=description_col, how=&#39;left&#39;)
     df_Interps.rename(columns={description_col:&#39;FORMATION&#39;}, inplace=True)
     df_Interps[&#39;BEDROCK_FLAG&#39;] = df_Interps[&#39;LITHOLOGY&#39;] == &#39;BEDROCK&#39;
 
@@ -148,7 +150,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
         Dataframe containing the original data and new classifications
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(start_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
     #if verbose:
     #    #Estimate when it will end, based on test run
     #    estTime = df.shape[0]/3054409 * 6 #It took about 6 minutes to classify data with entire dataframe. This estimates the fraction of that it will take
@@ -197,7 +200,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
         Dataframe containing the original data and new classifications
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(wildcard_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
     #if verbose:
     #    #Estimate when it will end, based on test run
     #    estTime = df.shape[0]/3054409 * 6 #It took about 6 minutes to classify data with entire dataframe. This estimates the fraction of that it will take
@@ -264,7 +268,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
         Dataframe containing intervals classified as bedrock due to depth
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(depth_define, locals(), exclude_params=[&#39;df&#39;])
     df = df.copy()
     df[&#39;CLASS_FLAG&#39;].mask(df[top_col]&gt;thresh, 3 ,inplace=True) #Add a Classification Flag of 3 (bedrock b/c it&#39;s deepter than 550&#39;) to all records where the top of the interval is &gt;550&#39;
     df[&#39;BEDROCK_FLAG&#39;].mask(df[top_col]&gt;thresh, True, inplace=True)
@@ -303,6 +308,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
         Dataframe containing only unclassified terms, and the number of times they occur
     &#34;&#34;&#34;
     import pathlib
+
+
     if isinstance(outdir, pathlib.PurePath):
         if not outdir.is_dir() or not outdir.exists():
             print(&#39;Please specify a valid directory for export. Filename is generated automatically.&#39;)
@@ -396,7 +403,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
         DataFrame containing only the unique well IDs
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(get_unique_wells, locals(), exclude_params=[&#39;df&#39;])
     #Get Unique well APIs
     uniqueWells = df[wellid_col].unique()
     wellsDF = pd.DataFrame(uniqueWells)
@@ -488,7 +496,8 @@ <h2 id="returns">Returns</h2>
         Dataframe containing intervals classified as bedrock due to depth
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(depth_define, locals(), exclude_params=[&#39;df&#39;])
     df = df.copy()
     df[&#39;CLASS_FLAG&#39;].mask(df[top_col]&gt;thresh, 3 ,inplace=True) #Add a Classification Flag of 3 (bedrock b/c it&#39;s deepter than 550&#39;) to all records where the top of the interval is &gt;550&#39;
     df[&#39;BEDROCK_FLAG&#39;].mask(df[top_col]&gt;thresh, True, inplace=True)
@@ -548,6 +557,8 @@ <h2 id="returns">Returns</h2>
         Dataframe containing only unclassified terms, and the number of times they occur
     &#34;&#34;&#34;
     import pathlib
+
+
     if isinstance(outdir, pathlib.PurePath):
         if not outdir.is_dir() or not outdir.exists():
             print(&#39;Please specify a valid directory for export. Filename is generated automatically.&#39;)
@@ -647,7 +658,8 @@ <h2 id="returns">Returns</h2>
         DataFrame containing only the unique well IDs
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(get_unique_wells, locals(), exclude_params=[&#39;df&#39;])
     #Get Unique well APIs
     uniqueWells = df[wellid_col].unique()
     wellsDF = pd.DataFrame(uniqueWells)
@@ -855,6 +867,8 @@ <h2 id="returns">Returns</h2>
         Dataframe containing the well descriptions and their matched classifications.
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
+    if verbose:
+        verbose_print(specific_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
 
     if description_col != terms_col:
         terms_df.rename(columns={terms_col:description_col}, inplace=True)
@@ -871,7 +885,7 @@ <h2 id="returns">Returns</h2>
     terms_df.drop_duplicates(subset=terms_col, keep=&#39;last&#39;, inplace=True)
     terms_df.reset_index(drop=True, inplace=True)
 
-    df_Interps = pd.merge(df, terms_df.set_index(terms_col), on=description_col, how=&#39;left&#39;)
+    df_Interps = pd.merge(left=df, right=terms_df.set_index(terms_col), on=description_col, how=&#39;left&#39;)
     df_Interps.rename(columns={description_col:&#39;FORMATION&#39;}, inplace=True)
     df_Interps[&#39;BEDROCK_FLAG&#39;] = df_Interps[&#39;LITHOLOGY&#39;] == &#39;BEDROCK&#39;
 
@@ -991,7 +1005,8 @@ <h2 id="returns">Returns</h2>
         Dataframe containing the original data and new classifications
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(start_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
     #if verbose:
     #    #Estimate when it will end, based on test run
     #    estTime = df.shape[0]/3054409 * 6 #It took about 6 minutes to classify data with entire dataframe. This estimates the fraction of that it will take
@@ -1069,7 +1084,8 @@ <h2 id="returns">Returns</h2>
         Dataframe containing the original data and new classifications
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(wildcard_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
     #if verbose:
     #    #Estimate when it will end, based on test run
     #    estTime = df.shape[0]/3054409 * 6 #It took about 6 minutes to classify data with entire dataframe. This estimates the fraction of that it will take

diff --git a/docs/clean.html b/docs/clean.html
@@ -31,7 +31,7 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
 import numpy as np
 import pandas as pd
 
-from w4h import logger_function
+from w4h import logger_function, verbose_print
 
 #This function removes all data from the downholeData table where there is no location information (in the headerData table). This includes elevation info too
 def remove_nonlocated(df_with_locations, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, no_data_val_table=&#39;&#39;, verbose=False, log=False):
@@ -52,6 +52,8 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
         Pandas dataframe containing only data with location information
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
+    if verbose:
+        verbose_print(remove_nonlocated, locals(), exclude_params=[&#39;df_with_locations&#39;])
 
     before = df_with_locations.shape[0] #Extract length of data before this process
 
@@ -97,6 +99,9 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
 
+    if verbose:
+        verbose_print(remove_no_topo, locals(), exclude_params=[&#39;df_with_topo&#39;])
+
     before = df_with_topo.shape[0]
 
     df_with_topo[zcol].replace(no_data_val_table, np.nan, inplace=True)
@@ -137,6 +142,9 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
 
+    if verbose:
+        verbose_print(remove_no_depth, locals(), exclude_params=[&#39;df_with_depth&#39;])
+
     #Replace empty cells in top and bottom columns with nan
     df_with_depth[top_col] = df_with_depth[top_col].replace(no_data_val_table, np.nan)
     df_with_depth[bottom_col] = df_with_depth[bottom_col].replace(no_data_val_table, np.nan)
@@ -183,6 +191,8 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
         Pandas dataframe with the records remvoed where the top is indicatd to be below the bottom.
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
+    if verbose:
+        verbose_print(remove_bad_depth, locals(), exclude_params=[&#39;df_with_depth&#39;])
 
     if depth_type.lower() ==&#39;depth&#39;:
         df_with_depth[&#39;THICKNESS&#39;] = df_with_depth[bottom_col] - df_with_depth[top_col] #Calculate interval thickness
@@ -224,7 +234,8 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
         Pandas dataframe with records with no description removed.
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(remove_no_description, locals(), exclude_params=[&#39;df_with_descriptions&#39;])
     #Replace empty cells in formation column with nans
     df_with_descriptions[description_col] = df_with_descriptions[description_col].replace(no_data_val_table, np.nan) 
     before = df_with_descriptions.shape[0] #Calculate number of rows before dropping
@@ -303,6 +314,8 @@ <h2 id="returns">Returns</h2>
         Pandas dataframe with the records remvoed where the top is indicatd to be below the bottom.
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
+    if verbose:
+        verbose_print(remove_bad_depth, locals(), exclude_params=[&#39;df_with_depth&#39;])
 
     if depth_type.lower() ==&#39;depth&#39;:
         df_with_depth[&#39;THICKNESS&#39;] = df_with_depth[bottom_col] - df_with_depth[top_col] #Calculate interval thickness
@@ -376,6 +389,9 @@ <h2 id="returns">Returns</h2>
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
 
+    if verbose:
+        verbose_print(remove_no_depth, locals(), exclude_params=[&#39;df_with_depth&#39;])
+
     #Replace empty cells in top and bottom columns with nan
     df_with_depth[top_col] = df_with_depth[top_col].replace(no_data_val_table, np.nan)
     df_with_depth[bottom_col] = df_with_depth[bottom_col].replace(no_data_val_table, np.nan)
@@ -447,7 +463,8 @@ <h2 id="returns">Returns</h2>
         Pandas dataframe with records with no description removed.
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
-
+    if verbose:
+        verbose_print(remove_no_description, locals(), exclude_params=[&#39;df_with_descriptions&#39;])
     #Replace empty cells in formation column with nans
     df_with_descriptions[description_col] = df_with_descriptions[description_col].replace(no_data_val_table, np.nan) 
     before = df_with_descriptions.shape[0] #Calculate number of rows before dropping
@@ -519,6 +536,9 @@ <h2 id="returns">Returns</h2>
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
 
+    if verbose:
+        verbose_print(remove_no_topo, locals(), exclude_params=[&#39;df_with_topo&#39;])
+
     before = df_with_topo.shape[0]
 
     df_with_topo[zcol].replace(no_data_val_table, np.nan, inplace=True)
@@ -575,6 +595,8 @@ <h2 id="returns">Returns</h2>
         Pandas dataframe containing only data with location information
     &#34;&#34;&#34;
     logger_function(log, locals(), inspect.currentframe().f_code.co_name)
+    if verbose:
+        verbose_print(remove_nonlocated, locals(), exclude_params=[&#39;df_with_locations&#39;])
 
     before = df_with_locations.shape[0] #Extract length of data before this process