Skip to content

Commit

Permalink
update docs and vernum
Browse files Browse the repository at this point in the history
  • Loading branch information
RJbalikian committed Jan 10, 2024
1 parent 92806d2 commit b1452e9
Show file tree
Hide file tree
Showing 11 changed files with 780 additions and 471 deletions.
38 changes: 27 additions & 11 deletions docs/classify.html
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
import pandas as pd
import numpy as np

from w4h import logger_function
from w4h import logger_function, verbose_print
#The following flags are used to mark the classification method:
#- 0: Not classified
#- 1: Specific Search Term Match
Expand Down Expand Up @@ -66,6 +66,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
Dataframe containing the well descriptions and their matched classifications.
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)
if verbose:
verbose_print(specific_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])

if description_col != terms_col:
terms_df.rename(columns={terms_col:description_col}, inplace=True)
Expand All @@ -82,7 +84,7 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
terms_df.drop_duplicates(subset=terms_col, keep=&#39;last&#39;, inplace=True)
terms_df.reset_index(drop=True, inplace=True)

df_Interps = pd.merge(df, terms_df.set_index(terms_col), on=description_col, how=&#39;left&#39;)
df_Interps = pd.merge(left=df, right=terms_df.set_index(terms_col), on=description_col, how=&#39;left&#39;)
df_Interps.rename(columns={description_col:&#39;FORMATION&#39;}, inplace=True)
df_Interps[&#39;BEDROCK_FLAG&#39;] = df_Interps[&#39;LITHOLOGY&#39;] == &#39;BEDROCK&#39;

Expand Down Expand Up @@ -148,7 +150,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
Dataframe containing the original data and new classifications
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(start_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
#if verbose:
# #Estimate when it will end, based on test run
# estTime = df.shape[0]/3054409 * 6 #It took about 6 minutes to classify data with entire dataframe. This estimates the fraction of that it will take
Expand Down Expand Up @@ -197,7 +200,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
Dataframe containing the original data and new classifications
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(wildcard_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
#if verbose:
# #Estimate when it will end, based on test run
# estTime = df.shape[0]/3054409 * 6 #It took about 6 minutes to classify data with entire dataframe. This estimates the fraction of that it will take
Expand Down Expand Up @@ -264,7 +268,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
Dataframe containing intervals classified as bedrock due to depth
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(depth_define, locals(), exclude_params=[&#39;df&#39;])
df = df.copy()
df[&#39;CLASS_FLAG&#39;].mask(df[top_col]&gt;thresh, 3 ,inplace=True) #Add a Classification Flag of 3 (bedrock b/c it&#39;s deepter than 550&#39;) to all records where the top of the interval is &gt;550&#39;
df[&#39;BEDROCK_FLAG&#39;].mask(df[top_col]&gt;thresh, True, inplace=True)
Expand Down Expand Up @@ -303,6 +308,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
Dataframe containing only unclassified terms, and the number of times they occur
&#34;&#34;&#34;
import pathlib


if isinstance(outdir, pathlib.PurePath):
if not outdir.is_dir() or not outdir.exists():
print(&#39;Please specify a valid directory for export. Filename is generated automatically.&#39;)
Expand Down Expand Up @@ -396,7 +403,8 @@ <h1 class="title">Module <code>w4h.classify</code></h1>
DataFrame containing only the unique well IDs
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(get_unique_wells, locals(), exclude_params=[&#39;df&#39;])
#Get Unique well APIs
uniqueWells = df[wellid_col].unique()
wellsDF = pd.DataFrame(uniqueWells)
Expand Down Expand Up @@ -488,7 +496,8 @@ <h2 id="returns">Returns</h2>
Dataframe containing intervals classified as bedrock due to depth
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(depth_define, locals(), exclude_params=[&#39;df&#39;])
df = df.copy()
df[&#39;CLASS_FLAG&#39;].mask(df[top_col]&gt;thresh, 3 ,inplace=True) #Add a Classification Flag of 3 (bedrock b/c it&#39;s deepter than 550&#39;) to all records where the top of the interval is &gt;550&#39;
df[&#39;BEDROCK_FLAG&#39;].mask(df[top_col]&gt;thresh, True, inplace=True)
Expand Down Expand Up @@ -548,6 +557,8 @@ <h2 id="returns">Returns</h2>
Dataframe containing only unclassified terms, and the number of times they occur
&#34;&#34;&#34;
import pathlib


if isinstance(outdir, pathlib.PurePath):
if not outdir.is_dir() or not outdir.exists():
print(&#39;Please specify a valid directory for export. Filename is generated automatically.&#39;)
Expand Down Expand Up @@ -647,7 +658,8 @@ <h2 id="returns">Returns</h2>
DataFrame containing only the unique well IDs
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(get_unique_wells, locals(), exclude_params=[&#39;df&#39;])
#Get Unique well APIs
uniqueWells = df[wellid_col].unique()
wellsDF = pd.DataFrame(uniqueWells)
Expand Down Expand Up @@ -855,6 +867,8 @@ <h2 id="returns">Returns</h2>
Dataframe containing the well descriptions and their matched classifications.
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)
if verbose:
verbose_print(specific_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])

if description_col != terms_col:
terms_df.rename(columns={terms_col:description_col}, inplace=True)
Expand All @@ -871,7 +885,7 @@ <h2 id="returns">Returns</h2>
terms_df.drop_duplicates(subset=terms_col, keep=&#39;last&#39;, inplace=True)
terms_df.reset_index(drop=True, inplace=True)

df_Interps = pd.merge(df, terms_df.set_index(terms_col), on=description_col, how=&#39;left&#39;)
df_Interps = pd.merge(left=df, right=terms_df.set_index(terms_col), on=description_col, how=&#39;left&#39;)
df_Interps.rename(columns={description_col:&#39;FORMATION&#39;}, inplace=True)
df_Interps[&#39;BEDROCK_FLAG&#39;] = df_Interps[&#39;LITHOLOGY&#39;] == &#39;BEDROCK&#39;

Expand Down Expand Up @@ -991,7 +1005,8 @@ <h2 id="returns">Returns</h2>
Dataframe containing the original data and new classifications
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(start_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
#if verbose:
# #Estimate when it will end, based on test run
# estTime = df.shape[0]/3054409 * 6 #It took about 6 minutes to classify data with entire dataframe. This estimates the fraction of that it will take
Expand Down Expand Up @@ -1069,7 +1084,8 @@ <h2 id="returns">Returns</h2>
Dataframe containing the original data and new classifications
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(wildcard_define, locals(), exclude_params=[&#39;df&#39;, &#39;terms_df&#39;])
#if verbose:
# #Estimate when it will end, based on test run
# estTime = df.shape[0]/3054409 * 6 #It took about 6 minutes to classify data with entire dataframe. This estimates the fraction of that it will take
Expand Down
28 changes: 25 additions & 3 deletions docs/clean.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
import numpy as np
import pandas as pd

from w4h import logger_function
from w4h import logger_function, verbose_print

#This function removes all data from the downholeData table where there is no location information (in the headerData table). This includes elevation info too
def remove_nonlocated(df_with_locations, xcol=&#39;LONGITUDE&#39;, ycol=&#39;LATITUDE&#39;, no_data_val_table=&#39;&#39;, verbose=False, log=False):
Expand All @@ -52,6 +52,8 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
Pandas dataframe containing only data with location information
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)
if verbose:
verbose_print(remove_nonlocated, locals(), exclude_params=[&#39;df_with_locations&#39;])

before = df_with_locations.shape[0] #Extract length of data before this process

Expand Down Expand Up @@ -97,6 +99,9 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(remove_no_topo, locals(), exclude_params=[&#39;df_with_topo&#39;])

before = df_with_topo.shape[0]

df_with_topo[zcol].replace(no_data_val_table, np.nan, inplace=True)
Expand Down Expand Up @@ -137,6 +142,9 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(remove_no_depth, locals(), exclude_params=[&#39;df_with_depth&#39;])

#Replace empty cells in top and bottom columns with nan
df_with_depth[top_col] = df_with_depth[top_col].replace(no_data_val_table, np.nan)
df_with_depth[bottom_col] = df_with_depth[bottom_col].replace(no_data_val_table, np.nan)
Expand Down Expand Up @@ -183,6 +191,8 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
Pandas dataframe with the records remvoed where the top is indicatd to be below the bottom.
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)
if verbose:
verbose_print(remove_bad_depth, locals(), exclude_params=[&#39;df_with_depth&#39;])

if depth_type.lower() ==&#39;depth&#39;:
df_with_depth[&#39;THICKNESS&#39;] = df_with_depth[bottom_col] - df_with_depth[top_col] #Calculate interval thickness
Expand Down Expand Up @@ -224,7 +234,8 @@ <h1 class="title">Module <code>w4h.clean</code></h1>
Pandas dataframe with records with no description removed.
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(remove_no_description, locals(), exclude_params=[&#39;df_with_descriptions&#39;])
#Replace empty cells in formation column with nans
df_with_descriptions[description_col] = df_with_descriptions[description_col].replace(no_data_val_table, np.nan)
before = df_with_descriptions.shape[0] #Calculate number of rows before dropping
Expand Down Expand Up @@ -303,6 +314,8 @@ <h2 id="returns">Returns</h2>
Pandas dataframe with the records remvoed where the top is indicatd to be below the bottom.
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)
if verbose:
verbose_print(remove_bad_depth, locals(), exclude_params=[&#39;df_with_depth&#39;])

if depth_type.lower() ==&#39;depth&#39;:
df_with_depth[&#39;THICKNESS&#39;] = df_with_depth[bottom_col] - df_with_depth[top_col] #Calculate interval thickness
Expand Down Expand Up @@ -376,6 +389,9 @@ <h2 id="returns">Returns</h2>
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(remove_no_depth, locals(), exclude_params=[&#39;df_with_depth&#39;])

#Replace empty cells in top and bottom columns with nan
df_with_depth[top_col] = df_with_depth[top_col].replace(no_data_val_table, np.nan)
df_with_depth[bottom_col] = df_with_depth[bottom_col].replace(no_data_val_table, np.nan)
Expand Down Expand Up @@ -447,7 +463,8 @@ <h2 id="returns">Returns</h2>
Pandas dataframe with records with no description removed.
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(remove_no_description, locals(), exclude_params=[&#39;df_with_descriptions&#39;])
#Replace empty cells in formation column with nans
df_with_descriptions[description_col] = df_with_descriptions[description_col].replace(no_data_val_table, np.nan)
before = df_with_descriptions.shape[0] #Calculate number of rows before dropping
Expand Down Expand Up @@ -519,6 +536,9 @@ <h2 id="returns">Returns</h2>
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)

if verbose:
verbose_print(remove_no_topo, locals(), exclude_params=[&#39;df_with_topo&#39;])

before = df_with_topo.shape[0]

df_with_topo[zcol].replace(no_data_val_table, np.nan, inplace=True)
Expand Down Expand Up @@ -575,6 +595,8 @@ <h2 id="returns">Returns</h2>
Pandas dataframe containing only data with location information
&#34;&#34;&#34;
logger_function(log, locals(), inspect.currentframe().f_code.co_name)
if verbose:
verbose_print(remove_nonlocated, locals(), exclude_params=[&#39;df_with_locations&#39;])

before = df_with_locations.shape[0] #Extract length of data before this process

Expand Down
Loading

0 comments on commit b1452e9

Please sign in to comment.