Skip to content

Commit

Permalink
changed preproc in pos samples to aply to individual rows instead of …
Browse files Browse the repository at this point in the history
…pairs
  • Loading branch information
sonalgoyal committed Jan 22, 2025
1 parent 749e69d commit b8c2a9f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ public void execute() throws ZinggClientException {


if (posPairs == null || posPairs.count() <= 5) {
ZFrame<D,R,C> posSamplesOriginal = getPositiveSamples(data);
ZFrame<D,R,C> posSamples = preprocess(posSamplesOriginal);
ZFrame<D,R,C> posSamples = getPositiveSamples(data);
//ZFrame<D,R,C> posSamples = preprocess(posSamplesOriginal);
//posSamples.printSchema();
if (posPairs != null) {
//posPairs.printSchema();
Expand Down Expand Up @@ -182,7 +182,7 @@ public ZFrame<D,R,C> getUncertain(ZFrame<D,R,C> dupes) {
return pos.union(neg);
}

public ZFrame<D,R,C> getPositiveSamples(ZFrame<D,R,C> data) throws Exception {
public ZFrame<D,R,C> getPositiveSamples(ZFrame<D,R,C> data) throws Exception, ZinggClientException {
if (LOG.isDebugEnabled()) {
long count = data.count();
LOG.debug("Total count is " + count);
Expand All @@ -195,6 +195,7 @@ public ZFrame<D,R,C> getPositiveSamples(ZFrame<D,R,C> data) throws Exception {
LOG.debug("Sampled " + posSample.count());
}
posSample = posSample.cache();
posSample = preprocess(posSample);
ZFrame<D,R,C> posPairs = getDSUtil().joinWithItself(posSample, ColName.ID_COL, false);

LOG.info("Created positive sample pairs ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ default ZFrame<D,R,C> preprocess(ZFrame<D,R,C> df) throws ZinggClientException {
for(IPreprocType o: getPreprocOrder().getOrder()){
//creating new instance of the class
IPreprocessor<S,D,R,C,T> ip = getPreprocMap().get(o).getDeclaredConstructor().newInstance();
LOG.info("tryibng preproc " + ip);
LOG.info("trying preproc " + ip);
//setting context and field defn
ip.setContext(getContext());
ip.init();
ip.setFieldDefinition(def);
dfp = ip.preprocess(dfp);
LOG.info("after preproc ");
dfp.show();
}
}
}
Expand Down

0 comments on commit b8c2a9f

Please sign in to comment.