From bc7621841b67e76eb980dc3a2cb9f53c2dcbdaf5 Mon Sep 17 00:00:00 2001 From: Tim Bernhard Date: Fri, 29 Nov 2019 11:22:38 +0100 Subject: [PATCH] Refactor Job(Single|All)ImageFileScan to use same simplified base code Note that this might have led to a changed behaviour of "Check for Barcodes in Image File" and has even more potential to be simplified. --- .../mcz/imagecapture/CandidateImageFile.java | 6 +- .../imagecapture/PositionTemplateEditor.java | 15 +- .../mcz/imagecapture/entity/Specimen.java | 8 +- .../{struct => entity/fixed}/CountValue.java | 2 +- .../fixed}/GenusSpeciesCount.java | 2 +- .../fixed}/VerbatimCount.java | 2 +- .../jobs/AbstractFileScanJob.java | 567 ++++++++++++ .../mcz/imagecapture/jobs/Counter.java | 30 +- .../jobs/JobAllImageFilesScan.java | 831 +++--------------- .../mcz/imagecapture/jobs/JobRepeatOCR.java | 2 +- .../jobs/JobSingleBarcodeScan.java | 561 +----------- .../lifecycle/SpecimenLifeCycle.java | 6 +- .../loader/JobVerbatimFieldLoad.java | 8 +- .../mcz/imagecapture/ui/ButtonEditor.java | 4 +- .../ui/dialog/VerbatimClassifyDialog.java | 4 +- .../ui/dialog/VerbatimListDialog.java | 4 +- .../ui/dialog/VerbatimToTranscribeDialog.java | 4 +- .../tablemodel}/CountValueTableModel.java | 3 +- .../GenusSpeciesCountTableModel.java | 3 +- .../tablemodel}/VerbatimCountTableModel.java | 3 +- .../mcz/imagecapture/utility/FileUtility.java | 38 + 21 files changed, 811 insertions(+), 1292 deletions(-) rename src/main/java/edu/harvard/mcz/imagecapture/{struct => entity/fixed}/CountValue.java (97%) rename src/main/java/edu/harvard/mcz/imagecapture/{struct => entity/fixed}/GenusSpeciesCount.java (97%) rename src/main/java/edu/harvard/mcz/imagecapture/{struct => entity/fixed}/VerbatimCount.java (98%) create mode 100644 src/main/java/edu/harvard/mcz/imagecapture/jobs/AbstractFileScanJob.java rename src/main/java/edu/harvard/mcz/imagecapture/{struct => ui/tablemodel}/CountValueTableModel.java (96%) rename src/main/java/edu/harvard/mcz/imagecapture/{struct => ui/tablemodel}/GenusSpeciesCountTableModel.java (96%) rename src/main/java/edu/harvard/mcz/imagecapture/{struct => ui/tablemodel}/VerbatimCountTableModel.java (97%) create mode 100644 src/main/java/edu/harvard/mcz/imagecapture/utility/FileUtility.java diff --git a/src/main/java/edu/harvard/mcz/imagecapture/CandidateImageFile.java b/src/main/java/edu/harvard/mcz/imagecapture/CandidateImageFile.java index 6a8a5e0d..ec289e65 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/CandidateImageFile.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/CandidateImageFile.java @@ -131,7 +131,7 @@ public CandidateImageFile(File aFile) throws UnreadableFileException { if (!template.getTemplateId().equals(PositionTemplate.TEMPLATE_NO_COMPONENT_PARTS)) { try { if (getTaxonLabelQRText(template) == null) { - getLabelOCRText(template); + getTaxonLabelOCRText(template); } } catch (OCRReadException e) { log.error("Unable to OCR file: " + candidateFile.getName() + " " + e.getMessage()); @@ -801,7 +801,7 @@ public void setFile(File aFile, PositionTemplate aTemplate) throws UnreadableFil if (!aTemplate.getTemplateId().equals(PositionTemplate.TEMPLATE_NO_COMPONENT_PARTS)) { try { if (getTaxonLabelQRText(aTemplate) == null) { - getLabelOCRText(aTemplate); + getTaxonLabelOCRText(aTemplate); } } catch (OCRReadException e) { log.error("Unable to OCR file: " + candidateFile.getName() + " " + e.getMessage()); @@ -1014,7 +1014,7 @@ public UnitTrayLabel getTaxonLabelQRText(PositionTemplate positionTemplate) { * @return a string * @throws OCRReadException */ - public String getLabelOCRText(PositionTemplate aTemplate) throws OCRReadException { + public String getTaxonLabelOCRText(PositionTemplate aTemplate) throws OCRReadException { // Actual read attempt is only invoked once, // subsequent calls return cached value. log.debug("in CandidateImageFile.getLabelOCRText() 1 labelText is " + labelText); diff --git a/src/main/java/edu/harvard/mcz/imagecapture/PositionTemplateEditor.java b/src/main/java/edu/harvard/mcz/imagecapture/PositionTemplateEditor.java index 6ca4272c..816398f6 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/PositionTemplateEditor.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/PositionTemplateEditor.java @@ -28,12 +28,12 @@ import edu.harvard.mcz.imagecapture.ui.dialog.PositionTemplateBoxDialog; import edu.harvard.mcz.imagecapture.ui.frame.ImagePanelForDrawing; import edu.harvard.mcz.imagecapture.ui.tablemodel.PositionTemplateTableModel; +import edu.harvard.mcz.imagecapture.utility.FileUtility; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import javax.imageio.ImageIO; import javax.swing.*; -import javax.swing.filechooser.FileNameExtensionFilter; import java.awt.*; import java.awt.event.KeyEvent; import java.awt.image.BufferedImage; @@ -622,18 +622,11 @@ private JMenuItem getJMenuItemLoadImage() { jMenuItem1.addActionListener(new java.awt.event.ActionListener() { public void actionPerformed(java.awt.event.ActionEvent e) { jLabelFeedback.setText(""); - final JFileChooser fileChooser = new JFileChooser(); - if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH) != null) { - fileChooser.setCurrentDirectory(new File(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH))); - } - //FileNameExtensionFilter filter = new FileNameExtensionFilter("TIFF Images", "tif", "tiff"); - FileNameExtensionFilter filter = new FileNameExtensionFilter("Image files", "tif", "tiff", "jpg", "jpeg", "png"); - fileChooser.setFileFilter(filter); - int returnValue = fileChooser.showOpenDialog(Singleton.getSingletonInstance().getMainFrame()); - if (returnValue == JFileChooser.APPROVE_OPTION) { + File imageFile = FileUtility.askForImageFile(new File(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH))); + if (imageFile != null) { jLabelFeedback.setText("Loading..."); try { - setImageFile(fileChooser.getSelectedFile()); + setImageFile(imageFile); jLabelFeedback.setText(""); drawLayers(); } catch (IOException e1) { diff --git a/src/main/java/edu/harvard/mcz/imagecapture/entity/Specimen.java b/src/main/java/edu/harvard/mcz/imagecapture/entity/Specimen.java index fb61e584..da0a7151 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/entity/Specimen.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/entity/Specimen.java @@ -1088,16 +1088,16 @@ public boolean isStateDone() { public String getLoadFlags() { String result = "Unexpected State"; - if (flagInBulkloader == false && flagInMCZbase == false && flagAncilaryAlsoInMCZbase == false) { + if (!flagInBulkloader && !flagInMCZbase && !flagAncilaryAlsoInMCZbase) { result = "In DataShot"; } - if (flagInBulkloader == true && flagInMCZbase == false && flagAncilaryAlsoInMCZbase == false) { + if (flagInBulkloader && !flagInMCZbase && !flagAncilaryAlsoInMCZbase) { result = "In Bulkloader"; } - if (flagInBulkloader == true && flagInMCZbase == true && flagAncilaryAlsoInMCZbase == false) { + if (flagInBulkloader && flagInMCZbase && !flagAncilaryAlsoInMCZbase) { result = "Adding Image and Ids."; } - if (flagInBulkloader == true && flagInMCZbase == true && flagAncilaryAlsoInMCZbase == true) { + if (flagInBulkloader && flagInMCZbase && flagAncilaryAlsoInMCZbase) { result = WorkFlowStatus.STAGE_DONE; } return result; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/struct/CountValue.java b/src/main/java/edu/harvard/mcz/imagecapture/entity/fixed/CountValue.java similarity index 97% rename from src/main/java/edu/harvard/mcz/imagecapture/struct/CountValue.java rename to src/main/java/edu/harvard/mcz/imagecapture/entity/fixed/CountValue.java index c0a22e54..8edfab42 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/struct/CountValue.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/entity/fixed/CountValue.java @@ -16,7 +16,7 @@ * along with this program. If not, see . *

*/ -package edu.harvard.mcz.imagecapture.struct; +package edu.harvard.mcz.imagecapture.entity.fixed; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/struct/GenusSpeciesCount.java b/src/main/java/edu/harvard/mcz/imagecapture/entity/fixed/GenusSpeciesCount.java similarity index 97% rename from src/main/java/edu/harvard/mcz/imagecapture/struct/GenusSpeciesCount.java rename to src/main/java/edu/harvard/mcz/imagecapture/entity/fixed/GenusSpeciesCount.java index 1d796830..d684f726 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/struct/GenusSpeciesCount.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/entity/fixed/GenusSpeciesCount.java @@ -16,7 +16,7 @@ * along with this program. If not, see . *

*/ -package edu.harvard.mcz.imagecapture.struct; +package edu.harvard.mcz.imagecapture.entity.fixed; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/struct/VerbatimCount.java b/src/main/java/edu/harvard/mcz/imagecapture/entity/fixed/VerbatimCount.java similarity index 98% rename from src/main/java/edu/harvard/mcz/imagecapture/struct/VerbatimCount.java rename to src/main/java/edu/harvard/mcz/imagecapture/entity/fixed/VerbatimCount.java index 025d9160..8593da24 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/struct/VerbatimCount.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/entity/fixed/VerbatimCount.java @@ -16,7 +16,7 @@ * along with this program. If not, see . *

*/ -package edu.harvard.mcz.imagecapture.struct; +package edu.harvard.mcz.imagecapture.entity.fixed; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/jobs/AbstractFileScanJob.java b/src/main/java/edu/harvard/mcz/imagecapture/jobs/AbstractFileScanJob.java new file mode 100644 index 00000000..dae5fa8f --- /dev/null +++ b/src/main/java/edu/harvard/mcz/imagecapture/jobs/AbstractFileScanJob.java @@ -0,0 +1,567 @@ +package edu.harvard.mcz.imagecapture.jobs; + +import edu.harvard.mcz.imagecapture.*; +import edu.harvard.mcz.imagecapture.data.MetadataRetriever; +import edu.harvard.mcz.imagecapture.entity.ICImage; +import edu.harvard.mcz.imagecapture.entity.Specimen; +import edu.harvard.mcz.imagecapture.entity.UnitTrayLabel; +import edu.harvard.mcz.imagecapture.entity.fixed.WorkFlowStatus; +import edu.harvard.mcz.imagecapture.exceptions.OCRReadException; +import edu.harvard.mcz.imagecapture.exceptions.SaveFailedException; +import edu.harvard.mcz.imagecapture.exceptions.SpecimenExistsException; +import edu.harvard.mcz.imagecapture.exceptions.UnreadableFileException; +import edu.harvard.mcz.imagecapture.interfaces.*; +import edu.harvard.mcz.imagecapture.lifecycle.HigherTaxonLifeCycle; +import edu.harvard.mcz.imagecapture.lifecycle.ICImageLifeCycle; +import edu.harvard.mcz.imagecapture.lifecycle.SpecimenLifeCycle; +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; + +abstract public class AbstractFileScanJob implements RunnableJob, Runnable { + + + private static final Log log = LogFactory.getLog(AbstractFileScanJob.class); + protected int percentComplete = 0; + protected ArrayList listeners = new ArrayList<>(); + + public AbstractFileScanJob() { + } + + /** + * Log errors associated with confusion between exifComment & detected barcode + * + * @param counter to apply the error to + * @param filename the errored file + * @param barcode the barcode as detected + * @param exifComment the comment, exif as read from file + * @param parser the parser used + * @param barcodeInImageMetadata whether the barcode was extracted from metadata + * @param log + */ + static void logMismatch(Counter counter, String filename, String barcode, String exifComment, TaxonNameReturner parser, boolean barcodeInImageMetadata, Log log) { + if (barcodeInImageMetadata || Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REDUNDANT_COMMENT_BARCODE).equals("true")) { + // If so configured, or if image metadata contains a barcode that doesn't match the barcode in the image + // report on barcode/comment missmatch as an error condition. + try { + RunnableJobError error = new RunnableJobError(filename, barcode, + barcode, exifComment, "Barcode/Comment mismatch.", + parser, (DrawerNameReturner) parser, + null, RunnableJobError.TYPE_MISMATCH); + counter.appendError(error); + } catch (Exception e) { + // we don't want an exception to stop processing + log.error(e); + } + } else { + // Just write into debug log + // This would normally the case where the image metadata doesn't contain a barcode but the image does, and reporting of this state as an error has been turned off. + log.debug("Barcode/Comment mismatch: [" + barcode + "]!=[" + exifComment + "]"); + } + } + + /** + * Do actual processing of one file + * + * @param containedFile the file to process + * @param counter the counter to keep track of sucess/failure + */ + protected void checkFile(File containedFile, Counter counter) { + Singleton.getSingletonInstance().getProperties().getProperties().setProperty(ImageCaptureProperties.KEY_LASTPATH, containedFile.getPath()); + String filename = containedFile.getName(); + counter.incrementFilesSeen(); + log.debug("Checking image file: " + filename); + CandidateImageFile.debugCheckHeightWidth(containedFile); + // scan file for barcodes and ocr of unit tray label text + CandidateImageFile scannableFile = null; + try { + // PositionTemplateDetector detector = new ConfiguredBarcodePositionTemplateDetector(); + boolean isSpecimenImage = false; + boolean isDrawerImage = false; + boolean reattach = false; // image is detached instance and should be reattached instead of persisted denovo. + // try { + // Check for an existing image record. + ICImageLifeCycle imageLifeCycle = new ICImageLifeCycle(); + ICImage existingTemplate = new ICImage(); + existingTemplate.setFilename(filename); + String path = ImageCaptureProperties.getPathBelowBase(containedFile); + existingTemplate.setPath(path); + List matches = imageLifeCycle.findBy(new HashMap() {{ + put("path", path); + put("filename", filename); + }}); + log.debug(matches != null ? matches.size() : "no matches found"); + if (matches != null && matches.size() == 1 + && matches.get(0).getRawBarcode() == null + && matches.get(0).getRawExifBarcode() == null + && (matches.get(0).getDrawerNumber() == null || matches.get(0).getDrawerNumber().trim().length() == 0) + ) { + // likely case for a failure to read data out of the image file + // try to update the image file record. + try { + existingTemplate = imageLifeCycle.merge(matches.get(0)); + matches.remove(0); + reattach = true; + log.debug(existingTemplate); + } catch (SaveFailedException e) { + log.error(e.getMessage(), e); + } + } else if (matches != null && matches.size() == 1 && matches.get(0).getSpecimen() == null) { + // likely case for a failure to create a specimen record in a previous run + // try to update the image file record + try { + existingTemplate = imageLifeCycle.merge(matches.get(0)); + matches.remove(0); + reattach = true; + log.debug(existingTemplate); + } catch (SaveFailedException e) { + log.error(e.getMessage(), e); + } + } + if (matches == null || matches.size() == 0) { + createDatabaseRecordForFile(containedFile, counter, reattach, imageLifeCycle, existingTemplate); + } else { + // found an already databased file (where we have barcode/specimen or drawer number data). + log.debug("Record exists, skipping file " + filename); + counter.incrementFilesExisting(); + } + // } catch (NoSuchTemplateException e) { + // log.error("Detected Template for image doesn't exist. " + e.getMessage()); + //} + + + } catch (UnreadableFileException e) { + counter.incrementFilesFailed(); + counter.appendError(new RunnableJobError(containedFile.getName(), "", "Could not read file", new UnreadableFileException(), RunnableJobError.TYPE_FILE_READ)); + log.error("Couldn't read file." + e.getMessage()); + //} catch (OCRReadException e) { + // counter.incrementFilesFailed(); + // log.error("Couldn't OCR file." + e.getMessage()); + } + } + + /** + * Create a new image database record + * + * @param containedFile the file path relative to the start + * @param counter to count errors + * @param reattach whether there is already a database record existing, to be overridden + * @param imageLifeCycle the repository to sage to + * @param image providing access to path & filename + * @throws UnreadableFileException + */ + protected void createDatabaseRecordForFile(File containedFile, Counter counter, boolean reattach, ICImageLifeCycle imageLifeCycle, ICImage image) throws UnreadableFileException { + boolean isSpecimenImage = false, isDrawerImage = false; + BarcodeMatcher matcher = Singleton.getSingletonInstance().getBarcodeMatcher(); + // ** Identify the template. + // String templateId = detector.detectTemplateForImage(fileToCheck); + // log.debug("Detected Template: " + templateId); + // PositionTemplate template = new PositionTemplate(templateId); + // // Found a barcode in a templated position in the image. + // // ** Scan the file based on this template. + // scannableFile = new CandidateImageFile(fileToCheck, template); + + // Construct a CandidateImageFile with constructor that self detects template + CandidateImageFile scannableFile = new CandidateImageFile(containedFile); + PositionTemplate template = scannableFile.getTemplateUsed(); + String templateId = template.getName(); + log.debug("Detected Template: " + templateId); + log.debug(scannableFile.getCatalogNumberBarcodeStatus()); + String barcode = scannableFile.getBarcodeTextAtFoundTemplate(); + if (scannableFile.getCatalogNumberBarcodeStatus() != CandidateImageFile.RESULT_BARCODE_SCANNED) { + log.error("Error scanning for barcode: " + barcode); + barcode = ""; + } + log.debug("Barcode: " + barcode); + String exifComment = scannableFile.getExifUserCommentText(); + log.debug("ExifComment: " + exifComment); + UnitTrayLabel labelRead = scannableFile.getTaxonLabelQRText(template); + TaxonNameReturner parser; + String rawOCR; + String state; + + if (labelRead != null) { + rawOCR = labelRead.toJSONString(); + log.debug("UnitTrayLabel: " + rawOCR); + state = WorkFlowStatus.STAGE_1; + parser = labelRead; + } else { + try { + rawOCR = scannableFile.getTaxonLabelOCRText(template); + } catch (OCRReadException e) { + log.error(e); + rawOCR = ""; + log.error("Couldn't OCR file." + e.getMessage()); + RunnableJobError error = new RunnableJobError(image.getFilename(), "OCR Failed", + barcode, exifComment, "Couldn't find text to OCR", + null, null, + e, RunnableJobError.TYPE_NO_TEMPLATE); + counter.appendError(error); + } + if (rawOCR == null) { + rawOCR = ""; + } + state = WorkFlowStatus.STAGE_0; + parser = new UnitTrayLabelParser(rawOCR); + // Provide error message to distinguish between entirely OCR or + if (((UnitTrayLabelParser) parser).isParsedFromJSON()) { + RunnableJobError error = new RunnableJobError(image.getFilename(), "OCR Failover found barcode.", + barcode, exifComment, "Couldn't read Taxon barcode, failed over to OCR, but OCR found taxon barcode.", + parser, null, + null, RunnableJobError.TYPE_FAILOVER_TO_OCR); + counter.appendError(error); + } else { + RunnableJobError error = new RunnableJobError(image.getFilename(), "TaxonLabel read failed.", + barcode, exifComment, "Couldn't read Taxon barcode, failed over to OCR only.", + parser, null, + null, RunnableJobError.TYPE_FAILOVER_TO_OCR); + counter.appendError(error); + } + } + + // Test: is exifComment a barcode: + + // Case 1: This is an image of papers associated with a container (a unit tray or a box). + // This case can be identified by there being no barcode data associated with the image. + // Action: + // A) Check the exifComment to see what metadata is there, if blank, User needs to fix. + // exifComment may contain a drawer number, identifying this as a drawer image. Save as such. + // Options: A drawer, for which number is captured. A unit tray, capture ?????????. A specimen + // where barcode wasn't read, allow capture of barcode and treat as Case 2. + // B) Create an image record and store the image metadata (with a null specimen_id). + + // Case 2: This is an image of a specimen and associated labels or an image assocated with + // a specimen with the specimen's barcode label in the image. + // This case can be identified by there being a barcode in a templated position or there + // being a barcode in the exif comment tag. + // Action: + // A) Check if a specimen record exists, if not, create one from the barcode and OCR data. + // B) Create an image record and store the image metadata. + + if (matcher.matchesPattern(exifComment) + || matcher.matchesPattern(barcode)) { + isSpecimenImage = true; + System.out.println("Specimen Image"); + } else { + if (exifComment != null && exifComment.matches(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REGEX_DRAWERNUMBER))) { + isDrawerImage = true; + System.out.println("Drawer Image"); + } else { + if (templateId.equals(PositionTemplate.TEMPLATE_NO_COMPONENT_PARTS)) { + log.debug("Image doesn't appear to contain a barcode in a templated position."); + RunnableJobError error = new RunnableJobError(image.getFilename(), barcode, + barcode, exifComment, "Image doesn't appear to contain a barcode in a templated position.", + null, null, + null, RunnableJobError.TYPE_NO_TEMPLATE); + counter.appendError(error); + counter.incrementFilesFailed(); + } else { + // Nothing found. Need to ask. + RunnableJobError error = new RunnableJobError(image.getFilename(), barcode, + barcode, exifComment, "Image doesn't appear to contain a barcode in a templated position.", + null, null, + null, RunnableJobError.TYPE_UNKNOWN); + counter.appendError(error); + counter.incrementFilesFailed(); + } + } + } + + image.setRawBarcode(barcode); + if (isSpecimenImage) { + createDatabaseRecordForSpecimen(containedFile, counter, image, barcode, exifComment, parser, labelRead, state); + } + if (isDrawerImage) { + image.setDrawerNumber(exifComment); + } else { + image.setRawExifBarcode(exifComment); + image.setDrawerNumber(((DrawerNameReturner) parser).getDrawerNumber()); + } + image.setRawOcr(rawOCR); + image.setTemplateId(template.getTemplateId()); + image.setPath(image.getPath()); + // Create md5hash of image file, persist with image + if (image.getMd5sum() == null || image.getMd5sum().length() == 0) { + try { + image.setMd5sum(DigestUtils.md5Hex(new FileInputStream(containedFile))); + } catch (FileNotFoundException e) { + log.error(e.getMessage()); + } catch (IOException e) { + log.error(e.getMessage()); + } + } + try { + if (reattach) { + // Update image file record + imageLifeCycle.attachDirty(image); + log.debug("Updated " + image.toString()); + counter.incrementFilesUpdated(); + } else { + // *** Save a database record of the image file. + imageLifeCycle.persist(image); + log.debug("Saved " + image.toString()); + counter.incrementFilesDatabased(); + } + } catch (SaveFailedException e) { + log.error(e.getMessage(), e); + counter.incrementFilesFailed(); + String failureMessage = "Failed to save image record. " + e.getMessage(); + RunnableJobError error = new RunnableJobError(image.getFilename(), "Save Failed", + image.getFilename(), image.getPath(), failureMessage, + null, null, + null, RunnableJobError.TYPE_SAVE_FAILED); + counter.appendError(error); + } + if (isSpecimenImage) { + if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REDUNDANT_COMMENT_BARCODE).equals("true")) { + // If so configured, log as error + if (!image.getRawBarcode().equals(image.getRawExifBarcode())) { + log.error("Warning: Scanned Image has missmatch between barcode and comment."); + } + } + } + } + + /** + * Create or update an existing Specimen record to associate it with the ICImage + * + * @param containedFile the file of the image + * @param counter counter to add errors + * @param image the image to associate with the specimen + * @param barcode the barcode associated with the specimen + * @param exifComment the exif metadata comment to enable additional data extraction + * @param unitTrayLabel the unit tray label of the Specimen + * @param state the workflow state we are in with the specimen/image + */ + protected void createDatabaseRecordForSpecimen(File containedFile, Counter counter, ICImage image, String barcode, final String exifComment, TaxonNameReturner parser, UnitTrayLabel unitTrayLabel, String state) { + BarcodeMatcher matcher = Singleton.getSingletonInstance().getBarcodeMatcher(); + final String rawBarcode = barcode; + if (!rawBarcode.equals(exifComment)) { + // Log the missmatch + logMismatch(counter, image.getFilename(), barcode, exifComment, parser, matcher.matchesPattern(exifComment), log); + } + + Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Creating new specimen record."); + Specimen s = new Specimen(); + if ((!matcher.matchesPattern(barcode)) + && matcher.matchesPattern(exifComment)) { + // special case: couldn't read QR code barcode from image, but it was present in exif comment. + s.setBarcode(exifComment); + barcode = exifComment; + } else { + if (!matcher.matchesPattern(barcode)) { + // Won't be able to save the specimen record if we end up here. + log.error("Neither exifComment nor QR Code barcode match the expected pattern for a barcode, but isSpecimenImage got set to true."); + } + s.setBarcode(barcode); + } + + // check if there already exists a specimen to add the image to + SpecimenLifeCycle specimenLifeCycle = new SpecimenLifeCycle(); + List existingSpecimens = specimenLifeCycle.findByBarcode(s.getBarcode()); + if (existingSpecimens != null && existingSpecimens.size() > 0) { + counter.incrementSpecimenExisting(); + assert (existingSpecimens.size() == 1); + for (Specimen specimen : existingSpecimens) { + image.setSpecimen(specimen); + specimen.getICImages().add(image); + try { + specimenLifeCycle.attachDirty(specimen); + } catch (SaveFailedException e) { + counter.appendError(new RunnableJobError(containedFile.getName(), barcode, specimen.getSpecimenId().toString(), "Failed adding image to existing Specimen", e, RunnableJobError.TYPE_SAVE_FAILED)); + } + } + return; + } + + s.setWorkFlowStatus(state); + + if (unitTrayLabel != null) { + // We got json data from a barcode. + s.setFamily(parser.getFamily()); + s.setSubfamily(parser.getSubfamily()); + s.setTribe(parser.getTribe()); + } else { + // We failed over to OCR, try lookup in DB. + s.setFamily(""); // make sure there's a a non-null value in family. + if (parser.getTribe().trim().equals("")) { + HigherTaxonLifeCycle hls = new HigherTaxonLifeCycle(); + if (hls.isMatched(parser.getFamily(), parser.getSubfamily())) { + // If there is a match, use it. + String[] higher = hls.findMatch(parser.getFamily(), parser.getSubfamily()); + s.setFamily(higher[0]); + s.setSubfamily(higher[1]); + } else { + // otherwise use the raw OCR output. + s.setFamily(parser.getFamily()); + s.setSubfamily(parser.getSubfamily()); + } + s.setTribe(""); + } else { + HigherTaxonLifeCycle hls = new HigherTaxonLifeCycle(); + if (hls.isMatched(parser.getFamily(), parser.getSubfamily(), parser.getTribe())) { + String[] higher = hls.findMatch(parser.getFamily(), parser.getSubfamily(), parser.getTribe()); + s.setFamily(higher[0]); + s.setSubfamily(higher[1]); + s.setTribe(higher[2]); + } else { + s.setFamily(parser.getFamily()); + s.setSubfamily(parser.getSubfamily()); + s.setTribe(parser.getTribe()); + } + } + } + if (state.equals(WorkFlowStatus.STAGE_0)) { + // Look up likely matches for the OCR of the higher taxa in the HigherTaxon authority file. + if (!parser.getFamily().equals("")) { + // check family against database (with a soundex match) + HigherTaxonLifeCycle hls = new HigherTaxonLifeCycle(); + String match = hls.findMatch(parser.getFamily()); + if (match != null && !match.trim().equals("")) { + s.setFamily(match); + } + } + } + // trim family to fit (in case multiple parts of taxon name weren't parsed + // and got concatenated into family field. + JobSingleBarcodeScan.setBasicSpecimenFromParser(parser, s); + s.setCreatingPath(ImageCaptureProperties.getPathBelowBase(containedFile)); + s.setCreatingFilename(containedFile.getName()); + if (parser.getIdentifiedBy() != null && parser.getIdentifiedBy().length() > 0) { + s.setIdentifiedBy(parser.getIdentifiedBy()); + } + log.debug(s.getCollection()); + + s.setCreatedBy(ImageCaptureApp.APP_NAME + " " + ImageCaptureApp.getAppVersion()); + s.setDateCreated(new Date()); + try { + // *** Save a database record of the specimen. + specimenLifeCycle.persist(s); + counter.incrementSpecimenDatabased(); + s.attachNewPart(); + } catch (SpecimenExistsException e) { + log.debug(e.getMessage()); + // Expected case on scanning a second image for a specimen. + // Doesn't need to be reported as a parsing error. + // + // Look up the existing record to link this specimen to it. + try { + List checkResult = specimenLifeCycle.findByBarcode(barcode); + if (checkResult.size() == 1) { + s = checkResult.get(0); + } + } catch (Exception e2) { + s = null; // so that saving the image record doesn't fail on trying to save linked transient specimen record. + String errorMessage = "Linking Error: \nFailed to link image to existing specimen record.\n"; + RunnableJobError error = new RunnableJobError(image.getFilename(), barcode, + rawBarcode, exifComment, errorMessage, + parser, (DrawerNameReturner) parser, + e2, RunnableJobError.TYPE_SAVE_FAILED); + counter.appendError(error); + } + } catch (SaveFailedException e) { + // Couldn't save for some reason other than the + // specimen record already existing. Check for possible + // save problems resulting from parsing errors. + log.error(e); + try { + List checkResult = specimenLifeCycle.findByBarcode(barcode); + if (checkResult.size() == 1) { + s = checkResult.get(0); + } + // Drawer number with length limit (and specimen that fails to save at over this length makes + // a good canary for labels that parse very badly. + if (((DrawerNameReturner) parser).getDrawerNumber().length() > MetadataRetriever.getFieldLength(Specimen.class, "DrawerNumber")) { + String badParse = ""; + badParse = "Parsing problem. \nDrawer number is too long: " + s.getDrawerNumber() + "\n"; + RunnableJobError error = new RunnableJobError(image.getFilename(), barcode, + rawBarcode, exifComment, badParse, + parser, (DrawerNameReturner) parser, + e, RunnableJobError.TYPE_BAD_PARSE); + counter.appendError(error); + } else { + RunnableJobError error = new RunnableJobError(image.getFilename(), barcode, + rawBarcode, exifComment, e.getMessage(), + parser, (DrawerNameReturner) parser, + e, RunnableJobError.TYPE_SAVE_FAILED); + counter.appendError(error); + + } + } catch (Exception err) { + log.error(err); + + String badParse = ""; + // Drawer number with length limit (and specimen that fails to save at over this length makes + // a good canary for labels that parse very badly. + if (s.getDrawerNumber() == null) { + badParse = "Parsing problem. \nDrawer number is null: \n"; + } else { + if (s.getDrawerNumber().length() > MetadataRetriever.getFieldLength(Specimen.class, "DrawerNumber")) { + // This was an OK test for testing OCR, but in production ends up in records not being + // created for files, which ends up being a larger quality control problem than records + // with bad OCR. + + // Won't fail this way anymore - drawer number is now enforced in Specimen.setDrawerNumber() + badParse = "Parsing problem. \nDrawer number is too long: " + s.getDrawerNumber() + "\n"; + } + } + RunnableJobError error = new RunnableJobError(image.getFilename(), barcode, + rawBarcode, exifComment, badParse, + parser, (DrawerNameReturner) parser, + err, RunnableJobError.TYPE_SAVE_FAILED); + counter.appendError(error); + counter.incrementFilesFailed(); + s = null; + } + } catch (Exception ex) { + log.error(ex); + RunnableJobError error = new RunnableJobError(image.getFilename(), barcode, + rawBarcode, exifComment, ex.getMessage(), + parser, (DrawerNameReturner) parser, + ex, RunnableJobError.TYPE_SAVE_FAILED); + counter.appendError(error); + } + if (s != null) { + image.setSpecimen(s); + } + } + + /** + * Set the completeness percentage in main frame & notify listeners + * + * @param aPercentage + */ + protected void setPercentComplete(final int aPercentage) { + //set value + percentComplete = aPercentage; + //notify listeners + Singleton.getSingletonInstance().getMainFrame().notifyListener(percentComplete, this); + for (RunnerListener listener : listeners) { + listener.notifyListener(percentComplete, this); + } + } + + /** + * @see java.lang.Runnable#run() + */ + @Override + public void run() { + start(); + } + + /** + * Cleanup when job is complete. + */ + protected void done() { + Singleton.getSingletonInstance().getJobList().removeJob(this); + } +} diff --git a/src/main/java/edu/harvard/mcz/imagecapture/jobs/Counter.java b/src/main/java/edu/harvard/mcz/imagecapture/jobs/Counter.java index 7282a86c..84ca02ba 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/jobs/Counter.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/jobs/Counter.java @@ -31,10 +31,11 @@ public class Counter { private int filesDatabased = 0; private int filesExisting = 0; private int filesFailed = 0; - private int specimensDatabased = 0; private int directories = 0; private int directoriesFailed = 0; + private int specimensExisting = 0; private int specimensUpdated = 0; + private int specimensDatabased = 0; private int filesUpdated = 0; private StringBuffer errorReport; private List errors = null; @@ -65,10 +66,14 @@ public String getErrorReport() { return errorReport.toString(); } - public void incrementSpecimens() { + public void incrementSpecimenDatabased() { specimensDatabased++; } + public void incrementSpecimenExisting() { + specimensExisting++; + } + public void incrementTotal() { totalCount++; } @@ -160,7 +165,7 @@ public int getSpecimensUpdated() { } /** - * @param specimensUpdated the specimensUpdated to set + * */ public void incrementSpecimensUpdated() { this.specimensUpdated++; @@ -174,7 +179,7 @@ public int getFilesUpdated() { } /** - * @param filesUpdated the filesUpdated to set + * */ public void incrementFilesUpdated() { this.filesUpdated++; @@ -184,4 +189,21 @@ public List getErrors() { return errors; } + @Override + public String toString() { + String report = "Scanned " + this.getDirectories() + " directories.\n"; + report += "Scanned " + this.getFilesSeen() + " files.\n"; + report += "Created " + this.getFilesDatabased() + " new image records.\n"; + if (this.getFilesUpdated() > 0) { + report += "Updated " + this.getFilesUpdated() + " image records.\n"; + + } + report += "Created " + this.getSpecimens() + " new specimen records.\n"; + if (this.getSpecimensUpdated() > 0) { + report += "Updated " + this.getSpecimensUpdated() + " specimen records.\n"; + + } + report += "Found " + this.getFilesFailed() + " files with problems.\n"; + return report; + } } diff --git a/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobAllImageFilesScan.java b/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobAllImageFilesScan.java index 1d79ec09..9e846650 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobAllImageFilesScan.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobAllImageFilesScan.java @@ -18,20 +18,15 @@ */ package edu.harvard.mcz.imagecapture.jobs; -import edu.harvard.mcz.imagecapture.*; -import edu.harvard.mcz.imagecapture.data.LocationInCollection; -import edu.harvard.mcz.imagecapture.data.MetadataRetriever; -import edu.harvard.mcz.imagecapture.entity.ICImage; -import edu.harvard.mcz.imagecapture.entity.Specimen; -import edu.harvard.mcz.imagecapture.entity.UnitTrayLabel; -import edu.harvard.mcz.imagecapture.entity.fixed.WorkFlowStatus; -import edu.harvard.mcz.imagecapture.exceptions.*; -import edu.harvard.mcz.imagecapture.interfaces.*; -import edu.harvard.mcz.imagecapture.lifecycle.HigherTaxonLifeCycle; -import edu.harvard.mcz.imagecapture.lifecycle.ICImageLifeCycle; +import edu.harvard.mcz.imagecapture.ImageCaptureProperties; +import edu.harvard.mcz.imagecapture.Singleton; +import edu.harvard.mcz.imagecapture.exceptions.UnreadableFileException; +import edu.harvard.mcz.imagecapture.interfaces.RunStatus; +import edu.harvard.mcz.imagecapture.interfaces.RunnableJob; +import edu.harvard.mcz.imagecapture.interfaces.RunnerListener; import edu.harvard.mcz.imagecapture.lifecycle.SpecimenLifeCycle; import edu.harvard.mcz.imagecapture.ui.dialog.RunnableJobReportDialog; -import org.apache.commons.codec.digest.DigestUtils; +import edu.harvard.mcz.imagecapture.utility.FileUtility; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -48,7 +43,7 @@ * and add records for files that aren't yet known to the database that contain barcode * information and add corresponding specimen records for new specimens. */ -public class JobAllImageFilesScan implements RunnableJob, Runnable { +public class JobAllImageFilesScan extends AbstractFileScanJob { /** * Scan all of image base directory tree. @@ -66,20 +61,16 @@ public class JobAllImageFilesScan implements RunnableJob, Runnable { private int scan = SCAN_ALL; // default scan all private File startPointSpecific = null; // place to start for scan_specific private File startPoint = null; // start point used. - private String firstFile = null; // for scan_specific, the first file seen - private String lastFile = null; // for scan_specific, the last file seen private int runStatus = RunStatus.STATUS_NEW; private int thumbnailCounter = 0; - private int percentComplete = 0; private Date startTime = null; - private ArrayList listeners = null; - /** * Default constructor, creates a job to scan all of imagebase, unless imagebase is * unreadable or undefined, in which case a directory chooser dialog is launched. */ public JobAllImageFilesScan() { + super(); scan = SCAN_ALL; startPointSpecific = null; runStatus = RunStatus.STATUS_NEW; @@ -100,6 +91,7 @@ public JobAllImageFilesScan() { * @param startAt null or a directory starting point. */ public JobAllImageFilesScan(int whatToScan, File startAt) { + super(); scan = SCAN_SELECT; // store startPoint as base for dialog if SCAN_SELECT, or directory to scan if SCAN_SPECIFIC if (startAt != null && startAt.canRead()) { @@ -109,8 +101,7 @@ public JobAllImageFilesScan(int whatToScan, File startAt) { // equivalent to default constructor scan = SCAN_ALL; startPointSpecific = null; - } - if (whatToScan == SCAN_SPECIFIC) { + } else if (whatToScan == SCAN_SPECIFIC) { if ((startAt != null) && startAt.canRead()) { scan = SCAN_SPECIFIC; } else { @@ -121,33 +112,12 @@ public JobAllImageFilesScan(int whatToScan, File startAt) { init(); } - static void logMismatch(Counter counter, String filename, String barcode, String exifComment, TaxonNameReturner parser, boolean barcodeInImageMetadata, Log log) { - if (barcodeInImageMetadata || Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REDUNDANT_COMMENT_BARCODE).equals("true")) { - // If so configured, or if image metadata contains a barcode that doesn't match the barcode in the image - // report on barcode/comment missmatch as an error condition. - try { - RunnableJobError error = new RunnableJobError(filename, barcode, - barcode, exifComment, "Barcode/Comment mismatch.", - parser, (DrawerNameReturner) parser, - null, RunnableJobError.TYPE_MISMATCH); - counter.appendError(error); - } catch (Exception e) { - // we don't want an exception to stop processing - log.error(e); - } - } else { - // Just write into debug log - // This would normally the case where the image metadata doesn't contain a barcode but the image does, and reporting of this state as an error has been turned off. - log.debug("Barcode/Comment mismatch: [" + barcode + "]!=[" + exifComment + "]"); - } - } - protected void init() { listeners = new ArrayList(); } - /* (non-Javadoc) - * @see edu.harvard.mcz.imagecapture.Runnable#cancel() + /** + * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#cancel() */ @Override public boolean cancel() { @@ -155,16 +125,16 @@ public boolean cancel() { return false; } - /* (non-Javadoc) - * @see edu.harvard.mcz.imagecapture.Runnable#getStatus() + /** + * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#getStatus() */ @Override public int getStatus() { return runStatus; } - /* (non-Javadoc) - * @see edu.harvard.mcz.imagecapture.Runnable#registerListener(edu.harvard.mcz.imagecapture.RunnerListener) + /** + * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#registerListener */ @Override public boolean registerListener(RunnerListener jobListener) { @@ -174,130 +144,96 @@ public boolean registerListener(RunnerListener jobListener) { return listeners.add(jobListener); } - /* (non-Javadoc) - * @see edu.harvard.mcz.imagecapture.Runnable#start() + /** + * Start this job by setting up the directories, making sure all are readable and start pointers are set correctly + * + * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#start() */ @Override public void start() { startTime = new Date(); Singleton.getSingletonInstance().getJobList().addJob(this); runStatus = RunStatus.STATUS_RUNNING; - File imagebase = null; // place to start the scan from, imagebase directory for SCAN_ALL startPoint = null; + // place to start the scan from, imageBase directory for SCAN_ALL + String imageBasePath = Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_IMAGEBASE); + File imageBase = null; + if (imageBasePath != null) { + imageBase = new File(imageBasePath); + } + // If it isn't null, retrieve the image base directory from properties, and test for read access. - if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_IMAGEBASE) == null) { + if (imageBase == null || !imageBase.canRead()) { JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), "Can't start scan. Don't know where images are stored. Set imagbase property.", "Can't Scan.", JOptionPane.ERROR_MESSAGE); - } else { - imagebase = new File(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_IMAGEBASE)); - if (imagebase != null) { - if (imagebase.canRead()) { - startPoint = imagebase; - } else { - // If it can't be read, null out imagebase - imagebase = null; + return; + } + startPoint = imageBase; + + // setup directory scan in case of SCAN_SPECIFIC + if (scan == SCAN_SPECIFIC && startPointSpecific != null && startPointSpecific.canRead()) { + // A scan start point has been provided, don't launch a dialog. + startPoint = startPointSpecific; + } + // ask for file directory to scan in case of SCAN_SELECT + if (scan == SCAN_SELECT) { + // launch a file chooser dialog to select the directory to scan + File searchStartPoint = null; + if (startPointSpecific != null && startPointSpecific.canRead()) { + searchStartPoint = startPointSpecific; + } else { + if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH) != null) { + searchStartPoint = new File(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH)); } } - if (scan == SCAN_SPECIFIC && startPointSpecific != null && startPointSpecific.canRead()) { - // A scan start point has been provided, don't launch a dialog. - startPoint = startPointSpecific; + startPoint = FileUtility.askForDirectory(searchStartPoint); + if (startPoint == null) { + JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), "Can't scan without a directory.", "Error: No scanning possible.", JOptionPane.ERROR_MESSAGE); + return; } - if (imagebase == null || scan == SCAN_SELECT) { - // launch a file chooser dialog to select the directory to scan - final JFileChooser fileChooser = new JFileChooser(); - fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); - if (scan == SCAN_SELECT && startPointSpecific != null && startPointSpecific.canRead()) { - fileChooser.setCurrentDirectory(startPointSpecific); - } else { - if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH) != null) { - fileChooser.setCurrentDirectory(new File(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH))); - } - } - int returnValue = fileChooser.showOpenDialog(Singleton.getSingletonInstance().getMainFrame()); - if (returnValue == JFileChooser.APPROVE_OPTION) { - File file = fileChooser.getSelectedFile(); - log.debug("Selected base directory: '" + file.getName() + "'."); - startPoint = file; - } else { - //TODO: handle error condition - log.error("Directory selection cancelled by user."); - } - //TODO: Filechooser to pick path, then save (if SCAN_ALL) imagebase property. - //Perhaps. Might be undesirable behavior. - //Probably better to warn that imagebase is null; - } - - // TODO: Check that startPoint is or is within imagebase. - // Check that fileToCheck is within imagebase. - if (!ImageCaptureProperties.isInPathBelowBase(startPoint)) { - String base = Singleton.getSingletonInstance().getProperties().getProperties().getProperty( - ImageCaptureProperties.KEY_IMAGEBASE); - log.error("Tried to scan directory (" + startPoint.getPath() + ") outside of base image directory (" + base + ")"); - String message = "Can't scan and database files outside of base image directory (" + base + ")"; - // TODO: handle YES/NO ?!? - JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), message, "Can't scan outside image base directory.", JOptionPane.YES_NO_OPTION); - } else { - - // run in separate thread and allow cancellation and status reporting - - // walk through directory tree + } - if (!startPoint.canRead()) { - JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), "Can't start scan. Unable to read selected directory: " + startPoint.getPath(), "Can't Scan.", JOptionPane.YES_NO_OPTION); - } else { - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Scanning path: " + startPoint.getPath()); - Counter counter = new Counter(); - // count files to scan - countFiles(startPoint, counter); - setPercentComplete(0); - Singleton.getSingletonInstance().getMainFrame().notifyListener(runStatus, this); - counter.incrementDirectories(); - // scan - if (runStatus != RunStatus.STATUS_TERMINATED) { - checkFiles(startPoint, counter); - } - // report - String report = "Scanned " + counter.getDirectories() + " directories.\n"; - report += "Created thumbnails in " + thumbnailCounter + " directories"; - if (thumbnailCounter == 0) { - report += " (May still be in progress)"; - } - report += ".\n"; - if (startPointSpecific == null) { - report += "Starting with the base image directory (Preprocess All).\n"; - } else { - report += "Starting with " + startPoint.getName() + " (" + startPoint.getPath() + ")\n"; - report += "First file: " + firstFile + " Last File: " + lastFile + "\n"; - } - report += "Scanned " + counter.getFilesSeen() + " files.\n"; - report += "Created " + counter.getFilesDatabased() + " new image records.\n"; - if (counter.getFilesUpdated() > 0) { - report += "Updated " + counter.getFilesUpdated() + " image records.\n"; + // Check that fileToCheck is within imageBase. + if (!ImageCaptureProperties.isInPathBelowBase(startPoint)) { + String base = Singleton.getSingletonInstance().getProperties().getProperties().getProperty( + ImageCaptureProperties.KEY_IMAGEBASE); + log.error("Tried to scan directory (" + startPoint.getPath() + ") outside of base image directory (" + base + ")"); + String message = "Should and will not scan and database files outside of base image directory (" + base + ")"; + JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), message, "Won't scan outside image base directory.", JOptionPane.ERROR_MESSAGE); + return; + } else { + if (!startPoint.canRead()) { + JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), "Can't start scan. Unable to read selected directory: " + startPoint.getPath(), "Can't Scan.", JOptionPane.ERROR_MESSAGE); + return; + } + // walk through directory tree + Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Scanning path: " + startPoint.getPath()); + Counter counter = new Counter(); + // count files to scan + countFiles(startPoint, counter); + setPercentComplete(0); + Singleton.getSingletonInstance().getMainFrame().notifyListener(runStatus, this); + counter.incrementDirectories(); + // scan + if (runStatus != RunStatus.STATUS_TERMINATED) { + checkFiles(startPoint, counter); + } + // report - } - report += "Created " + counter.getSpecimens() + " new specimen records.\n"; - if (counter.getSpecimensUpdated() > 0) { - report += "Updated " + counter.getSpecimensUpdated() + " specimen records.\n"; + Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Preprocess scan complete"); + setPercentComplete(100); + Singleton.getSingletonInstance().getMainFrame().notifyListener(runStatus, this); + RunnableJobReportDialog errorReportDialog = new RunnableJobReportDialog(Singleton.getSingletonInstance().getMainFrame(), counter.toString(), counter.getErrors(), "Preprocess Results"); + errorReportDialog.setVisible(true); + } - } - report += "Found " + counter.getFilesFailed() + " files with problems.\n"; - //report += counter.getErrors(); - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Preprocess scan complete"); - setPercentComplete(100); - Singleton.getSingletonInstance().getMainFrame().notifyListener(runStatus, this); - RunnableJobReportDialog errorReportDialog = new RunnableJobReportDialog(Singleton.getSingletonInstance().getMainFrame(), report, counter.getErrors(), "Preprocess Results"); - errorReportDialog.setVisible(true); - //JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), report, "Preprocess complete", JOptionPane.ERROR_MESSAGE); - } // can read directory - } + SpecimenLifeCycle sls = new SpecimenLifeCycle(); + Singleton.getSingletonInstance().getMainFrame().setCount(sls.findSpecimenCount()); - SpecimenLifeCycle sls = new SpecimenLifeCycle(); - Singleton.getSingletonInstance().getMainFrame().setCount(sls.findSpecimenCount()); - } // Imagebase isn't null done(); } - /* (non-Javadoc) - * @see edu.harvard.mcz.imagecapture.Runnable#stop() + /** + * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#stop() */ @Override public boolean stop() { @@ -310,12 +246,17 @@ public int percentComplete() { return percentComplete; } + /** + * Count the files in a directory, recursively + * + * @param startPoint the path to start + * @param counter the counter to increment + */ private void countFiles(File startPoint, Counter counter) { // count files to preprocess File[] containedFiles = startPoint.listFiles(); if (containedFiles != null) { - for (int i = 0; i < containedFiles.length; i++) { - File fileToCheck = containedFiles[i]; + for (File fileToCheck : containedFiles) { if (fileToCheck.isDirectory()) { if (fileToCheck.canRead()) { countFiles(fileToCheck, counter); @@ -327,537 +268,50 @@ private void countFiles(File startPoint, Counter counter) { } } + /** + * Do the actual processing of each files + * + * @param startPoint the directory with all files to handle + * @param counter the counter to increment + */ private void checkFiles(File startPoint, Counter counter) { // pick jpeg files // for each file check name against database, if not found, check barcodes, scan and parse text, create records. - log.debug("Scanning directory: " + startPoint.getPath()); File[] containedFiles = startPoint.listFiles(); - log.debug("Directory contains " + containedFiles.length + " entries."); - if (containedFiles.length > 0) { - // create thumbnails in a separate thread - (new Thread(new ThumbnailBuilderInternal(startPoint))).start(); + if (containedFiles == null) { + log.error("Directory " + startPoint.getPath() + " contains 0 entries."); + return; } - for (int i = 0; i < containedFiles.length; i++) { + log.debug("Scanning directory: " + startPoint.getPath() + " containing " + containedFiles.length + " files."); + // create thumbnails in a separate thread + (new Thread(new ThumbnailBuilderInternal(startPoint))).start(); + + for (File containedFile : containedFiles) { if (runStatus != RunStatus.STATUS_TERMINATED) { - log.debug("Scanning directory: " + startPoint.getPath()); - File fileToCheck = containedFiles[i]; - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Scanning: " + fileToCheck.getName()); - log.debug("Scanning: " + fileToCheck.getName()); - if (fileToCheck.isDirectory()) { + Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Scanning: " + containedFile.getName()); + log.debug("Scanning: " + containedFile.getName()); + if (containedFile.isDirectory()) { // recursive read for all files: start anew for directories - if (fileToCheck.canRead()) { + if (containedFile.canRead()) { // Skip thumbs directories - if (!fileToCheck.getName().equals("thumbs")) { - checkFiles(fileToCheck, counter); + if (!containedFile.getName().equals("thumbs")) { + checkFiles(containedFile, counter); counter.incrementDirectories(); } } else { - counter.appendError(new RunnableJobError(fileToCheck.getName(), "", "Could not read directory", new UnreadableFileException(), RunnableJobError.TYPE_FILE_READ)); + counter.appendError(new RunnableJobError(containedFile.getName(), "", "Could not read directory", new UnreadableFileException(), RunnableJobError.TYPE_FILE_READ)); counter.incrementDirectoriesFailed(); } } else { // check JPEG files for barcodes - if (!fileToCheck.getName().matches(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_IMAGEREGEX))) { - log.debug("Skipping file [" + fileToCheck.getName() + "], doesn't match expected filename pattern " + Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_IMAGEREGEX)); + if (!containedFile.getName().matches(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_IMAGEREGEX))) { + log.debug("Skipping file [" + containedFile.getName() + "], doesn't match expected filename pattern " + Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_IMAGEREGEX)); } else { - if (firstFile == null) { - firstFile = fileToCheck.getName(); - } - lastFile = fileToCheck.getName(); - Singleton.getSingletonInstance().getProperties().getProperties().setProperty(ImageCaptureProperties.KEY_LASTPATH, fileToCheck.getPath()); - String filename = fileToCheck.getName(); - counter.incrementFilesSeen(); - log.debug("Checking image file: " + filename); - CandidateImageFile.debugCheckHeightWidth(fileToCheck); - // scan file for barcodes and ocr of unit tray label text - CandidateImageFile scannableFile = null; - try { - // PositionTemplateDetector detector = new ConfiguredBarcodePositionTemplateDetector(); - boolean isSpecimenImage = false; - boolean isDrawerImage = false; - boolean reattach = false; // image is detached instance and should be reattached instead of persisted denovo. - // try { - // Check for an existing image record. - ICImageLifeCycle imageLifeCycle = new ICImageLifeCycle(); - ICImage tryMe = new ICImage(); - tryMe.setFilename(filename); - String path = ImageCaptureProperties.getPathBelowBase(fileToCheck); - tryMe.setPath(path); - List matches = imageLifeCycle.findBy(new HashMap() {{ - put("path", path); - put("filename", filename); - }}); - log.debug(matches != null ? matches.size() : "no matches found"); - if (matches != null && matches.size() == 1 - && matches.get(0).getRawBarcode() == null - && matches.get(0).getRawExifBarcode() == null - && (matches.get(0).getDrawerNumber() == null || matches.get(0).getDrawerNumber().trim().length() == 0) - ) { - // likely case for a failure to read data out of the image file - // try to update the image file record. - try { - tryMe = imageLifeCycle.merge(matches.get(0)); - matches.remove(0); - reattach = true; - log.debug(tryMe); - } catch (SaveFailedException e) { - log.error(e.getMessage(), e); - } - } else if (matches != null && matches.size() == 1 && matches.get(0).getSpecimen() == null) { - // likely case for a failure to create a specimen record in a previous run - // try to update the image file record - try { - tryMe = imageLifeCycle.merge(matches.get(0)); - matches.remove(0); - reattach = true; - log.debug(tryMe); - } catch (SaveFailedException e) { - log.error(e.getMessage(), e); - } - } - if (matches != null && matches.size() == 0) { - // No database record for this file. - - // ** Identify the template. - // String templateId = detector.detectTemplateForImage(fileToCheck); - // log.debug("Detected Template: " + templateId); - // PositionTemplate template = new PositionTemplate(templateId); - // // Found a barcode in a templated position in the image. - // // ** Scan the file based on this template. - // scannableFile = new CandidateImageFile(fileToCheck, template); - - // Construct a CandidateImageFile with constructor that self detects template - scannableFile = new CandidateImageFile(fileToCheck); - PositionTemplate template = scannableFile.getTemplateUsed(); - String templateId = template.getName(); - log.debug("Detected Template: " + templateId); - log.debug(scannableFile.getCatalogNumberBarcodeStatus()); - String barcode = scannableFile.getBarcodeTextAtFoundTemplate(); - if (scannableFile.getCatalogNumberBarcodeStatus() != CandidateImageFile.RESULT_BARCODE_SCANNED) { - log.error("Error scanning for barcode: " + barcode); - barcode = ""; - } - log.debug(barcode); - System.out.println("Barcode=" + barcode); - String exifComment = scannableFile.getExifUserCommentText(); - log.debug(exifComment); - TaxonNameReturner parser = null; - String rawOCR = ""; - UnitTrayLabel labelRead = null; - String state = WorkFlowStatus.STAGE_0; - labelRead = scannableFile.getTaxonLabelQRText(template); - if (labelRead == null) { - try { - labelRead = scannableFile.getTaxonLabelQRText(new PositionTemplate("Test template 2")); - } catch (NoSuchTemplateException e) { - try { - labelRead = scannableFile.getTaxonLabelQRText(new PositionTemplate("Small template 2")); - } catch (NoSuchTemplateException e1) { - log.error("Neither Test template 2 nor Small template 2 found"); - } - } - } else { - log.debug(labelRead.toJSONString()); - } - if (labelRead != null) { - rawOCR = labelRead.toJSONString(); - state = WorkFlowStatus.STAGE_1; - parser = labelRead; - } else { - PositionTemplate shifted = null; - try { - shifted = new PositionTemplate("Test template 2"); - } catch (NoSuchTemplateException e) { - try { - shifted = new PositionTemplate("Small template 2"); - } catch (NoSuchTemplateException e1) { - log.error("Neither Test template 2 nor Small template 2 found"); - } - } - if (shifted != null) { - int x = 5; - int xmax = 9; - Dimension utpos = shifted.getUtBarcodePosition(); - while (x < xmax) { - utpos.setSize(new Dimension(utpos.width + x, utpos.height)); - shifted.setUtBarcodePosition(utpos); - labelRead = scannableFile.getTaxonLabelQRText(shifted); - x++; - if (labelRead != null) { - x = xmax; - log.debug("Failover found: " + labelRead.getFamily() + " " + labelRead.getSubfamily() + " " + labelRead.getGenus()); - } - } - } - try { - rawOCR = scannableFile.getLabelOCRText(template); - } catch (OCRReadException e) { - log.error(e); - rawOCR = ""; - log.error("Couldn't OCR file." + e.getMessage()); - RunnableJobError error = new RunnableJobError(filename, "OCR Failed", - barcode, exifComment, "Couldn't find text to OCR", - null, null, - e, RunnableJobError.TYPE_NO_TEMPLATE); - counter.appendError(error); - } - if (labelRead == null) { - if (rawOCR == null) { - rawOCR = ""; - } - state = WorkFlowStatus.STAGE_0; - parser = new UnitTrayLabelParser(rawOCR); - // Provide error message to distinguish between entirely OCR or - if (((UnitTrayLabelParser) parser).isParsedFromJSON()) { - RunnableJobError error = new RunnableJobError(filename, "OCR Failover found barcode.", - barcode, exifComment, "Couldn't read Taxon barcode, failed over to OCR, but OCR found taxon barcode.", - parser, null, - null, RunnableJobError.TYPE_FAILOVER_TO_OCR); - counter.appendError(error); - } else { - RunnableJobError error = new RunnableJobError(filename, "Failover to OCR.", - barcode, exifComment, "Couldn't read Taxon barcode, failed over to OCR only.", - parser, null, - null, RunnableJobError.TYPE_FAILOVER_TO_OCR); - counter.appendError(error); - } - } else { - state = WorkFlowStatus.STAGE_1; - parser = labelRead; - } - } - - // Test: is exifComment a barcode: - - // Case 1: This is an image of papers associated with a container (a unit tray or a box). - // This case can be identified by there being no barcode data associated with the image. - // Action: - // A) Check the exifComment to see what metadata is there, if blank, User needs to fix. - // exifComment may contain a drawer number, identifying this as a drawer image. Save as such. - // Options: A drawer, for which number is captured. A unit tray, capture ?????????. A specimen - // where barcode wasn't read, allow capture of barcode and treat as Case 2. - // B) Create an image record and store the image metadata (with a null specimen_id). - - // Case 2: This is an image of a specimen and associated labels or an image assocated with - // a specimen with the specimen's barcode label in the image. - // This case can be identified by there being a barcode in a templated position or there - // being a barcode in the exif comment tag. - // Action: - // A) Check if a specimen record exists, if not, create one from the barcode and OCR data. - // B) Create an image record and store the image metadata. - - if (Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(exifComment) - || Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(barcode)) { - isSpecimenImage = true; - System.out.println("Specimen Image"); - } else { - if (exifComment != null && exifComment.matches(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REGEX_DRAWERNUMBER))) { - isDrawerImage = true; - System.out.println("Drawer Image"); - } else { - if (templateId.equals(PositionTemplate.TEMPLATE_NO_COMPONENT_PARTS)) { - log.debug("Image doesn't appear to contain a barcode in a templated position."); - counter.incrementFilesFailed(); - RunnableJobError error = new RunnableJobError(filename, barcode, - barcode, exifComment, "Image doesn't appear to contain a barcode in a templated position.", - null, null, - null, RunnableJobError.TYPE_NO_TEMPLATE); - counter.appendError(error); - } else { - // Nothing found. Need to ask. - RunnableJobError error = new RunnableJobError(filename, barcode, - barcode, exifComment, "Image doesn't appear to contain a barcode in a templated position.", - null, null, - null, RunnableJobError.TYPE_UNKNOWN); - counter.appendError(error); - counter.incrementFilesFailed(); - } - } - } - - String rawBarcode = barcode; - if (isSpecimenImage) { - if (!rawBarcode.equals(exifComment)) { - // Use the exifComment if it is a barcode - boolean barcodeInImageMetadata = false; - if (Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(exifComment)) { - barcodeInImageMetadata = true; - } - // Log the missmatch - logMismatch(counter, filename, barcode, exifComment, parser, barcodeInImageMetadata, log); - } - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Creating new specimen record."); - // TODO: check if Specimen with barcode already exists - Specimen s = new Specimen(); - if ((!Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(barcode)) - && Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(exifComment)) { - // special case: couldn't read QR code barcode from image, but it was present in exif comment. - s.setBarcode(exifComment); - barcode = exifComment; - } else { - if (!Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(barcode)) { - // Won't be able to save the specimen record if we end up here. - log.error("Neither exifComment nor QR Code barcode match the expected pattern for a barcode, but isSpecimenImage got set to true."); - } - s.setBarcode(barcode); - } - s.setWorkFlowStatus(state); - - if (labelRead != null) { - // We got json data from a barcode. - s.setFamily(parser.getFamily()); - s.setSubfamily(parser.getSubfamily()); - s.setTribe(parser.getTribe()); - } else { - // We failed over to OCR, try lookup in DB. - s.setFamily(""); // make sure there's a a non-null value in family. - if (parser.getTribe().trim().equals("")) { - HigherTaxonLifeCycle hls = new HigherTaxonLifeCycle(); - if (hls.isMatched(parser.getFamily(), parser.getSubfamily())) { - // If there is a match, use it. - String[] higher = hls.findMatch(parser.getFamily(), parser.getSubfamily()); - s.setFamily(higher[0]); - s.setSubfamily(higher[1]); - } else { - // otherwise use the raw OCR output. - s.setFamily(parser.getFamily()); - s.setSubfamily(parser.getSubfamily()); - } - s.setTribe(""); - } else { - HigherTaxonLifeCycle hls = new HigherTaxonLifeCycle(); - if (hls.isMatched(parser.getFamily(), parser.getSubfamily(), parser.getTribe())) { - String[] higher = hls.findMatch(parser.getFamily(), parser.getSubfamily(), parser.getTribe()); - s.setFamily(higher[0]); - s.setSubfamily(higher[1]); - s.setTribe(higher[2]); - } else { - s.setFamily(parser.getFamily()); - s.setSubfamily(parser.getSubfamily()); - s.setTribe(parser.getTribe()); - } - } - } - if (state.equals(WorkFlowStatus.STAGE_0)) { - // Look up likely matches for the OCR of the higher taxa in the HigherTaxon authority file. - - if (!parser.getFamily().equals("")) { - // check family against database (with a soundex match) - HigherTaxonLifeCycle hls = new HigherTaxonLifeCycle(); - String match = hls.findMatch(parser.getFamily()); - if (match != null && !match.trim().equals("")) { - s.setFamily(match); - } - } - } - // trim family to fit (in case multiple parts of taxon name weren't parsed - // and got concatenated into family field. - JobSingleBarcodeScan.setBasicSpecimenFromParser(parser, s); - s.setCreatingPath(ImageCaptureProperties.getPathBelowBase(fileToCheck)); - s.setCreatingFilename(fileToCheck.getName()); - if (parser.getIdentifiedBy() != null && parser.getIdentifiedBy().length() > 0) { - s.setIdentifiedBy(parser.getIdentifiedBy()); - } - log.debug(s.getCollection()); - - // TODO: non-general workflows - - // TODO: Refactor special case handling of non-general workflows - - // ********* Special Cases ********** - if (s.getWorkFlowStatus().equals(WorkFlowStatus.STAGE_0)) { - // ***** Special case, images in ent-formicidae - // get family set to Formicidae if in state OCR. - if (path.contains("formicidae")) { - s.setFamily("Formicidae"); - } - } - s.setLocationInCollection(LocationInCollection.getDefaultLocation()); - if (s.getFamily().equals("Formicidae")) { - // ***** Special case, families in Formicidae are in Ant collection - s.setLocationInCollection(LocationInCollection.GENERALANT); - } - // ********* End Special Cases ********** - - - s.setCreatedBy(ImageCaptureApp.APP_NAME + " " + ImageCaptureApp.getAppVersion()); - s.setDateCreated(new Date()); - SpecimenLifeCycle sh = new SpecimenLifeCycle(); - try { - // *** Save a database record of the specimen. - sh.persist(s); - counter.incrementSpecimens(); - s.attachNewPart(); - } catch (SpecimenExistsException e) { - log.debug(e.getMessage()); - // Expected case on scanning a second image for a specimen. - // Doesn't need to be reported as a parsing error. - // - // Look up the existing record to link this specimen to it. - try { - List checkResult = sh.findByBarcode(barcode); - if (checkResult.size() == 1) { - s = checkResult.get(0); - } - } catch (Exception e2) { - s = null; // so that saving the image record doesn't fail on trying to save linked transient specimen record. - String errorMessage = "Linking Error: \nFailed to link image to existing specimen record.\n"; - RunnableJobError error = new RunnableJobError(filename, barcode, - rawBarcode, exifComment, errorMessage, - parser, (DrawerNameReturner) parser, - e2, RunnableJobError.TYPE_SAVE_FAILED); - counter.appendError(error); - } - } catch (SaveFailedException e) { - // Couldn't save for some reason other than the - // specimen record already existing. Check for possible - // save problems resulting from parsing errors. - log.debug(e.getMessage()); - try { - List checkResult = sh.findByBarcode(barcode); - if (checkResult.size() == 1) { - s = checkResult.get(0); - } - // Drawer number with length limit (and specimen that fails to save at over this length makes - // a good canary for labels that parse very badly. - if (((DrawerNameReturner) parser).getDrawerNumber().length() > MetadataRetriever.getFieldLength(Specimen.class, "DrawerNumber")) { - String badParse = ""; - badParse = "Parsing problem. \nDrawer number is too long: " + s.getDrawerNumber() + "\n"; - RunnableJobError error = new RunnableJobError(filename, barcode, - rawBarcode, exifComment, badParse, - parser, (DrawerNameReturner) parser, - e, RunnableJobError.TYPE_BAD_PARSE); - counter.appendError(error); - } else { - RunnableJobError error = new RunnableJobError(filename, barcode, - rawBarcode, exifComment, e.getMessage(), - parser, (DrawerNameReturner) parser, - e, RunnableJobError.TYPE_SAVE_FAILED); - counter.appendError(error); - - } - } catch (Exception err) { - log.error(e); - log.error(err); - // TODO: Add a general error handling/inform user class. - - String badParse = ""; - // Drawer number with length limit (and specimen that fails to save at over this length makes - // a good canary for labels that parse very badly. - if (s.getDrawerNumber() == null) { - badParse = "Parsing problem. \nDrawer number is null: \n"; - } else { - if (s.getDrawerNumber().length() > MetadataRetriever.getFieldLength(Specimen.class, "DrawerNumber")) { - // This was an OK test for testing OCR, but in production ends up in records not being - // created for files, which ends up being a larger quality control problem than records - // with bad OCR. - - // Won't fail this way anymore - drawer number is now enforced in Specimen.setDrawerNumber() - badParse = "Parsing problem. \nDrawer number is too long: " + s.getDrawerNumber() + "\n"; - } - } - RunnableJobError error = new RunnableJobError(filename, barcode, - rawBarcode, exifComment, badParse, - parser, (DrawerNameReturner) parser, - err, RunnableJobError.TYPE_SAVE_FAILED); - counter.appendError(error); - counter.incrementFilesFailed(); - s = null; - } - } catch (Exception ex) { - log.error(ex); - RunnableJobError error = new RunnableJobError(filename, barcode, - rawBarcode, exifComment, ex.getMessage(), - parser, (DrawerNameReturner) parser, - ex, RunnableJobError.TYPE_SAVE_FAILED); - counter.appendError(error); - } - if (s != null) { - tryMe.setSpecimen(s); - } - } - tryMe.setRawBarcode(rawBarcode); - if (isDrawerImage) { - tryMe.setDrawerNumber(exifComment); - } else { - tryMe.setRawExifBarcode(exifComment); - tryMe.setDrawerNumber(((DrawerNameReturner) parser).getDrawerNumber()); - } - tryMe.setRawOcr(rawOCR); - tryMe.setTemplateId(template.getTemplateId()); - tryMe.setPath(path); - // Create md5hash of image file, persist with image - if (tryMe.getMd5sum() == null || tryMe.getMd5sum().length() == 0) { - try { - tryMe.setMd5sum(DigestUtils.md5Hex(new FileInputStream(fileToCheck))); - } catch (FileNotFoundException e) { - log.error(e.getMessage()); - } catch (IOException e) { - log.error(e.getMessage()); - } - } - try { - if (reattach) { - // Update image file record - imageLifeCycle.attachDirty(tryMe); - log.debug("Updated " + tryMe.toString()); - counter.incrementFilesUpdated(); - } else { - // *** Save a database record of the image file. - imageLifeCycle.persist(tryMe); - log.debug("Saved " + tryMe.toString()); - counter.incrementFilesDatabased(); - } - } catch (SaveFailedException e) { - log.error(e.getMessage(), e); - counter.incrementFilesFailed(); - String failureMessage = "Failed to save image record. " + e.getMessage(); - RunnableJobError error = new RunnableJobError(filename, "Save Failed", - tryMe.getFilename(), tryMe.getPath(), failureMessage, - null, null, - null, RunnableJobError.TYPE_SAVE_FAILED); - counter.appendError(error); - } - if (isSpecimenImage) { - if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REDUNDANT_COMMENT_BARCODE).equals("true")) { - // If so configured, log as error - if (!tryMe.getRawBarcode().equals(tryMe.getRawExifBarcode())) { - log.error("Warning: Scanned Image has missmatch between barcode and comment."); - } - } - } - } else { - if (matches == null) { - counter.incrementFilesFailed(); - String failureMessage = "Probable bad data in database. Null match searching for image file. Notify the database administrator."; - RunnableJobError error = new RunnableJobError(filename, "Bad Data", - tryMe.getFilename(), tryMe.getPath(), failureMessage, - null, null, - null, RunnableJobError.TYPE_SAVE_FAILED); - counter.appendError(error); - } else { - // found an already databased file (where we have barcode/specimen or drawer number data). - log.debug("Record exists, skipping file " + filename); - counter.incrementFilesExisting(); - } - } - // } catch (NoSuchTemplateException e) { - // log.error("Detected Template for image doesn't exist. " + e.getMessage()); - //} - - - } catch (UnreadableFileException e) { - counter.incrementFilesFailed(); - counter.appendError(new RunnableJobError(fileToCheck.getName(), "", "Could not read file", new UnreadableFileException(), RunnableJobError.TYPE_FILE_READ)); - log.error("Couldn't read file." + e.getMessage()); - //} catch (OCRReadException e) { - // counter.incrementFilesFailed(); - // log.error("Couldn't OCR file." + e.getMessage()); - } + checkFile(containedFile, counter); } } // report progress - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Scanned: " + fileToCheck.getName()); + Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Scanned: " + containedFile.getName()); Float seen = 0.0f + counter.getFilesSeen(); Float total = 0.0f + counter.getTotal(); // thumbPercentComplete = (int) ((seen/total)*100); @@ -867,34 +321,7 @@ private void checkFiles(File startPoint, Counter counter) { } } - private void setPercentComplete(int aPercentage) { - //set value - percentComplete = aPercentage; - log.debug(percentComplete); - //notify listeners - Singleton.getSingletonInstance().getMainFrame().notifyListener(percentComplete, this); - Iterator i = listeners.iterator(); - while (i.hasNext()) { - i.next().notifyListener(percentComplete, this); - } - } - - /* (non-Javadoc) - * @see java.lang.Runnable#run() - */ - @Override - public void run() { - start(); - } - /** - * Cleanup when job is complete. - */ - private void done() { - Singleton.getSingletonInstance().getJobList().removeJob(this); - } - - /* (non-Javadoc) * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#getName() */ @Override @@ -910,7 +337,7 @@ public String getName() { } } - /* (non-Javadoc) + /** * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#getStartTime() */ @Override @@ -1075,7 +502,7 @@ public void run() { Singleton.getSingletonInstance().getJobList().removeJob(this); } - /* (non-Javadoc) + /** * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#start() */ @Override @@ -1083,7 +510,7 @@ public void start() { run(); } - /* (non-Javadoc) + /** * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#stop() */ @Override @@ -1092,7 +519,7 @@ public boolean stop() { return false; } - /* (non-Javadoc) + /** * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#cancel() */ @Override @@ -1101,7 +528,7 @@ public boolean cancel() { return false; } - /* (non-Javadoc) + /** * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#getStatus() */ @Override @@ -1109,7 +536,7 @@ public int getStatus() { return thumbRunStatus; } - /* (non-Javadoc) + /** * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#percentComplete() */ @Override @@ -1128,7 +555,7 @@ protected void setThumbPercentComplete(int aPercentage) { } } - /* (non-Javadoc) + /** * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#registerListener(edu.harvard.mcz.imagecapture.interfaces.RunnerListener) */ @Override @@ -1139,7 +566,7 @@ public boolean registerListener(RunnerListener aJobListener) { return thumbListeners.add(aJobListener); } - /* (non-Javadoc) + /** * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#getName() */ @Override @@ -1147,7 +574,7 @@ public String getName() { return "Thumbnail Generation in: " + startPoint; } - /* (non-Javadoc) + /** * @see edu.harvard.mcz.imagecapture.interfaces.RunnableJob#getStartTime() */ @Override diff --git a/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobRepeatOCR.java b/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobRepeatOCR.java index b69a8e66..e7bfa9be 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobRepeatOCR.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobRepeatOCR.java @@ -308,7 +308,7 @@ private void redoOCR(File file) { parser = labelRead; } else { log.debug("Failing over to OCR with tesseract"); - rawOCR = scannableFile.getLabelOCRText(templateToUse); + rawOCR = scannableFile.getTaxonLabelOCRText(templateToUse); parser = new UnitTrayLabelParser(rawOCR); foundQRText = ((UnitTrayLabelParser) parser).isParsedFromJSON(); } diff --git a/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobSingleBarcodeScan.java b/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobSingleBarcodeScan.java index a5af5c5a..1e46432d 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobSingleBarcodeScan.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/jobs/JobSingleBarcodeScan.java @@ -18,35 +18,19 @@ */ package edu.harvard.mcz.imagecapture.jobs; -import edu.harvard.mcz.imagecapture.*; -import edu.harvard.mcz.imagecapture.data.LocationInCollection; -import edu.harvard.mcz.imagecapture.data.MetadataRetriever; -import edu.harvard.mcz.imagecapture.entity.ICImage; +import edu.harvard.mcz.imagecapture.ImageCaptureProperties; +import edu.harvard.mcz.imagecapture.Singleton; import edu.harvard.mcz.imagecapture.entity.Specimen; -import edu.harvard.mcz.imagecapture.entity.UnitTrayLabel; -import edu.harvard.mcz.imagecapture.entity.fixed.WorkFlowStatus; -import edu.harvard.mcz.imagecapture.exceptions.*; import edu.harvard.mcz.imagecapture.interfaces.*; -import edu.harvard.mcz.imagecapture.lifecycle.HigherTaxonLifeCycle; -import edu.harvard.mcz.imagecapture.lifecycle.ICImageLifeCycle; import edu.harvard.mcz.imagecapture.lifecycle.SpecimenLifeCycle; -import edu.harvard.mcz.imagecapture.ui.dialog.WhatsThisImageDialog; -import edu.harvard.mcz.imagecapture.ui.frame.ImageDisplayFrame; -import edu.harvard.mcz.imagecapture.ui.frame.SpecimenDetailsViewPane; -import org.apache.commons.codec.digest.DigestUtils; +import edu.harvard.mcz.imagecapture.ui.dialog.RunnableJobReportDialog; +import edu.harvard.mcz.imagecapture.utility.FileUtility; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import javax.swing.*; -import javax.swing.filechooser.FileNameExtensionFilter; import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; import java.util.ArrayList; import java.util.Date; -import java.util.Iterator; -import java.util.List; /** * Examines a single image file attempts to determine the correct template, tries to parse data from the image, launches @@ -60,22 +44,20 @@ * s.start(); * */ -public class JobSingleBarcodeScan implements RunnableJob, Runnable { +public class JobSingleBarcodeScan extends AbstractFileScanJob { private static final Log log = LogFactory.getLog(JobSingleBarcodeScan.class); private boolean persist = false; private Date startDate = null; - private int percentComplete = 0; private int runStatus = RunStatus.STATUS_NEW; - private ArrayList listeners = null; - /** * Default constructor, creates a single image job with persist=false, allows examination of image * extracted barcode, and OCR of label data without making a database connection. */ public JobSingleBarcodeScan() { + super(); init(); } @@ -87,6 +69,7 @@ public JobSingleBarcodeScan() { * the image and specimen records if needed. */ public JobSingleBarcodeScan(boolean persistResult) { + super(); persist = persistResult; init(); } @@ -113,17 +96,6 @@ private void init() { listeners = new ArrayList(); } - private void setPercentComplete(int aPercentage) { - //set value - percentComplete = aPercentage; - //notify listeners - Singleton.getSingletonInstance().getMainFrame().notifyListener(percentComplete, this); - Iterator i = listeners.iterator(); - while (i.hasNext()) { - i.next().notifyListener(percentComplete, this); - } - } - /* (non-Javadoc) * @see edu.harvard.mcz.imagecapture.Runnable#cancel() */ @@ -161,523 +133,20 @@ public void start() { Singleton.getSingletonInstance().getJobList().addJob(this); setPercentComplete(0); Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Selecting file to check."); - String rawOCR = ""; // to hold unparsed ocr output from unit tray label - //Create a file chooser - final JFileChooser fileChooser = new JFileChooser(); - if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH) != null) { - fileChooser.setCurrentDirectory(new File(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH))); - } - //FileNameExtensionFilter filter = new FileNameExtensionFilter("TIFF Images", "tif", "tiff"); - FileNameExtensionFilter filter = new FileNameExtensionFilter("Image files", "tif", "tiff", "jpg", "jpeg", "png"); - fileChooser.setFileFilter(filter); - int returnValue = fileChooser.showOpenDialog(Singleton.getSingletonInstance().getMainFrame()); + // ask for the file to parse + File fileToCheck = FileUtility.askForImageFile(new File(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_LASTPATH))); setPercentComplete(10); Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Scanning file for barcode."); - if (returnValue == JFileChooser.APPROVE_OPTION) { - File fileToCheck = fileChooser.getSelectedFile(); + if (fileToCheck != null) { Singleton.getSingletonInstance().getProperties().getProperties().setProperty(ImageCaptureProperties.KEY_LASTPATH, fileToCheck.getPath()); String filename = fileToCheck.getName(); log.debug("Selected file " + filename + " to scan for barcodes"); - CandidateImageFile.debugCheckHeightWidth(fileToCheck); - - // scan selected file - PositionTemplate defaultTemplate = null; - // Figure out which template to use. - DefaultPositionTemplateDetector detector = new DefaultPositionTemplateDetector(); - String template = ""; - try { - template = detector.detectTemplateForImage(fileToCheck); - } catch (UnreadableFileException e3) { - // TODO Auto-generated catch block - e3.printStackTrace(); - } - setPercentComplete(20); - try { - defaultTemplate = new PositionTemplate(template); - log.debug("Set template to: " + defaultTemplate.getTemplateId()); - } catch (NoSuchTemplateException e1) { - try { - defaultTemplate = new PositionTemplate(PositionTemplate.TEMPLATE_DEFAULT); - log.error("Template not recongised, reset template to: " + defaultTemplate.getTemplateId()); - } catch (Exception e2) { - // We shouldn't end up here - we just asked for the default template by its constant. - log.fatal("PositionTemplate doesn't recognize TEMPLATE_DEFAULT"); - log.trace(e2); - ImageCaptureApp.exit(ImageCaptureApp.EXIT_ERROR); - } - } - // TODO: Store the template id for this image with the other image metadata so - // that we don't have to check again. - - CandidateImageFile scannableFile; - try { - scannableFile = new CandidateImageFile(fileToCheck, defaultTemplate); - - String barcode = scannableFile.getBarcodeTextAtFoundTemplate(); - if (scannableFile.getCatalogNumberBarcodeStatus() != CandidateImageFile.RESULT_BARCODE_SCANNED) { - log.error("Error scanning for barcode: " + barcode); - barcode = ""; - } - String exifComment = scannableFile.getExifUserCommentText(); - if (barcode.equals("") && Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(exifComment)) { - // There should be a template for this image, and it shouldn't be the TEMPLATE_NO_COMPONENT_PARTS - if (defaultTemplate.getTemplateId().equals(PositionTemplate.TEMPLATE_NO_COMPONENT_PARTS)) { - try { - // This will give us a shot at OCR of the text and display of the image parts. - defaultTemplate = new PositionTemplate(PositionTemplate.TEMPLATE_DEFAULT); - log.error("Barcode not recongised, but exif contains barcode, reset template to: " + defaultTemplate.getTemplateId()); - } catch (Exception e2) { - // We shouldn't end up here - we just asked for the default template by its constant. - log.fatal("PositionTemplate doesn't recognize TEMPLATE_DEFAULT"); - log.trace(e2); - ImageCaptureApp.exit(ImageCaptureApp.EXIT_ERROR); - } - } - } - - log.debug("With template:" + defaultTemplate.getTemplateId()); - log.debug("Barcode=" + barcode); - - setPercentComplete(30); - - - String warning = ""; - if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REDUNDANT_COMMENT_BARCODE).equals("true")) { - if (!barcode.equals(exifComment)) { - warning = "Warning: non-matching QR code barcode and exif Comment"; - System.out.println(warning); - } - } - - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Loading image."); - - ImageDisplayFrame resultFrame = new ImageDisplayFrame(); - - if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REDUNDANT_COMMENT_BARCODE).equals("true")) { - resultFrame.setBarcode("QR=" + barcode + " Comment=" + exifComment + " " + warning); - } else { - resultFrame.setBarcode("QR=" + barcode); - } - - try { - resultFrame.loadImagesFromFileSingle(fileToCheck, defaultTemplate, null); - } catch (ImageLoadException e2) { - System.out.println("Error loading image file."); - System.out.println(e2.getMessage()); - } catch (BadTemplateException e2) { - System.out.println("Template doesn't match image file."); - System.out.println(e2.getMessage()); - try { - try { - try { - template = detector.detectTemplateForImage(fileToCheck); - } catch (UnreadableFileException e3) { - // TODO Auto-generated catch block - e3.printStackTrace(); - } - defaultTemplate = new PositionTemplate(template); - } catch (NoSuchTemplateException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - resultFrame.loadImagesFromFileSingle(fileToCheck, defaultTemplate, null); - } catch (ImageLoadException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (BadTemplateException e) { - System.out.println("Template doesn't match image file."); - System.out.println(e2.getMessage()); - } - } - - UnitTrayLabel labelRead = null; - try { - // Read unitTrayLabelBarcode, failover to OCR and parse UnitTray Label - rawOCR = ""; - labelRead = scannableFile.getTaxonLabelQRText(defaultTemplate); - if (labelRead == null) { - try { - labelRead = scannableFile.getTaxonLabelQRText(new PositionTemplate("Test template 2")); - } catch (NoSuchTemplateException e1) { - try { - labelRead = scannableFile.getTaxonLabelQRText(new PositionTemplate("Small template 2")); - } catch (NoSuchTemplateException e2) { - log.error("None of " + defaultTemplate.getName() + " Test template 2 or Small template 2 were found"); - } - } - } else { - log.debug("Translated UnitTrayBarcode to: " + labelRead.toJSONString()); - } - if (labelRead != null) { - rawOCR = labelRead.toJSONString(); - } else { - log.debug("Failing over to OCR with tesseract"); - rawOCR = scannableFile.getLabelOCRText(defaultTemplate); - } - log.debug(rawOCR); - resultFrame.setRawOCRLabel(rawOCR); - setPercentComplete(40); - } catch (Exception ex) { - System.out.println(ex.getMessage()); - } - - setPercentComplete(50); - - resultFrame.pack(); - resultFrame.setVisible(true); - resultFrame.centerSpecimen(); - resultFrame.center(); - setPercentComplete(60); - - if (persist) { - // Check that fileToCheck is within imagebase. - if (!ImageCaptureProperties.isInPathBelowBase(fileToCheck)) { - String base = Singleton.getSingletonInstance().getProperties().getProperties().getProperty( - ImageCaptureProperties.KEY_IMAGEBASE); - log.error("Tried to scan file (" + fileToCheck.getPath() + ") outside of base image directory (" + base + ")"); - throw new UnreadableFileException("Can't scan and database files outside of base image directory (" + base + ")"); - } - - String state = WorkFlowStatus.STAGE_0; - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Connecting to database."); - // try to parse the raw OCR - TaxonNameReturner parser = null; - if (labelRead != null) { - rawOCR = labelRead.toJSONString(); - state = WorkFlowStatus.STAGE_1; - parser = labelRead; - } else { - log.debug("Failing over to OCR with tesseract"); - rawOCR = scannableFile.getLabelOCRText(defaultTemplate); - state = WorkFlowStatus.STAGE_0; - parser = new UnitTrayLabelParser(rawOCR); - } - - - // Case 1: This is an image of papers associated with a container (a unit tray or a box). - // This case can be identified by there being no barcode data associated with the image. - // Action: - // A) Check the exifComment to see what metadata is there, if blank, bring up a dialog. - // Options: A drawer, for which number is captured. A unit tray, capture ?????????. A specimen - // where barcode wasn't read, allow capture of barcode and treat as Case 2. - // B) Create an image record and store the image metadata (with a null specimen_id). - - boolean isSpecimenImage = false; - boolean isDrawerImage = false; - // Test: is exifComment a barcode: - if (Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(exifComment) || Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(barcode)) { - isSpecimenImage = true; - System.out.println("Specimen Image"); - } else { - if (exifComment.matches(Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REGEX_DRAWERNUMBER))) { - isDrawerImage = true; - System.out.println("Drawer Image"); - } else { - // Ask. - System.out.println("Need to ask."); - WhatsThisImageDialog askDialog = new WhatsThisImageDialog(resultFrame, fileToCheck); - askDialog.setVisible(true); - if (askDialog.isSpecimen()) { - isSpecimenImage = true; - exifComment = askDialog.getBarcode(); - } - if (askDialog.isDrawerImage()) { - isDrawerImage = true; - exifComment = askDialog.getDrawerNumber(); - } - } - } - - // applies to both cases. - ICImageLifeCycle imageCont = new ICImageLifeCycle(); - ICImage tryMe = new ICImage(); - tryMe.setFilename(filename); - //String path = fileToCheck.getParentFile().getPath(); - String path = ImageCaptureProperties.getPathBelowBase(fileToCheck); - //String[] bits = rawOCR.split(":"); - List matches = imageCont.findByExample(tryMe); - - - // Case 2: This is an image of a specimen and associated labels or an image assocated with - // a specimen with the specimen's barcode label in the image. - // This case can be identified by there being a barcode in a templated position or there - // being a barcode in the exif comment tag. - // Action: - // A) Check if a specimen record exists, if not, create one from the barcode and OCR data. - // B) Create an image record and store the image metadata. - - // Handle a potential failure case, existing image record without a linked specimen, but which - // should have one. - if (matches.size() == 1 && isSpecimenImage) { - ICImage existing = imageCont.findById(matches.get(0).getImageId()); - if (existing.getSpecimen() == null) { - // If the existing image record has no attached specimen, delete it. - // We will create it again from tryMe. - try { - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Removing existing unlinked image record."); - imageCont.delete(existing); - matches.remove(0); - } catch (SaveFailedException e) { - log.error(e.getMessage(), e); - } - } - } - - if (matches.size() == 0) { - String rawBarcode = barcode; - if (isSpecimenImage) { - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Creating new specimen record."); - Specimen s = new Specimen(); - if ((!Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(barcode)) - && Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(exifComment)) { - s.setBarcode(exifComment); - barcode = exifComment; - } else { - if (!Singleton.getSingletonInstance().getBarcodeMatcher().matchesPattern(barcode)) { - // Won't be able to save the specimen record if we end up here. - log.error("Neither exifComment nor QR Code barcode match the expected pattern for a barcode, but isSpecimenImage got set to true."); - } - s.setBarcode(barcode); - } - s.setWorkFlowStatus(state); - if (!state.equals(WorkFlowStatus.STAGE_0)) { - s.setFamily(parser.getFamily()); - s.setSubfamily(parser.getSubfamily()); - s.setTribe(parser.getTribe()); - } else { - s.setFamily(""); - // Look up likely matches for the OCR of the higher taxa in the HigherTaxon authority file. - HigherTaxonLifeCycle hls = new HigherTaxonLifeCycle(); - - if (parser != null) { //allie edit one line - - if (parser.getTribe().trim().equals("")) { - if (hls.isMatched(parser.getFamily(), parser.getSubfamily())) { - // If there is a match, use it. - String[] higher = hls.findMatch(parser.getFamily(), parser.getSubfamily()); - s.setFamily(higher[0]); - s.setSubfamily(higher[1]); - } else { - // otherwise use the raw OCR output. - s.setFamily(parser.getFamily()); - s.setSubfamily(parser.getSubfamily()); - } - s.setTribe(""); - } else { - if (hls.isMatched(parser.getFamily(), parser.getSubfamily(), parser.getTribe())) { - String[] higher = hls.findMatch(parser.getFamily(), parser.getSubfamily(), parser.getTribe()); - s.setFamily(higher[0]); - s.setSubfamily(higher[1]); - s.setTribe(higher[2]); - } else { - s.setFamily(parser.getFamily()); - s.setSubfamily(parser.getSubfamily()); - s.setTribe(parser.getTribe()); - } - } - if (!parser.getFamily().equals("")) { - // check family against database (with a soundex match) - String match = hls.findMatch(parser.getFamily()); - if (match != null && !match.trim().equals("")) { - s.setFamily(match); - } - } - - } //end if allie edit - } - // trim family to fit (in case multiple parts of taxon name weren't parsed - // and got concatenated into family field. - s = setBasicSpecimenFromParser(parser, s); - - log.debug(s.getCollection()); - - // TODO: non-general workflows - - // ********* Special Cases ********** - if (s.getWorkFlowStatus().equals(WorkFlowStatus.STAGE_0)) { - // ***** Special case, images in ent-formicidae - // get family set to Formicidae if in state OCR. - if (path.contains("formicidae")) { - s.setFamily("Formicidae"); - } - } - s.setLocationInCollection(LocationInCollection.getDefaultLocation()); - if (s.getFamily().equals("Formicidae")) { - // ***** Special case, families in Formicidae are in Ant collection - s.setLocationInCollection(LocationInCollection.GENERALANT); - } - // ********* End Special Cases ********** - - s.setCreatedBy(ImageCaptureApp.APP_NAME + " " + ImageCaptureApp.getAppVersion()); - SpecimenLifeCycle sh = new SpecimenLifeCycle(); - try { - sh.persist(s); - s.attachNewPart(); - } catch (SpecimenExistsException e) { - log.debug(e); - JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), - filename + " " + barcode + " \n" + e.getMessage(), - "Specimen Exists, linking Image to existing record.", - JOptionPane.ERROR_MESSAGE); - List checkResult = sh.findByBarcode(barcode); - if (checkResult.size() == 1) { - s = checkResult.get(0); - } - } catch (SaveFailedException e) { - // Couldn't save, but for some reason other than the - // specimen record already existing. - log.debug(e); - try { - List checkResult = sh.findByBarcode(barcode); - if (checkResult.size() == 1) { - s = checkResult.get(0); - } - // Drawer number with length limit (and specimen that fails to save at over this length makes - // a good canary for labels that parse very badly. - String badParse = ""; - if (parser != null && ((DrawerNameReturner) parser).getDrawerNumber().length() > MetadataRetriever.getFieldLength(Specimen.class, "DrawerNumber")) { - badParse = "Parsing problem. \nDrawer number is too long: " + s.getDrawerNumber() + "\n"; - } - JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), - filename + " " + barcode + " \n" + badParse + e.getMessage(), - "Badly parsed OCR", - JOptionPane.ERROR_MESSAGE); - } catch (Exception err) { - log.error(e); - log.error(err); - // TODO: Add a general error handling/inform user class. - // Cause of exception is not likely to be drawer number now that drawer number - // length is enforced in Specimen.setDrawerNumber, but the text returned by the parser - // might indicate very poor OCR as a cause. - if (parser != null) { - String badParse = ((DrawerNameReturner) parser).getDrawerNumber(); - JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), - filename + " " + barcode + "\n" + badParse + e.getMessage(), - "Save Failed", - JOptionPane.ERROR_MESSAGE); - } - s = null; - } - } - setPercentComplete(70); - if (s != null) { - tryMe.setSpecimen(s); - } - } - tryMe.setRawBarcode(rawBarcode); - if (isDrawerImage) { - tryMe.setDrawerNumber(exifComment); - } else { - tryMe.setRawExifBarcode(exifComment); - if (parser != null) { - tryMe.setDrawerNumber(((DrawerNameReturner) parser).getDrawerNumber()); - } - } - tryMe.setRawOcr(rawOCR); - tryMe.setTemplateId(defaultTemplate.getTemplateId()); - tryMe.setPath(path); - if (tryMe.getMd5sum() == null || tryMe.getMd5sum().length() == 0) { - try { - tryMe.setMd5sum(DigestUtils.md5Hex(new FileInputStream(fileToCheck))); - } catch (FileNotFoundException e) { - log.error(e.getMessage()); - } catch (IOException e) { - log.error(e.getMessage()); - } - } - try { - imageCont.persist(tryMe); - } catch (SaveFailedException e) { - // TODO Auto-generated catch block - log.error(e.getMessage()); - e.printStackTrace(); - } - - setPercentComplete(80); - if (isSpecimenImage) { - SpecimenController controler = null; - try { - controler = new SpecimenController(tryMe.getSpecimen()); - controler.setTargetFrame(resultFrame); - } catch (NoSuchRecordException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - SpecimenDetailsViewPane sPane = new SpecimenDetailsViewPane(tryMe.getSpecimen(), controler); - resultFrame.setWest(sPane); - if (!tryMe.getRawBarcode().equals(tryMe.getRawExifBarcode())) { - if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REDUNDANT_COMMENT_BARCODE).equals("true")) { - // If so configured, warn about missmatch - sPane.setWarning("Warning: Scanned Image has missmatch between barcode and comment."); - } - } - } - resultFrame.center(); - } else { - // found one or more matching image records. - setPercentComplete(80); - Singleton.getSingletonInstance().getMainFrame().setStatusMessage("Loading existing image record."); - ICImage existing = imageCont.findById(matches.get(0).getImageId()); - System.out.println(existing.getRawBarcode()); - existing.setRawBarcode(barcode); - if (isDrawerImage) { - existing.setDrawerNumber(exifComment); - } else { - existing.setRawExifBarcode(exifComment); - } - existing.setTemplateId(defaultTemplate.getTemplateId()); - if (existing.getPath() == null || existing.getPath().equals("")) { - existing.setPath(path); - } - if (existing.getDrawerNumber() == null || existing.getDrawerNumber().equals("")) { - if (parser != null) { - existing.setDrawerNumber(((DrawerNameReturner) parser).getDrawerNumber()); - } - } - try { - imageCont.attachDirty(existing); - } catch (SaveFailedException e) { - // TODO Auto-generated catch block - log.error(e.getMessage()); - e.printStackTrace(); - } - if (isSpecimenImage) { - SpecimenController controler = null; - try { - controler = new SpecimenController(existing.getSpecimen()); - controler.setTargetFrame(resultFrame); - System.out.println(existing.getSpecimen().getBarcode()); - } catch (NullPointerException e1) { - log.debug("Specimen barcode not set"); - } catch (NoSuchRecordException e) { - // Failure case - log.error(e.getMessage(), e); - JOptionPane.showMessageDialog(Singleton.getSingletonInstance().getMainFrame(), - filename + " " + barcode + "\n" + "Existing Image record with no Specimen Record. " + e.getMessage(), - "Save Failed.", - JOptionPane.ERROR_MESSAGE); - } - SpecimenDetailsViewPane sPane = new SpecimenDetailsViewPane(existing.getSpecimen(), controler); - resultFrame.setWest(sPane); - resultFrame.center(); - resultFrame.setActiveTab(ImageDisplayFrame.TAB_LABELS); - resultFrame.fitPinLabels(); - if (!existing.getRawBarcode().equals(existing.getRawExifBarcode())) { - if (Singleton.getSingletonInstance().getProperties().getProperties().getProperty(ImageCaptureProperties.KEY_REDUNDANT_COMMENT_BARCODE).equals("true")) { - sPane.setWarning("Warning: Scanned Image has missmatch between barcode and comment."); - } - } - } - setPercentComplete(90); - } - } - } catch (UnreadableFileException e1) { - log.error("Unable to read selected file." + e1.getMessage()); - } catch (OCRReadException e) { - log.error("Failed to OCR file." + e.getMessage()); - } + Counter counter = new Counter(); + checkFile(fileToCheck, counter); + RunnableJobReportDialog errorReportDialog = new RunnableJobReportDialog(Singleton.getSingletonInstance().getMainFrame(), counter.toString(), counter.getErrors(), "Preprocess Result"); + errorReportDialog.setVisible(true); } else { - System.out.println("No file selected from dialog."); + log.error("No file selected from dialog."); } setPercentComplete(100); Singleton.getSingletonInstance().getMainFrame().setStatusMessage(""); diff --git a/src/main/java/edu/harvard/mcz/imagecapture/lifecycle/SpecimenLifeCycle.java b/src/main/java/edu/harvard/mcz/imagecapture/lifecycle/SpecimenLifeCycle.java index ce4b30c4..c9b8dfee 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/lifecycle/SpecimenLifeCycle.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/lifecycle/SpecimenLifeCycle.java @@ -9,14 +9,14 @@ import edu.harvard.mcz.imagecapture.entity.Specimen; import edu.harvard.mcz.imagecapture.entity.SpecimenPart; import edu.harvard.mcz.imagecapture.entity.Tracking; +import edu.harvard.mcz.imagecapture.entity.fixed.CountValue; +import edu.harvard.mcz.imagecapture.entity.fixed.GenusSpeciesCount; +import edu.harvard.mcz.imagecapture.entity.fixed.VerbatimCount; import edu.harvard.mcz.imagecapture.entity.fixed.WorkFlowStatus; import edu.harvard.mcz.imagecapture.exceptions.ConnectionException; import edu.harvard.mcz.imagecapture.exceptions.SaveFailedException; import edu.harvard.mcz.imagecapture.exceptions.SpecimenExistsException; import edu.harvard.mcz.imagecapture.interfaces.BarcodeBuilder; -import edu.harvard.mcz.imagecapture.struct.CountValue; -import edu.harvard.mcz.imagecapture.struct.GenusSpeciesCount; -import edu.harvard.mcz.imagecapture.struct.VerbatimCount; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.hibernate.*; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/loader/JobVerbatimFieldLoad.java b/src/main/java/edu/harvard/mcz/imagecapture/loader/JobVerbatimFieldLoad.java index 23707a46..c7e438da 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/loader/JobVerbatimFieldLoad.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/loader/JobVerbatimFieldLoad.java @@ -183,7 +183,7 @@ public void start() { int lineNumber = 0; while (iterator.hasNext()) { lineNumber++; - counter.incrementSpecimens(); + counter.incrementSpecimenDatabased(); CSVRecord record = iterator.next(); try { String verbatimUnclassifiedText = record.get("verbatimUnclassifiedText"); @@ -224,7 +224,7 @@ public void start() { int lineNumber = 0; while (iterator.hasNext()) { lineNumber++; - counter.incrementSpecimens(); + counter.incrementSpecimenDatabased(); CSVRecord record = iterator.next(); try { String verbatimUnclassifiedText = record.get("verbatimUnclassifiedText"); @@ -271,7 +271,7 @@ public void start() { int lineNumber = 0; while (iterator.hasNext()) { lineNumber++; - counter.incrementSpecimens(); + counter.incrementSpecimenDatabased(); CSVRecord record = iterator.next(); try { String verbatimLocality = record.get("verbatimLocality"); @@ -365,7 +365,7 @@ public void start() { } else { updated = fl.loadFromMap(barcode, data, WorkFlowStatus.STAGE_VERBATIM, true); } - counter.incrementSpecimens(); + counter.incrementSpecimenDatabased(); if (updated) { counter.incrementSpecimensUpdated(); } diff --git a/src/main/java/edu/harvard/mcz/imagecapture/ui/ButtonEditor.java b/src/main/java/edu/harvard/mcz/imagecapture/ui/ButtonEditor.java index 558aa510..e1c34412 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/ui/ButtonEditor.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/ui/ButtonEditor.java @@ -22,13 +22,13 @@ import edu.harvard.mcz.imagecapture.entity.Specimen; import edu.harvard.mcz.imagecapture.entity.SpecimenPart; import edu.harvard.mcz.imagecapture.entity.Users; +import edu.harvard.mcz.imagecapture.entity.fixed.GenusSpeciesCount; +import edu.harvard.mcz.imagecapture.entity.fixed.VerbatimCount; import edu.harvard.mcz.imagecapture.entity.fixed.WorkFlowStatus; import edu.harvard.mcz.imagecapture.exceptions.NoSuchRecordException; import edu.harvard.mcz.imagecapture.exceptions.NoSuchTemplateException; import edu.harvard.mcz.imagecapture.interfaces.DataChangeListener; import edu.harvard.mcz.imagecapture.lifecycle.SpecimenLifeCycle; -import edu.harvard.mcz.imagecapture.struct.GenusSpeciesCount; -import edu.harvard.mcz.imagecapture.struct.VerbatimCount; import edu.harvard.mcz.imagecapture.ui.dialog.SpecimenPartAttributeDialog; import edu.harvard.mcz.imagecapture.ui.dialog.VerbatimCaptureDialog; import edu.harvard.mcz.imagecapture.ui.dialog.VerbatimClassifyDialog; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimClassifyDialog.java b/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimClassifyDialog.java index a93de18f..23494eaf 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimClassifyDialog.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimClassifyDialog.java @@ -22,16 +22,16 @@ import edu.harvard.mcz.imagecapture.entity.Collector; import edu.harvard.mcz.imagecapture.entity.Number; import edu.harvard.mcz.imagecapture.entity.Specimen; +import edu.harvard.mcz.imagecapture.entity.fixed.VerbatimCount; import edu.harvard.mcz.imagecapture.entity.fixed.WorkFlowStatus; import edu.harvard.mcz.imagecapture.exceptions.SaveFailedException; import edu.harvard.mcz.imagecapture.lifecycle.MCZbaseGeogAuthRecLifeCycle; import edu.harvard.mcz.imagecapture.lifecycle.NumberLifeCycle; import edu.harvard.mcz.imagecapture.lifecycle.SpecimenLifeCycle; -import edu.harvard.mcz.imagecapture.struct.CountValueTableModel; -import edu.harvard.mcz.imagecapture.struct.VerbatimCount; import edu.harvard.mcz.imagecapture.ui.field.FilteringAgentJComboBox; import edu.harvard.mcz.imagecapture.ui.field.FilteringGeogJComboBox; import edu.harvard.mcz.imagecapture.ui.tablemodel.CollectorTableModel; +import edu.harvard.mcz.imagecapture.ui.tablemodel.CountValueTableModel; import edu.harvard.mcz.imagecapture.ui.tablemodel.HigherGeographyComboBoxModel; import edu.harvard.mcz.imagecapture.ui.tablemodel.NumberTableModel; import org.apache.commons.logging.Log; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimListDialog.java b/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimListDialog.java index 3a2a4e53..28934181 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimListDialog.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimListDialog.java @@ -18,12 +18,12 @@ */ package edu.harvard.mcz.imagecapture.ui.dialog; +import edu.harvard.mcz.imagecapture.entity.fixed.VerbatimCount; import edu.harvard.mcz.imagecapture.interfaces.DataChangeListener; import edu.harvard.mcz.imagecapture.lifecycle.SpecimenLifeCycle; -import edu.harvard.mcz.imagecapture.struct.VerbatimCount; -import edu.harvard.mcz.imagecapture.struct.VerbatimCountTableModel; import edu.harvard.mcz.imagecapture.ui.ButtonEditor; import edu.harvard.mcz.imagecapture.ui.ButtonRenderer; +import edu.harvard.mcz.imagecapture.ui.tablemodel.VerbatimCountTableModel; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimToTranscribeDialog.java b/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimToTranscribeDialog.java index 638e8683..8efcfe91 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimToTranscribeDialog.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/ui/dialog/VerbatimToTranscribeDialog.java @@ -18,11 +18,11 @@ */ package edu.harvard.mcz.imagecapture.ui.dialog; +import edu.harvard.mcz.imagecapture.entity.fixed.GenusSpeciesCount; import edu.harvard.mcz.imagecapture.lifecycle.SpecimenLifeCycle; -import edu.harvard.mcz.imagecapture.struct.GenusSpeciesCount; -import edu.harvard.mcz.imagecapture.struct.GenusSpeciesCountTableModel; import edu.harvard.mcz.imagecapture.ui.ButtonEditor; import edu.harvard.mcz.imagecapture.ui.ButtonRenderer; +import edu.harvard.mcz.imagecapture.ui.tablemodel.GenusSpeciesCountTableModel; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/struct/CountValueTableModel.java b/src/main/java/edu/harvard/mcz/imagecapture/ui/tablemodel/CountValueTableModel.java similarity index 96% rename from src/main/java/edu/harvard/mcz/imagecapture/struct/CountValueTableModel.java rename to src/main/java/edu/harvard/mcz/imagecapture/ui/tablemodel/CountValueTableModel.java index 83ebbc6e..4e3c0415 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/struct/CountValueTableModel.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/ui/tablemodel/CountValueTableModel.java @@ -16,8 +16,9 @@ * along with this program. If not, see . *

*/ -package edu.harvard.mcz.imagecapture.struct; +package edu.harvard.mcz.imagecapture.ui.tablemodel; +import edu.harvard.mcz.imagecapture.entity.fixed.CountValue; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/struct/GenusSpeciesCountTableModel.java b/src/main/java/edu/harvard/mcz/imagecapture/ui/tablemodel/GenusSpeciesCountTableModel.java similarity index 96% rename from src/main/java/edu/harvard/mcz/imagecapture/struct/GenusSpeciesCountTableModel.java rename to src/main/java/edu/harvard/mcz/imagecapture/ui/tablemodel/GenusSpeciesCountTableModel.java index 18686957..a533ee4a 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/struct/GenusSpeciesCountTableModel.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/ui/tablemodel/GenusSpeciesCountTableModel.java @@ -16,8 +16,9 @@ * along with this program. If not, see . *

*/ -package edu.harvard.mcz.imagecapture.struct; +package edu.harvard.mcz.imagecapture.ui.tablemodel; +import edu.harvard.mcz.imagecapture.entity.fixed.GenusSpeciesCount; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/struct/VerbatimCountTableModel.java b/src/main/java/edu/harvard/mcz/imagecapture/ui/tablemodel/VerbatimCountTableModel.java similarity index 97% rename from src/main/java/edu/harvard/mcz/imagecapture/struct/VerbatimCountTableModel.java rename to src/main/java/edu/harvard/mcz/imagecapture/ui/tablemodel/VerbatimCountTableModel.java index a5a7224f..7691e71c 100644 --- a/src/main/java/edu/harvard/mcz/imagecapture/struct/VerbatimCountTableModel.java +++ b/src/main/java/edu/harvard/mcz/imagecapture/ui/tablemodel/VerbatimCountTableModel.java @@ -16,8 +16,9 @@ * along with this program. If not, see . *

*/ -package edu.harvard.mcz.imagecapture.struct; +package edu.harvard.mcz.imagecapture.ui.tablemodel; +import edu.harvard.mcz.imagecapture.entity.fixed.VerbatimCount; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; diff --git a/src/main/java/edu/harvard/mcz/imagecapture/utility/FileUtility.java b/src/main/java/edu/harvard/mcz/imagecapture/utility/FileUtility.java new file mode 100644 index 00000000..bfa7a15e --- /dev/null +++ b/src/main/java/edu/harvard/mcz/imagecapture/utility/FileUtility.java @@ -0,0 +1,38 @@ +package edu.harvard.mcz.imagecapture.utility; + +import edu.harvard.mcz.imagecapture.Singleton; + +import javax.swing.*; +import javax.swing.filechooser.FileNameExtensionFilter; +import java.io.File; + +public class FileUtility { + public static File askForDirectory(File startpoint) { + final JFileChooser fileChooser = new JFileChooser(); + fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); + if (startpoint != null && startpoint.canRead()) { + fileChooser.setCurrentDirectory(startpoint); + } + int returnValue = fileChooser.showOpenDialog(Singleton.getSingletonInstance().getMainFrame()); + if (returnValue == JFileChooser.APPROVE_OPTION) { + File file = fileChooser.getSelectedFile(); + return file; + } else { + return null; + } + } + + public static File askForImageFile(File startpoint) { + final JFileChooser fileChooser = new JFileChooser(); + fileChooser.setCurrentDirectory(startpoint); + //FileNameExtensionFilter filter = new FileNameExtensionFilter("TIFF Images", "tif", "tiff"); + FileNameExtensionFilter filter = new FileNameExtensionFilter("Image files", "tif", "tiff", "jpg", "jpeg", "png"); + fileChooser.setFileFilter(filter); + int returnValue = fileChooser.showOpenDialog(Singleton.getSingletonInstance().getMainFrame()); + if (returnValue == JFileChooser.APPROVE_OPTION) { + return fileChooser.getSelectedFile(); + } + return null; + } + +}