Skip to content

Commit

Permalink
[qa] Add tests covering acquisition of USPTO PDF documents and drawings
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Apr 15, 2022
1 parent c5bb789 commit b0d8825
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Development
- [qa] Improve Python test harness
- [mw] Fix getting PDF documents and drawings from USPTO. Thanks, @aghster!
- [qa] Add software test infrastructure for data acquisition subsystems
- [qa] Add tests covering the acquisition of USPTO PDF documents and drawings


2019-11-01 0.169.3
Expand Down
113 changes: 113 additions & 0 deletions tests/access/test_uspto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
# (c) 2022 Andreas Motl <andreas.motl@ip-tools.org>
"""
Validate PDF document acquisition from USPTO servers.
"""
import pytest
from pyramid.httpexceptions import HTTPNotFound

from patzilla.access.uspto.image import fetch_first_drawing
from patzilla.access.uspto.pdf import fetch_pdf, document_viewer_url, pdf_index, pdf_url, fetch_url
from patzilla.util.numbers.common import split_patent_number


class TestFetchResourceValid:

@pytest.mark.slow
def test_full_pdf_application(self):
# TODO: Find a smaller application document.
pdf = fetch_pdf("US2022110447A1")
assert pdf.startswith("%PDF")

@pytest.mark.slow
def test_full_pdf_publication(self):
# US2548918 has only 240k, which is great for testing.
pdf = fetch_pdf("US2548918")
assert pdf.startswith("%PDF")

@pytest.mark.slow
def test_first_drawing_application(self):
# TODO: Find a smaller application document.
drawing = fetch_first_drawing("US2022110447A1")
assert drawing.startswith(b"\x49\x49\x2a\x00")

@pytest.mark.slow
def test_first_drawing_publication(self):
# US2548918 has only 240k, which is great for testing.
drawing = fetch_first_drawing(split_patent_number("US2548918"))
assert drawing.startswith(b"\x49\x49\x2a\x00")


class TestFetchResourceInvalid:

@pytest.mark.slow
def test_full_pdf_application_notfound(self):
with pytest.raises(HTTPNotFound) as ex:
fetch_pdf("US0000000000")
assert ex.match("Resource at .+ not found")

@pytest.mark.slow
def test_full_pdf_publication_notfound(self):
with pytest.raises(HTTPNotFound) as ex:
fetch_pdf("US0000000")
assert ex.match("Resource at .+ not found")


class TestDocumentViewerUrlValid:

@pytest.mark.slow
def test_application_validated(self):
assert document_viewer_url("US2022110447A1") == {'origin': 'USPTO', 'location': 'https://pdfaiw.uspto.gov/.aiw?docid=20220110447'}

@pytest.mark.slow
def test_publication_validated(self):
assert document_viewer_url("US2548918") == {'origin': 'USPTO', 'location': 'https://pdfpiw.uspto.gov/.piw?docid=02548918'}

def test_application_unvalidated(self):
assert document_viewer_url("US2022110447A1", validate=False) == {'origin': 'USPTO', 'location': 'https://pdfaiw.uspto.gov/.aiw?docid=20220110447'}

def test_publication_unvalidated(self):
assert document_viewer_url("US2548918", validate=False) == {'origin': 'USPTO', 'location': 'https://pdfpiw.uspto.gov/.piw?docid=02548918'}


class TestDocumentViewerUrlInvalid:

@pytest.mark.slow
def test_application_validated(self):
with pytest.raises(HTTPNotFound) as ex:
document_viewer_url("US0000000000")
ex.match("Resource at .+ not found")

@pytest.mark.slow
def test_publication_validated(self):
with pytest.raises(HTTPNotFound) as ex:
document_viewer_url("US0000000")
ex.match("Resource at .+ not found")

def test_application_unvalidated(self):
assert document_viewer_url("US0000000000", validate=False) == {'origin': 'USPTO', 'location': 'https://pdfaiw.uspto.gov/.aiw?docid=00000000000'}

def test_publication_unvalidated(self):
assert document_viewer_url("US0000000", validate=False) == {'origin': 'USPTO', 'location': 'https://pdfpiw.uspto.gov/.piw?docid=00000000'}


def test_pdf_index_unknown_application(caplog):
section_url_map = pdf_index("US2022110447A1", include=-99)
assert not section_url_map
assert "Unable to compute PDF section map for document US2022110447A1" in caplog.messages


def test_pdf_index_unknown_publication(caplog):
section_url_map = pdf_index("US2548918", include=-99)
assert not section_url_map
assert "Unable to compute PDF section map for document US2548918" in caplog.messages


def test_pdf_url_invalid():
assert pdf_url(None) is None


def test_fetch_url_failure():
with pytest.raises(HTTPNotFound) as ex:
assert fetch_url("https://httpbin.org/status/500") is None
ex.match("Accessing resource at .+ failed. status=500, response=")

0 comments on commit b0d8825

Please sign in to comment.