-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[qa] Add tests covering acquisition of USPTO PDF documents and drawings
- Loading branch information
Showing
2 changed files
with
114 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
# -*- coding: utf-8 -*- | ||
# (c) 2022 Andreas Motl <andreas.motl@ip-tools.org> | ||
""" | ||
Validate PDF document acquisition from USPTO servers. | ||
""" | ||
import pytest | ||
from pyramid.httpexceptions import HTTPNotFound | ||
|
||
from patzilla.access.uspto.image import fetch_first_drawing | ||
from patzilla.access.uspto.pdf import fetch_pdf, document_viewer_url, pdf_index, pdf_url, fetch_url | ||
from patzilla.util.numbers.common import split_patent_number | ||
|
||
|
||
class TestFetchResourceValid: | ||
|
||
@pytest.mark.slow | ||
def test_full_pdf_application(self): | ||
# TODO: Find a smaller application document. | ||
pdf = fetch_pdf("US2022110447A1") | ||
assert pdf.startswith("%PDF") | ||
|
||
@pytest.mark.slow | ||
def test_full_pdf_publication(self): | ||
# US2548918 has only 240k, which is great for testing. | ||
pdf = fetch_pdf("US2548918") | ||
assert pdf.startswith("%PDF") | ||
|
||
@pytest.mark.slow | ||
def test_first_drawing_application(self): | ||
# TODO: Find a smaller application document. | ||
drawing = fetch_first_drawing("US2022110447A1") | ||
assert drawing.startswith(b"\x49\x49\x2a\x00") | ||
|
||
@pytest.mark.slow | ||
def test_first_drawing_publication(self): | ||
# US2548918 has only 240k, which is great for testing. | ||
drawing = fetch_first_drawing(split_patent_number("US2548918")) | ||
assert drawing.startswith(b"\x49\x49\x2a\x00") | ||
|
||
|
||
class TestFetchResourceInvalid: | ||
|
||
@pytest.mark.slow | ||
def test_full_pdf_application_notfound(self): | ||
with pytest.raises(HTTPNotFound) as ex: | ||
fetch_pdf("US0000000000") | ||
assert ex.match("Resource at .+ not found") | ||
|
||
@pytest.mark.slow | ||
def test_full_pdf_publication_notfound(self): | ||
with pytest.raises(HTTPNotFound) as ex: | ||
fetch_pdf("US0000000") | ||
assert ex.match("Resource at .+ not found") | ||
|
||
|
||
class TestDocumentViewerUrlValid: | ||
|
||
@pytest.mark.slow | ||
def test_application_validated(self): | ||
assert document_viewer_url("US2022110447A1") == {'origin': 'USPTO', 'location': 'https://pdfaiw.uspto.gov/.aiw?docid=20220110447'} | ||
|
||
@pytest.mark.slow | ||
def test_publication_validated(self): | ||
assert document_viewer_url("US2548918") == {'origin': 'USPTO', 'location': 'https://pdfpiw.uspto.gov/.piw?docid=02548918'} | ||
|
||
def test_application_unvalidated(self): | ||
assert document_viewer_url("US2022110447A1", validate=False) == {'origin': 'USPTO', 'location': 'https://pdfaiw.uspto.gov/.aiw?docid=20220110447'} | ||
|
||
def test_publication_unvalidated(self): | ||
assert document_viewer_url("US2548918", validate=False) == {'origin': 'USPTO', 'location': 'https://pdfpiw.uspto.gov/.piw?docid=02548918'} | ||
|
||
|
||
class TestDocumentViewerUrlInvalid: | ||
|
||
@pytest.mark.slow | ||
def test_application_validated(self): | ||
with pytest.raises(HTTPNotFound) as ex: | ||
document_viewer_url("US0000000000") | ||
ex.match("Resource at .+ not found") | ||
|
||
@pytest.mark.slow | ||
def test_publication_validated(self): | ||
with pytest.raises(HTTPNotFound) as ex: | ||
document_viewer_url("US0000000") | ||
ex.match("Resource at .+ not found") | ||
|
||
def test_application_unvalidated(self): | ||
assert document_viewer_url("US0000000000", validate=False) == {'origin': 'USPTO', 'location': 'https://pdfaiw.uspto.gov/.aiw?docid=00000000000'} | ||
|
||
def test_publication_unvalidated(self): | ||
assert document_viewer_url("US0000000", validate=False) == {'origin': 'USPTO', 'location': 'https://pdfpiw.uspto.gov/.piw?docid=00000000'} | ||
|
||
|
||
def test_pdf_index_unknown_application(caplog): | ||
section_url_map = pdf_index("US2022110447A1", include=-99) | ||
assert not section_url_map | ||
assert "Unable to compute PDF section map for document US2022110447A1" in caplog.messages | ||
|
||
|
||
def test_pdf_index_unknown_publication(caplog): | ||
section_url_map = pdf_index("US2548918", include=-99) | ||
assert not section_url_map | ||
assert "Unable to compute PDF section map for document US2548918" in caplog.messages | ||
|
||
|
||
def test_pdf_url_invalid(): | ||
assert pdf_url(None) is None | ||
|
||
|
||
def test_fetch_url_failure(): | ||
with pytest.raises(HTTPNotFound) as ex: | ||
assert fetch_url("https://httpbin.org/status/500") is None | ||
ex.match("Accessing resource at .+ failed. status=500, response=") |