Skip to content

Commit

Permalink
Summary: Fixed #2
Browse files Browse the repository at this point in the history
Changesets:
- When there is no stream; there is still text; be more lenient
  • Loading branch information
leowmjw committed Jul 12, 2019
1 parent d7c2ad3 commit 0ba7c44
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 0 deletions.
129 changes: 129 additions & 0 deletions data/Lisan/JWP_DR_151018/split.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
parliamentsession: par1sesi1
hansardtype: 0
hansardquestions:
- questionnum: "1"
pagenumstart: 2
pagenumend: 7
- questionnum: "2"
pagenumstart: 8
pagenumend: 10
- questionnum: "3"
pagenumstart: 12
pagenumend: 20
- questionnum: "6"
pagenumstart: 21
pagenumend: 26
- questionnum: "8"
pagenumstart: 27
pagenumend: 28
- questionnum: "9"
pagenumstart: 29
pagenumend: 33
- questionnum: "11"
pagenumstart: 34
pagenumend: 35
- questionnum: "12"
pagenumstart: 36
pagenumend: 37
- questionnum: "13"
pagenumstart: 38
pagenumend: 39
- questionnum: "14"
pagenumstart: 40
pagenumend: 45
- questionnum: "16"
pagenumstart: 46
pagenumend: 48
- questionnum: "17"
pagenumstart: 49
pagenumend: 51
- questionnum: "18"
pagenumstart: 52
pagenumend: 53
- questionnum: "19"
pagenumstart: 54
pagenumend: 56
- questionnum: "20"
pagenumstart: 57
pagenumend: 62
- questionnum: "22"
pagenumstart: 63
pagenumend: 65
- questionnum: "23"
pagenumstart: 66
pagenumend: 67
- questionnum: "24"
pagenumstart: 68
pagenumend: 70
- questionnum: "25"
pagenumstart: 71
pagenumend: 73
- questionnum: "26"
pagenumstart: 74
pagenumend: 75
- questionnum: "27"
pagenumstart: 76
pagenumend: 83
- questionnum: "30"
pagenumstart: 84
pagenumend: 86
- questionnum: "31"
pagenumstart: 87
pagenumend: 88
- questionnum: "32"
pagenumstart: 89
pagenumend: 95
- questionnum: "35"
pagenumstart: 96
pagenumend: 99
- questionnum: "37"
pagenumstart: 100
pagenumend: 101
- questionnum: "38"
pagenumstart: 102
pagenumend: 104
- questionnum: "39"
pagenumstart: 105
pagenumend: 106
- questionnum: "40"
pagenumstart: 107
pagenumend: 109
- questionnum: "41"
pagenumstart: 110
pagenumend: 111
- questionnum: "42"
pagenumstart: 112
pagenumend: 113
- questionnum: "43"
pagenumstart: 114
pagenumend: 117
- questionnum: "44"
pagenumstart: 118
pagenumend: 119
- questionnum: "45"
pagenumstart: 120
pagenumend: 125
- questionnum: "47"
pagenumstart: 126
pagenumend: 128
- questionnum: "48"
pagenumstart: 129
pagenumend: 130
- questionnum: "49"
pagenumstart: 131
pagenumend: 133
- questionnum: "50"
pagenumstart: 134
pagenumend: 136
- questionnum: "51"
pagenumstart: 137
pagenumend: 137
- questionnum: "52"
pagenumstart: 138
pagenumend: 139
- questionnum: "53"
pagenumstart: 140
pagenumend: 141
- questionnum: "54"
pagenumstart: 142
pagenumend: 144
4 changes: 4 additions & 0 deletions internal/hansard/pdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ func (pdfDoc *PDFDocument) extractPDF() error {
// copy over plain text; short form
pt, pterr := p.GetPlainText(nil)
if pterr != nil {
if pterr.Error() == "malformed PDF: reading at offset 0: stream not present" {
fmt.Println("**WILL IGNORE!!!! *****")
continue
}
return xerrors.Errorf(" GetPlainText ERROR: %w", pt)
}
pdfPage.PDFPlainText = pt
Expand Down

0 comments on commit 0ba7c44

Please sign in to comment.