-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathextract_measures_links.php
63 lines (43 loc) · 1.28 KB
/
extract_measures_links.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
<?php
require_once('go_wayback.function.php');
chdir('./www.qualitymeasures.ahrq.gov');
$data = [];
$errors = [];
foreach(glob('*.html') as $file_name){
$doc = new DOMdocument;
//its messy html, so ignore the load errors...
$html_text = file_get_contents($file_name);
if($html_text === FALSE){
echo "Error: failed to open $file_name\n";
exit();
}else{
@$doc->loadhtml($html_text);
$xpath = new DOMXPath($doc);
$xpath_query = '//h3[@class="results-list-item-title"]/a';
foreach($xpath->query($xpath_query) as $result_title_link){
$anchor = $result_title_link->nodeValue;
$href = $result_title_link->getAttribute('href');
$tmp = [];
$tmp['anchor'] = $anchor;
$tmp['href'] = $href;
$data[] = $tmp;
}
}
}
//lets download the data from wayback machine..
foreach($data as $i => $row){
$results = go_wayback($row['href'],'.');
$data[$i]['mirror_file'] = $results['saved_to_file'];
$data[$i]['timestamp'] = $results['timestamp'];
}
chdir('..'); //go back to main dir
//lets save a csv file...
$fp = fopen('measures_links.csv','w');
fputcsv($fp,['guideline anchor', 'guideline url','mirror_file','timestamp']);
foreach($data as $row){
fputcsv($fp,$row);
}
$Measures_MD_text = "
AHRQ Measures
================
";