-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgetCovid-19Data.php
More file actions
68 lines (59 loc) · 3.46 KB
/
Copy pathgetCovid-19Data.php
File metadata and controls
68 lines (59 loc) · 3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
<?php
// Dom parsing script from sourceforge.net
include ('simple_html_dom.php');
//Url of the website who's data to be scraped
$websiteUrl = "https://www.covid-19.pk/";
// Getting the raw html of the page given above
$html = file_get_html($websiteUrl);
// Array to store data of all Provinces
$all_provinces = array();
// Direct hit to the Table Body and all the Table rows in Table with ID example2
foreach ($html->find('table#example2 tbody tr') as $tr)
{
// Every Table Row have 4 children 4 Table data tags
$province = array();
// Getting the data from Table row, from the table data tag
// Using this loop to iterate through 4 of the Table Data tags
foreach ($tr->find('td') as $key => $td)
{
switch ($key)
{
case 0: //Case 0 when the Loop runs for the first time for the current table row
// Getting the province name from the Table Row, Table Data 1st child of Table Row
$province['name'] = $td->plaintext; //Getting the plaintext from Table Data T
// echo $td->plaintext;
break;
case 1: //Case 1 when the Loop runs for the second time for the current table row
// Getting the Total Number of Cases in that Province(From this Row First Child) from the Table Row, Table Data 2nd child of Table Row
$td = $td->innertext;
$td = preg_replace('#<span class="badge badge-pill badge-warning">(.*?)</span>#','',$td); //removing the unnecessary span tag from anchor tag
$td = preg_replace('#<span class="badge badge-pill badge-info">(.*?)</span>#','',$td); //removing the unnecessary span tag from anchor tag
$province['total_cases'] = (int)strip_tags($td); //
// echo strip_tags($td);
break;
case 2: //Case 2 when the Loop runs for the third time for the current table row
// Getting the Total Number of Recovered Patients in that Province(From this Row First Child) from the Table Row, Table Data Third child of Table Row
$td = $td->innertext;
$td = preg_replace('#<span class="badge badge-pill badge-warning">(.*?)</span>#','',$td); //removing the unnecessary span tag from anchor tag
$td = preg_replace('#<span class="badge badge-pill badge-info">(.*?)</span>#','',$td); //removing the unnecessary span tag from anchor tag
$province['total_recovered'] = (int)strip_tags($td);
// echo strip_tags($td);
break;
case 3: //Case 3 when the Loop runs for the fourth time(Last time) for the current table row
// Getting the Total Number of Deaths in that Province(From this Row First Child) from the Table Row, Table Data Fourth child of Table Row
$td = $td->innertext; //Getting the content of the HTML tags
$td = preg_replace('#<span class="badge badge-pill badge-warning">(.*?)</span>#','',$td); //removing the unnecessary span tag from anchor tag
$td = preg_replace('#<span class="badge badge-pill badge-info">(.*?)</span>#','',$td); //removing the unnecessary span tag from anchor tag
$province['total_deaths'] = (int)strip_tags($td);
// echo strip_tags($td);
break;
}
}
$all_provinces[] = $province;
// echo "<br>";
}
// Checking the output of the data scraped
//echo "<pre>";
//var_dump($all_provinces);
return $all_provinces;
?>