Files
automatic-dispose/server/parse_html.php
2018-11-24 14:43:59 +01:00

113 lines
3.2 KiB
PHP
Executable File

<?php
error_reporting(E_ALL & ~E_NOTICE & ~E_WARNING);
chdir('./missions/');
function trimremove($string)
{
$string = str_replace(array("\r\n", "\r", "\n", "\t"), ' ', $string);
return preg_replace('/[ \t]+/', ' ', preg_replace('/\s*$^\s*/m', "\n", $string));
}
function array_remove_empty($haystack)
{
foreach ($haystack as $key => $value) {
if (is_array($value)) {
$haystack[$key] = array_remove_empty($haystack[$key]);
}
if (empty($haystack[$key]) && $haystack[$key] != 0) {
unset($haystack[$key]);
}
}
return $haystack;
}
$allTables = [];
$priceTable = [];
foreach (glob("*.html") as $filename) {
$dom = new DOMDocument();
//load the html
$html = $dom->loadHTMLFile($filename);
//discard white space
$dom->preserveWhiteSpace = false;
//the table by its tag name
$tables = $dom->getElementsByTagName('table');
$table = array();
foreach ($tables as $tab) {
//get all rows from the table
$rows = $tab->getElementsByTagName('tr');
// get each column by tag name
$cols = $rows->item(0)->getElementsByTagName('th');
$row_headers = null;
foreach ($cols as $node) {
//print $node->nodeValue."\n";
$row_headers[] = trimremove(trim($node->nodeValue));
}
//get all rows from the table
$rows = $tab->getElementsByTagName('tr');
foreach ($rows as $row) {
// get each column by tag name
$cols = $row->getElementsByTagName('td');
$row = array();
$i = 0;
foreach ($cols as $node) {
# code...
//print $node->nodeValue."\n";
if ($row_headers == null) {
$row[] = trimremove(trim($node->nodeValue));
} else {
$row[$row_headers[$i]] = trimremove(trim($node->nodeValue));
}
$i++;
}
$table[] = $row;
}
}
$table_data = array_remove_empty($table);
$new_data = [];
foreach ($table_data as $value) {
if ($value['Beschreibung'] == "Credits im Durchschnitt") {
$pkey = str_pad($value['Wert'], 5, "0", STR_PAD_LEFT);
if (!isset($priceTable[$pkey]) && !is_array($priceTable[$pkey])) {
$priceTable[$pkey] = [];
}
$priceTable[$pkey][] = str_replace(array(".html"), '', $filename);
}
$new_data[$value['Beschreibung']] = $value['Wert'];
}
$xpath = new DOMXpath($dom);
// example 1: for everything with an id
$elements = $xpath->query('//*[@id="iframe-inside-container"]/ol/li[2]');
if (!is_null($elements)) {
$new_data["Name"] = $elements->item(0)->nodeValue;
}
$allTables = array_merge($allTables, $new_data);
//var_dump($new_data);
$putJson = json_encode($new_data, JSON_UNESCAPED_UNICODE);
file_put_contents(str_replace(array(".html"), '.json', $filename), $putJson);
//echo $putJson . "\r\n";
}
ksort($priceTable);
file_put_contents("prices.json", json_encode($priceTable, JSON_UNESCAPED_UNICODE));
file_put_contents("allTables.json", json_encode($allTables, JSON_UNESCAPED_UNICODE));