<?


	libxml_use_internal_errors( 1 );
	
	header("X-Accel-Buffering: no");
	header("Content-Encoding: none");

	ob_end_flush();
	ob_implicit_flush( 1 );
	
	error_reporting( E_ALL & ~E_NOTICE );
	ini_set("memory_limit", "512M");
	
	set_time_limit( 3600 );	

	
	$key= key( $_GET );
	
	if( $key=="cache" )
	  do_cache();

	if( $key=="re-cache" )
	  re_cache();
  
  	if( $key=="generate" )
	  generate();
  
  
?>




<a href="?cache"> Download </a>
<br><br>

<a href="?re-cache"> Re-download </a>
<b style="color:red"> !!! </b>
<br><br>

<a href="?generate"> Generate list </a>
<br><br>

<a href="list.xlsx"> list.xlsx </a>
&nbsp;
<?=date( "Y-m-d", filemtime("list.xlsx") )?>




<?  


	// download page (html)
	function download( $status, $p ) {
		
		$url = "https://ouny.magyarugyvedikamara.hu/licoms/common/service/requestparser";
		$url.= "?name=pubsearcher&action=search&type=ugyved&status=$status&p=$p";
		
		$file= file_get_contents( $url );
		
		return $file;
	}

	
	// get number of pages
	function pages( $status="aktiv" ) {
		
		$file= download( $status, 1 );
		
		$doc= new DOMDocument();
		$doc->loadHTML( $file, LIBXML_NOERROR );

		$p= $doc->getElementsByTagName("p")[0]->nodeValue;
		$p= explode( " ", $p )[1];
		$p= ceil( $p / 25 );
		
		return $p;
		
	}


	// cache page (xml)
	function cache( $status, $p ) {
		
		$path= "cache/$status-$p.xml";
		
		if( is_file($path) )
		  return;
	  
		$file= download( $status, $p ) or die("DL Error");
		
		$doc= new DOMDocument();
		$doc->loadHTML( $file, LIBXML_NOERROR ) or die("HTML Error");
		
		$div= $doc->getElementsByTagName( "div" )[0];
		
		$xml= $doc->saveXML( $div ) or die("XML Error");
		file_put_contents( $path, $xml );
		
	}
	


	// fill cache
	function do_cache() {

		foreach( ["aktiv","offline"] as $status ) {
		  echo "$status <br>";
			
		  $pages= pages( $status ); 
		  for( $p=1; $p<=$pages; $p++ ) {
			
			cache( $status, $p );
			progress( $p, $pages );

		} }
		die( "DONE" );

	}
	
	
	// log progress
	function progress( $i, $count, $step=10 ) {
		if( $i % $step ) { return; }
		$i= round( $i / $count * 100 );
		echo "$i% <br>";
		flush(); ob_flush();
	}
	
	
	
	
	// empty cache & redo
	function re_cache() {
		array_map( 'unlink', glob("cache/*") );
		header( "Location: ?cache" );
		exit;
	}

	
	
	// generate list
	function generate() {
		// header
		$data[1]= array();

		// all files
		$cache= glob("cache/*.xml");
		foreach( $cache as $i=>$xml ) {
			
		  // load
		  $doc= new DOMDocument();
		  $doc->load( $xml, LIBXML_NOBLANKS ) or die("XML Error $xml");

		  // media blocks
		  $mlist= $doc->documentElement->childNodes;
		  foreach( $mlist as $media ) {

			// garbage
			if( $media->nodeName != "div" ) { continue; }
			unset( $arr, $tel, $mob );
			
			// all values
			$llist= $media->getElementsByTagName("label");
			foreach( $llist as $label ) {
				$legend= $label->parentNode->parentNode->firstChild;
				$set= $legend->nodeName=="legend" ? clean( $legend->nodeValue ) : "main";
				$name= clean( $label->nodeValue );
				$arr[ $set ][ $name ]= ucf( $label->nextSibling->firstChild->nodeValue );
			}
			
			// phone numbers
			$num1= tel( $arr["iroda"]["telefonszáma"]  ?: $arr["székhely"]["telefonszáma"]  );
			$num2= tel( $arr["iroda"]["telefonszáma2"] ?: $arr["székhely"]["telefonszáma2"] );
			if( $num1 )
			  is_mobie( $num1 ) ? $mob[]=$num1 : $tel[]=$num1;
			if( $num2 )
			  is_mobie( $num2 ) ? $mob[]=$num2 : $tel[]=$num2;
			
			// what we need
			$data[]= [
				1 => $arr["main"]["kamarainév"],
				2 => $arr["main"]["státusz"],
				3 => $arr["main"]["kamara"],
				4 => mb_strtolower( $arr["main"]["email"] ),
				5 => $arr["main"]["jogterület"],
				6 => $arr["iroda"]["neve"],
				7 => $arr["iroda"]["címe"] ?: $arr["székhely"]["címe"],
				8 => $tel[0],
				9 => $tel[1],
			   10 => $mob[0],
			   11 => $mob[1]
			];

		  }
		  progress( $i, count($cache) );
		  
		}

		echo "Saving XLSX <br>";
		
		copy( "template.xlsx", "list.xlsx" );
		mod_xlsx( "list.xlsx", $data );
		
		die( "DONE" );	
	}
	


	// clean key
	function clean( $str ) {
		$str= preg_replace( "@[\s\-\*]@", null, $str );
		return mb_strtolower( $str );
	}
	
	
	// clean value
	function ucf( $str ) {
		$str= str_replace( "NINCS ADAT", null, $str );
		return mb_convert_case( $str, MB_CASE_TITLE );
	}
	
	
	// format
	function tel( $str ) {
		return preg_replace([
		  "@\D@",
		  "@^(06|36)?(1|\d{2})(\d{6,7})$@",
		  "@^(\d{7})$@",
		  "@^(\d{8,9})$@"
		],[
		  "",
		  "$2$3",
		  "1$1",
		  "+36$1"
		], $str );
	}
	
	
	// test 4 mobile
	function is_mobie( $tel ) {
		return preg_match( "@^\+36(20|30|50|70)(\d{7})$@", $tel );
	}
	
	
	

	// modify xlsx values
	function mod_xlsx( $file, $arr ) {

		// open zip
		$zip= new ZipArchive();
		$zip->open( $file );

		// open sheet1
		$sheet= $zip->getFromName("xl/worksheets/sheet1.xml");
		$sheet= str_replace( "xmlns=", "oldns=", $sheet );
		$sheet= new SimpleXMLElement( $sheet );

		// add data (in order)
		ksort( $arr );
		foreach( $arr as $r=>$row ) {
		  ksort( $row );
		  foreach( $row as $c=>$val ) {

			// missing row
			$rowData= @ $sheet->sheetData->xpath("row[@r='$r']")[0];
			if( !$rowData ) {
			  $rowData= $sheet->sheetData->addChild("row");
			  $rowData['r']= $r;
			}

			// missing cell
			$id= chr( $c+64 ).$r;
			$cellData= @ $rowData->xpath("c[@r='$id']")[0];
			if( !$cellData ) {
			  $cellData= $rowData->addChild("c");
			  $cellData['r']= $id;
			}

			// int or inline-string
			if( is_int($val) ) {
			  $cellData['t']= "n";
			  $cellData->v= $val;
			} else {
			  $cellData['t']= "inlineStr";
			  $cellData->is->t= $val;
			}

		  }
		  
		  // log
		  progress( $r, count($arr), 200 );
		
		}
		
		// save
		$sheet= $sheet->asXML();
		$sheet= str_replace( "oldns", "xmlns", $sheet );
		$zip->addFromString( "xl/worksheets/sheet1.xml", $sheet );
		
		$zip->close();
	}





	