'http://toolserver.org/~magnus/catscan_rewrite.php?language=fi&depth=30&categories=urheilu&comb%5Blist%5D=1', 'viihde'=>'http://toolserver.org/~magnus/catscan_rewrite.php?language=fi&depth=30&categories=Televisio-ohjelmat%0D%0AElokuvat%0D%0AYhtyeet%0D%0AMusiikkiteokset%0D%0AVideopelien+teemasivun+luokat&comb%5Blist%5D=1', 'biologia'=>'http://toolserver.org/~magnus/catscan_rewrite.php?language=fi&depth=9&categories=Biologia&negcats=Filosofian_osa-alueet%0D%0ARuoka+ja+juoma%0D%0AUfot%0D%0AHomoseksuaalisuus%0D%0ASeksuaalipolitiikka%0D%0ASeksiteollisuus%0D%0ASeksuaali-+ja+sukupuoliv%C3%A4hemmist%C3%B6t%0D%0ASeksiv%C3%A4lineet%0D%0ATeollisuusyritykset%0D%0AKuvitteelliset_el%C3%A4imet%0D%0ASodank%C3%A4ynti%0D%0ALiikunta%0D%0AJumaluudet%0D%0AMaan_ulkopuolinen_el%C3%A4m%C3%A4%0D%0AKuolema%0D%0AP%C3%A4ihdepolitiikka%0D%0ANainen%0D%0AInformaatio%0D%0ASysteemit%0D%0AIhmisen+toiminta%0D%0AElokuvat+el%C3%A4imist%C3%A4%0D%0AOngelmat%0D%0AK%C3%A4yt%C3%B6s-+ja+tapatieto%0D%0ATaidot%0D%0AKuvitteellinen%0D%0AK%C3%A4sitteet&ns%5B14%5D=1', 'arvioidut'=>'http://toolserver.org/~magnus/catscan_rewrite.php?language=fi&categories=Arvioidut+artikkelit&depth=5&ns%5B14%5D=1' ); function curl_get_contents_post($fullurl) { list($url, $postfields)=split("\?", $fullurl); //$url="http://toolserver.org/~magnus/catscan_rewrite.php"; //$postfields="language=fi&depth=20&categories=Lupaavat+artikkelit&comb%5Batleast%5D=1&atleast_count=1&templates_any=Olympialaiset%0D%0AYleisurheilun+MM-kilp$ //$postfields.="&format=tsv&doit=1"; //$postfields=preg_replace("/doit=1/i", "doit=1", $postfields); //$postfields=preg_replace("/format=/i", "format_tmp=", $postfields); $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_POST, 1); curl_setopt($curl, CURLOPT_POSTFIELDS, $postfields); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); $file=curl_exec ($curl); curl_close ($curl); return $file; } function curl_get_contents($url) { // Initialize session and set URL. $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); // Set so curl_exec returns the result instead of outputting it. curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //curl_setopt($ch, CURLOPT_HEADER, true); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_USERAGENT, 'ZacheBot, updating flagged revision status info for finnish Wikipedia'); // Get the response and close the channel. $response = curl_exec($ch); curl_close($ch); return $response; } function db_connect() { global $cnf; $dbh = new PDO("mysql:host=".$cnf['host'].";dbname=".$cnf['database'], $cnf['user'],$cnf['password'] // ,array(PDO::ATTR_PERSISTENT => true) ); return $dbh; } function db_delete_group($dbh, $group_title) { $query="DELETE FROM flaggedrevs_categories WHERE group_title=:group_title"; $stmt = $dbh->prepare($query); $stmt->bindParam(":group_title",$group_title); $stmt->execute(); $result=$stmt->fetch(); } function db_add_category_to_group($dbh, $group_title, $category_title) { $query="INSERT INTO flaggedrevs_categories (group_title, category_title) VALUES (:group_title, :category_title)"; $stmt = $dbh->prepare($query); $stmt->bindParam(":group_title",$group_title); $stmt->bindParam(":category_title",$category_title); $stmt->execute(); $result=$stmt->fetch(); print_r($result); } function db_add_article_to_group($dbh, $group_title, $article_title) { $query="INSERT IGNORE INTO flaggedrevs_articles (group_title, article_title) VALUES (:group_title, :article_title)"; $stmt = $dbh->prepare($query); $stmt->bindParam(":group_title",$group_title); $stmt->bindParam(":article_title",$article_title); $stmt->execute(); $result=$stmt->fetch(); } function db_get_fr_groups_by_category($dbh, $category_title) { $query="SELECT group_title FROM flaggedrevs_categories WHERE category_title = :category_title"; $stmt = $dbh->prepare($query); $stmt->bindParam(":category_title",$category_title); $stmt->execute(); $groups=array(); while($result=$stmt->fetch()) { array_push($groups, $result['group_title']); } return $groups; } function db_purge($dbh) { $query="TRUNCATE flaggedrevs_articles"; $stmt = $dbh->prepare($query); $stmt->bindParam(":article_title",$article_title); $stmt->execute(); $query="TRUNCATE flaggedrevs_categories"; $stmt = $dbh->prepare($query); $stmt->bindParam(":article_title",$article_title); $stmt->execute(); } function db_is_known($dbh, $article_title) { $query="SELECT 1 FROM flaggedrevs_articles WHERE article_title = :article_title LIMIT 1"; $stmt = $dbh->prepare($query); $stmt->bindParam(":article_title",$article_title); $stmt->execute(); return $stmt->rowCount(); } function db_in_group($dbh, $group_title, $article_title) { $query="SELECT 1 FROM flaggedrevs_articles WHERE group_title=:group_title AND article_title = :article_title LIMIT 1"; $stmt = $dbh->prepare($query); $stmt->bindParam(":article_title",$article_title); $stmt->bindParam(":group_title",$group_title); $stmt->execute(); return $stmt->rowCount(); } function db_get_group($dbh, $article_title) { $query="SELECT group_title FROM flaggedrevs_articles WHERE group_title!='all' AND article_title = :article_title LIMIT 1"; $stmt = $dbh->prepare($query); $stmt->bindParam(":article_title",$article_title); $stmt->execute(); $result=$stmt->fetch(); return $result['group_title']; } function db_in_skip_groups($dbh, $article_title) { $query="SELECT 1 FROM flaggedrevs_articles WHERE group_title IN ('urheilu', 'viihde', 'arvioidut', 'biologia') AND article_title = :article_title LIMIT 1"; $stmt = $dbh->prepare($query); $stmt->bindParam(":article_title",$article_title); $stmt->execute(); return $stmt->rowCount(); } function update_categories() { global $use_groups; $dbh=db_connect(); db_purge($dbh); foreach ($use_groups as $group=>$url) { update_category($dbh, $group, $url ."&doit=1&format=tsv"); sleep(5); } } function update_category($dbh, $group_title, $url) { echo "Group: $group_title \n"; $file=curl_get_contents_post($url); $rows=preg_split("/\n/", $file); if (count($rows)>3) { db_delete_group($dbh, $group_title); } foreach($rows as $k=>$row) { if ($k==0) continue; if (preg_match("/\|/", $row)!=false) continue; $category_title=str_replace("_", " ", trim(str_replace("\"", "", $row))); if (preg_match("/Luokka:(.*?)\t/", $category_title, $m)) $category_title=trim($m[1]); if ($category_title!="") db_add_category_to_group($dbh, $group_title, $category_title); } } function get_oldreviewedpages() { $url="http://fi.wikipedia.org/w/api.php?action=query&list=oldreviewedpages&ornamespace=0&orlimit=500&format=json&maxlag=5"; $oldreviewedpages=curl_get_contents($url); $oldreviewedpages=json_decode($oldreviewedpages); if (!isset($oldreviewedpages->query)) die("No old reviewed pages\n"); $oldreviewedpages=get_object_vars($oldreviewedpages->query); $oldreviewedpages=$oldreviewedpages['oldreviewedpages']; $ret=array(); foreach ($oldreviewedpages as $page) { array_push($ret, $page->title); } return $ret; } function update_titles($dbh, $update_titles) { if (count($update_titles)) { $prefix=""; $titles=""; foreach ($update_titles as $title) { $titles.=$prefix . urlencode($title); $prefix="|"; } $url="http://fi.wikipedia.org/w/api.php?action=query&prop=categories&format=json&cllimit=500&titles=".$titles; $categories=curl_get_contents($url); $categories=json_decode($categories); if (!isset($categories->query)) die("No categories\n"); $categories=get_object_vars($categories->query); foreach ($categories['pages'] as $page) { db_add_article_to_group($dbh, "all", $page->title); foreach ($page->categories as $cat) { $category_title=trim(str_replace("Luokka:", "", $cat->title)); $groups=db_get_fr_groups_by_category($dbh, $category_title); foreach($groups as $group) { db_add_article_to_group($dbh, $group, $page->title); } } } } } function handle_get_requested_titles($dbh, $requested_titles, $selected_groups) { if (count($selected_groups)==0) die("Error: Group is not set\n"); $ret=array(); foreach ($requested_titles as $r) { if (trim($r)=="") continue; foreach ($selected_groups as $group) { if ($group=='muut' && !db_in_skip_groups($dbh, $r)) array_push($ret, $r); else if (group!='skipgroup' && db_in_group($dbh, $group, $r)) array_push($ret, $r); } } return $ret; } function handle_get_grouped_oldreviewedpages($dbh) { $ret=array(); // $ret['Arvioidut artikkelit']=array(); // $group="Arvioidut artikkelit"; // $group=ucfirst($group); // $ret[$group]=array(); $ret['Arvioidut']=array(); $ret['Biologia']=array(); $ret['Urheilu']=array(); $ret['Viihde']=array(); $ret['Muut']=array(); $requested_titles= get_oldreviewedpages(); foreach ($requested_titles as $r) { $group=db_get_group($dbh,$r); if (!isset($group)) $group="muut"; // if ($group=="arvioidut") $group="Arvioidut artikkelit"; $group=ucfirst($group); if (!is_array($ret[$group])) $ret[$group]=array(); array_push($ret[$group], $r); } return $ret; } if ($argv[1]=="purge") { update_categories(); die("Purging..."); } $requested_titles=array(); $selected_groups=array(); $update_titles=array(); $dbh=db_connect(); if ($_GET['action']=="show_config") { echo ""; foreach ($use_groups as $k=>$v) { echo "

" . $k ."

"; echo "" . $v .""; } echo ""; die(0); } if (trim($_GET['group']) != "" ) { $selected_groups=preg_split("/\|/", trim(strtolower($_GET['group']))); foreach ($selected_groups as $group) { if (!in_array($group, array_keys($use_groups)) && $group!="muut") die("Unknown group '$group'"); } } if (trim($_GET['titles'])=="oldreviewedpages" || trim($_GET['action'])=="oldreviewedpages") { $requested_titles= get_oldreviewedpages(); } else if (trim($_GET['titles'])!="") { $requested_titles=preg_split("/\|/", $_GET['titles']); } else if (preg_match("/titles=(.*?)\z/ism", $_SERVER['QUERY_STRING'], $m)) { $requested_titles=preg_split("/\|/", urldecode($m[1])); } foreach ($requested_titles as $k=>$r) { $r=str_replace("_", " ", $r); $requested_titles[$k]=$r; if (!db_is_known($dbh, $r)) array_push($update_titles, $r); } update_titles($dbh, $update_titles); $ret=array(); switch($_GET['action']) { case 'show_config': break; case 'purge': break; case 'oldreviewedpages': $ret=handle_get_grouped_oldreviewedpages($dbh); $cache=0; break; default: $ret=handle_get_requested_titles($dbh, $requested_titles, $selected_groups); $cache=1; break; } if ($cache==1) { $expires = 60*60*24*14; header("Pragma: public"); header("Cache-Control: maxage=".$expires); header('Expires: ' . gmdate('D, d M Y H:i:s', time()+$expires) . ' GMT'); } header('Access-Control-Allow-Origin: *'); header("Content-Type: application/json"); echo json_encode($ret); /* if ((!isset($pageinfo->query)) || (isset($pageinfo->query->badrevids))) error('Bad revision ID '.$revid.'. Call: '.$url,0,$project); $pageinfo=get_object_vars($pageinfo -> query -> pages); $pageinfo=get_object_vars($pageinfo[key($pageinfo)]); */ ?>