<?php
/* SearchEngine version 2.0 (customised for Logan version 3.0) class is a Meta Data site search class. Will use text searches 
as well as direct DB searches. 

Class inherites/extends the DatabaseCall class to be passed in for access to the site specific 
stored procedures. You will need to pass in the instantiated database object on inialisation.

Contains a PHP5 Implementation of the Porter Stemmer algorithm used to reduce English words to the stem. 
For example connections would be reduced to conect. 
Usage: $stem = Search::Stem($word);
*/

class SearchEngine {
	var $oDbCall;
	var $AuthGroup;
	var $aResults = array();
	var $iResultsCount;
	var $iResultsPerPage;
	var $iHomeResultsPerPage;

	var $sRegexConsonant = '(?:[bcdfghjklmnpqrstvwxz]|(?<=[aeiou])y|^y)';
    var $sRegexVowel = '(?:[aeiou]|(?<![aeiou])y)';
	var $sSymbols = array('&#039;','/','\\','\'','"',',','.','<','>','?',';',':','[',']','{','}','|','=','+','-','_',')','(','*','&','^','%','$','#','@','!','~','`'	);
	var $cache = array();
	var $aRegions = array("", "Asia", "Middle East, North Africa, and Greater Arabia", "Europe", "North America", "Central America and the Caribbean", "South America", "Sub-Saharan Africa", "Australia and Oceania"); 
	
	function __construct($oDbCall, $AuthGroup, $iResultsPerPage, $iHomeResultsPerPage) {
		$this->oDbCall = $oDbCall;
		$this->AuthGroup = $AuthGroup;
		$this->iResultsPerPage = $iResultsPerPage;
		$this->iHomeResultsPerPage = $iHomeResultsPerPage;
	}
	
	function GetDistinctCountries() {
		return $this->oDbCall->GetUniqueCountry($this->AuthGroup);
	}
	
	function GetDistinctRegions() {
		return $this->oDbCall->GetUniqueRegions($this->AuthGroup);
	}
	
	function GetDistinctYears() {
		return $this->oDbCall->GetUniqueYears($this->AuthGroup);
	}
	
	function GetRegion($i) {
		return $this->aRegions[$i];
	}
	
	function GetRegionId($name) {
		foreach($this->aRegions as $key => $val) {
			if($name == $val) {return $key;}
		}
		return false;
	}
	
	function GetResultCount() {
		return $this->ResultsCount;
	}
	
	function GetDistinctFilters() {
		return $this->DistinctFilters;
	}
	
	function GetResultsPerPage() {
		return $this->iResultsPerPage;
	}
	
	// Get study site details by name.
	function GetSiteDetailsByName($sName) {
		return $this->GetMetadataSectionName($sName);
	}

	// Get study site detaisl by ID
	function GetSiteDetails($iId) {
		return $this->GetMetadata($iId);
	}
	
	// Fetch last 2,000 photos as paginated photo stream. 
	function GetPhotoStream($pagination = 0) {
		
		// Calc pagination. 
		$limit_from = $pagination * $this->iResultsPerPage;
		$limit_to = $limit_from + $this->iResultsPerPage;
		
		// Fetch result set. 
		$this->aResults = $this->oDbCall->SearchPhotoStream($limit_from, $limit_to, $this->AuthGroup);
		$this->iResultsCount = $this->oDbCall->iResultsCount;
		return $this->aResults;
	}
	
	// Fetch Tag Cloud all dtaa 
	function TagCloudFetchAllContent() {
	
		 return $this->oDbCall->TagCloudFetchAllContent($this->AuthGroup);
		
	}
	
	// Delete all cloud_tag rows
	function TagCloudDeleteAll() {
		return $this->oDbCall->TagCloudDeleteAll();
	}
	
	// Insert words into cloud_tag
	function TagCloudInsertWord($weight, $tagname, $url, $auth) {
		return $this->oDbCall->TagCloudInsertWord($weight, $tagname, $url, $auth);
	}
	
	// Fetch all cloud tags for this auth
	function TagCloudFetchTags($auth) {
		return $this->oDbCall->TagCloudFetchTags($auth);
	}
	
	
	// Search site including image sets, images and blogs.
	// Search paramaters are year, region, country and keywords. 
	// Use search_areas to specify db tables to search.
	// Using natural language FULLTEXT keyword search.
	// Include pagination (page num, first = 0). 
	// Use AuthGroup to determine set,image or blog permissions.
	function Search($search_areas = "image_folders,images,blogs", $keywords = false, $year = false, $country = false, $region = false, $pagination = 0) {
		
		// Calc pagination. 
		$limit_from = $pagination * $this->iResultsPerPage;
		$limit_to = $this->iResultsPerPage;
		
		// Build keywords filter, if we have any.  
		if($keywords) { 
		
			// Check keywords and prepare search strings.
			// Prepare search box keywords. Stem words, clean characters and white space. 
			$keywords = strtolower($keywords);
			$keywords = $this->_RemoveSymbols($keywords);
			if(preg_match("/,/",$keywords)) {
				$keywords = explode(",", $keywords);
			} else {
				$keywords = explode(" ", $keywords);
			}
			foreach($keywords as $word) {
				
				// Knocked out stemming for now (Nov 2014). May add back in later. 
				// Perform Stemming. 
				//$sKeywordsPlain1 .= $this->Stem($word) . " ";
				//$sKeywordsBoolean1 .= "+" . $this->Stem($word) . " ";
				if(strlen($word) > 3) {
					$keywords_plain .= $word . " ";
					//$sKeywordsBoolean2 .= "+" . $word . " ";
				}		
			}
		
			$keywords = trim($keywords_plain);
			//$sKeywordsPlain = trim($sKeywordsPlain1) . " " . trim($sKeywordsPlain2);
			//$sKeywordsBoolean = trim($sKeywordsBoolean1) . " " . trim($sKeywordsBoolean2);
		}

		// Make the DB call. 
		$results = $this->oDbCall->Search($search_areas, $keywords, $year, $country, $region, $limit_from, $limit_to, $this->AuthGroup);
		$this->ResultsCount = $this->oDbCall->iResultsCount;
		$this->DistinctFilters = $this->oDbCall->aDistinctFilters;
		return $results;
	}
	
	// Home page - search latest content from image sets, images and blogs. 
	// Pass in tables to search as $search_areas. 
	function SearchLatest($search_areas = "image_folders,blogs", $pagination = 0) {
		
		// Calc pagination. 
		$limit_from = $pagination * $this->iHomeResultsPerPage;
		$limit_to = $this->iHomeResultsPerPage;

		// Make the DB call. 
		$results = $this->oDbCall->SearchLatest($search_areas, $limit_from, $limit_to, $this->AuthGroup);
		return $results;
	}

	// Find and highlight search term in results fields. Wraps found term in a <span> for CSS highlighting. 
	// If passed in text ranges then will return a portion of text only - this is used for returning a block of 
	// text surounding the highlighted words eg a summary search returning a fragment of of the overall text. 
	// iCharRange = length of individual segments.
	// iCharTotal = maxium amount of characters in total.
	// iNumReasult = Number of segments to be returned. 
	function ResultsHighlight($sKeywords, $sContent, $iCharRange = 0, $iCharTotal = 0, $iNumResults = 0) {
		$multiples = array();
		$singles = array();
		$iCharSeg = round($iCharRange /2);
		$sKeywords = trim(strtolower($sKeywords));
		$iContentLen = strlen($sContent);
		$sTempContent = $sContent;

		// Split keywords and build search strings
		$aItems = explode(" ", trim($sKeywords));
		if(!end($aItems)) {
			array_pop($aItems);
		}
		for($c=1;$c<count($aItems);$c++) {
			for($t=count($aItems);$t>$c;$t--) {
				$phrase = "";
				for($k=($c-1);$k<$t;$k++) {
					$phrase .= $aItems[$k] . " ";
				}
				$multiples[] = "(" . trim($phrase) . ")";
			}
			$singles[] = "(" . $aItems[$c-1] . ")";
		}
		$singles[] = "(" . end($aItems) . ")";
		$search = array_merge($multiples , $singles);
		
		// Check if we are processing a large string with concantonation. If not then simply replace matching words in single line string. 
		if($iCharRange) {
			// Search for all occurences of each keyword item. 
			$cnt=0;
			while($cnt < $iNumResults) {
				if($search[$cnt] && preg_match_all("|(.{0," . $iCharSeg . "})[ \,\-]" . $search[$cnt] . "[ \,\-](.{0," . $iCharSeg . "})|i", $sTempContent, $matches, PREG_PATTERN_ORDER)) {
					for($k=0;$k<count($matches[0]);$k++) {				
						
						// Insert and encase highlight spans around keywords in content.
						$results[] = preg_replace("/[ \,\-]" . $matches[2][0] . "[ \,\-]/i", " <span class=\"search-high-word\">" . $matches[2][0] . "</span> ", $matches[0][$k]); 
						$cnt++;
						if($cnt >= $iNumResults) {break;}
					}

					// Now remove specific search word(s) from content string so we dont match the same segment again. 
					// This stops duplicate segments being selected from content string.
					$sTempContent = preg_replace("/[ \,\-]" . $search[$cnt] . "[ \,\-]/i", "" . $matches[2][0] . "</span>", $sTempContent); 
										

				} else {
					$cnt++;
				}
			}
			
			// Build formatted summary
			if($results) {
				
				foreach($results as $k) {
					$summary .= " ... " . $k;
				}
				$summary .= " ... ";
				
				if($iContentLen > $iCharTotal) {
					$chardiff = $iCharTotal - strlen($summary);
				} else {
					$chardiff = $iContentLen - strlen($summary);
				}
	
				if(strlen($summary) < $iCharTotal && $chardiff > 10) {
					$summary = substr($sContent, 0, $chardiff) . $summary;
				}
			} else {
				if(strlen($sContent) > $iCharTotal) {
					$summary = substr($sContent, 0, $iCharTotal - 3) . "...";
				} else {
					$summary = $sContent;
				}
			}
		} else {
			if($sKeywords) {
				foreach($search as $k) {
					if(preg_match_all("|[ \,\-]" . $k . "[ \,\-]|i", $sContent, $matches, PREG_PATTERN_ORDER)) {
						foreach($matches[0] as $v) {
							$sContent = preg_replace("/" . $v . "/i", "<span class=\"search-high-word\">" . $v . "</span>", $sContent);
						}
						break;
					}
				}
			}
			$summary = $sContent;
		}
	
		return $summary;
	}

	// Stemmer function.
	function Stem($word, $cache = false) {
    	if(strlen($word) <= 2) {
         	return $word;
       	}
        
		// Check cache
      	if($cache AND !empty($this->cache[$word])) {
  	        return $this->cache[$word];
   	 	}
        
		// Remove: 've, n't, 'd
    	$word = preg_replace("/('ve|n't|'d)$/", '', $word);
   		$stem = $this->_step1ab($word);
  		$stem = $this->_step1c($stem);
   		$stem = $this->_step2($stem);
     	$stem = $this->_step3($stem);
      	$stem = $this->_step4($stem);
     	$stem = $this->_step5($stem);
      	
		// Store in cache
      	if($cache) {
      		$this->cache[$word] = $stem;
      	}
     	return $stem;
	}
    
  	function _step1ab($word) {
    	
		// Part a
      	if(substr($word, -1) == 's') {
			$this->_replace($word, 'sses', 'ss')
			OR $this->_replace($word, 'ies', 'i')
           	OR $this->_replace($word, 'ss', 'ss')
          	OR $this->_replace($word, 's', '');
      	}
		
		// Part b
      	if(substr($word, -2, 1) != 'e' OR !$this->_replace($word, 'eed', 'ee', 0)) { 
			
			// First rule
           	$v = $this->sRegexVowel;
            
			// ing and ed
           	if(preg_match("#$v+#", substr($word, 0, -3)) && $this->_replace($word, 'ing', '')
				OR preg_match("#$v+#", substr($word, 0, -2)) && $this->_replace($word, 'ed', '')) { // Note use of && and OR, for precedence reasons
            	
				// If one of above two test successful
              	if(!$this->_replace($word, 'at', 'ate')
            		AND !$this->_replace($word, 'bl', 'ble')
                  	AND !$this->_replace($word, 'iz', 'ize')) {
                  	
					// Double consonant ending
                    if($this->_doubleConsonant($word)
                    	AND substr($word, -2) != 'll'
                    	AND substr($word, -2) != 'ss'
                    	AND substr($word, -2) != 'zz') {
                      	$word = substr($word, 0, -1);
                  	} elseif($this->_m($word) == 1 AND $this->_cvc($word)) {
                       	$word .= 'e';
                  	}
             	}
          	}
     	}
     	return $word;
   	}
    
  	function _step1c($word) {
   		$v = $this->sRegexVowel;
		if(substr($word, -1) == 'y' && preg_match("#$v+#", substr($word, 0, -1))) {
        	$this->_replace($word, 'y', 'i');
      	}
		return $word;
  	}
  	function _step2($word) {
  		switch(substr($word, -2, 1)) {
        	case 'a':
            	$this->_replace($word, 'ational', 'ate', 0)
                	OR $this->_replace($word, 'tional', 'tion', 0);
              	break;
                case 'c':
                	$this->_replace($word, 'enci', 'ence', 0)
                    	OR $this->_replace($word, 'anci', 'ance', 0);
                    break;
                case 'e':
              		$this->_replace($word, 'izer', 'ize', 0);
                    break;
                case 'g':
                    $this->_replace($word, 'logi', 'log', 0);
                    break;
                case 'l':
                   	$this->_replace($word, 'entli', 'ent', 0)
                    	OR $this->_replace($word, 'ousli', 'ous', 0)
                    	OR $this->_replace($word, 'alli', 'al', 0)
                    	OR $this->_replace($word, 'bli', 'ble', 0)
                    	OR $this->_replace($word, 'eli', 'e', 0);
                    break;
                case 'o':
                  	$this->_replace($word, 'ization', 'ize', 0)
                    	OR $this->_replace($word, 'ation', 'ate', 0)
                    	OR $this->_replace($word, 'ator', 'ate', 0);
                    break;
                case 's':
                	$this->_replace($word, 'iveness', 'ive', 0)
                    	OR $this->_replace($word, 'fulness', 'ful', 0)
                    	OR $this->_replace($word, 'ousness', 'ous', 0)
                    	OR $this->_replace($word, 'alism', 'al', 0);
                    break;
                case 't':
                 	$this->_replace($word, 'biliti', 'ble', 0)
                    	OR $this->_replace($word, 'aliti', 'al', 0)
                    	OR $this->_replace($word, 'iviti', 'ive', 0);
                    break;
     	}
      	return $word;
 	}

	function _step3($word) {
      	switch(substr($word, -2, 1)) {
        	case 'a':
          		$this->_replace($word, 'ical', 'ic', 0);
                break;    
        	case 's':
            	$this->_replace($word, 'alise', 'al', 0)
              		OR $this->_replace($word, 'ness', '', 0);
              	break;    
         	case 't':
            	$this->_replace($word, 'icate', 'ic', 0)
               		OR $this->_replace($word, 'iciti', 'ic', 0);
               	break;    
       		case 'u':
          		$this->_replace($word, 'ful', '', 0);
              	break;    
          	case 'v':
           		$this->_replace($word, 'ative', '', 0);
           		break;    
          	case 'z':
             	$this->_replace($word, 'alize', 'al', 0);
           		break;
     	}
     	return $word;
   	}
  
 	function _step4($word) {
    	switch(substr($word, -2, 1)) {
       		case 'a':
           		$this->_replace($word, 'al', '', 1);
             	break;
       		case 'c':
          		$this->_replace($word, 'ance', '', 1)
                  	OR $this->_replace($word, 'ence', '', 1);
               	break;
           	case 'e':
             	$this->_replace($word, 'er', '', 1);
             	break;
       		case 'i':
           		$this->_replace($word, 'ic', '', 1);
             	break;
          	case 'l':
           		$this->_replace($word, 'able', '', 1)
                	OR $this->_replace($word, 'ible', '', 1);
               	break;
           	case 'n':
             	$this->_replace($word, 'ant', '', 1)
               		OR $this->_replace($word, 'ement', '', 1)
                	OR $this->_replace($word, 'ment', '', 1)
                 	OR $this->_replace($word, 'ent', '', 1);
               	break;
        	case 'o':
              	if(substr($word, -4) == 'tion' OR substr($word, -4) == 'sion') {
               		$this->_replace($word, 'ion', '', 1);
               	} else {
                 	$this->_replace($word, 'ou', '', 1);
              	}
             	break;
        	case 's':
             	$this->_replace($word, 'ism', '', 1);
                break;
           	case 't':
           		$this->_replace($word, 'ate', '', 1)
                	OR $this->_replace($word, 'iti', '', 1);
               	break;
          	case 'u':
              	$this->_replace($word, 'ous', '', 1);
                break;
          	case 'v':
            	$this->_replace($word, 'ive', '', 1);
                break;
           	case 'z':
              	$this->_replace($word, 'ize', '', 1);
            	break;
       	}   
     	return $word;
  	}

 	function _step5($word) {
    	// Part a
       	if(substr($word, -1) == 'e') {
        	if($this->_m(substr($word, 0, -1)) > 1) {
            	$this->_replace($word, 'e', '');
          	} elseif($this->_m(substr($word, 0, -1)) == 1) {
				//print "BEFORE: " . $word . " == " . substr($word, 0, -1) . "<br>";
               	if(!$this->_cvc(substr($word, 0, -1))) {
                	//$this->_replace($word, 'e', '');
					//print "RESULT:" . $tag . "AFTER: " . $word . "<br>";
               	}
          	}
      	}

     	// Part b
      	if($this->_m($word) > 1 AND $this->_doubleConsonant($word) AND substr($word, -1) == 'l') {
       		$word = substr($word, 0, -1);
       	}
        return $word;
   	}
   	
    
 	/*
  	Replaces the first string with the second, at the end of the string. If third
  	arg is given, then the preceding string must match that m count at least.
   	$str   String to check
   	$check Ending to check for
   	$repl  Replacement string
   	$m     Optional minimum number of m() to meet
   	return bool Whether the $check string was at the end of the $str string. True does not necessarily mean that it was replaced.
    */
   	function _replace(&$str, $check, $repl, $m = null) {
     	$len = 0 - strlen($check);
  		if(substr($str, $len) == $check) {
        	$substr = substr($str, 0, $len);
           	if(is_null($m) OR $this->_m($substr) > $m) {
          		$str = $substr . $repl;
          	}
       		return true;
     	}
       	return false;
  	}
    
  	/*
   	m() measures the number of consonant sequences in $str. if c is a consonant sequence and v a vowel sequence, and <..> indicates arbitrary presence,
  	<c><v>       gives 0
  	<c>vc<v>     gives 1
  	<c>vcvc<v>   gives 2
  	<c>vcvcvc<v> gives 3
    $str The string to return the m count for
    return int	The m count
  	*/
  	function _m($str) {
    	$c = $this->sRegexConsonant;
      	$v = $this->sRegexVowel;
		$str = preg_replace("#^$c+#", '', $str);
    	$str = preg_replace("#$v+$#", '', $str);
    	preg_match_all("#($v+$c+)#", $str, $matches);
        return count($matches[1]);
   	}

 	/*
    Returns true/false as to whether the given string contains two of the same consonant next to each other at the end of the string.
    $str String to check
    return bool	Result
  	*/
 	function _doubleConsonant($str) {
      	$c = $this->sRegexConsonant;
     	return preg_match("#$c[2]$#", $str, $matches) AND $matches[0][0] == $matches[0][1];
  	}
	
	/* 
	Checks for ending CVC sequence where second C is not W, X or Y
   	$str String to check
    @return bool	Result
    */
 	function _cvc($str) {
      	$c = $this->sRegexConsonant;
    	$v = $this->sRegexVowel;
       	return preg_match("#($c$v$c)$#", $str, $matches) 
			AND strlen($matches[1]) == 3
        	AND $matches[1][2] != 'w'
        	AND $matches[1][2] != 'x'
         	AND $matches[1][2] != 'y';
    }

	// Remove symbols from string. Used to preapre a search query.
	function _RemoveSymbols($string) {
		for($i=0;$i<sizeof($this->sSymbols);$i++) {
			$string = str_replace($this->sSymbols[$i],' ',$string);
		}
		return trim($string);
	}
}
?>