-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNetSearcher.php
More file actions
162 lines (114 loc) · 4.06 KB
/
NetSearcher.php
File metadata and controls
162 lines (114 loc) · 4.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
<?php
include('vendor/autoload.php');
use Google\Client;
class NetSearcher
{
const GOOGLE_API_KEY = "";
const GOOGLE_APP_NAME = "Plagiat";
const GCSE_SEARCH_ENGINE_ID = "";
const QUERY_WORDS_LENGTH_HARD = 5;
const QUERY_WORDS_LENGTH_MIDDLE = 8;
const QUERY_WORDS_LENGTH_EASY = 10;
const SEARCH_STEP_HARD = 1;
const SEARCH_STEP_MIDDLE = 3;
const SEARCH_STEP_EASY = 4;
const SEARCH_RESULT_PAGES = 1;
const SEARCH_RESULT_ITEMS = 10; // This value is always 10, even you change it.
private $searchType;
public function __construct($searchType)
{
$this->searchType = $searchType;
}
function perform($text)
{
$links = array();
$queries = $this->getSearchQueries($text);
foreach ($queries as $query) {
$res = $this->performSearchRequest($query);
// echo "<pre>"; echo($query. "\n"); print_r($res);
$links = array_merge($links, $res);
}
// echo '+++++++++++++++++++++++'; print_r($links); die();
return $links;
}
function getSearchQueries($text)
{
$ret = array();
$text = $this->clearTextFromSymbols($text);
$words = explode(' ', $text);
switch ($this->searchType) {
case 1:
$step = self::SEARCH_STEP_EASY;
$wordsCount = self::QUERY_WORDS_LENGTH_EASY;
break;
case 2:
$step = self::SEARCH_STEP_MIDDLE;
$wordsCount = self::QUERY_WORDS_LENGTH_MIDDLE;
break;
case 3:
$step = self::SEARCH_STEP_HARD;
$wordsCount = self::QUERY_WORDS_LENGTH_HARD;
break;
default:
$step = self::SEARCH_STEP_EASY;
$wordsCount = self::QUERY_WORDS_LENGTH_EASY;
}
for ($i = 0; $i < count($words); $i += $wordsCount * $step) {
$query = implode(' ', array_slice($words, $i, $wordsCount));
array_push($ret, $query);
}
// echo '<pre>'; print_r($ret); die();
return $ret;
}
function clearTextFromSymbols($text)
{
return str_replace("\n", ' ', $text);
// return str_replace(array( ',', '.', '', '—'), '', $text);
}
function isLinkInResult($link, $array) {
return array_search($link, array_column($array, 'link')) != null;
}
private function performRequest($service, $query, $exactTerms = false) {
$ret = array();
for ($i = 0; $i < self::SEARCH_RESULT_PAGES; $i++) {
$optParams = array(
"cx" => self::GCSE_SEARCH_ENGINE_ID,
"start" => $i * self::SEARCH_RESULT_ITEMS
);
if($exactTerms)
$optParams["exactTerms"] = $query;
else
$optParams["q"] = $query;
$results = $service->cse->listCse($optParams);
// echo '<pre>'; print_r($query);
$items = $results->getItems();
// print_r($items);
foreach ($items as $k => $item) {
if(!$this->isLinkInResult($item->link, $ret)) {
$link = new stdClass();
$link->link = $item->link;
$link->title = $item->title;
array_push($ret, $link);
}
}
if(count($results->getItems()) != self::SEARCH_RESULT_ITEMS)
break;
}
return $ret;
}
function performSearchRequest($query)
{
$client = new Client();
$client->setApplicationName(self::GOOGLE_APP_NAME);
$client->setDeveloperKey(self::GOOGLE_API_KEY);
$service = new Google_Service_Customsearch($client);
$ret = $this->performRequest($service, $query, true);
$notExactResult = $this->performRequest($service, $query, false);
foreach ($notExactResult as $k => $item) {
if(!$this->isLinkInResult($item->link, $ret)) {
array_push($ret, $item);
}
}
return $ret;
}
}