mirror of
https://git.bwaaa.monster/omnisearch
synced 2026-03-25 17:19:02 +02:00
feat: enabling/disabling specific engines
This commit is contained in:
@@ -25,3 +25,9 @@ domain = https://search.example.com
|
|||||||
|
|
||||||
# Cache TTL for infobox data in seconds (default: 86400 = 24 hours)
|
# Cache TTL for infobox data in seconds (default: 86400 = 24 hours)
|
||||||
#ttl_infobox = 86400
|
#ttl_infobox = 86400
|
||||||
|
|
||||||
|
[engines]
|
||||||
|
# Use * for all engines, or specify comma-separated list (e.g., ddg,yahoo)
|
||||||
|
# Use *,-engine to exclude specific engines (e.g., *,-startpage)
|
||||||
|
# Available engines: ddg, startpage, yahoo, mojeek
|
||||||
|
engines="*"
|
||||||
|
|||||||
@@ -54,6 +54,8 @@ int load_config(const char *filename, Config *config) {
|
|||||||
value_end--;
|
value_end--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while (*value == ' ' || *value == '\t')
|
||||||
|
value++;
|
||||||
while (*value == '"' || *value == '\'')
|
while (*value == '"' || *value == '\'')
|
||||||
value++;
|
value++;
|
||||||
|
|
||||||
@@ -91,6 +93,11 @@ int load_config(const char *filename, Config *config) {
|
|||||||
} else if (strcmp(key, "ttl_infobox") == 0) {
|
} else if (strcmp(key, "ttl_infobox") == 0) {
|
||||||
config->cache_ttl_infobox = atoi(value);
|
config->cache_ttl_infobox = atoi(value);
|
||||||
}
|
}
|
||||||
|
} else if (strcmp(section, "engines") == 0) {
|
||||||
|
if (strcmp(key, "engines") == 0) {
|
||||||
|
strncpy(config->engines, value, sizeof(config->engines) - 1);
|
||||||
|
config->engines[sizeof(config->engines) - 1] = '\0';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ typedef struct {
|
|||||||
char cache_dir[512];
|
char cache_dir[512];
|
||||||
int cache_ttl_search;
|
int cache_ttl_search;
|
||||||
int cache_ttl_infobox;
|
int cache_ttl_infobox;
|
||||||
|
char engines[512];
|
||||||
} Config;
|
} Config;
|
||||||
|
|
||||||
int load_config(const char *filename, Config *config);
|
int load_config(const char *filename, Config *config);
|
||||||
|
|||||||
@@ -51,7 +51,8 @@ int main() {
|
|||||||
.randomize_password = 0,
|
.randomize_password = 0,
|
||||||
.cache_dir = DEFAULT_CACHE_DIR,
|
.cache_dir = DEFAULT_CACHE_DIR,
|
||||||
.cache_ttl_search = DEFAULT_CACHE_TTL_SEARCH,
|
.cache_ttl_search = DEFAULT_CACHE_TTL_SEARCH,
|
||||||
.cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX};
|
.cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX,
|
||||||
|
.engines = ""};
|
||||||
|
|
||||||
if (load_config("config.ini", &cfg) != 0) {
|
if (load_config("config.ini", &cfg) != 0) {
|
||||||
fprintf(stderr, "[WARN] Could not load config file, using defaults\n");
|
fprintf(stderr, "[WARN] Could not load config file, using defaults\n");
|
||||||
@@ -59,6 +60,8 @@ int main() {
|
|||||||
|
|
||||||
global_config = cfg;
|
global_config = cfg;
|
||||||
|
|
||||||
|
apply_engines_config(cfg.engines);
|
||||||
|
|
||||||
if (cache_init(cfg.cache_dir) != 0) {
|
if (cache_init(cfg.cache_dir) != 0) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"[WARN] Failed to initialize cache, continuing without caching\n");
|
"[WARN] Failed to initialize cache, continuing without caching\n");
|
||||||
|
|||||||
@@ -273,26 +273,39 @@ int results_handler(UrlParams *params) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int enabled_engine_count = 0;
|
||||||
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
|
if (ENGINE_REGISTRY[i].enabled) {
|
||||||
|
enabled_engine_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ScrapeJob jobs[ENGINE_COUNT];
|
ScrapeJob jobs[ENGINE_COUNT];
|
||||||
SearchResult *all_results[ENGINE_COUNT];
|
SearchResult *all_results[ENGINE_COUNT];
|
||||||
|
|
||||||
|
int engine_idx = 0;
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
all_results[i] = NULL;
|
if (ENGINE_REGISTRY[i].enabled) {
|
||||||
jobs[i].engine = &ENGINE_REGISTRY[i];
|
all_results[engine_idx] = NULL;
|
||||||
jobs[i].query = raw_query;
|
jobs[engine_idx].engine = &ENGINE_REGISTRY[i];
|
||||||
jobs[i].out_results = &all_results[i];
|
jobs[engine_idx].query = raw_query;
|
||||||
jobs[i].max_results = MAX_RESULTS_PER_ENGINE;
|
jobs[engine_idx].out_results = &all_results[engine_idx];
|
||||||
jobs[i].results_count = 0;
|
jobs[engine_idx].max_results = MAX_RESULTS_PER_ENGINE;
|
||||||
jobs[i].page = page;
|
jobs[engine_idx].results_count = 0;
|
||||||
jobs[i].handle = NULL;
|
jobs[engine_idx].page = page;
|
||||||
jobs[i].response.memory = NULL;
|
jobs[engine_idx].handle = NULL;
|
||||||
jobs[i].response.size = 0;
|
jobs[engine_idx].response.memory = NULL;
|
||||||
jobs[i].response.capacity = 0;
|
jobs[engine_idx].response.size = 0;
|
||||||
jobs[i].http_status = 0;
|
jobs[engine_idx].response.capacity = 0;
|
||||||
jobs[i].status = SCRAPE_STATUS_PENDING;
|
jobs[engine_idx].http_status = 0;
|
||||||
|
jobs[engine_idx].status = SCRAPE_STATUS_PENDING;
|
||||||
|
engine_idx++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
scrape_engines_parallel(jobs, ENGINE_COUNT);
|
if (enabled_engine_count > 0) {
|
||||||
|
scrape_engines_parallel(jobs, enabled_engine_count);
|
||||||
|
}
|
||||||
|
|
||||||
if (page == 1) {
|
if (page == 1) {
|
||||||
for (int i = 0; i < HANDLER_COUNT; i++) {
|
for (int i = 0; i < HANDLER_COUNT; i++) {
|
||||||
@@ -301,10 +314,10 @@ int results_handler(UrlParams *params) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (btnI) {
|
if (btnI) {
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < enabled_engine_count; i++) {
|
||||||
if (jobs[i].results_count > 0 && all_results[i][0].url) {
|
if (jobs[i].results_count > 0 && all_results[i][0].url) {
|
||||||
char *redirect_url = strdup(all_results[i][0].url);
|
char *redirect_url = strdup(all_results[i][0].url);
|
||||||
for (int j = 0; j < ENGINE_COUNT; j++) {
|
for (int j = 0; j < enabled_engine_count; j++) {
|
||||||
for (int k = 0; k < jobs[j].results_count; k++) {
|
for (int k = 0; k < jobs[j].results_count; k++) {
|
||||||
free(all_results[j][k].url);
|
free(all_results[j][k].url);
|
||||||
free(all_results[j][k].title);
|
free(all_results[j][k].title);
|
||||||
@@ -327,7 +340,7 @@ int results_handler(UrlParams *params) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < enabled_engine_count; i++) {
|
||||||
free(all_results[i]);
|
free(all_results[i]);
|
||||||
}
|
}
|
||||||
if (page == 1) {
|
if (page == 1) {
|
||||||
@@ -369,7 +382,7 @@ int results_handler(UrlParams *params) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int warning_count = 0;
|
int warning_count = 0;
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < enabled_engine_count; i++) {
|
||||||
if (warning_message_for_job(&jobs[i]))
|
if (warning_message_for_job(&jobs[i]))
|
||||||
warning_count++;
|
warning_count++;
|
||||||
}
|
}
|
||||||
@@ -379,7 +392,7 @@ int results_handler(UrlParams *params) {
|
|||||||
int *warning_inner_counts = NULL;
|
int *warning_inner_counts = NULL;
|
||||||
int warning_index = 0;
|
int warning_index = 0;
|
||||||
|
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < enabled_engine_count; i++) {
|
||||||
const char *warning_message = warning_message_for_job(&jobs[i]);
|
const char *warning_message = warning_message_for_job(&jobs[i]);
|
||||||
if (!warning_message)
|
if (!warning_message)
|
||||||
continue;
|
continue;
|
||||||
@@ -407,7 +420,7 @@ int results_handler(UrlParams *params) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int total_results = 0;
|
int total_results = 0;
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < enabled_engine_count; i++) {
|
||||||
total_results += jobs[i].results_count;
|
total_results += jobs[i].results_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -427,7 +440,7 @@ int results_handler(UrlParams *params) {
|
|||||||
send_response(html);
|
send_response(html);
|
||||||
free(html);
|
free(html);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++)
|
for (int i = 0; i < enabled_engine_count; i++)
|
||||||
free(all_results[i]);
|
free(all_results[i]);
|
||||||
if (page == 1) {
|
if (page == 1) {
|
||||||
for (int i = 0; i < HANDLER_COUNT; i++) {
|
for (int i = 0; i < HANDLER_COUNT; i++) {
|
||||||
@@ -441,7 +454,7 @@ int results_handler(UrlParams *params) {
|
|||||||
}
|
}
|
||||||
int unique_count = 0;
|
int unique_count = 0;
|
||||||
|
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < enabled_engine_count; i++) {
|
||||||
for (int j = 0; j < jobs[i].results_count; j++) {
|
for (int j = 0; j < jobs[i].results_count; j++) {
|
||||||
char *display_url = all_results[i][j].url;
|
char *display_url = all_results[i][j].url;
|
||||||
|
|
||||||
@@ -524,7 +537,7 @@ int results_handler(UrlParams *params) {
|
|||||||
free(html);
|
free(html);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
for (int i = 0; i < enabled_engine_count; i++) {
|
||||||
free(all_results[i]);
|
free(all_results[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc,
|
|||||||
SearchResult **out_results, int max_results);
|
SearchResult **out_results, int max_results);
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
const char *id;
|
||||||
const char *name;
|
const char *name;
|
||||||
const char *base_url;
|
const char *base_url;
|
||||||
const char *host_header;
|
const char *host_header;
|
||||||
@@ -24,6 +25,7 @@ typedef struct {
|
|||||||
int page_multiplier;
|
int page_multiplier;
|
||||||
int page_base;
|
int page_base;
|
||||||
ParserFunc parser;
|
ParserFunc parser;
|
||||||
|
int enabled;
|
||||||
} SearchEngine;
|
} SearchEngine;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@@ -54,8 +56,9 @@ typedef struct {
|
|||||||
ScrapeStatus status;
|
ScrapeStatus status;
|
||||||
} ScrapeJob;
|
} ScrapeJob;
|
||||||
|
|
||||||
extern const SearchEngine ENGINE_REGISTRY[];
|
extern SearchEngine ENGINE_REGISTRY[];
|
||||||
extern const int ENGINE_COUNT;
|
extern const int ENGINE_COUNT;
|
||||||
|
void apply_engines_config(const char *engines_str);
|
||||||
|
|
||||||
size_t write_memory_callback(void *contents, size_t size, size_t nmemb,
|
size_t write_memory_callback(void *contents, size_t size, size_t nmemb,
|
||||||
void *userp);
|
void *userp);
|
||||||
|
|||||||
@@ -310,38 +310,122 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
|
|||||||
static int parse_mojeek(const char *engine_name, xmlDocPtr doc,
|
static int parse_mojeek(const char *engine_name, xmlDocPtr doc,
|
||||||
SearchResult **out_results, int max_results);
|
SearchResult **out_results, int max_results);
|
||||||
|
|
||||||
const SearchEngine ENGINE_REGISTRY[] = {
|
SearchEngine ENGINE_REGISTRY[] = {
|
||||||
{.name = "DuckDuckGo Lite",
|
{.id = "ddg",
|
||||||
|
.name = "DuckDuckGo Lite",
|
||||||
.base_url = "https://lite.duckduckgo.com/lite/?q=",
|
.base_url = "https://lite.duckduckgo.com/lite/?q=",
|
||||||
.host_header = "lite.duckduckgo.com",
|
.host_header = "lite.duckduckgo.com",
|
||||||
.referer = "https://lite.duckduckgo.com/",
|
.referer = "https://lite.duckduckgo.com/",
|
||||||
.page_param = "s",
|
.page_param = "s",
|
||||||
.page_multiplier = 30,
|
.page_multiplier = 30,
|
||||||
.page_base = 0,
|
.page_base = 0,
|
||||||
.parser = parse_ddg_lite},
|
.parser = parse_ddg_lite,
|
||||||
{.name = "Startpage",
|
.enabled = 1},
|
||||||
|
{.id = "startpage",
|
||||||
|
.name = "Startpage",
|
||||||
.base_url = "https://www.startpage.com/sp/search?q=",
|
.base_url = "https://www.startpage.com/sp/search?q=",
|
||||||
.host_header = "www.startpage.com",
|
.host_header = "www.startpage.com",
|
||||||
.referer = "https://www.startpage.com/",
|
.referer = "https://www.startpage.com/",
|
||||||
.page_param = "page",
|
.page_param = "page",
|
||||||
.page_multiplier = 1,
|
.page_multiplier = 1,
|
||||||
.page_base = 1,
|
.page_base = 1,
|
||||||
.parser = parse_startpage},
|
.parser = parse_startpage,
|
||||||
{.name = "Yahoo",
|
.enabled = 1},
|
||||||
|
{.id = "yahoo",
|
||||||
|
.name = "Yahoo",
|
||||||
.base_url = "https://search.yahoo.com/search?p=",
|
.base_url = "https://search.yahoo.com/search?p=",
|
||||||
.host_header = "search.yahoo.com",
|
.host_header = "search.yahoo.com",
|
||||||
.referer = "https://search.yahoo.com/",
|
.referer = "https://search.yahoo.com/",
|
||||||
.page_param = "b",
|
.page_param = "b",
|
||||||
.page_multiplier = 10,
|
.page_multiplier = 10,
|
||||||
.page_base = 1,
|
.page_base = 1,
|
||||||
.parser = parse_yahoo},
|
.parser = parse_yahoo,
|
||||||
{.name = "Mojeek",
|
.enabled = 1},
|
||||||
|
{.id = "mojeek",
|
||||||
|
.name = "Mojeek",
|
||||||
.base_url = "https://www.mojeek.com/search?q=",
|
.base_url = "https://www.mojeek.com/search?q=",
|
||||||
.host_header = "www.mojeek.com",
|
.host_header = "www.mojeek.com",
|
||||||
.referer = "https://www.mojeek.com/",
|
.referer = "https://www.mojeek.com/",
|
||||||
.page_param = "s",
|
.page_param = "s",
|
||||||
.page_multiplier = 10,
|
.page_multiplier = 10,
|
||||||
.page_base = 1,
|
.page_base = 1,
|
||||||
.parser = parse_mojeek}};
|
.parser = parse_mojeek,
|
||||||
|
.enabled = 1}};
|
||||||
|
|
||||||
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
|
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
|
||||||
|
|
||||||
|
static int engine_id_compare(const char *engine_id, const char *config_id) {
|
||||||
|
while (*engine_id && *config_id) {
|
||||||
|
char e = *engine_id;
|
||||||
|
char c = *config_id;
|
||||||
|
if (e >= 'A' && e <= 'Z')
|
||||||
|
e = e - 'A' + 'a';
|
||||||
|
if (c >= 'A' && c <= 'Z')
|
||||||
|
c = c - 'A' + 'a';
|
||||||
|
if (e != c)
|
||||||
|
return 0;
|
||||||
|
engine_id++;
|
||||||
|
config_id++;
|
||||||
|
}
|
||||||
|
return *engine_id == *config_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void apply_engines_config(const char *engines_str) {
|
||||||
|
if (!engines_str || engines_str[0] == '\0') {
|
||||||
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
|
ENGINE_REGISTRY[i].enabled = 1;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
|
ENGINE_REGISTRY[i].enabled = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *copy = strdup(engines_str);
|
||||||
|
if (!copy)
|
||||||
|
return;
|
||||||
|
|
||||||
|
char *saveptr;
|
||||||
|
char *token = strtok_r(copy, ",", &saveptr);
|
||||||
|
|
||||||
|
while (token) {
|
||||||
|
while (*token == ' ' || *token == '\t')
|
||||||
|
token++;
|
||||||
|
|
||||||
|
if (strcmp(token, "*") == 0) {
|
||||||
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
|
ENGINE_REGISTRY[i].enabled = 1;
|
||||||
|
}
|
||||||
|
} else if (token[0] == '-' && token[1] != '\0') {
|
||||||
|
char *engine_id = token + 1;
|
||||||
|
int found = 0;
|
||||||
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
|
if (engine_id_compare(ENGINE_REGISTRY[i].id, engine_id)) {
|
||||||
|
ENGINE_REGISTRY[i].enabled = 0;
|
||||||
|
found = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
fprintf(stderr, "[WARN] Unknown engine: %s\n", engine_id);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int found = 0;
|
||||||
|
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||||
|
if (engine_id_compare(ENGINE_REGISTRY[i].id, token)) {
|
||||||
|
ENGINE_REGISTRY[i].enabled = 1;
|
||||||
|
found = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
fprintf(stderr, "[WARN] Unknown engine: %s\n", token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
token = strtok_r(NULL, ",", &saveptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(copy);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user