mirror of
https://git.bwaaa.monster/omnisearch
synced 2026-03-25 17:19:02 +02:00
feat: enabling/disabling specific engines
This commit is contained in:
@@ -25,3 +25,9 @@ domain = https://search.example.com
|
||||
|
||||
# Cache TTL for infobox data in seconds (default: 86400 = 24 hours)
|
||||
#ttl_infobox = 86400
|
||||
|
||||
[engines]
|
||||
# Use * for all engines, or specify comma-separated list (e.g., ddg,yahoo)
|
||||
# Use *,-engine to exclude specific engines (e.g., *,-startpage)
|
||||
# Available engines: ddg, startpage, yahoo, mojeek
|
||||
engines="*"
|
||||
|
||||
@@ -54,6 +54,8 @@ int load_config(const char *filename, Config *config) {
|
||||
value_end--;
|
||||
}
|
||||
|
||||
while (*value == ' ' || *value == '\t')
|
||||
value++;
|
||||
while (*value == '"' || *value == '\'')
|
||||
value++;
|
||||
|
||||
@@ -91,6 +93,11 @@ int load_config(const char *filename, Config *config) {
|
||||
} else if (strcmp(key, "ttl_infobox") == 0) {
|
||||
config->cache_ttl_infobox = atoi(value);
|
||||
}
|
||||
} else if (strcmp(section, "engines") == 0) {
|
||||
if (strcmp(key, "engines") == 0) {
|
||||
strncpy(config->engines, value, sizeof(config->engines) - 1);
|
||||
config->engines[sizeof(config->engines) - 1] = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,6 +42,7 @@ typedef struct {
|
||||
char cache_dir[512];
|
||||
int cache_ttl_search;
|
||||
int cache_ttl_infobox;
|
||||
char engines[512];
|
||||
} Config;
|
||||
|
||||
int load_config(const char *filename, Config *config);
|
||||
|
||||
@@ -51,7 +51,8 @@ int main() {
|
||||
.randomize_password = 0,
|
||||
.cache_dir = DEFAULT_CACHE_DIR,
|
||||
.cache_ttl_search = DEFAULT_CACHE_TTL_SEARCH,
|
||||
.cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX};
|
||||
.cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX,
|
||||
.engines = ""};
|
||||
|
||||
if (load_config("config.ini", &cfg) != 0) {
|
||||
fprintf(stderr, "[WARN] Could not load config file, using defaults\n");
|
||||
@@ -59,6 +60,8 @@ int main() {
|
||||
|
||||
global_config = cfg;
|
||||
|
||||
apply_engines_config(cfg.engines);
|
||||
|
||||
if (cache_init(cfg.cache_dir) != 0) {
|
||||
fprintf(stderr,
|
||||
"[WARN] Failed to initialize cache, continuing without caching\n");
|
||||
|
||||
@@ -273,26 +273,39 @@ int results_handler(UrlParams *params) {
|
||||
}
|
||||
}
|
||||
|
||||
int enabled_engine_count = 0;
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
if (ENGINE_REGISTRY[i].enabled) {
|
||||
enabled_engine_count++;
|
||||
}
|
||||
}
|
||||
|
||||
ScrapeJob jobs[ENGINE_COUNT];
|
||||
SearchResult *all_results[ENGINE_COUNT];
|
||||
|
||||
int engine_idx = 0;
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
all_results[i] = NULL;
|
||||
jobs[i].engine = &ENGINE_REGISTRY[i];
|
||||
jobs[i].query = raw_query;
|
||||
jobs[i].out_results = &all_results[i];
|
||||
jobs[i].max_results = MAX_RESULTS_PER_ENGINE;
|
||||
jobs[i].results_count = 0;
|
||||
jobs[i].page = page;
|
||||
jobs[i].handle = NULL;
|
||||
jobs[i].response.memory = NULL;
|
||||
jobs[i].response.size = 0;
|
||||
jobs[i].response.capacity = 0;
|
||||
jobs[i].http_status = 0;
|
||||
jobs[i].status = SCRAPE_STATUS_PENDING;
|
||||
if (ENGINE_REGISTRY[i].enabled) {
|
||||
all_results[engine_idx] = NULL;
|
||||
jobs[engine_idx].engine = &ENGINE_REGISTRY[i];
|
||||
jobs[engine_idx].query = raw_query;
|
||||
jobs[engine_idx].out_results = &all_results[engine_idx];
|
||||
jobs[engine_idx].max_results = MAX_RESULTS_PER_ENGINE;
|
||||
jobs[engine_idx].results_count = 0;
|
||||
jobs[engine_idx].page = page;
|
||||
jobs[engine_idx].handle = NULL;
|
||||
jobs[engine_idx].response.memory = NULL;
|
||||
jobs[engine_idx].response.size = 0;
|
||||
jobs[engine_idx].response.capacity = 0;
|
||||
jobs[engine_idx].http_status = 0;
|
||||
jobs[engine_idx].status = SCRAPE_STATUS_PENDING;
|
||||
engine_idx++;
|
||||
}
|
||||
}
|
||||
|
||||
scrape_engines_parallel(jobs, ENGINE_COUNT);
|
||||
if (enabled_engine_count > 0) {
|
||||
scrape_engines_parallel(jobs, enabled_engine_count);
|
||||
}
|
||||
|
||||
if (page == 1) {
|
||||
for (int i = 0; i < HANDLER_COUNT; i++) {
|
||||
@@ -301,10 +314,10 @@ int results_handler(UrlParams *params) {
|
||||
}
|
||||
|
||||
if (btnI) {
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
for (int i = 0; i < enabled_engine_count; i++) {
|
||||
if (jobs[i].results_count > 0 && all_results[i][0].url) {
|
||||
char *redirect_url = strdup(all_results[i][0].url);
|
||||
for (int j = 0; j < ENGINE_COUNT; j++) {
|
||||
for (int j = 0; j < enabled_engine_count; j++) {
|
||||
for (int k = 0; k < jobs[j].results_count; k++) {
|
||||
free(all_results[j][k].url);
|
||||
free(all_results[j][k].title);
|
||||
@@ -327,7 +340,7 @@ int results_handler(UrlParams *params) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
for (int i = 0; i < enabled_engine_count; i++) {
|
||||
free(all_results[i]);
|
||||
}
|
||||
if (page == 1) {
|
||||
@@ -369,7 +382,7 @@ int results_handler(UrlParams *params) {
|
||||
}
|
||||
|
||||
int warning_count = 0;
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
for (int i = 0; i < enabled_engine_count; i++) {
|
||||
if (warning_message_for_job(&jobs[i]))
|
||||
warning_count++;
|
||||
}
|
||||
@@ -379,7 +392,7 @@ int results_handler(UrlParams *params) {
|
||||
int *warning_inner_counts = NULL;
|
||||
int warning_index = 0;
|
||||
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
for (int i = 0; i < enabled_engine_count; i++) {
|
||||
const char *warning_message = warning_message_for_job(&jobs[i]);
|
||||
if (!warning_message)
|
||||
continue;
|
||||
@@ -407,7 +420,7 @@ int results_handler(UrlParams *params) {
|
||||
}
|
||||
|
||||
int total_results = 0;
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
for (int i = 0; i < enabled_engine_count; i++) {
|
||||
total_results += jobs[i].results_count;
|
||||
}
|
||||
|
||||
@@ -427,7 +440,7 @@ int results_handler(UrlParams *params) {
|
||||
send_response(html);
|
||||
free(html);
|
||||
}
|
||||
for (int i = 0; i < ENGINE_COUNT; i++)
|
||||
for (int i = 0; i < enabled_engine_count; i++)
|
||||
free(all_results[i]);
|
||||
if (page == 1) {
|
||||
for (int i = 0; i < HANDLER_COUNT; i++) {
|
||||
@@ -441,7 +454,7 @@ int results_handler(UrlParams *params) {
|
||||
}
|
||||
int unique_count = 0;
|
||||
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
for (int i = 0; i < enabled_engine_count; i++) {
|
||||
for (int j = 0; j < jobs[i].results_count; j++) {
|
||||
char *display_url = all_results[i][j].url;
|
||||
|
||||
@@ -524,7 +537,7 @@ int results_handler(UrlParams *params) {
|
||||
free(html);
|
||||
}
|
||||
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
for (int i = 0; i < enabled_engine_count; i++) {
|
||||
free(all_results[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc,
|
||||
SearchResult **out_results, int max_results);
|
||||
|
||||
typedef struct {
|
||||
const char *id;
|
||||
const char *name;
|
||||
const char *base_url;
|
||||
const char *host_header;
|
||||
@@ -24,6 +25,7 @@ typedef struct {
|
||||
int page_multiplier;
|
||||
int page_base;
|
||||
ParserFunc parser;
|
||||
int enabled;
|
||||
} SearchEngine;
|
||||
|
||||
typedef struct {
|
||||
@@ -54,8 +56,9 @@ typedef struct {
|
||||
ScrapeStatus status;
|
||||
} ScrapeJob;
|
||||
|
||||
extern const SearchEngine ENGINE_REGISTRY[];
|
||||
extern SearchEngine ENGINE_REGISTRY[];
|
||||
extern const int ENGINE_COUNT;
|
||||
void apply_engines_config(const char *engines_str);
|
||||
|
||||
size_t write_memory_callback(void *contents, size_t size, size_t nmemb,
|
||||
void *userp);
|
||||
|
||||
@@ -310,38 +310,122 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
|
||||
static int parse_mojeek(const char *engine_name, xmlDocPtr doc,
|
||||
SearchResult **out_results, int max_results);
|
||||
|
||||
const SearchEngine ENGINE_REGISTRY[] = {
|
||||
{.name = "DuckDuckGo Lite",
|
||||
SearchEngine ENGINE_REGISTRY[] = {
|
||||
{.id = "ddg",
|
||||
.name = "DuckDuckGo Lite",
|
||||
.base_url = "https://lite.duckduckgo.com/lite/?q=",
|
||||
.host_header = "lite.duckduckgo.com",
|
||||
.referer = "https://lite.duckduckgo.com/",
|
||||
.page_param = "s",
|
||||
.page_multiplier = 30,
|
||||
.page_base = 0,
|
||||
.parser = parse_ddg_lite},
|
||||
{.name = "Startpage",
|
||||
.parser = parse_ddg_lite,
|
||||
.enabled = 1},
|
||||
{.id = "startpage",
|
||||
.name = "Startpage",
|
||||
.base_url = "https://www.startpage.com/sp/search?q=",
|
||||
.host_header = "www.startpage.com",
|
||||
.referer = "https://www.startpage.com/",
|
||||
.page_param = "page",
|
||||
.page_multiplier = 1,
|
||||
.page_base = 1,
|
||||
.parser = parse_startpage},
|
||||
{.name = "Yahoo",
|
||||
.parser = parse_startpage,
|
||||
.enabled = 1},
|
||||
{.id = "yahoo",
|
||||
.name = "Yahoo",
|
||||
.base_url = "https://search.yahoo.com/search?p=",
|
||||
.host_header = "search.yahoo.com",
|
||||
.referer = "https://search.yahoo.com/",
|
||||
.page_param = "b",
|
||||
.page_multiplier = 10,
|
||||
.page_base = 1,
|
||||
.parser = parse_yahoo},
|
||||
{.name = "Mojeek",
|
||||
.parser = parse_yahoo,
|
||||
.enabled = 1},
|
||||
{.id = "mojeek",
|
||||
.name = "Mojeek",
|
||||
.base_url = "https://www.mojeek.com/search?q=",
|
||||
.host_header = "www.mojeek.com",
|
||||
.referer = "https://www.mojeek.com/",
|
||||
.page_param = "s",
|
||||
.page_multiplier = 10,
|
||||
.page_base = 1,
|
||||
.parser = parse_mojeek}};
|
||||
.parser = parse_mojeek,
|
||||
.enabled = 1}};
|
||||
|
||||
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
|
||||
|
||||
static int engine_id_compare(const char *engine_id, const char *config_id) {
|
||||
while (*engine_id && *config_id) {
|
||||
char e = *engine_id;
|
||||
char c = *config_id;
|
||||
if (e >= 'A' && e <= 'Z')
|
||||
e = e - 'A' + 'a';
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
c = c - 'A' + 'a';
|
||||
if (e != c)
|
||||
return 0;
|
||||
engine_id++;
|
||||
config_id++;
|
||||
}
|
||||
return *engine_id == *config_id;
|
||||
}
|
||||
|
||||
void apply_engines_config(const char *engines_str) {
|
||||
if (!engines_str || engines_str[0] == '\0') {
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
ENGINE_REGISTRY[i].enabled = 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
ENGINE_REGISTRY[i].enabled = 0;
|
||||
}
|
||||
|
||||
char *copy = strdup(engines_str);
|
||||
if (!copy)
|
||||
return;
|
||||
|
||||
char *saveptr;
|
||||
char *token = strtok_r(copy, ",", &saveptr);
|
||||
|
||||
while (token) {
|
||||
while (*token == ' ' || *token == '\t')
|
||||
token++;
|
||||
|
||||
if (strcmp(token, "*") == 0) {
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
ENGINE_REGISTRY[i].enabled = 1;
|
||||
}
|
||||
} else if (token[0] == '-' && token[1] != '\0') {
|
||||
char *engine_id = token + 1;
|
||||
int found = 0;
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
if (engine_id_compare(ENGINE_REGISTRY[i].id, engine_id)) {
|
||||
ENGINE_REGISTRY[i].enabled = 0;
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
fprintf(stderr, "[WARN] Unknown engine: %s\n", engine_id);
|
||||
}
|
||||
} else {
|
||||
int found = 0;
|
||||
for (int i = 0; i < ENGINE_COUNT; i++) {
|
||||
if (engine_id_compare(ENGINE_REGISTRY[i].id, token)) {
|
||||
ENGINE_REGISTRY[i].enabled = 1;
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
fprintf(stderr, "[WARN] Unknown engine: %s\n", token);
|
||||
}
|
||||
}
|
||||
|
||||
token = strtok_r(NULL, ",", &saveptr);
|
||||
}
|
||||
|
||||
free(copy);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user