feat: enabling/disabling specific engines

This commit is contained in:
frosty
2026-03-21 21:47:45 -04:00
parent 8229aaa489
commit 6b90877869
7 changed files with 152 additions and 35 deletions

View File

@@ -25,3 +25,9 @@ domain = https://search.example.com
# Cache TTL for infobox data in seconds (default: 86400 = 24 hours)
#ttl_infobox = 86400
[engines]
# Use * for all engines, or specify comma-separated list (e.g., ddg,yahoo)
# Use *,-engine to exclude specific engines (e.g., *,-startpage)
# Available engines: ddg, startpage, yahoo, mojeek
engines="*"

View File

@@ -54,6 +54,8 @@ int load_config(const char *filename, Config *config) {
value_end--;
}
while (*value == ' ' || *value == '\t')
value++;
while (*value == '"' || *value == '\'')
value++;
@@ -91,6 +93,11 @@ int load_config(const char *filename, Config *config) {
} else if (strcmp(key, "ttl_infobox") == 0) {
config->cache_ttl_infobox = atoi(value);
}
} else if (strcmp(section, "engines") == 0) {
if (strcmp(key, "engines") == 0) {
strncpy(config->engines, value, sizeof(config->engines) - 1);
config->engines[sizeof(config->engines) - 1] = '\0';
}
}
}
}

View File

@@ -42,6 +42,7 @@ typedef struct {
char cache_dir[512];
int cache_ttl_search;
int cache_ttl_infobox;
char engines[512];
} Config;
int load_config(const char *filename, Config *config);

View File

@@ -51,7 +51,8 @@ int main() {
.randomize_password = 0,
.cache_dir = DEFAULT_CACHE_DIR,
.cache_ttl_search = DEFAULT_CACHE_TTL_SEARCH,
.cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX};
.cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX,
.engines = ""};
if (load_config("config.ini", &cfg) != 0) {
fprintf(stderr, "[WARN] Could not load config file, using defaults\n");
@@ -59,6 +60,8 @@ int main() {
global_config = cfg;
apply_engines_config(cfg.engines);
if (cache_init(cfg.cache_dir) != 0) {
fprintf(stderr,
"[WARN] Failed to initialize cache, continuing without caching\n");

View File

@@ -273,26 +273,39 @@ int results_handler(UrlParams *params) {
}
}
int enabled_engine_count = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
if (ENGINE_REGISTRY[i].enabled) {
enabled_engine_count++;
}
}
ScrapeJob jobs[ENGINE_COUNT];
SearchResult *all_results[ENGINE_COUNT];
int engine_idx = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
all_results[i] = NULL;
jobs[i].engine = &ENGINE_REGISTRY[i];
jobs[i].query = raw_query;
jobs[i].out_results = &all_results[i];
jobs[i].max_results = MAX_RESULTS_PER_ENGINE;
jobs[i].results_count = 0;
jobs[i].page = page;
jobs[i].handle = NULL;
jobs[i].response.memory = NULL;
jobs[i].response.size = 0;
jobs[i].response.capacity = 0;
jobs[i].http_status = 0;
jobs[i].status = SCRAPE_STATUS_PENDING;
if (ENGINE_REGISTRY[i].enabled) {
all_results[engine_idx] = NULL;
jobs[engine_idx].engine = &ENGINE_REGISTRY[i];
jobs[engine_idx].query = raw_query;
jobs[engine_idx].out_results = &all_results[engine_idx];
jobs[engine_idx].max_results = MAX_RESULTS_PER_ENGINE;
jobs[engine_idx].results_count = 0;
jobs[engine_idx].page = page;
jobs[engine_idx].handle = NULL;
jobs[engine_idx].response.memory = NULL;
jobs[engine_idx].response.size = 0;
jobs[engine_idx].response.capacity = 0;
jobs[engine_idx].http_status = 0;
jobs[engine_idx].status = SCRAPE_STATUS_PENDING;
engine_idx++;
}
}
scrape_engines_parallel(jobs, ENGINE_COUNT);
if (enabled_engine_count > 0) {
scrape_engines_parallel(jobs, enabled_engine_count);
}
if (page == 1) {
for (int i = 0; i < HANDLER_COUNT; i++) {
@@ -301,10 +314,10 @@ int results_handler(UrlParams *params) {
}
if (btnI) {
for (int i = 0; i < ENGINE_COUNT; i++) {
for (int i = 0; i < enabled_engine_count; i++) {
if (jobs[i].results_count > 0 && all_results[i][0].url) {
char *redirect_url = strdup(all_results[i][0].url);
for (int j = 0; j < ENGINE_COUNT; j++) {
for (int j = 0; j < enabled_engine_count; j++) {
for (int k = 0; k < jobs[j].results_count; k++) {
free(all_results[j][k].url);
free(all_results[j][k].title);
@@ -327,7 +340,7 @@ int results_handler(UrlParams *params) {
return 0;
}
}
for (int i = 0; i < ENGINE_COUNT; i++) {
for (int i = 0; i < enabled_engine_count; i++) {
free(all_results[i]);
}
if (page == 1) {
@@ -369,7 +382,7 @@ int results_handler(UrlParams *params) {
}
int warning_count = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
for (int i = 0; i < enabled_engine_count; i++) {
if (warning_message_for_job(&jobs[i]))
warning_count++;
}
@@ -379,7 +392,7 @@ int results_handler(UrlParams *params) {
int *warning_inner_counts = NULL;
int warning_index = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
for (int i = 0; i < enabled_engine_count; i++) {
const char *warning_message = warning_message_for_job(&jobs[i]);
if (!warning_message)
continue;
@@ -407,7 +420,7 @@ int results_handler(UrlParams *params) {
}
int total_results = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
for (int i = 0; i < enabled_engine_count; i++) {
total_results += jobs[i].results_count;
}
@@ -427,7 +440,7 @@ int results_handler(UrlParams *params) {
send_response(html);
free(html);
}
for (int i = 0; i < ENGINE_COUNT; i++)
for (int i = 0; i < enabled_engine_count; i++)
free(all_results[i]);
if (page == 1) {
for (int i = 0; i < HANDLER_COUNT; i++) {
@@ -441,7 +454,7 @@ int results_handler(UrlParams *params) {
}
int unique_count = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
for (int i = 0; i < enabled_engine_count; i++) {
for (int j = 0; j < jobs[i].results_count; j++) {
char *display_url = all_results[i][j].url;
@@ -524,7 +537,7 @@ int results_handler(UrlParams *params) {
free(html);
}
for (int i = 0; i < ENGINE_COUNT; i++) {
for (int i = 0; i < enabled_engine_count; i++) {
free(all_results[i]);
}
}

View File

@@ -15,6 +15,7 @@ typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc,
SearchResult **out_results, int max_results);
typedef struct {
const char *id;
const char *name;
const char *base_url;
const char *host_header;
@@ -24,6 +25,7 @@ typedef struct {
int page_multiplier;
int page_base;
ParserFunc parser;
int enabled;
} SearchEngine;
typedef struct {
@@ -54,8 +56,9 @@ typedef struct {
ScrapeStatus status;
} ScrapeJob;
extern const SearchEngine ENGINE_REGISTRY[];
extern SearchEngine ENGINE_REGISTRY[];
extern const int ENGINE_COUNT;
void apply_engines_config(const char *engines_str);
size_t write_memory_callback(void *contents, size_t size, size_t nmemb,
void *userp);

View File

@@ -310,38 +310,122 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
static int parse_mojeek(const char *engine_name, xmlDocPtr doc,
SearchResult **out_results, int max_results);
const SearchEngine ENGINE_REGISTRY[] = {
{.name = "DuckDuckGo Lite",
SearchEngine ENGINE_REGISTRY[] = {
{.id = "ddg",
.name = "DuckDuckGo Lite",
.base_url = "https://lite.duckduckgo.com/lite/?q=",
.host_header = "lite.duckduckgo.com",
.referer = "https://lite.duckduckgo.com/",
.page_param = "s",
.page_multiplier = 30,
.page_base = 0,
.parser = parse_ddg_lite},
{.name = "Startpage",
.parser = parse_ddg_lite,
.enabled = 1},
{.id = "startpage",
.name = "Startpage",
.base_url = "https://www.startpage.com/sp/search?q=",
.host_header = "www.startpage.com",
.referer = "https://www.startpage.com/",
.page_param = "page",
.page_multiplier = 1,
.page_base = 1,
.parser = parse_startpage},
{.name = "Yahoo",
.parser = parse_startpage,
.enabled = 1},
{.id = "yahoo",
.name = "Yahoo",
.base_url = "https://search.yahoo.com/search?p=",
.host_header = "search.yahoo.com",
.referer = "https://search.yahoo.com/",
.page_param = "b",
.page_multiplier = 10,
.page_base = 1,
.parser = parse_yahoo},
{.name = "Mojeek",
.parser = parse_yahoo,
.enabled = 1},
{.id = "mojeek",
.name = "Mojeek",
.base_url = "https://www.mojeek.com/search?q=",
.host_header = "www.mojeek.com",
.referer = "https://www.mojeek.com/",
.page_param = "s",
.page_multiplier = 10,
.page_base = 1,
.parser = parse_mojeek}};
.parser = parse_mojeek,
.enabled = 1}};
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
static int engine_id_compare(const char *engine_id, const char *config_id) {
while (*engine_id && *config_id) {
char e = *engine_id;
char c = *config_id;
if (e >= 'A' && e <= 'Z')
e = e - 'A' + 'a';
if (c >= 'A' && c <= 'Z')
c = c - 'A' + 'a';
if (e != c)
return 0;
engine_id++;
config_id++;
}
return *engine_id == *config_id;
}
void apply_engines_config(const char *engines_str) {
if (!engines_str || engines_str[0] == '\0') {
for (int i = 0; i < ENGINE_COUNT; i++) {
ENGINE_REGISTRY[i].enabled = 1;
}
return;
}
for (int i = 0; i < ENGINE_COUNT; i++) {
ENGINE_REGISTRY[i].enabled = 0;
}
char *copy = strdup(engines_str);
if (!copy)
return;
char *saveptr;
char *token = strtok_r(copy, ",", &saveptr);
while (token) {
while (*token == ' ' || *token == '\t')
token++;
if (strcmp(token, "*") == 0) {
for (int i = 0; i < ENGINE_COUNT; i++) {
ENGINE_REGISTRY[i].enabled = 1;
}
} else if (token[0] == '-' && token[1] != '\0') {
char *engine_id = token + 1;
int found = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
if (engine_id_compare(ENGINE_REGISTRY[i].id, engine_id)) {
ENGINE_REGISTRY[i].enabled = 0;
found = 1;
break;
}
}
if (!found) {
fprintf(stderr, "[WARN] Unknown engine: %s\n", engine_id);
}
} else {
int found = 0;
for (int i = 0; i < ENGINE_COUNT; i++) {
if (engine_id_compare(ENGINE_REGISTRY[i].id, token)) {
ENGINE_REGISTRY[i].enabled = 1;
found = 1;
break;
}
}
if (!found) {
fprintf(stderr, "[WARN] Unknown engine: %s\n", token);
}
}
token = strtok_r(NULL, ",", &saveptr);
}
free(copy);
}