diff --git a/example-config.ini b/example-config.ini index e0f1f02..fc6ea8d 100644 --- a/example-config.ini +++ b/example-config.ini @@ -25,3 +25,9 @@ domain = https://search.example.com # Cache TTL for infobox data in seconds (default: 86400 = 24 hours) #ttl_infobox = 86400 + +[engines] +# Use * for all engines, or specify comma-separated list (e.g., ddg,yahoo) +# Use *,-engine to exclude specific engines (e.g., *,-startpage) +# Available engines: ddg, startpage, yahoo, mojeek +engines="*" diff --git a/src/Config.c b/src/Config.c index b4a0f21..0c243bd 100644 --- a/src/Config.c +++ b/src/Config.c @@ -54,6 +54,8 @@ int load_config(const char *filename, Config *config) { value_end--; } + while (*value == ' ' || *value == '\t') + value++; while (*value == '"' || *value == '\'') value++; @@ -91,6 +93,11 @@ int load_config(const char *filename, Config *config) { } else if (strcmp(key, "ttl_infobox") == 0) { config->cache_ttl_infobox = atoi(value); } + } else if (strcmp(section, "engines") == 0) { + if (strcmp(key, "engines") == 0) { + strncpy(config->engines, value, sizeof(config->engines) - 1); + config->engines[sizeof(config->engines) - 1] = '\0'; + } } } } diff --git a/src/Config.h b/src/Config.h index 33ff527..4143bbd 100644 --- a/src/Config.h +++ b/src/Config.h @@ -42,6 +42,7 @@ typedef struct { char cache_dir[512]; int cache_ttl_search; int cache_ttl_infobox; + char engines[512]; } Config; int load_config(const char *filename, Config *config); diff --git a/src/Main.c b/src/Main.c index be5080b..8aa161d 100644 --- a/src/Main.c +++ b/src/Main.c @@ -16,7 +16,7 @@ #include "Scraping/Scraping.h" Config global_config; - + int handle_opensearch(UrlParams *params) { (void)params; extern Config global_config; @@ -51,7 +51,8 @@ int main() { .randomize_password = 0, .cache_dir = DEFAULT_CACHE_DIR, .cache_ttl_search = DEFAULT_CACHE_TTL_SEARCH, - .cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX}; + .cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX, + .engines = ""}; if (load_config("config.ini", &cfg) != 0) { fprintf(stderr, "[WARN] Could not load config file, using defaults\n"); @@ -59,6 +60,8 @@ int main() { global_config = cfg; + apply_engines_config(cfg.engines); + if (cache_init(cfg.cache_dir) != 0) { fprintf(stderr, "[WARN] Failed to initialize cache, continuing without caching\n"); diff --git a/src/Routes/Search.c b/src/Routes/Search.c index ad167fb..bc35fb6 100644 --- a/src/Routes/Search.c +++ b/src/Routes/Search.c @@ -273,26 +273,39 @@ int results_handler(UrlParams *params) { } } + int enabled_engine_count = 0; + for (int i = 0; i < ENGINE_COUNT; i++) { + if (ENGINE_REGISTRY[i].enabled) { + enabled_engine_count++; + } + } + ScrapeJob jobs[ENGINE_COUNT]; SearchResult *all_results[ENGINE_COUNT]; + int engine_idx = 0; for (int i = 0; i < ENGINE_COUNT; i++) { - all_results[i] = NULL; - jobs[i].engine = &ENGINE_REGISTRY[i]; - jobs[i].query = raw_query; - jobs[i].out_results = &all_results[i]; - jobs[i].max_results = MAX_RESULTS_PER_ENGINE; - jobs[i].results_count = 0; - jobs[i].page = page; - jobs[i].handle = NULL; - jobs[i].response.memory = NULL; - jobs[i].response.size = 0; - jobs[i].response.capacity = 0; - jobs[i].http_status = 0; - jobs[i].status = SCRAPE_STATUS_PENDING; + if (ENGINE_REGISTRY[i].enabled) { + all_results[engine_idx] = NULL; + jobs[engine_idx].engine = &ENGINE_REGISTRY[i]; + jobs[engine_idx].query = raw_query; + jobs[engine_idx].out_results = &all_results[engine_idx]; + jobs[engine_idx].max_results = MAX_RESULTS_PER_ENGINE; + jobs[engine_idx].results_count = 0; + jobs[engine_idx].page = page; + jobs[engine_idx].handle = NULL; + jobs[engine_idx].response.memory = NULL; + jobs[engine_idx].response.size = 0; + jobs[engine_idx].response.capacity = 0; + jobs[engine_idx].http_status = 0; + jobs[engine_idx].status = SCRAPE_STATUS_PENDING; + engine_idx++; + } } - scrape_engines_parallel(jobs, ENGINE_COUNT); + if (enabled_engine_count > 0) { + scrape_engines_parallel(jobs, enabled_engine_count); + } if (page == 1) { for (int i = 0; i < HANDLER_COUNT; i++) { @@ -301,10 +314,10 @@ int results_handler(UrlParams *params) { } if (btnI) { - for (int i = 0; i < ENGINE_COUNT; i++) { + for (int i = 0; i < enabled_engine_count; i++) { if (jobs[i].results_count > 0 && all_results[i][0].url) { char *redirect_url = strdup(all_results[i][0].url); - for (int j = 0; j < ENGINE_COUNT; j++) { + for (int j = 0; j < enabled_engine_count; j++) { for (int k = 0; k < jobs[j].results_count; k++) { free(all_results[j][k].url); free(all_results[j][k].title); @@ -327,7 +340,7 @@ int results_handler(UrlParams *params) { return 0; } } - for (int i = 0; i < ENGINE_COUNT; i++) { + for (int i = 0; i < enabled_engine_count; i++) { free(all_results[i]); } if (page == 1) { @@ -369,7 +382,7 @@ int results_handler(UrlParams *params) { } int warning_count = 0; - for (int i = 0; i < ENGINE_COUNT; i++) { + for (int i = 0; i < enabled_engine_count; i++) { if (warning_message_for_job(&jobs[i])) warning_count++; } @@ -379,7 +392,7 @@ int results_handler(UrlParams *params) { int *warning_inner_counts = NULL; int warning_index = 0; - for (int i = 0; i < ENGINE_COUNT; i++) { + for (int i = 0; i < enabled_engine_count; i++) { const char *warning_message = warning_message_for_job(&jobs[i]); if (!warning_message) continue; @@ -407,7 +420,7 @@ int results_handler(UrlParams *params) { } int total_results = 0; - for (int i = 0; i < ENGINE_COUNT; i++) { + for (int i = 0; i < enabled_engine_count; i++) { total_results += jobs[i].results_count; } @@ -427,7 +440,7 @@ int results_handler(UrlParams *params) { send_response(html); free(html); } - for (int i = 0; i < ENGINE_COUNT; i++) + for (int i = 0; i < enabled_engine_count; i++) free(all_results[i]); if (page == 1) { for (int i = 0; i < HANDLER_COUNT; i++) { @@ -441,7 +454,7 @@ int results_handler(UrlParams *params) { } int unique_count = 0; - for (int i = 0; i < ENGINE_COUNT; i++) { + for (int i = 0; i < enabled_engine_count; i++) { for (int j = 0; j < jobs[i].results_count; j++) { char *display_url = all_results[i][j].url; @@ -524,7 +537,7 @@ int results_handler(UrlParams *params) { free(html); } - for (int i = 0; i < ENGINE_COUNT; i++) { + for (int i = 0; i < enabled_engine_count; i++) { free(all_results[i]); } } diff --git a/src/Scraping/Scraping.h b/src/Scraping/Scraping.h index 014285f..be65e5a 100644 --- a/src/Scraping/Scraping.h +++ b/src/Scraping/Scraping.h @@ -15,6 +15,7 @@ typedef int (*ParserFunc)(const char *engine_name, xmlDocPtr doc, SearchResult **out_results, int max_results); typedef struct { + const char *id; const char *name; const char *base_url; const char *host_header; @@ -24,6 +25,7 @@ typedef struct { int page_multiplier; int page_base; ParserFunc parser; + int enabled; } SearchEngine; typedef struct { @@ -54,8 +56,9 @@ typedef struct { ScrapeStatus status; } ScrapeJob; -extern const SearchEngine ENGINE_REGISTRY[]; +extern SearchEngine ENGINE_REGISTRY[]; extern const int ENGINE_COUNT; +void apply_engines_config(const char *engines_str); size_t write_memory_callback(void *contents, size_t size, size_t nmemb, void *userp); diff --git a/src/Scraping/ScrapingParsers.c b/src/Scraping/ScrapingParsers.c index 874cf54..96aaded 100644 --- a/src/Scraping/ScrapingParsers.c +++ b/src/Scraping/ScrapingParsers.c @@ -310,38 +310,122 @@ static int parse_yahoo(const char *engine_name, xmlDocPtr doc, static int parse_mojeek(const char *engine_name, xmlDocPtr doc, SearchResult **out_results, int max_results); -const SearchEngine ENGINE_REGISTRY[] = { - {.name = "DuckDuckGo Lite", +SearchEngine ENGINE_REGISTRY[] = { + {.id = "ddg", + .name = "DuckDuckGo Lite", .base_url = "https://lite.duckduckgo.com/lite/?q=", .host_header = "lite.duckduckgo.com", .referer = "https://lite.duckduckgo.com/", .page_param = "s", .page_multiplier = 30, .page_base = 0, - .parser = parse_ddg_lite}, - {.name = "Startpage", + .parser = parse_ddg_lite, + .enabled = 1}, + {.id = "startpage", + .name = "Startpage", .base_url = "https://www.startpage.com/sp/search?q=", .host_header = "www.startpage.com", .referer = "https://www.startpage.com/", .page_param = "page", .page_multiplier = 1, .page_base = 1, - .parser = parse_startpage}, - {.name = "Yahoo", + .parser = parse_startpage, + .enabled = 1}, + {.id = "yahoo", + .name = "Yahoo", .base_url = "https://search.yahoo.com/search?p=", .host_header = "search.yahoo.com", .referer = "https://search.yahoo.com/", .page_param = "b", .page_multiplier = 10, .page_base = 1, - .parser = parse_yahoo}, - {.name = "Mojeek", + .parser = parse_yahoo, + .enabled = 1}, + {.id = "mojeek", + .name = "Mojeek", .base_url = "https://www.mojeek.com/search?q=", .host_header = "www.mojeek.com", .referer = "https://www.mojeek.com/", .page_param = "s", .page_multiplier = 10, .page_base = 1, - .parser = parse_mojeek}}; + .parser = parse_mojeek, + .enabled = 1}}; const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine); + +static int engine_id_compare(const char *engine_id, const char *config_id) { + while (*engine_id && *config_id) { + char e = *engine_id; + char c = *config_id; + if (e >= 'A' && e <= 'Z') + e = e - 'A' + 'a'; + if (c >= 'A' && c <= 'Z') + c = c - 'A' + 'a'; + if (e != c) + return 0; + engine_id++; + config_id++; + } + return *engine_id == *config_id; +} + +void apply_engines_config(const char *engines_str) { + if (!engines_str || engines_str[0] == '\0') { + for (int i = 0; i < ENGINE_COUNT; i++) { + ENGINE_REGISTRY[i].enabled = 1; + } + return; + } + + for (int i = 0; i < ENGINE_COUNT; i++) { + ENGINE_REGISTRY[i].enabled = 0; + } + + char *copy = strdup(engines_str); + if (!copy) + return; + + char *saveptr; + char *token = strtok_r(copy, ",", &saveptr); + + while (token) { + while (*token == ' ' || *token == '\t') + token++; + + if (strcmp(token, "*") == 0) { + for (int i = 0; i < ENGINE_COUNT; i++) { + ENGINE_REGISTRY[i].enabled = 1; + } + } else if (token[0] == '-' && token[1] != '\0') { + char *engine_id = token + 1; + int found = 0; + for (int i = 0; i < ENGINE_COUNT; i++) { + if (engine_id_compare(ENGINE_REGISTRY[i].id, engine_id)) { + ENGINE_REGISTRY[i].enabled = 0; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "[WARN] Unknown engine: %s\n", engine_id); + } + } else { + int found = 0; + for (int i = 0; i < ENGINE_COUNT; i++) { + if (engine_id_compare(ENGINE_REGISTRY[i].id, token)) { + ENGINE_REGISTRY[i].enabled = 1; + found = 1; + break; + } + } + if (!found) { + fprintf(stderr, "[WARN] Unknown engine: %s\n", token); + } + } + + token = strtok_r(NULL, ",", &saveptr); + } + + free(copy); +}