From 8c6632502ff992e80051910451421c55894ed9d8 Mon Sep 17 00:00:00 2001 From: frosty Date: Tue, 17 Mar 2026 12:07:07 -0400 Subject: [PATCH] fix: general readability improvements --- src/Cache/Cache.c | 41 +++++++++++++------------ src/Config.c | 1 - src/Config.h | 22 ++++++++++++++ src/Infobox/Wikipedia.c | 3 +- src/Main.c | 65 +++++++++++++++++++--------------------- src/Routes/Search.c | 4 +-- src/Scraping/Scraping.c | 26 +++++++++------- src/Utility/Display.c | 3 +- src/Utility/HttpClient.c | 53 ++++++++++++++++++++++++++++++-- src/Utility/HttpClient.h | 16 ++++++++++ 10 files changed, 161 insertions(+), 73 deletions(-) diff --git a/src/Cache/Cache.c b/src/Cache/Cache.c index 59d0ed4..d739277 100644 --- a/src/Cache/Cache.c +++ b/src/Cache/Cache.c @@ -1,4 +1,5 @@ #include "Cache.h" +#include "Config.h" #include #include #include @@ -7,9 +8,9 @@ #include #include -static char cache_dir[512] = {0}; -static int cache_ttl_search_val = 3600; -static int cache_ttl_infobox_val = 86400; +static char cache_dir[BUFFER_SIZE_MEDIUM] = {0}; +static int cache_ttl_search_val = DEFAULT_CACHE_TTL_SEARCH; +static int cache_ttl_infobox_val = DEFAULT_CACHE_TTL_INFOBOX; void set_cache_ttl_search(int ttl) { cache_ttl_search_val = ttl; } @@ -44,7 +45,8 @@ static time_t get_file_mtime(const char *filepath) { int cache_init(const char *dir) { if (!dir || strlen(dir) == 0) { - strcpy(cache_dir, "/tmp/omnisearch_cache"); + strncpy(cache_dir, DEFAULT_CACHE_DIR, sizeof(cache_dir) - 1); + cache_dir[sizeof(cache_dir) - 1] = '\0'; } else { strncpy(cache_dir, dir, sizeof(cache_dir) - 1); cache_dir[sizeof(cache_dir) - 1] = '\0'; @@ -53,19 +55,20 @@ int cache_init(const char *dir) { struct stat st; if (stat(cache_dir, &st) != 0) { if (mkdir(cache_dir, 0755) != 0) { - fprintf(stderr, "Failed to create cache directory: %s\n", cache_dir); + fprintf(stderr, "[ERROR] Failed to create cache directory: %s\n", + cache_dir); return -1; } } else if (!S_ISDIR(st.st_mode)) { - fprintf(stderr, "Cache path exists but is not a directory: %s\n", + fprintf(stderr, "[ERROR] Cache path exists but is not a directory: %s\n", cache_dir); return -1; } - char subdirs[] = "0123456789abcdef"; - for (int i = 0; subdirs[i]; i++) { - char subdir_path[1024]; - snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir, subdirs[i]); + for (int i = 0; HEX_CHARS[i]; i++) { + char subdir_path[BUFFER_SIZE_LARGE]; + snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir, + HEX_CHARS[i]); if (stat(subdir_path, &st) != 0) { mkdir(subdir_path, 0755); } @@ -77,11 +80,11 @@ int cache_init(const char *dir) { void cache_shutdown(void) { cache_dir[0] = '\0'; } char *cache_compute_key(const char *query, int page, const char *engine_name) { - char key_buffer[1024]; + char key_buffer[BUFFER_SIZE_LARGE]; snprintf(key_buffer, sizeof(key_buffer), "%s_%d_%s", query ? query : "", page, engine_name ? engine_name : ""); - char *hash = malloc(33); + char *hash = malloc(MD5_HASH_LEN + 1); if (!hash) { return NULL; } @@ -95,7 +98,7 @@ int cache_get(const char *key, time_t max_age, char **out_data, return -1; } - char filepath[1024]; + char filepath[BUFFER_SIZE_LARGE]; snprintf(filepath, sizeof(filepath), "%s/%c/%s.cache", cache_dir, key[0], key); @@ -149,7 +152,7 @@ int cache_set(const char *key, const char *data, size_t size) { return -1; } - char filepath[1024]; + char filepath[BUFFER_SIZE_LARGE]; snprintf(filepath, sizeof(filepath), "%s/%c/%s.cache", cache_dir, key[0], key); @@ -176,11 +179,11 @@ void cache_cleanup(time_t max_age) { time_t now = time(NULL); time_t cutoff = now - max_age; - char subdirs[] = "0123456789abcdef"; - for (int d = 0; subdirs[d]; d++) { - char subdir_path[1024]; - snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir, subdirs[d]); + for (int d = 0; HEX_CHARS[d]; d++) { + char subdir_path[BUFFER_SIZE_LARGE]; + snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir, + HEX_CHARS[d]); DIR *dir = opendir(subdir_path); if (!dir) @@ -190,7 +193,7 @@ void cache_cleanup(time_t max_age) { while ((entry = readdir(dir)) != NULL) { size_t len = strlen(entry->d_name); if (len > 7 && strcmp(entry->d_name + len - 7, ".cache") == 0) { - char filepath[2048]; + char filepath[BUFFER_SIZE_XLARGE]; snprintf(filepath, sizeof(filepath), "%s/%s", subdir_path, entry->d_name); diff --git a/src/Config.c b/src/Config.c index d038525..490d069 100644 --- a/src/Config.c +++ b/src/Config.c @@ -13,7 +13,6 @@ int load_config(const char *filename, Config *config) { char section[64] = ""; while (fgets(line, sizeof(line), file)) { - line[strcspn(line, "\r\n")] = 0; if (line[0] == '\0' || line[0] == '#' || line[0] == ';') { diff --git a/src/Config.h b/src/Config.h index 3571018..24dafe6 100644 --- a/src/Config.h +++ b/src/Config.h @@ -1,6 +1,28 @@ #ifndef CONFIG_H #define CONFIG_H +#define DEFAULT_HOST "0.0.0.0" +#define DEFAULT_PORT 5000 +#define DEFAULT_CACHE_DIR "/tmp/omnisearch_cache" +#define DEFAULT_CACHE_TTL_SEARCH 3600 +#define DEFAULT_CACHE_TTL_INFOBOX 86400 +#define DEFAULT_MAX_PROXY_RETRIES 3 + +#define BUFFER_SIZE_SMALL 256 +#define BUFFER_SIZE_MEDIUM 512 +#define BUFFER_SIZE_LARGE 1024 +#define BUFFER_SIZE_XLARGE 2048 + +#define INITIAL_BUFFER_SIZE 16384 + +#define WIKI_SUMMARY_MAX_CHARS 300 + +#define MD5_HASH_LEN 32 +#define HEX_CHARS "0123456789abcdef" + +#define INFOBOX_FIELD_COUNT 4 +#define MAX_RESULTS_PER_ENGINE 10 + typedef struct { char host[256]; int port; diff --git a/src/Infobox/Wikipedia.c b/src/Infobox/Wikipedia.c index 4a46b73..6108bce 100644 --- a/src/Infobox/Wikipedia.c +++ b/src/Infobox/Wikipedia.c @@ -2,6 +2,7 @@ #include "../Cache/Cache.h" #include "../Scraping/Scraping.h" #include "../Utility/HttpClient.h" +#include "Config.h" #include #include #include @@ -91,7 +92,7 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) { } info->extract = strdup((const char *)content); - shorten_summary(&(info->extract), 300); + shorten_summary(&(info->extract), WIKI_SUMMARY_MAX_CHARS); xmlFree(content); } } diff --git a/src/Main.c b/src/Main.c index c76764a..9cdd0ad 100644 --- a/src/Main.c +++ b/src/Main.c @@ -33,50 +33,47 @@ int main() { curl_global_init(CURL_GLOBAL_DEFAULT); - Config config = {.host = "0.0.0.0", - .port = 5000, - .proxy = "", - .proxy_list_file = "", - .max_proxy_retries = 3, - .randomize_username = 0, - .randomize_password = 0, - .cache_dir = "/tmp/omnisearch_cache", - .cache_ttl_search = 3600, - .cache_ttl_infobox = 86400}; + Config cfg = {.host = DEFAULT_HOST, + .port = DEFAULT_PORT, + .proxy = "", + .proxy_list_file = "", + .max_proxy_retries = DEFAULT_MAX_PROXY_RETRIES, + .randomize_username = 0, + .randomize_password = 0, + .cache_dir = DEFAULT_CACHE_DIR, + .cache_ttl_search = DEFAULT_CACHE_TTL_SEARCH, + .cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX}; - if (load_config("config.ini", &config) != 0) { - fprintf(stderr, "Warning: Could not load config file, using defaults\n"); + if (load_config("config.ini", &cfg) != 0) { + fprintf(stderr, "[WARN] Could not load config file, using defaults\n"); } - if (cache_init(config.cache_dir) != 0) { - fprintf( - stderr, - "Warning: Failed to initialize cache, continuing without caching\n"); + if (cache_init(cfg.cache_dir) != 0) { + fprintf(stderr, + "[WARN] Failed to initialize cache, continuing without caching\n"); } else { - fprintf(stderr, "Cache initialized at %s\n", config.cache_dir); - cache_cleanup(config.cache_ttl_search); + fprintf(stderr, "[INFO] Cache initialized at %s\n", cfg.cache_dir); + cache_cleanup(cfg.cache_ttl_search); } - set_cache_ttl_search(config.cache_ttl_search); - set_cache_ttl_infobox(config.cache_ttl_infobox); + set_cache_ttl_search(cfg.cache_ttl_search); + set_cache_ttl_infobox(cfg.cache_ttl_infobox); - if (config.proxy_list_file[0] != '\0') { - if (load_proxy_list(config.proxy_list_file) < 0) { - fprintf( - stderr, - "Warning: Failed to load proxy list, continuing without proxies\n"); + if (cfg.proxy_list_file[0] != '\0') { + if (load_proxy_list(cfg.proxy_list_file) < 0) { + fprintf(stderr, + "[WARN] Failed to load proxy list, continuing without proxies\n"); } } - max_proxy_retries = config.max_proxy_retries; - set_proxy_config(config.proxy, config.randomize_username, - config.randomize_password); + max_proxy_retries = cfg.max_proxy_retries; + set_proxy_config(cfg.proxy, cfg.randomize_username, cfg.randomize_password); if (proxy_url[0] != '\0') { - fprintf(stderr, "Using proxy: %s\n", proxy_url); + fprintf(stderr, "[INFO] Using proxy: %s\n", proxy_url); } else if (proxy_count > 0) { - fprintf(stderr, "Using %d proxies from %s\n", proxy_count, - config.proxy_list_file); + fprintf(stderr, "[INFO] Using %d proxies from %s\n", proxy_count, + cfg.proxy_list_file); } set_handler("/", home_handler); @@ -85,12 +82,12 @@ int main() { set_handler("/images", images_handler); set_handler("/proxy", image_proxy_handler); - fprintf(stderr, "Starting Omnisearch on %s:%d\n", config.host, config.port); + fprintf(stderr, "[INFO] Starting Omnisearch on %s:%d\n", cfg.host, cfg.port); - int result = beaker_run(config.host, config.port); + int result = beaker_run(cfg.host, cfg.port); if (result != 0) { - fprintf(stderr, "Error: Beaker server failed to start.\n"); + fprintf(stderr, "[ERROR] Beaker server failed to start.\n"); curl_global_cleanup(); xmlCleanupParser(); return EXIT_FAILURE; diff --git a/src/Routes/Search.c b/src/Routes/Search.c index 6fa3157..16ff1c0 100644 --- a/src/Routes/Search.c +++ b/src/Routes/Search.c @@ -7,6 +7,7 @@ #include "../Scraping/Scraping.h" #include "../Utility/Display.h" #include "../Utility/Unescape.h" +#include "Config.h" #include #include #include @@ -14,9 +15,6 @@ #include #include -#define INFOBOX_FIELD_COUNT 4 -#define MAX_RESULTS_PER_ENGINE 10 - typedef struct { const char *query; InfoBox result; diff --git a/src/Scraping/Scraping.c b/src/Scraping/Scraping.c index 692377e..4c87890 100644 --- a/src/Scraping/Scraping.c +++ b/src/Scraping/Scraping.c @@ -3,6 +3,7 @@ #include "../Proxy/Proxy.h" #include "../Utility/Unescape.h" #include "../Utility/XmlHelper.h" +#include "Config.h" #include #include #include @@ -18,8 +19,8 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, MemoryBuffer *mem = (MemoryBuffer *)userp; if (mem->size + realsize + 1 > mem->capacity) { - - size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2; + size_t new_cap = + mem->capacity == 0 ? INITIAL_BUFFER_SIZE : mem->capacity * 2; while (new_cap < mem->size + realsize + 1) new_cap *= 2; @@ -38,7 +39,7 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, return realsize; } -static const char *get_random_user_agent() { +static const char *get_random_user_agent(void) { static const char *agents[] = { "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, " "like Gecko) Chrome/120.0.0.0 Safari/537.36", @@ -324,6 +325,9 @@ const SearchEngine ENGINE_REGISTRY[] = { const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine); +#define CURL_TIMEOUT 15L +#define CURL_DNS_TIMEOUT 300L + static void configure_curl_handle(CURL *curl, const char *full_url, MemoryBuffer *chunk, struct curl_slist *headers) { @@ -335,9 +339,9 @@ static void configure_curl_handle(CURL *curl, const char *full_url, curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0); curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, ""); - curl_easy_setopt(curl, CURLOPT_DNS_CACHE_TIMEOUT, 300L); + curl_easy_setopt(curl, CURLOPT_DNS_CACHE_TIMEOUT, CURL_DNS_TIMEOUT); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); - curl_easy_setopt(curl, CURLOPT_TIMEOUT, 15L); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, CURL_TIMEOUT); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L); curl_easy_setopt(curl, CURLOPT_COOKIEFILE, ""); @@ -348,19 +352,19 @@ static char *build_search_url(const char *base_url, const char *page_param, int page_multiplier, int page_base, const char *encoded_query, int page) { int page_value = (page < 1 ? 1 : page - 1) * page_multiplier + page_base; - char *url = malloc(1024); + char *url = malloc(BUFFER_SIZE_LARGE); if (!url) { return NULL; } - snprintf(url, 1024, "%s%s&%s=%d", base_url, encoded_query, page_param, - page_value); + snprintf(url, BUFFER_SIZE_LARGE, "%s%s&%s=%d", base_url, encoded_query, + page_param, page_value); return url; } static struct curl_slist *build_request_headers(const char *host_header, const char *referer) { struct curl_slist *headers = NULL; - char host_buf[256], ref_buf[256]; + char host_buf[BUFFER_SIZE_MEDIUM], ref_buf[BUFFER_SIZE_MEDIUM]; snprintf(host_buf, sizeof(host_buf), "Host: %s", host_header); snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", referer); @@ -486,9 +490,9 @@ retry: continue; } - job->response.memory = (char *)malloc(16384); + job->response.memory = (char *)malloc(INITIAL_BUFFER_SIZE); job->response.size = 0; - job->response.capacity = 16384; + job->response.capacity = INITIAL_BUFFER_SIZE; struct curl_slist *headers = build_request_headers(job->engine->host_header, job->engine->referer); diff --git a/src/Utility/Display.c b/src/Utility/Display.c index d313383..1322391 100644 --- a/src/Utility/Display.c +++ b/src/Utility/Display.c @@ -1,4 +1,5 @@ #include "Display.h" +#include "Config.h" #include #include #include @@ -20,7 +21,7 @@ char *pretty_display_url(const char *input) { } size_t input_len = strlen(start); - char temp[512]; + char temp[BUFFER_SIZE_MEDIUM]; strncpy(temp, start, sizeof(temp) - 1); temp[sizeof(temp) - 1] = '\0'; diff --git a/src/Utility/HttpClient.c b/src/Utility/HttpClient.c index 150b228..bdd2f4d 100644 --- a/src/Utility/HttpClient.c +++ b/src/Utility/HttpClient.c @@ -1,5 +1,7 @@ #include "HttpClient.h" +#include "../Cache/Cache.h" #include "../Proxy/Proxy.h" +#include "Config.h" #include #include @@ -9,7 +11,8 @@ static size_t write_callback(void *contents, size_t size, size_t nmemb, HttpResponse *mem = (HttpResponse *)userp; if (mem->size + realsize + 1 > mem->capacity) { - size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2; + size_t new_cap = + mem->capacity == 0 ? INITIAL_BUFFER_SIZE : mem->capacity * 2; while (new_cap < mem->size + realsize + 1) new_cap *= 2; @@ -35,11 +38,11 @@ HttpResponse http_get(const char *url, const char *user_agent) { return resp; } - resp.memory = malloc(16384); + resp.memory = malloc(INITIAL_BUFFER_SIZE); if (!resp.memory) { return resp; } - resp.capacity = 16384; + resp.capacity = INITIAL_BUFFER_SIZE; CURL *curl = curl_easy_init(); if (!curl) { @@ -79,3 +82,47 @@ void http_response_free(HttpResponse *resp) { resp->size = 0; resp->capacity = 0; } + +CachedHttpResponse cached_http_get(const char *url, const char *user_agent, + const char *cache_key, time_t cache_ttl, + XmlParserFn parser) { + CachedHttpResponse result = { + .memory = NULL, .size = 0, .parsed_result = NULL, .success = 0}; + + if (!url || !parser) { + return result; + } + + if (cache_key && cache_ttl > 0) { + char *cached_data = NULL; + size_t cached_size = 0; + if (cache_get(cache_key, cache_ttl, &cached_data, &cached_size) == 0 && + cached_data && cached_size > 0) { + xmlDocPtr doc = parser(cached_data, cached_size, url); + if (doc) { + result.parsed_result = doc; + result.success = 1; + } + free(cached_data); + return result; + } + free(cached_data); + } + + HttpResponse resp = http_get(url, user_agent); + if (resp.memory && resp.size > 0) { + if (cache_key && cache_ttl > 0) { + cache_set(cache_key, resp.memory, resp.size); + } + + xmlDocPtr doc = parser(resp.memory, resp.size, url); + if (doc) { + result.parsed_result = doc; + result.success = 1; + } + } + + result.memory = resp.memory; + result.size = resp.size; + return result; +} diff --git a/src/Utility/HttpClient.h b/src/Utility/HttpClient.h index 6eb002c..5eceb83 100644 --- a/src/Utility/HttpClient.h +++ b/src/Utility/HttpClient.h @@ -2,7 +2,9 @@ #define HTTPCLIENT_H #include +#include #include +#include typedef struct { char *memory; @@ -13,4 +15,18 @@ typedef struct { HttpResponse http_get(const char *url, const char *user_agent); void http_response_free(HttpResponse *resp); +typedef xmlDocPtr (*XmlParserFn)(const char *data, size_t size, + const char *url); + +typedef struct { + char *memory; + size_t size; + void *parsed_result; + int success; +} CachedHttpResponse; + +CachedHttpResponse cached_http_get(const char *url, const char *user_agent, + const char *cache_key, time_t cache_ttl, + XmlParserFn parser); + #endif