fix: general readability improvements

This commit is contained in:
frosty
2026-03-17 12:07:07 -04:00
parent 5a6ad26974
commit 8c6632502f
10 changed files with 161 additions and 73 deletions

View File

@@ -1,4 +1,5 @@
#include "Cache.h"
#include "Config.h"
#include <dirent.h>
#include <openssl/evp.h>
#include <stdio.h>
@@ -7,9 +8,9 @@
#include <sys/stat.h>
#include <time.h>
static char cache_dir[512] = {0};
static int cache_ttl_search_val = 3600;
static int cache_ttl_infobox_val = 86400;
static char cache_dir[BUFFER_SIZE_MEDIUM] = {0};
static int cache_ttl_search_val = DEFAULT_CACHE_TTL_SEARCH;
static int cache_ttl_infobox_val = DEFAULT_CACHE_TTL_INFOBOX;
void set_cache_ttl_search(int ttl) { cache_ttl_search_val = ttl; }
@@ -44,7 +45,8 @@ static time_t get_file_mtime(const char *filepath) {
int cache_init(const char *dir) {
if (!dir || strlen(dir) == 0) {
strcpy(cache_dir, "/tmp/omnisearch_cache");
strncpy(cache_dir, DEFAULT_CACHE_DIR, sizeof(cache_dir) - 1);
cache_dir[sizeof(cache_dir) - 1] = '\0';
} else {
strncpy(cache_dir, dir, sizeof(cache_dir) - 1);
cache_dir[sizeof(cache_dir) - 1] = '\0';
@@ -53,19 +55,20 @@ int cache_init(const char *dir) {
struct stat st;
if (stat(cache_dir, &st) != 0) {
if (mkdir(cache_dir, 0755) != 0) {
fprintf(stderr, "Failed to create cache directory: %s\n", cache_dir);
fprintf(stderr, "[ERROR] Failed to create cache directory: %s\n",
cache_dir);
return -1;
}
} else if (!S_ISDIR(st.st_mode)) {
fprintf(stderr, "Cache path exists but is not a directory: %s\n",
fprintf(stderr, "[ERROR] Cache path exists but is not a directory: %s\n",
cache_dir);
return -1;
}
char subdirs[] = "0123456789abcdef";
for (int i = 0; subdirs[i]; i++) {
char subdir_path[1024];
snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir, subdirs[i]);
for (int i = 0; HEX_CHARS[i]; i++) {
char subdir_path[BUFFER_SIZE_LARGE];
snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir,
HEX_CHARS[i]);
if (stat(subdir_path, &st) != 0) {
mkdir(subdir_path, 0755);
}
@@ -77,11 +80,11 @@ int cache_init(const char *dir) {
void cache_shutdown(void) { cache_dir[0] = '\0'; }
char *cache_compute_key(const char *query, int page, const char *engine_name) {
char key_buffer[1024];
char key_buffer[BUFFER_SIZE_LARGE];
snprintf(key_buffer, sizeof(key_buffer), "%s_%d_%s", query ? query : "", page,
engine_name ? engine_name : "");
char *hash = malloc(33);
char *hash = malloc(MD5_HASH_LEN + 1);
if (!hash) {
return NULL;
}
@@ -95,7 +98,7 @@ int cache_get(const char *key, time_t max_age, char **out_data,
return -1;
}
char filepath[1024];
char filepath[BUFFER_SIZE_LARGE];
snprintf(filepath, sizeof(filepath), "%s/%c/%s.cache", cache_dir, key[0],
key);
@@ -149,7 +152,7 @@ int cache_set(const char *key, const char *data, size_t size) {
return -1;
}
char filepath[1024];
char filepath[BUFFER_SIZE_LARGE];
snprintf(filepath, sizeof(filepath), "%s/%c/%s.cache", cache_dir, key[0],
key);
@@ -176,11 +179,11 @@ void cache_cleanup(time_t max_age) {
time_t now = time(NULL);
time_t cutoff = now - max_age;
char subdirs[] = "0123456789abcdef";
for (int d = 0; subdirs[d]; d++) {
char subdir_path[1024];
snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir, subdirs[d]);
for (int d = 0; HEX_CHARS[d]; d++) {
char subdir_path[BUFFER_SIZE_LARGE];
snprintf(subdir_path, sizeof(subdir_path), "%s/%c", cache_dir,
HEX_CHARS[d]);
DIR *dir = opendir(subdir_path);
if (!dir)
@@ -190,7 +193,7 @@ void cache_cleanup(time_t max_age) {
while ((entry = readdir(dir)) != NULL) {
size_t len = strlen(entry->d_name);
if (len > 7 && strcmp(entry->d_name + len - 7, ".cache") == 0) {
char filepath[2048];
char filepath[BUFFER_SIZE_XLARGE];
snprintf(filepath, sizeof(filepath), "%s/%s", subdir_path,
entry->d_name);

View File

@@ -13,7 +13,6 @@ int load_config(const char *filename, Config *config) {
char section[64] = "";
while (fgets(line, sizeof(line), file)) {
line[strcspn(line, "\r\n")] = 0;
if (line[0] == '\0' || line[0] == '#' || line[0] == ';') {

View File

@@ -1,6 +1,28 @@
#ifndef CONFIG_H
#define CONFIG_H
#define DEFAULT_HOST "0.0.0.0"
#define DEFAULT_PORT 5000
#define DEFAULT_CACHE_DIR "/tmp/omnisearch_cache"
#define DEFAULT_CACHE_TTL_SEARCH 3600
#define DEFAULT_CACHE_TTL_INFOBOX 86400
#define DEFAULT_MAX_PROXY_RETRIES 3
#define BUFFER_SIZE_SMALL 256
#define BUFFER_SIZE_MEDIUM 512
#define BUFFER_SIZE_LARGE 1024
#define BUFFER_SIZE_XLARGE 2048
#define INITIAL_BUFFER_SIZE 16384
#define WIKI_SUMMARY_MAX_CHARS 300
#define MD5_HASH_LEN 32
#define HEX_CHARS "0123456789abcdef"
#define INFOBOX_FIELD_COUNT 4
#define MAX_RESULTS_PER_ENGINE 10
typedef struct {
char host[256];
int port;

View File

@@ -2,6 +2,7 @@
#include "../Cache/Cache.h"
#include "../Scraping/Scraping.h"
#include "../Utility/HttpClient.h"
#include "Config.h"
#include <curl/curl.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
@@ -91,7 +92,7 @@ static void extract_wiki_info(xmlNode *node, InfoBox *info) {
}
info->extract = strdup((const char *)content);
shorten_summary(&(info->extract), 300);
shorten_summary(&(info->extract), WIKI_SUMMARY_MAX_CHARS);
xmlFree(content);
}
}

View File

@@ -33,50 +33,47 @@ int main() {
curl_global_init(CURL_GLOBAL_DEFAULT);
Config config = {.host = "0.0.0.0",
.port = 5000,
.proxy = "",
.proxy_list_file = "",
.max_proxy_retries = 3,
.randomize_username = 0,
.randomize_password = 0,
.cache_dir = "/tmp/omnisearch_cache",
.cache_ttl_search = 3600,
.cache_ttl_infobox = 86400};
Config cfg = {.host = DEFAULT_HOST,
.port = DEFAULT_PORT,
.proxy = "",
.proxy_list_file = "",
.max_proxy_retries = DEFAULT_MAX_PROXY_RETRIES,
.randomize_username = 0,
.randomize_password = 0,
.cache_dir = DEFAULT_CACHE_DIR,
.cache_ttl_search = DEFAULT_CACHE_TTL_SEARCH,
.cache_ttl_infobox = DEFAULT_CACHE_TTL_INFOBOX};
if (load_config("config.ini", &config) != 0) {
fprintf(stderr, "Warning: Could not load config file, using defaults\n");
if (load_config("config.ini", &cfg) != 0) {
fprintf(stderr, "[WARN] Could not load config file, using defaults\n");
}
if (cache_init(config.cache_dir) != 0) {
fprintf(
stderr,
"Warning: Failed to initialize cache, continuing without caching\n");
if (cache_init(cfg.cache_dir) != 0) {
fprintf(stderr,
"[WARN] Failed to initialize cache, continuing without caching\n");
} else {
fprintf(stderr, "Cache initialized at %s\n", config.cache_dir);
cache_cleanup(config.cache_ttl_search);
fprintf(stderr, "[INFO] Cache initialized at %s\n", cfg.cache_dir);
cache_cleanup(cfg.cache_ttl_search);
}
set_cache_ttl_search(config.cache_ttl_search);
set_cache_ttl_infobox(config.cache_ttl_infobox);
set_cache_ttl_search(cfg.cache_ttl_search);
set_cache_ttl_infobox(cfg.cache_ttl_infobox);
if (config.proxy_list_file[0] != '\0') {
if (load_proxy_list(config.proxy_list_file) < 0) {
fprintf(
stderr,
"Warning: Failed to load proxy list, continuing without proxies\n");
if (cfg.proxy_list_file[0] != '\0') {
if (load_proxy_list(cfg.proxy_list_file) < 0) {
fprintf(stderr,
"[WARN] Failed to load proxy list, continuing without proxies\n");
}
}
max_proxy_retries = config.max_proxy_retries;
set_proxy_config(config.proxy, config.randomize_username,
config.randomize_password);
max_proxy_retries = cfg.max_proxy_retries;
set_proxy_config(cfg.proxy, cfg.randomize_username, cfg.randomize_password);
if (proxy_url[0] != '\0') {
fprintf(stderr, "Using proxy: %s\n", proxy_url);
fprintf(stderr, "[INFO] Using proxy: %s\n", proxy_url);
} else if (proxy_count > 0) {
fprintf(stderr, "Using %d proxies from %s\n", proxy_count,
config.proxy_list_file);
fprintf(stderr, "[INFO] Using %d proxies from %s\n", proxy_count,
cfg.proxy_list_file);
}
set_handler("/", home_handler);
@@ -85,12 +82,12 @@ int main() {
set_handler("/images", images_handler);
set_handler("/proxy", image_proxy_handler);
fprintf(stderr, "Starting Omnisearch on %s:%d\n", config.host, config.port);
fprintf(stderr, "[INFO] Starting Omnisearch on %s:%d\n", cfg.host, cfg.port);
int result = beaker_run(config.host, config.port);
int result = beaker_run(cfg.host, cfg.port);
if (result != 0) {
fprintf(stderr, "Error: Beaker server failed to start.\n");
fprintf(stderr, "[ERROR] Beaker server failed to start.\n");
curl_global_cleanup();
xmlCleanupParser();
return EXIT_FAILURE;

View File

@@ -7,6 +7,7 @@
#include "../Scraping/Scraping.h"
#include "../Utility/Display.h"
#include "../Utility/Unescape.h"
#include "Config.h"
#include <ctype.h>
#include <pthread.h>
#include <stdio.h>
@@ -14,9 +15,6 @@
#include <string.h>
#include <time.h>
#define INFOBOX_FIELD_COUNT 4
#define MAX_RESULTS_PER_ENGINE 10
typedef struct {
const char *query;
InfoBox result;

View File

@@ -3,6 +3,7 @@
#include "../Proxy/Proxy.h"
#include "../Utility/Unescape.h"
#include "../Utility/XmlHelper.h"
#include "Config.h"
#include <curl/curl.h>
#include <libxml/HTMLparser.h>
#include <libxml/xpath.h>
@@ -18,8 +19,8 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
MemoryBuffer *mem = (MemoryBuffer *)userp;
if (mem->size + realsize + 1 > mem->capacity) {
size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
size_t new_cap =
mem->capacity == 0 ? INITIAL_BUFFER_SIZE : mem->capacity * 2;
while (new_cap < mem->size + realsize + 1)
new_cap *= 2;
@@ -38,7 +39,7 @@ static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
return realsize;
}
static const char *get_random_user_agent() {
static const char *get_random_user_agent(void) {
static const char *agents[] = {
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
"like Gecko) Chrome/120.0.0.0 Safari/537.36",
@@ -324,6 +325,9 @@ const SearchEngine ENGINE_REGISTRY[] = {
const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
#define CURL_TIMEOUT 15L
#define CURL_DNS_TIMEOUT 300L
static void configure_curl_handle(CURL *curl, const char *full_url,
MemoryBuffer *chunk,
struct curl_slist *headers) {
@@ -335,9 +339,9 @@ static void configure_curl_handle(CURL *curl, const char *full_url,
curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
curl_easy_setopt(curl, CURLOPT_DNS_CACHE_TIMEOUT, 300L);
curl_easy_setopt(curl, CURLOPT_DNS_CACHE_TIMEOUT, CURL_DNS_TIMEOUT);
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, 15L);
curl_easy_setopt(curl, CURLOPT_TIMEOUT, CURL_TIMEOUT);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "");
@@ -348,19 +352,19 @@ static char *build_search_url(const char *base_url, const char *page_param,
int page_multiplier, int page_base,
const char *encoded_query, int page) {
int page_value = (page < 1 ? 1 : page - 1) * page_multiplier + page_base;
char *url = malloc(1024);
char *url = malloc(BUFFER_SIZE_LARGE);
if (!url) {
return NULL;
}
snprintf(url, 1024, "%s%s&%s=%d", base_url, encoded_query, page_param,
page_value);
snprintf(url, BUFFER_SIZE_LARGE, "%s%s&%s=%d", base_url, encoded_query,
page_param, page_value);
return url;
}
static struct curl_slist *build_request_headers(const char *host_header,
const char *referer) {
struct curl_slist *headers = NULL;
char host_buf[256], ref_buf[256];
char host_buf[BUFFER_SIZE_MEDIUM], ref_buf[BUFFER_SIZE_MEDIUM];
snprintf(host_buf, sizeof(host_buf), "Host: %s", host_header);
snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", referer);
@@ -486,9 +490,9 @@ retry:
continue;
}
job->response.memory = (char *)malloc(16384);
job->response.memory = (char *)malloc(INITIAL_BUFFER_SIZE);
job->response.size = 0;
job->response.capacity = 16384;
job->response.capacity = INITIAL_BUFFER_SIZE;
struct curl_slist *headers =
build_request_headers(job->engine->host_header, job->engine->referer);

View File

@@ -1,4 +1,5 @@
#include "Display.h"
#include "Config.h"
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
@@ -20,7 +21,7 @@ char *pretty_display_url(const char *input) {
}
size_t input_len = strlen(start);
char temp[512];
char temp[BUFFER_SIZE_MEDIUM];
strncpy(temp, start, sizeof(temp) - 1);
temp[sizeof(temp) - 1] = '\0';

View File

@@ -1,5 +1,7 @@
#include "HttpClient.h"
#include "../Cache/Cache.h"
#include "../Proxy/Proxy.h"
#include "Config.h"
#include <stdlib.h>
#include <string.h>
@@ -9,7 +11,8 @@ static size_t write_callback(void *contents, size_t size, size_t nmemb,
HttpResponse *mem = (HttpResponse *)userp;
if (mem->size + realsize + 1 > mem->capacity) {
size_t new_cap = mem->capacity == 0 ? 16384 : mem->capacity * 2;
size_t new_cap =
mem->capacity == 0 ? INITIAL_BUFFER_SIZE : mem->capacity * 2;
while (new_cap < mem->size + realsize + 1)
new_cap *= 2;
@@ -35,11 +38,11 @@ HttpResponse http_get(const char *url, const char *user_agent) {
return resp;
}
resp.memory = malloc(16384);
resp.memory = malloc(INITIAL_BUFFER_SIZE);
if (!resp.memory) {
return resp;
}
resp.capacity = 16384;
resp.capacity = INITIAL_BUFFER_SIZE;
CURL *curl = curl_easy_init();
if (!curl) {
@@ -79,3 +82,47 @@ void http_response_free(HttpResponse *resp) {
resp->size = 0;
resp->capacity = 0;
}
CachedHttpResponse cached_http_get(const char *url, const char *user_agent,
const char *cache_key, time_t cache_ttl,
XmlParserFn parser) {
CachedHttpResponse result = {
.memory = NULL, .size = 0, .parsed_result = NULL, .success = 0};
if (!url || !parser) {
return result;
}
if (cache_key && cache_ttl > 0) {
char *cached_data = NULL;
size_t cached_size = 0;
if (cache_get(cache_key, cache_ttl, &cached_data, &cached_size) == 0 &&
cached_data && cached_size > 0) {
xmlDocPtr doc = parser(cached_data, cached_size, url);
if (doc) {
result.parsed_result = doc;
result.success = 1;
}
free(cached_data);
return result;
}
free(cached_data);
}
HttpResponse resp = http_get(url, user_agent);
if (resp.memory && resp.size > 0) {
if (cache_key && cache_ttl > 0) {
cache_set(cache_key, resp.memory, resp.size);
}
xmlDocPtr doc = parser(resp.memory, resp.size, url);
if (doc) {
result.parsed_result = doc;
result.success = 1;
}
}
result.memory = resp.memory;
result.size = resp.size;
return result;
}

View File

@@ -2,7 +2,9 @@
#define HTTPCLIENT_H
#include <curl/curl.h>
#include <libxml/parser.h>
#include <stddef.h>
#include <time.h>
typedef struct {
char *memory;
@@ -13,4 +15,18 @@ typedef struct {
HttpResponse http_get(const char *url, const char *user_agent);
void http_response_free(HttpResponse *resp);
typedef xmlDocPtr (*XmlParserFn)(const char *data, size_t size,
const char *url);
typedef struct {
char *memory;
size_t size;
void *parsed_result;
int success;
} CachedHttpResponse;
CachedHttpResponse cached_http_get(const char *url, const char *user_agent,
const char *cache_key, time_t cache_ttl,
XmlParserFn parser);
#endif