parsing looks like it works TODO: SORT THE LIST

This commit is contained in:
2025-06-11 23:51:52 +02:00
parent 4d256d0b3c
commit c06268383f

110
main.c
View File

@@ -1,18 +1,12 @@
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
const u_int NAME_MAX_LENGTH = 32;
const u_int TAG_MAX_LENGTH = 32;
const u_int HASH_BUCKETS = 4000037;
int string_compare(const char *str1, const char *str2) {
while (*str1 && *str2 && *str1 == *str2) {
str1++;
str2++;
}
return *str1 - *str2;
}
#define NAME_MAX_LENGTH 128
#define TAG_MAX_LENGTH 128
#define HASH_BUCKETS 4000037
#define MIN_COUNT 10000
void string_ncopy(char *dest, const char *src, size_t max_len) {
size_t i = 0;
@@ -25,7 +19,7 @@ void string_ncopy(char *dest, const char *src, size_t max_len) {
typedef struct person {
struct person *next;
char name[32];
char name[NAME_MAX_LENGTH];
int count;
} person;
@@ -40,10 +34,10 @@ void insert(person **head, person *node) {
*head = node;
} else {
person *p = *head;
int p_exists = string_compare(p->name, node->name);
int p_exists = strcmp(p->name, node->name);
while (p->next != NULL && p_exists != 0) {
p = p->next;
p_exists = string_compare(p->name, node->name);
p_exists = strcmp(p->name, node->name);
}
if (p_exists == 0) {
p->count++;
@@ -61,14 +55,14 @@ void sorted_name_insert(person **head, char *name) {
} else {
person *p = *head;
person *p_prev = NULL;
int p_exists = string_compare(p->name, name);
int p_exists = strcmp(p->name, name);
if (p_exists > 0) {
node->next = *head;
*head = node;
return;
}
while (p->next != NULL && p_exists != 0) {
p_exists = string_compare(p->next->name, name);
p_exists = strcmp(p->next->name, name);
if (p_exists > 0) {
node->next = p->next;
p->next = node;
@@ -85,6 +79,28 @@ void sorted_name_insert(person **head, char *name) {
}
}
void sorted_count_insert(person **head, person *node) {
if (*head == NULL) {
*head = node;
} else {
person *p = *head;
person *p_prev = NULL;
int cmp = p->count - node->count;
while (p != NULL && cmp < 0) {
p_prev = p;
cmp = p->count - node->count;
p = p->next;
}
if (p_prev == NULL) {
node->next = *head;
*head = node;
} else {
node->next = p;
p_prev->next = node;
}
}
}
void display(person *head) {
person *p = head;
while (p != NULL) {
@@ -113,14 +129,14 @@ char *parse_line(char *line) {
if (*line_it == '<') {
line_it++;
char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH);
char *tag_it = tagname;
while (*line_it != ' ' && *line_it != '>' && *line_it != '\0' && *line_it != '\n') {
*tag_it = *line_it;
size_t i = 0;
while (i < TAG_MAX_LENGTH-1 && *line_it != ' ' && *line_it != '>' && *line_it != '\0' && *line_it != '\n') {
tagname[i] = *line_it;
line_it++;
tag_it++;
i++;
}
*tag_it = '\0';
if (string_compare(tagname, "author") == 0 || string_compare(tagname, "editor") == 0) {
tagname[i] = '\0';
if (strcmp(tagname, "author") == 0 || strcmp(tagname, "editor") == 0) {
free(tagname);
while (*line_it != '>') {
line_it++;
@@ -142,19 +158,46 @@ char *parse_line(char *line) {
}
}
char *name = malloc(sizeof(char) * NAME_MAX_LENGTH);
char *name_it = name;
i = 0;
line_it = last_space + 1;
while (*line_it != '<' && *line_it != ' ') {
*name_it = *line_it;
while (i < NAME_MAX_LENGTH - 1 && *line_it != '<' && *line_it != ' ') {
name[i] = *line_it;
line_it++;
name_it++;
i++;
}
*name_it = '\0';
name[i] = '\0';
return name;
}
free(tagname);
}
return nullptr;
return NULL;
}
void make_list(person **hashmap, person **list, const int min_count) {
size_t i = 0;
for (i = 0; i < HASH_BUCKETS; i++) {
person *p = hashmap[i];
while (p != NULL) {
person *p_next = p->next;
if (p->count >= min_count) {
p->next = NULL;
sorted_count_insert(list, p);
} else {
free(p);
}
p = p_next;
}
}
free(hashmap);
}
void clean_list(person *list) {
person *p = list;
while (p != NULL) {
person *next = p->next;
free(p);
p = next;
}
}
void clean_memory(person **hashmap) {
@@ -170,7 +213,7 @@ void clean_memory(person **hashmap) {
}
int main(void) {
const auto hashmap = (person **) malloc(sizeof(person *) * HASH_BUCKETS);
person **hashmap = (person **) malloc(sizeof(person *) * HASH_BUCKETS);
for (int i = 0; i < HASH_BUCKETS; i++) {
hashmap[i] = NULL;
}
@@ -184,9 +227,10 @@ int main(void) {
}
}
fclose(fp);
for (int i = 0; i < HASH_BUCKETS; i++) {
display(hashmap[i]);
}
clean_memory(hashmap);
printf("Done parsing!\n");
person *list = NULL;
make_list(hashmap, &list, MIN_COUNT);
display(list);
clean_list(list);
return 0;
}