sorting fixed

This commit is contained in:
2025-06-12 11:31:39 +02:00
parent c06268383f
commit c57dbe080b

368
main.c
View File

@@ -9,228 +9,240 @@
#define MIN_COUNT 10000 #define MIN_COUNT 10000
void string_ncopy(char *dest, const char *src, size_t max_len) { void string_ncopy(char *dest, const char *src, size_t max_len) {
size_t i = 0; size_t i = 0;
while (i < max_len - 1 && src[i]) { while (i < max_len - 1 && src[i]) {
dest[i] = src[i]; dest[i] = src[i];
i++; i++;
} }
dest[i] = '\0'; dest[i] = '\0';
} }
typedef struct person { typedef struct person {
struct person *next; struct person *next;
char name[NAME_MAX_LENGTH]; char name[NAME_MAX_LENGTH];
int count; int count;
} person; } person;
void newPerson(person *p, const char *name) { void newPerson(person *p, const char *name) {
string_ncopy(p->name, name, NAME_MAX_LENGTH); string_ncopy(p->name, name, NAME_MAX_LENGTH);
p->count = 1; p->count = 1;
p->next = NULL; p->next = NULL;
} }
void insert(person **head, person *node) { void insert(person **head, person *node) {
if (head == NULL) { if (head == NULL) {
*head = node; *head = node;
} else { } else {
person *p = *head; person *p = *head;
int p_exists = strcmp(p->name, node->name); int p_exists = strcmp(p->name, node->name);
while (p->next != NULL && p_exists != 0) { while (p->next != NULL && p_exists != 0) {
p = p->next; p = p->next;
p_exists = strcmp(p->name, node->name); p_exists = strcmp(p->name, node->name);
}
if (p_exists == 0) {
p->count++;
} else {
p->next = node;
}
} }
if (p_exists == 0) {
p->count++;
} else {
p->next = node;
}
}
} }
void sorted_name_insert(person **head, char *name) { void sorted_name_insert(person **head, char *name) {
person *node = (person *) malloc(sizeof(person)); person *node = (person *)malloc(sizeof(person));
newPerson(node, name); newPerson(node, name);
if (*head == NULL) { if (*head == NULL) {
*head = node; *head = node;
} else { } else {
person *p = *head; person *p = *head;
person *p_prev = NULL; person *p_prev = NULL;
int p_exists = strcmp(p->name, name); int p_exists = strcmp(p->name, name);
if (p_exists > 0) { if (p_exists > 0) {
node->next = *head; node->next = *head;
*head = node; *head = node;
return; return;
}
while (p->next != NULL && p_exists != 0) {
p_exists = strcmp(p->next->name, name);
if (p_exists > 0) {
node->next = p->next;
p->next = node;
return;
}
p = p->next;
}
if (p_exists == 0) {
p->count++;
free(node);
} else {
p->next = node;
}
} }
while (p->next != NULL && p_exists != 0) {
p_exists = strcmp(p->next->name, name);
if (p_exists > 0) {
node->next = p->next;
p->next = node;
return;
}
p = p->next;
}
if (p_exists == 0) {
p->count++;
free(node);
} else {
p->next = node;
}
}
} }
void sorted_count_insert(person **head, person *node) { void sorted_count_insert(person **head, person *node) {
if (*head == NULL) { if (*head == NULL) {
*head = node; *head = node;
} else { } else {
person *p = *head; person *p = *head;
person *p_prev = NULL; person *p_prev = NULL;
int cmp = p->count - node->count; int cmp = p->count - node->count;
while (p != NULL && cmp < 0) { while (p->next != NULL && cmp > 0) {
p_prev = p; p_prev = p;
cmp = p->count - node->count; p = p->next;
p = p->next; cmp = p->count - node->count;
}
if (p_prev == NULL) {
node->next = *head;
*head = node;
} else {
node->next = p;
p_prev->next = node;
}
} }
if (p_prev == NULL) {
node->next = *head;
*head = node;
} else if (p->next != NULL && cmp < 0) {
node->next = p;
p_prev->next = node;
} else {
p->next = node;
node->next = NULL;
}
}
} }
void display(person *head) { void display(person *head) {
person *p = head; person *p = head;
while (p != NULL) { while (p != NULL) {
printf("%s %d\n", p->name, p->count); printf("%s %d\n", p->name, p->count);
p = p->next; p = p->next;
} }
} }
u_long hash(const unsigned char *str) { u_long hash(const unsigned char *str) {
u_long hash = 5381; u_long hash = 5381;
int c; int c;
while ((c = *str++)) { while ((c = *str++)) {
hash = ((hash << 5) + hash) + c; hash = ((hash << 5) + hash) + c;
} }
return hash; return hash;
} }
void hm_insert(person **hashmap, char *name) { void hm_insert(person **hashmap, char *name) {
u_long hash_value = hash(name); u_long hash_value = hash(name);
hash_value = hash_value % HASH_BUCKETS; hash_value = hash_value % HASH_BUCKETS;
sorted_name_insert(&hashmap[hash_value], name); sorted_name_insert(&hashmap[hash_value], name);
} }
char *parse_line(char *line) { void parse_line(char *line, char *name) {
char *line_it = line; char *line_it = line;
if (*line_it == '<') { if (*line_it == '<') {
line_it++; line_it++;
char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH); char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH);
size_t i = 0; size_t i = 0;
while (i < TAG_MAX_LENGTH-1 && *line_it != ' ' && *line_it != '>' && *line_it != '\0' && *line_it != '\n') { while (i < TAG_MAX_LENGTH - 1 && *line_it != ' ' && *line_it != '>' &&
tagname[i] = *line_it; *line_it != '\0' && *line_it != '\n') {
line_it++; tagname[i] = *line_it;
i++; line_it++;
} i++;
tagname[i] = '\0';
if (strcmp(tagname, "author") == 0 || strcmp(tagname, "editor") == 0) {
free(tagname);
while (*line_it != '>') {
line_it++;
}
line_it++;
char *last_space = line_it;
while (*line_it != '<') {
if (*line_it == ' ') {
last_space = line_it;
}
line_it++;
if (isdigit(*line_it)) {
line_it = line_it - 2;
while (*line_it != ' ' && *line_it != '<') {
line_it--;
}
last_space = line_it + 1;
break;
}
}
char *name = malloc(sizeof(char) * NAME_MAX_LENGTH);
i = 0;
line_it = last_space + 1;
while (i < NAME_MAX_LENGTH - 1 && *line_it != '<' && *line_it != ' ') {
name[i] = *line_it;
line_it++;
i++;
}
name[i] = '\0';
return name;
}
free(tagname);
} }
return NULL; tagname[i] = '\0';
if (strcmp(tagname, "author") == 0 || strcmp(tagname, "editor") == 0) {
while (*line_it != '>') {
line_it++;
}
line_it++;
char *last_space = line_it;
while (*line_it != '<') {
if (*line_it == ' ') {
last_space = line_it;
}
line_it++;
if (isdigit(*line_it)) {
line_it = line_it - 2;
while (*line_it != ' ' && *line_it != '<') {
line_it--;
}
last_space = line_it + 1;
break;
}
}
i = 0;
line_it = last_space + 1;
while (i < NAME_MAX_LENGTH - 1 && *line_it != '<' && *line_it != ' ') {
name[i] = *line_it;
line_it++;
i++;
}
name[i] = '\0';
}
free(tagname);
}
} }
void make_list(person **hashmap, person **list, const int min_count) { void make_list(person **hashmap, person **list, const int min_count) {
size_t i = 0; size_t i = 0;
for (i = 0; i < HASH_BUCKETS; i++) { for (i = 0; i < HASH_BUCKETS; i++) {
person *p = hashmap[i]; person *p = hashmap[i];
while (p != NULL) { while (p != NULL) {
person *p_next = p->next; person *p_next = p->next;
if (p->count >= min_count) { if (p->count >= min_count) {
p->next = NULL; p->next = NULL;
sorted_count_insert(list, p); sorted_count_insert(list, p);
} else { } else {
free(p); free(p);
} }
p = p_next; p = p_next;
}
} }
free(hashmap); }
free(hashmap);
} }
void clean_list(person *list) { void clean_list(person *list) {
person *p = list; person *p = list;
while (p != NULL) { while (p != NULL) {
person *next = p->next; person *next = p->next;
free(p); free(p);
p = next; p = next;
} }
} }
void clean_memory(person **hashmap) { void clean_memory(person **hashmap) {
for (int i = 0; i < HASH_BUCKETS; i++) { for (int i = 0; i < HASH_BUCKETS; i++) {
person *p = hashmap[i]; person *p = hashmap[i];
while (p != nullptr) { while (p != NULL) {
person *next = p->next; person *next = p->next;
free(p); free(p);
p = next; p = next;
}
} }
free(hashmap); }
free(hashmap);
} }
int main(void) { int main(void) {
person **hashmap = (person **) malloc(sizeof(person *) * HASH_BUCKETS); person **hashmap = (person **)malloc(sizeof(person *) * HASH_BUCKETS);
for (int i = 0; i < HASH_BUCKETS; i++) { for (int i = 0; i < HASH_BUCKETS; i++) {
hashmap[i] = NULL; hashmap[i] = NULL;
} }
FILE *fp = fopen("dblp.xml", "r"); FILE *fp = fopen("dblp.xml", "r");
char line[1024]; char line[1024];
while (fgets(line, 1024, fp) != NULL) { if (fp) {
char *name = parse_line(line); while (fgets(line, sizeof(line), fp) != NULL) {
if (name != NULL) { char *name = (char *)malloc(sizeof(char) * NAME_MAX_LENGTH);
hm_insert(hashmap, name); parse_line(line, name);
free(name); if (name != NULL) {
} hm_insert(hashmap, name);
}
free(name);
} }
fclose(fp); fclose(fp);
printf("Done parsing!\n"); } else {
person *list = NULL; while (fgets(line, sizeof(line), stdin) != NULL) {
make_list(hashmap, &list, MIN_COUNT); char *name = (char *)malloc(sizeof(char) * NAME_MAX_LENGTH);
display(list); parse_line(line, name);
clean_list(list); if (name != NULL) {
return 0; hm_insert(hashmap, name);
}
free(name);
}
}
printf("Done parsing!\n");
person *list = NULL;
make_list(hashmap, &list, MIN_COUNT);
display(list);
clean_list(list);
return 0;
} }