Files
c_prog/main.c
2025-06-11 22:00:07 +02:00

193 lines
4.8 KiB
C

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
const u_int NAME_MAX_LENGTH = 32;
const u_int TAG_MAX_LENGTH = 32;
const u_int HASH_BUCKETS = 4000037;
int string_compare(const char *str1, const char *str2) {
while (*str1 && *str2 && *str1 == *str2) {
str1++;
str2++;
}
return *str1 - *str2;
}
void string_ncopy(char *dest, const char *src, size_t max_len) {
size_t i = 0;
while (i < max_len - 1 && src[i]) {
dest[i] = src[i];
i++;
}
dest[i] = '\0';
}
typedef struct person {
struct person *next;
char name[32];
int count;
} person;
void newPerson(person *p, const char *name) {
string_ncopy(p->name, name, NAME_MAX_LENGTH);
p->count = 1;
p->next = NULL;
}
void insert(person **head, person *node) {
if (head == NULL) {
*head = node;
} else {
person *p = *head;
int p_exists = string_compare(p->name, node->name);
while (p->next != NULL && p_exists != 0) {
p = p->next;
p_exists = string_compare(p->name, node->name);
}
if (p_exists == 0) {
p->count++;
} else {
p->next = node;
}
}
}
void sorted_name_insert(person **head, char *name) {
person *node = (person *) malloc(sizeof(person));
newPerson(node, name);
if (*head == NULL) {
*head = node;
} else {
person *p = *head;
person *p_prev = NULL;
int p_exists = string_compare(p->name, name);
if (p_exists > 0) {
node->next = *head;
*head = node;
return;
}
while (p->next != NULL && p_exists != 0) {
p_exists = string_compare(p->next->name, name);
if (p_exists > 0) {
node->next = p->next;
p->next = node;
return;
}
p = p->next;
}
if (p_exists == 0) {
p->count++;
free(node);
} else {
p->next = node;
}
}
}
void display(person *head) {
person *p = head;
while (p != NULL) {
printf("%s %d\n", p->name, p->count);
p = p->next;
}
}
u_long hash(const unsigned char *str) {
u_long hash = 5381;
int c;
while ((c = *str++)) {
hash = ((hash << 5) + hash) + c;
}
return hash;
}
void hm_insert(person **hashmap, char *name) {
u_long hash_value = hash(name);
hash_value = hash_value % HASH_BUCKETS;
sorted_name_insert(&hashmap[hash_value], name);
}
char *parse_line(char *line) {
char *line_it = line;
if (*line_it == '<') {
line_it++;
char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH);
char *tag_it = tagname;
while (*line_it != ' ' && *line_it != '>' && *line_it != '\0' && *line_it != '\n') {
*tag_it = *line_it;
line_it++;
tag_it++;
}
*tag_it = '\0';
if (string_compare(tagname, "author") == 0 || string_compare(tagname, "editor") == 0) {
free(tagname);
while (*line_it != '>') {
line_it++;
}
line_it++;
char *last_space = line_it;
while (*line_it != '<') {
if (*line_it == ' ') {
last_space = line_it;
}
line_it++;
if (isdigit(*line_it)) {
line_it = line_it - 2;
while (*line_it != ' ' && *line_it != '<') {
line_it--;
}
last_space = line_it + 1;
break;
}
}
char *name = malloc(sizeof(char) * NAME_MAX_LENGTH);
char *name_it = name;
line_it = last_space + 1;
while (*line_it != '<' && *line_it != ' ') {
*name_it = *line_it;
line_it++;
name_it++;
}
*name_it = '\0';
return name;
}
free(tagname);
}
return nullptr;
}
void clean_memory(person **hashmap) {
for (int i = 0; i < HASH_BUCKETS; i++) {
person *p = hashmap[i];
while (p != nullptr) {
person *next = p->next;
free(p);
p = next;
}
}
free(hashmap);
}
int main(void) {
const auto hashmap = (person **) malloc(sizeof(person *) * HASH_BUCKETS);
for (int i = 0; i < HASH_BUCKETS; i++) {
hashmap[i] = NULL;
}
FILE *fp = fopen("dblp.xml", "r");
char line[1024];
while (fgets(line, 1024, fp) != NULL) {
char *name = parse_line(line);
if (name != NULL) {
hm_insert(hashmap, name);
free(name);
}
}
fclose(fp);
for (int i = 0; i < HASH_BUCKETS; i++) {
display(hashmap[i]);
}
clean_memory(hashmap);
return 0;
}