#include #include #include const u_int NAME_MAX_LENGTH = 32; const u_int TAG_MAX_LENGTH = 32; const u_int HASH_BUCKETS = 4000037; int string_compare(const char *str1, const char *str2) { while (*str1 && *str2 && *str1 == *str2) { str1++; str2++; } return *str1 - *str2; } void string_ncopy(char *dest, const char *src, size_t max_len) { size_t i = 0; while (i < max_len - 1 && src[i]) { dest[i] = src[i]; i++; } dest[i] = '\0'; } typedef struct person { struct person *next; char name[32]; int count; } person; void newPerson(person *p, const char *name) { string_ncopy(p->name, name, NAME_MAX_LENGTH); p->count = 1; p->next = NULL; } void insert(person **head, person *node) { if (head == NULL) { *head = node; } else { person *p = *head; int p_exists = string_compare(p->name, node->name); while (p->next != NULL && p_exists != 0) { p = p->next; p_exists = string_compare(p->name, node->name); } if (p_exists == 0) { p->count++; } else { p->next = node; } } } void sorted_name_insert(person **head, char *name) { person *node = (person *) malloc(sizeof(person)); newPerson(node, name); if (*head == NULL) { *head = node; } else { person *p = *head; person *p_prev = NULL; int p_exists = string_compare(p->name, name); if (p_exists > 0) { node->next = *head; *head = node; return; } while (p->next != NULL && p_exists != 0) { p_exists = string_compare(p->next->name, name); if (p_exists > 0) { node->next = p->next; p->next = node; return; } p = p->next; } if (p_exists == 0) { p->count++; free(node); } else { p->next = node; } } } void display(person *head) { person *p = head; while (p != NULL) { printf("%s %d\n", p->name, p->count); p = p->next; } } u_long hash(const unsigned char *str) { u_long hash = 5381; int c; while ((c = *str++)) { hash = ((hash << 5) + hash) + c; } return hash; } void hm_insert(person **hashmap, char *name) { u_long hash_value = hash(name); hash_value = hash_value % HASH_BUCKETS; sorted_name_insert(&hashmap[hash_value], name); } char *parse_line(char *line) { char *line_it = line; if (*line_it == '<') { line_it++; char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH); char *tag_it = tagname; while (*line_it != ' ' && *line_it != '>' && *line_it != '\0' && *line_it != '\n') { *tag_it = *line_it; line_it++; tag_it++; } *tag_it = '\0'; if (string_compare(tagname, "author") == 0 || string_compare(tagname, "editor") == 0) { free(tagname); while (*line_it != '>') { line_it++; } line_it++; char *last_space = line_it; while (*line_it != '<') { if (*line_it == ' ') { last_space = line_it; } line_it++; if (isdigit(*line_it)) { line_it = line_it - 2; while (*line_it != ' ' && *line_it != '<') { line_it--; } last_space = line_it + 1; break; } } char *name = malloc(sizeof(char) * NAME_MAX_LENGTH); char *name_it = name; line_it = last_space + 1; while (*line_it != '<' && *line_it != ' ') { *name_it = *line_it; line_it++; name_it++; } *name_it = '\0'; return name; } free(tagname); } return nullptr; } void clean_memory(person **hashmap) { for (int i = 0; i < HASH_BUCKETS; i++) { person *p = hashmap[i]; while (p != nullptr) { person *next = p->next; free(p); p = next; } } free(hashmap); } int main(void) { const auto hashmap = (person **) malloc(sizeof(person *) * HASH_BUCKETS); for (int i = 0; i < HASH_BUCKETS; i++) { hashmap[i] = NULL; } FILE *fp = fopen("dblp.xml", "r"); char line[1024]; while (fgets(line, 1024, fp) != NULL) { char *name = parse_line(line); if (name != NULL) { hm_insert(hashmap, name); free(name); } } fclose(fp); for (int i = 0; i < HASH_BUCKETS; i++) { display(hashmap[i]); } clean_memory(hashmap); return 0; }