#include #include #include #include #define BUFFER_LENGTH 128 #define HASH_BUCKETS 4000037 #define MIN_COUNT 10000 void string_ncopy(char *dest, const char *src, size_t max_len) { size_t i = 0; while (i < max_len - 1 && src[i]) { dest[i] = src[i]; i++; } dest[i] = '\0'; } typedef struct person { struct person *next; char *name; int count; } person; void newPerson(person *p, const char *name) { string_ncopy(p->name, name, BUFFER_LENGTH); p->count = 1; p->next = NULL; } void insert(person **head, person *node) { if (*head == NULL) { *head = node; } else { person *p = *head; int p_exists = strcmp(p->name, node->name); while (p->next != NULL && p_exists != 0) { p = p->next; p_exists = strcmp(p->name, node->name); } if (p_exists == 0) { p->count++; } else { p->next = node; } } } void sorted_name_insert(person **head, char *name) { person *node = (person *)malloc(sizeof(person)); newPerson(node, name); if (*head == NULL) { *head = node; } else { person *p = *head; person *p_prev = NULL; int p_exists = strcmp(p->name, name); if (p_exists > 0) { node->next = *head; *head = node; return; } while (p->next != NULL && p_exists != 0) { p_exists = strcmp(p->next->name, name); if (p_exists > 0) { node->next = p->next; p->next = node; return; } p = p->next; } if (p_exists == 0) { p->count++; free(node); } else { p->next = node; } } } void sorted_count_insert(person **head, person *node) { if (*head == NULL) { *head = node; } else { person *p = *head; person *p_prev = NULL; int cmp = p->count - node->count; while (p->next != NULL && cmp > 0) { p_prev = p; p = p->next; cmp = p->count - node->count; } if (p_prev == NULL) { node->next = *head; *head = node; } else if (p->next != NULL && cmp < 0) { node->next = p; p_prev->next = node; } else { p->next = node; node->next = NULL; } } } void display(person *head) { person *p = head; while (p != NULL) { printf("%s %d\n", p->name, p->count); p = p->next; } } u_long hash(const unsigned char *str) { u_long hash = 5381; int c; while ((c = *str++)) { hash = ((hash << 5) + hash) + c; } return hash; } void hm_insert(person **hashmap, char *name) { u_long hash_value = hash((unsigned char *)name); hash_value = hash_value % HASH_BUCKETS; sorted_name_insert(&hashmap[hash_value], name); } void parse_line(char *line, char *buffer) { char *line_it = line; if (*line_it == '<') { line_it++; size_t i = 0; while (i < BUFFER_LENGTH - 1 && *line_it != ' ' && *line_it != '>' && *line_it != '\0' && *line_it != '\n') { buffer[i] = *line_it; line_it++; i++; } buffer[i] = '\0'; if (strcmp(buffer, "author") == 0 || strcmp(buffer, "editor") == 0) { while (*line_it != '>') { line_it++; } line_it++; char *surname_end = line_it, *surname_start; while (*line_it != '<') { if (*line_it == ' ') { surname_start = surname_end; surname_end = line_it; } line_it++; printf("%s\n", line_it); } printf("line_it: %s\n", line_it); printf("sn start: %s\n", surname_start); printf("sn end: %s\n", surname_end); if (!isdigit(surname_end + 1)) { surname_start = surname_end + 1; surname_end = line_it - 1; } else { surname_start++; surname_end--; } i = 0; size_t name_length = surname_end - surname_start; while (i < name_length) { buffer[i] = surname_start[i]; } buffer[i] = '\0'; } } } void make_list(person **hashmap, person **list, const int min_count) { size_t i = 0; for (i = 0; i < HASH_BUCKETS; i++) { person *p = hashmap[i]; while (p != NULL) { person *p_next = p->next; if (p->count >= min_count) { p->next = NULL; sorted_count_insert(list, p); } else { free(p); } p = p_next; } hashmap[i] = NULL; } free(hashmap); } void clean_list(person *list) { person *p = list; while (p != NULL) { person *next = p->next; free(p); p = next; } } void clean_memory(person **hashmap) { for (int i = 0; i < HASH_BUCKETS; i++) { person *p = hashmap[i]; while (p != NULL) { person *next = p->next; free(p); p = next; } } free(hashmap); } int main(void) { person **hashmap = (person **)malloc(sizeof(person *) * HASH_BUCKETS); for (int i = 0; i < HASH_BUCKETS; i++) { hashmap[i] = NULL; } FILE *fp = fopen("dblp.xml", "r"); char *line = NULL; size_t line_len = 0; char *buffer = (char *)malloc(sizeof(char) * BUFFER_LENGTH); if (fp) { while (!(getline(&line, &line_len, fp) < 0)) { parse_line(line, buffer); if (buffer != NULL) { hm_insert(hashmap, buffer); } } fclose(fp); } else { while (!(getline(&line, &line_len, stdin) < 0)) { printf("%s", line); parse_line(line, buffer); if (buffer != NULL) { hm_insert(hashmap, buffer); } } } free(buffer); printf("Done parsing!\n"); person *list = NULL; make_list(hashmap, &list, MIN_COUNT); display(list); clean_list(list); return 0; }