diff --git a/main.c b/main.c index baea114..473b8d4 100644 --- a/main.c +++ b/main.c @@ -9,228 +9,240 @@ #define MIN_COUNT 10000 void string_ncopy(char *dest, const char *src, size_t max_len) { - size_t i = 0; - while (i < max_len - 1 && src[i]) { - dest[i] = src[i]; - i++; - } - dest[i] = '\0'; + size_t i = 0; + while (i < max_len - 1 && src[i]) { + dest[i] = src[i]; + i++; + } + dest[i] = '\0'; } typedef struct person { - struct person *next; - char name[NAME_MAX_LENGTH]; - int count; + struct person *next; + char name[NAME_MAX_LENGTH]; + int count; } person; void newPerson(person *p, const char *name) { - string_ncopy(p->name, name, NAME_MAX_LENGTH); - p->count = 1; - p->next = NULL; + string_ncopy(p->name, name, NAME_MAX_LENGTH); + p->count = 1; + p->next = NULL; } void insert(person **head, person *node) { - if (head == NULL) { - *head = node; - } else { - person *p = *head; - int p_exists = strcmp(p->name, node->name); - while (p->next != NULL && p_exists != 0) { - p = p->next; - p_exists = strcmp(p->name, node->name); - } - if (p_exists == 0) { - p->count++; - } else { - p->next = node; - } + if (head == NULL) { + *head = node; + } else { + person *p = *head; + int p_exists = strcmp(p->name, node->name); + while (p->next != NULL && p_exists != 0) { + p = p->next; + p_exists = strcmp(p->name, node->name); } + if (p_exists == 0) { + p->count++; + } else { + p->next = node; + } + } } void sorted_name_insert(person **head, char *name) { - person *node = (person *) malloc(sizeof(person)); - newPerson(node, name); - if (*head == NULL) { - *head = node; - } else { - person *p = *head; - person *p_prev = NULL; - int p_exists = strcmp(p->name, name); - if (p_exists > 0) { - node->next = *head; - *head = node; - return; - } - while (p->next != NULL && p_exists != 0) { - p_exists = strcmp(p->next->name, name); - if (p_exists > 0) { - node->next = p->next; - p->next = node; - return; - } - p = p->next; - } - if (p_exists == 0) { - p->count++; - free(node); - } else { - p->next = node; - } + person *node = (person *)malloc(sizeof(person)); + newPerson(node, name); + if (*head == NULL) { + *head = node; + } else { + person *p = *head; + person *p_prev = NULL; + int p_exists = strcmp(p->name, name); + if (p_exists > 0) { + node->next = *head; + *head = node; + return; } + while (p->next != NULL && p_exists != 0) { + p_exists = strcmp(p->next->name, name); + if (p_exists > 0) { + node->next = p->next; + p->next = node; + return; + } + p = p->next; + } + if (p_exists == 0) { + p->count++; + free(node); + } else { + p->next = node; + } + } } void sorted_count_insert(person **head, person *node) { - if (*head == NULL) { - *head = node; - } else { - person *p = *head; - person *p_prev = NULL; - int cmp = p->count - node->count; - while (p != NULL && cmp < 0) { - p_prev = p; - cmp = p->count - node->count; - p = p->next; - } - if (p_prev == NULL) { - node->next = *head; - *head = node; - } else { - node->next = p; - p_prev->next = node; - } + if (*head == NULL) { + *head = node; + } else { + person *p = *head; + person *p_prev = NULL; + int cmp = p->count - node->count; + while (p->next != NULL && cmp > 0) { + p_prev = p; + p = p->next; + cmp = p->count - node->count; } + if (p_prev == NULL) { + node->next = *head; + *head = node; + } else if (p->next != NULL && cmp < 0) { + node->next = p; + p_prev->next = node; + } else { + p->next = node; + node->next = NULL; + } + } } void display(person *head) { - person *p = head; - while (p != NULL) { - printf("%s %d\n", p->name, p->count); - p = p->next; - } + person *p = head; + while (p != NULL) { + printf("%s %d\n", p->name, p->count); + p = p->next; + } } u_long hash(const unsigned char *str) { - u_long hash = 5381; - int c; - while ((c = *str++)) { - hash = ((hash << 5) + hash) + c; - } - return hash; + u_long hash = 5381; + int c; + while ((c = *str++)) { + hash = ((hash << 5) + hash) + c; + } + return hash; } void hm_insert(person **hashmap, char *name) { - u_long hash_value = hash(name); - hash_value = hash_value % HASH_BUCKETS; - sorted_name_insert(&hashmap[hash_value], name); + u_long hash_value = hash(name); + hash_value = hash_value % HASH_BUCKETS; + sorted_name_insert(&hashmap[hash_value], name); } -char *parse_line(char *line) { - char *line_it = line; - if (*line_it == '<') { - line_it++; - char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH); - size_t i = 0; - while (i < TAG_MAX_LENGTH-1 && *line_it != ' ' && *line_it != '>' && *line_it != '\0' && *line_it != '\n') { - tagname[i] = *line_it; - line_it++; - i++; - } - tagname[i] = '\0'; - if (strcmp(tagname, "author") == 0 || strcmp(tagname, "editor") == 0) { - free(tagname); - while (*line_it != '>') { - line_it++; - } - line_it++; - char *last_space = line_it; - while (*line_it != '<') { - if (*line_it == ' ') { - last_space = line_it; - } - line_it++; - if (isdigit(*line_it)) { - line_it = line_it - 2; - while (*line_it != ' ' && *line_it != '<') { - line_it--; - } - last_space = line_it + 1; - break; - } - } - char *name = malloc(sizeof(char) * NAME_MAX_LENGTH); - i = 0; - line_it = last_space + 1; - while (i < NAME_MAX_LENGTH - 1 && *line_it != '<' && *line_it != ' ') { - name[i] = *line_it; - line_it++; - i++; - } - name[i] = '\0'; - return name; - } - free(tagname); +void parse_line(char *line, char *name) { + char *line_it = line; + if (*line_it == '<') { + line_it++; + char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH); + size_t i = 0; + while (i < TAG_MAX_LENGTH - 1 && *line_it != ' ' && *line_it != '>' && + *line_it != '\0' && *line_it != '\n') { + tagname[i] = *line_it; + line_it++; + i++; } - return NULL; + tagname[i] = '\0'; + if (strcmp(tagname, "author") == 0 || strcmp(tagname, "editor") == 0) { + while (*line_it != '>') { + line_it++; + } + line_it++; + char *last_space = line_it; + while (*line_it != '<') { + if (*line_it == ' ') { + last_space = line_it; + } + line_it++; + if (isdigit(*line_it)) { + line_it = line_it - 2; + while (*line_it != ' ' && *line_it != '<') { + line_it--; + } + last_space = line_it + 1; + break; + } + } + i = 0; + line_it = last_space + 1; + while (i < NAME_MAX_LENGTH - 1 && *line_it != '<' && *line_it != ' ') { + name[i] = *line_it; + line_it++; + i++; + } + name[i] = '\0'; + } + free(tagname); + } } void make_list(person **hashmap, person **list, const int min_count) { - size_t i = 0; - for (i = 0; i < HASH_BUCKETS; i++) { - person *p = hashmap[i]; - while (p != NULL) { - person *p_next = p->next; - if (p->count >= min_count) { - p->next = NULL; - sorted_count_insert(list, p); - } else { - free(p); - } - p = p_next; - } + size_t i = 0; + for (i = 0; i < HASH_BUCKETS; i++) { + person *p = hashmap[i]; + while (p != NULL) { + person *p_next = p->next; + if (p->count >= min_count) { + p->next = NULL; + sorted_count_insert(list, p); + } else { + free(p); + } + p = p_next; } - free(hashmap); + } + free(hashmap); } void clean_list(person *list) { - person *p = list; - while (p != NULL) { - person *next = p->next; - free(p); - p = next; - } + person *p = list; + while (p != NULL) { + person *next = p->next; + free(p); + p = next; + } } void clean_memory(person **hashmap) { - for (int i = 0; i < HASH_BUCKETS; i++) { - person *p = hashmap[i]; - while (p != nullptr) { - person *next = p->next; - free(p); - p = next; - } + for (int i = 0; i < HASH_BUCKETS; i++) { + person *p = hashmap[i]; + while (p != NULL) { + person *next = p->next; + free(p); + p = next; } - free(hashmap); + } + free(hashmap); } int main(void) { - person **hashmap = (person **) malloc(sizeof(person *) * HASH_BUCKETS); - for (int i = 0; i < HASH_BUCKETS; i++) { - hashmap[i] = NULL; - } - FILE *fp = fopen("dblp.xml", "r"); - char line[1024]; - while (fgets(line, 1024, fp) != NULL) { - char *name = parse_line(line); - if (name != NULL) { - hm_insert(hashmap, name); - free(name); - } + person **hashmap = (person **)malloc(sizeof(person *) * HASH_BUCKETS); + for (int i = 0; i < HASH_BUCKETS; i++) { + hashmap[i] = NULL; + } + FILE *fp = fopen("dblp.xml", "r"); + char line[1024]; + if (fp) { + while (fgets(line, sizeof(line), fp) != NULL) { + char *name = (char *)malloc(sizeof(char) * NAME_MAX_LENGTH); + parse_line(line, name); + if (name != NULL) { + hm_insert(hashmap, name); + } + free(name); } fclose(fp); - printf("Done parsing!\n"); - person *list = NULL; - make_list(hashmap, &list, MIN_COUNT); - display(list); - clean_list(list); - return 0; + } else { + while (fgets(line, sizeof(line), stdin) != NULL) { + char *name = (char *)malloc(sizeof(char) * NAME_MAX_LENGTH); + parse_line(line, name); + if (name != NULL) { + hm_insert(hashmap, name); + } + free(name); + } + } + printf("Done parsing!\n"); + person *list = NULL; + make_list(hashmap, &list, MIN_COUNT); + display(list); + clean_list(list); + return 0; }