From 5654b998296e7566644fe8b7a320ef534d74530a Mon Sep 17 00:00:00 2001 From: Matthias Puchstein Date: Fri, 13 Jun 2025 01:26:36 +0200 Subject: [PATCH] update --- main.c | 79 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/main.c b/main.c index 47b6c57..8d4ab2b 100644 --- a/main.c +++ b/main.c @@ -3,8 +3,7 @@ #include #include -#define NAME_MAX_LENGTH 128 -#define TAG_MAX_LENGTH 128 +#define BUFFER_LENGTH 128 #define HASH_BUCKETS 4000037 #define MIN_COUNT 10000 @@ -19,12 +18,12 @@ void string_ncopy(char *dest, const char *src, size_t max_len) { typedef struct person { struct person *next; - char name[NAME_MAX_LENGTH]; + char *name; int count; } person; void newPerson(person *p, const char *name) { - string_ncopy(p->name, name, NAME_MAX_LENGTH); + string_ncopy(p->name, name, BUFFER_LENGTH); p->count = 1; p->next = NULL; } @@ -127,49 +126,49 @@ void hm_insert(person **hashmap, char *name) { sorted_name_insert(&hashmap[hash_value], name); } -void parse_line(char *line, char *name) { +void parse_line(char *line, char *buffer) { char *line_it = line; if (*line_it == '<') { line_it++; - char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH); size_t i = 0; - while (i < TAG_MAX_LENGTH - 1 && *line_it != ' ' && *line_it != '>' && + while (i < BUFFER_LENGTH - 1 && *line_it != ' ' && *line_it != '>' && *line_it != '\0' && *line_it != '\n') { - tagname[i] = *line_it; + buffer[i] = *line_it; line_it++; i++; } - tagname[i] = '\0'; - if (strcmp(tagname, "author") == 0 || strcmp(tagname, "editor") == 0) { + buffer[i] = '\0'; + if (strcmp(buffer, "author") == 0 || strcmp(buffer, "editor") == 0) { while (*line_it != '>') { line_it++; } line_it++; - char *last_space = line_it; + char *surname_end = line_it, *surname_start; while (*line_it != '<') { if (*line_it == ' ') { - last_space = line_it; + surname_start = surname_end; + surname_end = line_it; } line_it++; - if (isdigit(*line_it)) { - line_it = line_it - 2; - while (*line_it != ' ' && *line_it != '<') { - line_it--; - } - last_space = line_it + 1; - break; - } + printf("%s\n", line_it); + } + printf("line_it: %s\n", line_it); + printf("sn start: %s\n", surname_start); + printf("sn end: %s\n", surname_end); + if (!isdigit(surname_end + 1)) { + surname_start = surname_end + 1; + surname_end = line_it - 1; + } else { + surname_start++; + surname_end--; } i = 0; - line_it = last_space + 1; - while (i < NAME_MAX_LENGTH - 1 && *line_it != '<' && *line_it != ' ') { - name[i] = *line_it; - line_it++; - i++; + size_t name_length = surname_end - surname_start; + while (i < name_length) { + buffer[i] = surname_start[i]; } - name[i] = '\0'; + buffer[i] = '\0'; } - free(tagname); } } @@ -219,27 +218,27 @@ int main(void) { hashmap[i] = NULL; } FILE *fp = fopen("dblp.xml", "r"); - char line[1024]; + char *line = NULL; + size_t line_len = 0; + char *buffer = (char *)malloc(sizeof(char) * BUFFER_LENGTH); if (fp) { - while (fgets(line, sizeof(line), fp) != NULL) { - char *name = (char *)malloc(sizeof(char) * NAME_MAX_LENGTH); - parse_line(line, name); - if (name != NULL) { - hm_insert(hashmap, name); + while (!(getline(&line, &line_len, fp) < 0)) { + parse_line(line, buffer); + if (buffer != NULL) { + hm_insert(hashmap, buffer); } - free(name); } fclose(fp); } else { - while (fgets(line, sizeof(line), stdin) != NULL) { - char *name = (char *)malloc(sizeof(char) * NAME_MAX_LENGTH); - parse_line(line, name); - if (name != NULL) { - hm_insert(hashmap, name); + while (!(getline(&line, &line_len, stdin) < 0)) { + printf("%s", line); + parse_line(line, buffer); + if (buffer != NULL) { + hm_insert(hashmap, buffer); } - free(name); } } + free(buffer); printf("Done parsing!\n"); person *list = NULL; make_list(hashmap, &list, MIN_COUNT);