This commit is contained in:
2025-06-13 01:26:36 +02:00
parent 4051ccb6ff
commit 5654b99829

79
main.c
View File

@@ -3,8 +3,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#define NAME_MAX_LENGTH 128 #define BUFFER_LENGTH 128
#define TAG_MAX_LENGTH 128
#define HASH_BUCKETS 4000037 #define HASH_BUCKETS 4000037
#define MIN_COUNT 10000 #define MIN_COUNT 10000
@@ -19,12 +18,12 @@ void string_ncopy(char *dest, const char *src, size_t max_len) {
typedef struct person { typedef struct person {
struct person *next; struct person *next;
char name[NAME_MAX_LENGTH]; char *name;
int count; int count;
} person; } person;
void newPerson(person *p, const char *name) { void newPerson(person *p, const char *name) {
string_ncopy(p->name, name, NAME_MAX_LENGTH); string_ncopy(p->name, name, BUFFER_LENGTH);
p->count = 1; p->count = 1;
p->next = NULL; p->next = NULL;
} }
@@ -127,49 +126,49 @@ void hm_insert(person **hashmap, char *name) {
sorted_name_insert(&hashmap[hash_value], name); sorted_name_insert(&hashmap[hash_value], name);
} }
void parse_line(char *line, char *name) { void parse_line(char *line, char *buffer) {
char *line_it = line; char *line_it = line;
if (*line_it == '<') { if (*line_it == '<') {
line_it++; line_it++;
char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH);
size_t i = 0; size_t i = 0;
while (i < TAG_MAX_LENGTH - 1 && *line_it != ' ' && *line_it != '>' && while (i < BUFFER_LENGTH - 1 && *line_it != ' ' && *line_it != '>' &&
*line_it != '\0' && *line_it != '\n') { *line_it != '\0' && *line_it != '\n') {
tagname[i] = *line_it; buffer[i] = *line_it;
line_it++; line_it++;
i++; i++;
} }
tagname[i] = '\0'; buffer[i] = '\0';
if (strcmp(tagname, "author") == 0 || strcmp(tagname, "editor") == 0) { if (strcmp(buffer, "author") == 0 || strcmp(buffer, "editor") == 0) {
while (*line_it != '>') { while (*line_it != '>') {
line_it++; line_it++;
} }
line_it++; line_it++;
char *last_space = line_it; char *surname_end = line_it, *surname_start;
while (*line_it != '<') { while (*line_it != '<') {
if (*line_it == ' ') { if (*line_it == ' ') {
last_space = line_it; surname_start = surname_end;
surname_end = line_it;
} }
line_it++; line_it++;
if (isdigit(*line_it)) { printf("%s\n", line_it);
line_it = line_it - 2; }
while (*line_it != ' ' && *line_it != '<') { printf("line_it: %s\n", line_it);
line_it--; printf("sn start: %s\n", surname_start);
} printf("sn end: %s\n", surname_end);
last_space = line_it + 1; if (!isdigit(surname_end + 1)) {
break; surname_start = surname_end + 1;
} surname_end = line_it - 1;
} else {
surname_start++;
surname_end--;
} }
i = 0; i = 0;
line_it = last_space + 1; size_t name_length = surname_end - surname_start;
while (i < NAME_MAX_LENGTH - 1 && *line_it != '<' && *line_it != ' ') { while (i < name_length) {
name[i] = *line_it; buffer[i] = surname_start[i];
line_it++;
i++;
} }
name[i] = '\0'; buffer[i] = '\0';
} }
free(tagname);
} }
} }
@@ -219,27 +218,27 @@ int main(void) {
hashmap[i] = NULL; hashmap[i] = NULL;
} }
FILE *fp = fopen("dblp.xml", "r"); FILE *fp = fopen("dblp.xml", "r");
char line[1024]; char *line = NULL;
size_t line_len = 0;
char *buffer = (char *)malloc(sizeof(char) * BUFFER_LENGTH);
if (fp) { if (fp) {
while (fgets(line, sizeof(line), fp) != NULL) { while (!(getline(&line, &line_len, fp) < 0)) {
char *name = (char *)malloc(sizeof(char) * NAME_MAX_LENGTH); parse_line(line, buffer);
parse_line(line, name); if (buffer != NULL) {
if (name != NULL) { hm_insert(hashmap, buffer);
hm_insert(hashmap, name);
} }
free(name);
} }
fclose(fp); fclose(fp);
} else { } else {
while (fgets(line, sizeof(line), stdin) != NULL) { while (!(getline(&line, &line_len, stdin) < 0)) {
char *name = (char *)malloc(sizeof(char) * NAME_MAX_LENGTH); printf("%s", line);
parse_line(line, name); parse_line(line, buffer);
if (name != NULL) { if (buffer != NULL) {
hm_insert(hashmap, name); hm_insert(hashmap, buffer);
} }
free(name);
} }
} }
free(buffer);
printf("Done parsing!\n"); printf("Done parsing!\n");
person *list = NULL; person *list = NULL;
make_list(hashmap, &list, MIN_COUNT); make_list(hashmap, &list, MIN_COUNT);