made some fixes

This commit is contained in:
2025-06-13 02:39:13 +02:00
parent 5654b99829
commit 5daa974dc6

98
main.c
View File

@@ -4,6 +4,7 @@
#include <string.h> #include <string.h>
#define BUFFER_LENGTH 128 #define BUFFER_LENGTH 128
#define LINE_LENGTH 1024
#define HASH_BUCKETS 4000037 #define HASH_BUCKETS 4000037
#define MIN_COUNT 10000 #define MIN_COUNT 10000
@@ -18,7 +19,7 @@ void string_ncopy(char *dest, const char *src, size_t max_len) {
typedef struct person { typedef struct person {
struct person *next; struct person *next;
char *name; char name[BUFFER_LENGTH];
int count; int count;
} person; } person;
@@ -28,24 +29,6 @@ void newPerson(person *p, const char *name) {
p->next = NULL; p->next = NULL;
} }
void insert(person **head, person *node) {
if (*head == NULL) {
*head = node;
} else {
person *p = *head;
int p_exists = strcmp(p->name, node->name);
while (p->next != NULL && p_exists != 0) {
p = p->next;
p_exists = strcmp(p->name, node->name);
}
if (p_exists == 0) {
p->count++;
} else {
p->next = node;
}
}
}
void sorted_name_insert(person **head, char *name) { void sorted_name_insert(person **head, char *name) {
person *node = (person *) malloc(sizeof(person)); person *node = (person *) malloc(sizeof(person));
newPerson(node, name); newPerson(node, name);
@@ -54,26 +37,24 @@ void sorted_name_insert(person **head, char *name) {
} else { } else {
person *p = *head; person *p = *head;
person *p_prev = NULL; person *p_prev = NULL;
int p_exists = strcmp(p->name, name); int cmp = strcmp(p->name, name);
if (p_exists > 0) { while (p->next != NULL && cmp < 0) {
node->next = *head; p_prev = p;
*head = node;
return;
}
while (p->next != NULL && p_exists != 0) {
p_exists = strcmp(p->next->name, name);
if (p_exists > 0) {
node->next = p->next;
p->next = node;
return;
}
p = p->next; p = p->next;
cmp = strcmp(p->name, name);
} }
if (p_exists == 0) { if (cmp == 0){
p->count++; p->count++;
free(node); free(node);
}else if (p_prev == NULL) {
node->next = *head;
*head = node;
} else if (p->next != NULL && cmp < 0) {
node->next = p;
p_prev->next = node;
} else { } else {
p->next = node; p->next = node;
node->next = NULL;
} }
} }
} }
@@ -111,6 +92,7 @@ void display(person *head) {
} }
} }
//djb2 hash http://www.cse.yorku.ca/~oz/hash.html
u_long hash(const unsigned char *str) { u_long hash(const unsigned char *str) {
u_long hash = 5381; u_long hash = 5381;
int c; int c;
@@ -139,35 +121,37 @@ void parse_line(char *line, char *buffer) {
} }
buffer[i] = '\0'; buffer[i] = '\0';
if (strcmp(buffer, "author") == 0 || strcmp(buffer, "editor") == 0) { if (strcmp(buffer, "author") == 0 || strcmp(buffer, "editor") == 0) {
memset(buffer, 0, BUFFER_LENGTH);
while (*line_it != '>') { while (*line_it != '>') {
line_it++; line_it++;
} }
line_it++; line_it++;
char *surname_end = line_it, *surname_start; char *surname_end = line_it, *surname_start = line_it;
while (*line_it != '<') { while (*line_it && *line_it != '<') {
if (*line_it == ' ') { if (*line_it == ' ') {
surname_start = surname_end; surname_start = surname_end;
surname_end = line_it; surname_end = line_it;
} }
line_it++; line_it++;
printf("%s\n", line_it);
} }
printf("line_it: %s\n", line_it); bool only_numbers = true;
printf("sn start: %s\n", surname_start); char *c = surname_end;
printf("sn end: %s\n", surname_end); while (only_numbers && c != line_it) {
if (!isdigit(surname_end + 1)) { if (!isdigit(*c)) {
surname_start = surname_end + 1; only_numbers = false;
surname_end = line_it - 1; }
} else { c++;
surname_start++; }
surname_end--;
if (!only_numbers) {
surname_start = surname_end;
surname_end = line_it;
} }
i = 0;
size_t name_length = surname_end - surname_start; size_t name_length = surname_end - surname_start;
while (i < name_length) { memcpy(buffer, surname_start, name_length);
buffer[i] = surname_start[i]; buffer[name_length] = '\0';
} } else {
buffer[i] = '\0'; memset(buffer, 0, BUFFER_LENGTH);
} }
} }
} }
@@ -218,26 +202,30 @@ int main(void) {
hashmap[i] = NULL; hashmap[i] = NULL;
} }
FILE *fp = fopen("dblp.xml", "r"); FILE *fp = fopen("dblp.xml", "r");
// char *line = malloc(sizeof(char) * LINE_LENGTH);
// size_t line_len = LINE_LENGTH;
char *line = NULL; char *line = NULL;
size_t line_len = 0; size_t line_len = 0;
char *buffer = (char *) malloc(sizeof(char) * BUFFER_LENGTH); char *buffer = (char *) malloc(sizeof(char) * BUFFER_LENGTH);
if (fp) { if (fp) {
while (!(getline(&line, &line_len, fp) < 0)) { while (getline(&line, &line_len, fp) >= 0) {
memset(buffer, 0, BUFFER_LENGTH);
parse_line(line, buffer); parse_line(line, buffer);
if (buffer != NULL) { if (*buffer != '\0') {
hm_insert(hashmap, buffer); hm_insert(hashmap, buffer);
} }
} }
fclose(fp); fclose(fp);
} else { } else {
while (!(getline(&line, &line_len, stdin) < 0)) { while (getline(&line, &line_len, stdin) >= 0) {
printf("%s", line); memset(buffer, 0, BUFFER_LENGTH);
parse_line(line, buffer); parse_line(line, buffer);
if (buffer != NULL) { if (*buffer != '\0') {
hm_insert(hashmap, buffer); hm_insert(hashmap, buffer);
} }
} }
} }
free(line);
free(buffer); free(buffer);
printf("Done parsing!\n"); printf("Done parsing!\n");
person *list = NULL; person *list = NULL;