diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3fa0867 --- /dev/null +++ b/.gitignore @@ -0,0 +1,137 @@ +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +CMakeLists.txt +dblp.xml +main + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### C template +# Prerequisites +*.d + +# Object files +*.o +*.ko +*.obj +*.elf + +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/Werkzeuge.iml b/.idea/Werkzeuge.iml new file mode 100644 index 0000000..f08604b --- /dev/null +++ b/.idea/Werkzeuge.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/.idea/editor.xml b/.idea/editor.xml new file mode 100644 index 0000000..25c6c37 --- /dev/null +++ b/.idea/editor.xml @@ -0,0 +1,344 @@ + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..0b76fe5 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..931874d --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/main.c b/main.c new file mode 100644 index 0000000..de45104 --- /dev/null +++ b/main.c @@ -0,0 +1,192 @@ +#include +#include +#include + +const u_int NAME_MAX_LENGTH = 32; +const u_int TAG_MAX_LENGTH = 32; +const u_int HASH_BUCKETS = 4000037; + +int string_compare(const char *str1, const char *str2) { + while (*str1 && *str2 && *str1 == *str2) { + str1++; + str2++; + } + return *str1 - *str2; +} + +void string_ncopy(char *dest, const char *src, size_t max_len) { + size_t i = 0; + while (i < max_len - 1 && src[i]) { + dest[i] = src[i]; + i++; + } + dest[i] = '\0'; +} + +typedef struct person { + struct person *next; + char name[32]; + int count; +} person; + +void newPerson(person *p, const char *name) { + string_ncopy(p->name, name, NAME_MAX_LENGTH); + p->count = 1; + p->next = NULL; +} + +void insert(person **head, person *node) { + if (head == NULL) { + *head = node; + } else { + person *p = *head; + int p_exists = string_compare(p->name, node->name); + while (p->next != NULL && p_exists != 0) { + p = p->next; + p_exists = string_compare(p->name, node->name); + } + if (p_exists == 0) { + p->count++; + } else { + p->next = node; + } + } +} + +void sorted_name_insert(person **head, char *name) { + person *node = (person *) malloc(sizeof(person)); + newPerson(node, name); + if (*head == NULL) { + *head = node; + } else { + person *p = *head; + person *p_prev = NULL; + int p_exists = string_compare(p->name, name); + if (p_exists > 0) { + node->next = *head; + *head = node; + return; + } + while (p->next != NULL && p_exists != 0) { + p_exists = string_compare(p->next->name, name); + if (p_exists > 0) { + node->next = p->next; + p->next = node; + return; + } + p = p->next; + } + if (p_exists == 0) { + p->count++; + free(node); + } else { + p->next = node; + } + } +} + +void display(person *head) { + person *p = head; + while (p != NULL) { + printf("%s %d\n", p->name, p->count); + p = p->next; + } +} + +u_long hash(const unsigned char *str) { + u_long hash = 5381; + int c; + while ((c = *str++)) { + hash = ((hash << 5) + hash) + c; + } + return hash; +} + +void hm_insert(person **hashmap, char *name) { + u_long hash_value = hash(name); + hash_value = hash_value % HASH_BUCKETS; + sorted_name_insert(&hashmap[hash_value], name); +} + +char *parse_line(char *line) { + char *line_it = line; + if (*line_it == '<') { + line_it++; + char *tagname = malloc(sizeof(char) * TAG_MAX_LENGTH); + char *tag_it = tagname; + while (*line_it != ' ' && *line_it != '>' && *line_it != '\0' && *line_it != '\n') { + *tag_it = *line_it; + line_it++; + tag_it++; + } + *tag_it = '\0'; + if (string_compare(tagname, "author") == 0 || string_compare(tagname, "editor") == 0) { + free(tagname); + while (*line_it != '>') { + line_it++; + } + line_it++; + char *last_space = line_it; + while (*line_it != '<') { + if (*line_it == ' ') { + last_space = line_it; + } + line_it++; + if (isdigit(*line_it)) { + line_it = line_it - 2; + while (*line_it != ' ' && *line_it != '<') { + line_it--; + } + last_space = line_it + 1; + break; + } + } + char *name = malloc(sizeof(char) * NAME_MAX_LENGTH); + char *name_it = name; + line_it = last_space + 1; + while (*line_it != '<' && *line_it != ' ') { + *name_it = *line_it; + line_it++; + name_it++; + } + *name_it = '\0'; + return name; + } + free(tagname); + } + return nullptr; +} + +void clean_memory(person **hashmap) { + for (int i = 0; i < HASH_BUCKETS; i++) { + person *p = hashmap[i]; + while (p != nullptr) { + person *next = p->next; + free(p); + p = next; + } + } + free(hashmap); +} + +int main(void) { + const auto hashmap = (person **) malloc(sizeof(person *) * HASH_BUCKETS); + for (int i = 0; i < HASH_BUCKETS; i++) { + hashmap[i] = NULL; + } + FILE *fp = fopen("dblp.xml", "r"); + char line[1024]; + while (fgets(line, 1024, fp) != NULL) { + char *name = parse_line(line); + if (name != NULL) { + hm_insert(hashmap, name); + free(name); + } + } + fclose(fp); + for (int i = 0; i < HASH_BUCKETS; i++) { + display(hashmap[i]); + } + clean_memory(hashmap); + return 0; +}