diff --git a/main.c b/main.c
index ece6c8c..116f544 100644
--- a/main.c
+++ b/main.c
@@ -6,7 +6,7 @@
 #define BUFFER_LENGTH 128
 #define LINE_LENGTH 1024
 #define HASH_BUCKETS 4000037
-#define MIN_COUNT 10000
+#define MIN_COUNT 0
 
 void string_ncopy(char *dest, const char *src, size_t max_len) {
     size_t i = 0;
@@ -30,35 +30,34 @@ void newPerson(person *p, const char *name) {
 }
 
 void sorted_name_insert(person **head, char *name) {
+    person *p = *head;
+    while (p != NULL) {
+        if (strcmp(p->name, name) == 0) {
+            p->count++;
+            return;
+        }
+        p = p->next;
+    }
+
     person *node = (person *) malloc(sizeof(person));
     newPerson(node, name);
-    if (*head == NULL) {
+
+    if (*head == NULL || strcmp((*head)->name, name) > 0) {
+        node->next = *head;
         *head = node;
-    } else {
-        person *p = *head;
-        person *p_prev = NULL;
-        int cmp = strcmp(p->name, name);
-        while (p->next != NULL && cmp < 0) {
-            p_prev = p;
-            p = p->next;
-            cmp = strcmp(p->name, name);
-        }
-        if (cmp == 0){
-            p->count++;
-            free(node);
-        }else if (p_prev == NULL) {
-            node->next = *head;
-            *head = node;
-        } else if (p->next != NULL && cmp < 0) {
-            node->next = p;
-            p_prev->next = node;
-        } else {
-            p->next = node;
-            node->next = NULL;
-        }
+        return;
     }
+
+    p = *head;
+    while (p->next != NULL && strcmp(p->next->name, name) < 0) {
+        p = p->next;
+    }
+
+    node->next = p->next;
+    p->next = node;
 }
 
+
 void sorted_count_insert(person **head, person *node) {
     if (*head == NULL) {
         *head = node;
@@ -126,25 +125,21 @@ void parse_line(char *line, char *buffer) {
                 line_it++;
             }
             line_it++;
-            char *surname_end = line_it, *surname_start = line_it;
+            char *content_start = line_it, *last_space = NULL, *second_last_space = NULL;
             while (*line_it && *line_it != '<') {
                 if (*line_it == ' ') {
-                    surname_start = surname_end;
-                    surname_end = line_it;
+                    second_last_space = last_space;
+                    last_space = line_it;
                 }
                 line_it++;
             }
-            bool only_numbers = true;
-            char *c = surname_end;
-            while (only_numbers && c != line_it) {
-                if (!isdigit(*c)) {
-                    only_numbers = false;
-                }
-                c++;
-            }
+            char *surname_start, *surname_end;
 
-            if (!only_numbers) {
-                surname_start = surname_end;
+            if (last_space && isdigit(*(last_space+1))) {
+                surname_start = second_last_space ? second_last_space + 1 : content_start;
+                surname_end = last_space ? last_space : line_it;
+            } else {
+                surname_start = last_space ? last_space + 1 : content_start;
                 surname_end = line_it;
             }
             size_t name_length = surname_end - surname_start;
@@ -175,6 +170,30 @@ void make_list(person **hashmap, person **list, const int min_count) {
     free(hashmap);
 }
 
+void verify_list(person *list, person *test) {
+    person *p = list;
+    int count = 0;
+    while (p != NULL) {
+        person *next = p->next;
+        if (strcmp(p->name, test->name) == 0) {
+            count++;
+        }
+        p = next;
+    }
+    if (count > 1) {
+        printf("ERROR: %s\n", test->name);
+    }
+}
+
+void check_list(person *list) {
+    person *p = list;
+    while (p != NULL) {
+        person *next = p->next;
+        verify_list(list, p);
+        p = next;
+    }
+}
+
 void clean_list(person *list) {
     person *p = list;
     while (p != NULL) {
@@ -202,8 +221,6 @@ int main(void) {
         hashmap[i] = NULL;
     }
     FILE *fp = fopen("dblp.xml", "r");
-    // char *line = malloc(sizeof(char) * LINE_LENGTH);
-    // size_t line_len = LINE_LENGTH;
     char *line = NULL;
     size_t line_len = 0;
     char *buffer = (char *) malloc(sizeof(char) * BUFFER_LENGTH);
@@ -227,10 +244,10 @@ int main(void) {
     }
     free(line);
     free(buffer);
-    printf("Done parsing!\n");
     person *list = NULL;
     make_list(hashmap, &list, MIN_COUNT);
+    check_list(list);
     display(list);
     clean_list(list);
     return 0;
-}
\ No newline at end of file
+}
diff --git a/validate_extraction.sh b/validate_extraction.sh
new file mode 100755
index 0000000..c40da3b
--- /dev/null
+++ b/validate_extraction.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+echo "Running full validation with working pipeline..."
+
+# Generate complete bash results using the proven pipeline
+echo "Extracting all surnames (this may take a few minutes for 4.7GB file)..."
+{
+    grep -E "<(author|editor)" dblp.xml | \
+    sed -E 's/.*<(author|editor)[^>]*>//; s/<\/(author|editor)>.*//' | \
+    awk '{
+        if ($NF ~ /^[0-9]{4}$/) { NF-- }
+        if (NF > 0) { 
+            surname = $NF
+            gsub(/&[^;]*;/, "", surname)
+            if (length(surname) > 0) print surname 
+        }
+    }' | \
+    sort | uniq -c | \
+    awk '$1 >= 10000 {print $2, $1}' | \
+    sort -k2 -nr
+} > bash_results.txt
+
+echo "Bash extraction complete. Results: $(wc -l < bash_results.txt) surnames"
+
+# Get your C program results
+./main > c_results.txt
+
+echo "C extraction complete. Results: $(wc -l < c_results.txt) surnames"
+
+# Quick comparison of top entries
+echo "Top 5 comparison:"
+echo "=== C Program ==="
+head -5 c_results.txt
+echo "=== Bash Script ==="
+head -5 bash_results.txt
+
+# Check Wang specifically
+echo "Wang comparison:"
+echo "C: $(grep "^Wang " c_results.txt)"
+echo "Bash: $(grep "^Wang " bash_results.txt)"
+