42 lines
1.1 KiB
Bash
Executable File
42 lines
1.1 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
echo "Running full validation with working pipeline..."
|
|
|
|
# Generate complete bash results using the proven pipeline
|
|
echo "Extracting all surnames (this may take a few minutes for 4.7GB file)..."
|
|
{
|
|
grep -E "<(author|editor)" dblp.xml | \
|
|
sed -E 's/.*<(author|editor)[^>]*>//; s/<\/(author|editor)>.*//' | \
|
|
awk '{
|
|
if ($NF ~ /^[0-9]{4}$/) { NF-- }
|
|
if (NF > 0) {
|
|
surname = $NF
|
|
gsub(/&[^;]*;/, "", surname)
|
|
if (length(surname) > 0) print surname
|
|
}
|
|
}' | \
|
|
sort | uniq -c | \
|
|
awk '$1 >= 10000 {print $2, $1}' | \
|
|
sort -k2 -nr
|
|
} > bash_results.txt
|
|
|
|
echo "Bash extraction complete. Results: $(wc -l < bash_results.txt) surnames"
|
|
|
|
# Get your C program results
|
|
./main > c_results.txt
|
|
|
|
echo "C extraction complete. Results: $(wc -l < c_results.txt) surnames"
|
|
|
|
# Quick comparison of top entries
|
|
echo "Top 5 comparison:"
|
|
echo "=== C Program ==="
|
|
head -5 c_results.txt
|
|
echo "=== Bash Script ==="
|
|
head -5 bash_results.txt
|
|
|
|
# Check Wang specifically
|
|
echo "Wang comparison:"
|
|
echo "C: $(grep "^Wang " c_results.txt)"
|
|
echo "Bash: $(grep "^Wang " bash_results.txt)"
|
|
|