tarot/_scripts/extract.sh

21 lines
476 B
Bash
Raw Normal View History

2024-05-29 04:22:31 +05:30
#!/bin/bash -x
old_file="$1"
new_file="${old_file/src/dst}"
tmp1_file="$(mktemp /tmp/taro.XXXXXX)"
tmp2_file="$(mktemp /tmp/taro.XXXXXX)"
# Extract the content of the page
xmllint --html --xpath '//div[@class="content-wrapper"]' 2>/dev/null "$old_file" > "$tmp2_file"
# Remove the unnecessary content
awk '{
if ($0 ~ /Смотрите все толкования карт/) {
exit
}
print $0
}' "$tmp2_file" > "$new_file"
# Remove the temporary files