# Removes extra spaces off a string function ltrim(s) { sub(/^[ \t\r\n]+/, "", s); return s; } function rtrim(s) { sub(/[ \t\r\n]+$/, "", s); return s; } function trim(s) { return rtrim(ltrim(s)); } # Escape slashes and amps in a string for building links function linkescape(s) { gsub(/\//, "\\/", s); gsub(/&/, "\\\\&", s); return s; } # Creates a section link using normal replacement rules function nametolink(s) { s = tolower(s); gsub(/[^a-z0-9 -]/, "", s); gsub(/ /, "-", s); return s; } BEGIN { sectionCount = 0; WAIT_FOR_START = 1; IN_PRE = 0; } # PARSE_HEADERS will be true only for the TOC list of HREFs # so we can build the list of "chapters". PARSE_HEADERS && /(.*)<.A/, m); sections[sectionCount] = m[2]; sectionsLink[sectionCount] = m[1]; sectionCount++; next; } # This marks the beginning of the TOC really. WAIT_FOR_START && /Nocash PSXSPX Playstation Specifications<.B>
/ { PARSE_HEADERS = 1; next; } # These markers are to split off sections and subsections. /^/ { if (PARSE_HEADERS) WAIT_FOR_START = 0; PARSE_HEADERS = 0; WAIT_SECTION = 1; next; } # Trash anything that's in the TOC. We'll build a new one later. WAIT_FOR_START { next; } # We need to do slighly different parsing if we're in or out a PRE section. // { IN_PRE = 1; } /<.TD><.TR><.TABLE>/ { IN_PRE = 0; } # Generic sed-like line replacements. { # The input file isn't valid html to begin with, # so we're not going to do a true html parser. # Replace the typical html escapes right off the bat. gsub(/ /, " "); if (IN_PRE) { gsub(/</, "<"); gsub(/>/, ">"); } else { # outside of
blocks, we want to escape these for the md format. gsub(/</, "\\<"); gsub(/>/, "\\>"); gsub(/*/, "\\*"); gsub(/_/, "\\_"); gsub(/~/, "\\~"); } gsub(/&/, "\\&"); # These are fairly straightforward replacements sub(//, "```"); sub(/<.TD><.TR><.TABLE>/, "```"); $0 = gensub(/(.*)<.B>
/, "#### \\1", "g"); sub(/
/, "
"); } # Skip the remaining cruft. /^/, m); link = m[1]; found = 0; # Checking if it was in our TOC to distingish between section or subsection. for (i = 0; i < sectionCount; i++) { if (sectionsLink[i] == link) { GOT_MAJOR_SECTION = 1; sectionFile = link; found = 1; subsectionsCount = 0; printf("SWITCHED TO MAJOR SECTION: %s\n", link); } } if (!found) { subsection = link; printf("SWITCHED TO MINOR SECTION: %s\n", link); } next; } # Creating the proper section header, and the second pass sed script. WAIT_HEADER { WAIT_HEADER = 0; if (GOT_MAJOR_SECTION) { print("s/[^<]*<.A>/[" linkescape(trim($0)) "](" link ".md)/g") > "psx-spx-pass2.sed"; $0 = "# " $0; } else { print("s/[^<]*<.A>/[" linkescape(trim($0)) "](" sectionFile ".md#" nametolink(trim($0)) ")/g") > "psx-spx-pass2.sed"; $0 = "## " $0; } GOT_MAJOR_SECTION = 0; } # If we get here, output our current line to the current section md file. { print > sectionFile ".md" } END { # Emit the TOC and finish the second pass sed script, as well as # the second pass runner script. for (i = 0; i < sectionCount; i++) { link = sectionsLink[i]; name = sections[i]; print("sed -f psx-spx-pass2.sed -i " link ".md") > "run-pass2.sh"; print("[" name "](" link ".md)
") > "index.md"; } print("s/^$//") > "psx-spx-pass2.sed"; }