import sys import os sys.path.append("../md2gemini") import shutil from md2gemini import md2gemini from pathlib import Path import html import unicodedata import urllib mdsource = "/home/ploum/gandi_backup_202210119/blog_20221019" filesource = "/home/ploum/gandi_backup_202210119/vhosts/ploum.net/htdocs" filedest = "/home/ploum/dev/gemlog" def convertlink(inlink): localfolder = "" writtenlink = "" if inlink.startswith("http://ploum.net") or inlink.startswith("https://ploum.net"): inlink = inlink.removeprefix("http://ploum.net").removeprefix("https://ploum.net") if not inlink.startswith("http"): if inlink.startswith("/post/"): inlink = inlink.removeprefix("/post") elif inlink.startswith("/public/"): inlink = "/wp-content/uploads/" + inlink.removeprefix("/public/") elif inlink.startswith("/images/"): inlink = "/wp-content/uploads/" + inlink.removeprefix("/images/") elif inlink.startswith("../uploads"): inlink = "/wp-content/" + inlink.removeprefix("../") if inlink.startswith("wp-content/") \ or inlink.startswith("/wp-content/"): inlink = inlink.removeprefix("/") inlink = urllib.parse.unquote(inlink) p = Path(filesource + "/" + inlink) inlink = "../files/old/" + inlink.removeprefix("wp-content/uploads/") if p.exists(): dest = Path(filedest + inlink[2:]) print("copying %s file to %s"%(p,dest)) if not dest.exists(): if not dest.parent.exists(): os.makedirs(dest.parent) shutil.move(p,dest) elif "ploum" in inlink or "plus.google.com" in inlink: if "medium.com" in inlink or "facebook.com" in inlink or "twitter.com" in inlink or\ "patreon.com" in inlink or "last.fm" in inlink or "tipeee.com" in inlink or\ "klout.com" in inlink or "flattr.com" in inlink or "app.net" in inlink or\ "500px.com" in inlink or "instagram.com" in inlink or "changetip.com" in inlink or\ "linkedin.com" in inlink or "getpocket.com" in inlink or "tip.me" in inlink or\ "lastfm.fr" in inlink or "plus.google.com" in inlink: inlink = "/no-proprietary-service.html" return inlink pngs = 0 jpgs = 0 noh = 0 other = 0 #print(mdsource) testfile = "de-la-mediterranee-a-latlantique-en-vtt.md" testlist = [testfile] #print(testlist) #for md_file in testlist: for md_file in os.listdir(mdsource): title = "" date = "" permalink = "" text = "" tag = [] lang = "fr" image = None with open(mdsource + "/" + md_file) as f: md = html.unescape(f.read()) md = unicodedata.normalize('NFC', md) md = md.replace("","\n") md = md.replace("
"," ") md = md.replace("
","\n") md = md.replace("
","") md = md.replace("
","") md = md.replace("
","") md = md.replace("
","") md = md.replace("
","\n") md = md.replace("
","") if "
"): link = link.split(" ")[1] if link.endswith(".jpg") or link.endswith(".png") or link.endswith(".gif"): imagename = link[:-4] elif link.endswith(".jpeg"): imagename = link[:-5] if imagename: imagename = urllib.parse.unquote(imagename) return imagename for l in lines: if l.startswith("=> ../files/old"): imagename = get_imagename(l) if previousimg and imagename and (previousimg in imagename or imagename in previousimg): #print("skip link %s" %l) pass else: previousimg = imagename if imagename: extension = l.split()[1].split(".")[-1] pathimg = Path(filedest+imagename[2:]+"."+extension) if not pathimg.exists(): print("not imported in %s yet : %s" %(path,pathimg)) newtext += l + "\n" ## detecting other double links elif l.startswith("=> ") and not "no-proprietary-service" in l: if previous and l.split()[1] == previous.split()[1]: #print("%s : skip link %s" %(date,l)) pass else: previous = l newtext += l + "\n" else: newtext += l + "\n" final = "# %s\n" %title if image: final += "=> files/old/%s\n"%image final += "\n" final += newtext #print(final) with open(path,mode="w") as f: f.write(final) f.close()