open-journal-matcher/csvdata.py

31 lines
929 B
Python
Raw Normal View History

2019-07-14 18:21:18 +00:00
""" extract issns from journallist.csv """
2019-03-17 22:47:24 +00:00
import csv
import json
import regex
2019-03-17 22:47:24 +00:00
output = []
2020-06-21 00:20:48 +00:00
with open("journallist-June2020.csv", newline="") as csvfile:
2019-03-17 22:47:24 +00:00
data = csv.reader(csvfile)
for row in data:
try:
if "english" in row[30].lower():
if row[4]:
if regex.match(r'^[0-9]{4}-[0-9]{3}[0-9xX]$', row[4]):
output.append(row[4])
else:
print(row[4], 'regex does not match')
elif row[3]:
if regex.match(r'^[0-9]{4}-[0-9]{3}[0-9Xx]$', row[3]):
output.append(row[3])
else:
print(row[3], 'regex does not match')
else:
print("no issn")
except:
pass
2019-03-17 22:47:24 +00:00
with open("issnlist-multilingual.txt", "w") as issnfile:
2019-03-17 22:47:24 +00:00
issnfile.write(json.dumps(output))