2019-07-14 18:21:18 +00:00
|
|
|
""" extract issns from journallist.csv """
|
|
|
|
|
2019-03-17 22:47:24 +00:00
|
|
|
import csv
|
|
|
|
import json
|
2020-08-16 05:15:55 +00:00
|
|
|
import regex
|
2019-03-17 22:47:24 +00:00
|
|
|
|
|
|
|
output = []
|
|
|
|
|
2020-06-21 00:20:48 +00:00
|
|
|
with open("journallist-June2020.csv", newline="") as csvfile:
|
2019-03-17 22:47:24 +00:00
|
|
|
data = csv.reader(csvfile)
|
|
|
|
for row in data:
|
|
|
|
try:
|
2020-11-26 07:42:07 +00:00
|
|
|
if "english" in row[30].lower():
|
|
|
|
if row[4]:
|
|
|
|
if regex.match(r'^[0-9]{4}-[0-9]{3}[0-9xX]$', row[4]):
|
|
|
|
output.append(row[4])
|
|
|
|
else:
|
|
|
|
print(row[4], 'regex does not match')
|
|
|
|
elif row[3]:
|
|
|
|
if regex.match(r'^[0-9]{4}-[0-9]{3}[0-9Xx]$', row[3]):
|
|
|
|
output.append(row[3])
|
|
|
|
else:
|
|
|
|
print(row[3], 'regex does not match')
|
2020-08-16 05:15:55 +00:00
|
|
|
else:
|
2020-11-26 07:42:07 +00:00
|
|
|
print("no issn")
|
|
|
|
except:
|
|
|
|
pass
|
2019-03-17 22:47:24 +00:00
|
|
|
|
2020-11-26 07:42:07 +00:00
|
|
|
with open("issnlist-multilingual.txt", "w") as issnfile:
|
2019-03-17 22:47:24 +00:00
|
|
|
issnfile.write(json.dumps(output))
|