Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
node_modules
npm-debug.log
admin1CodesASCII.txt
admin2Codes.txt
cities1000.txt
cities1000.zip
3 changes: 2 additions & 1 deletion .npmignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ packages
# Project
convert.js
download.js
admin1CodesASCII.txt
admin2Codes.txt
cities1000.txt
cities1000.zip
cities1000.zip
.prettierignore
.prettierrc
2 changes: 2 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
cities.json
admin1.json
admin2.json
54 changes: 48 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Here is the description of the original dataset:

> _all cities with a population > 1000 or seats of adm div (ca 150.000) [...]_

_Thus, this file is updated **monthly** and new cities are added as world population rises._

## Install

```
Expand Down Expand Up @@ -42,22 +44,62 @@ This Json version is an array of object of the following shape:
- name
- Latitude
- Longitude
- Admin code 1: the code of an administrative division (see [Administrative divisions](#administrative-divisions))
- Admin code 2: the code of an administrative subdivision (see [Administrative divisions](#administrative-divisions))

```
```jsonc
[
{
"country": "FR",
"name": "Lyon",
"lat": "45.75",
"lng": "4.583333"
},
...
"lat": "45.74846",
"lng": "4.84671",
"country": "FR",
"admin1": "84",
"admin2": "69"
}
// etc.
]
```

> These cities can pretty easily be matched with countries by code using the following dataset:
> https://github.com/annexare/Countries

## Administrative divisions

Names and depth of these divisions (and subdivisions) may vary greatly between countries, thus the use of these less expressive property names :

- `admin1Code`
- `admin2code`
- _etc._

For example `admin1Code` for a **US** city,j would stand for the **state** :

```jsonc
[
{
"name": "Boston",
"lat": "42.35843",
"lng": "-71.05977",
"country": "US",
"admin1": "MA",
"admin2": "025"
}
]
```

You may map the `admin1` code to the full **English** name through the [./admin1.json](./admin1.json) file.
Where the `code` is the concatenation of the `country` code and the `admin1` code, for example:

- `US.MA` → **Massachusetts**

For [./admin1.json](./admin2.json) concatenate `country`, `admin1` code and the `admin2` code:

- `US.MA.025` → **Suffolk County**

## Resource

- https://download.geonames.org/export/dump/

## Licence

This work is licensed under a [Creative Commons Attribution 3.0 License](https://creativecommons.org/licenses/by/3.0/).
141 changes: 92 additions & 49 deletions convert.js
Original file line number Diff line number Diff line change
@@ -1,53 +1,96 @@
var fs = require('fs');
var jsonfile = require('jsonfile');
var readline = require('readline');
const fs = require('fs');
const jsonfile = require('jsonfile');
const readline = require('readline');

var file = './cities.json';
var cities = [],
i = 0,
city;
const removeDoubleQuotes = (value) => value.replaceAll('"', '');

readline
.createInterface({
input: fs.createReadStream('./cities1000.txt'),
output: process.stdout,
terminal: false,
})
.on('line', function (line) {
city = line.split('\t');
if (i !== 0) {
// geonameid : integer id of record in geonames database
// name : name of geographical point (utf8) varchar(200)
// asciiname : name of geographical point in plain ascii characters, varchar(200)
// alternatenames : alternatenames, comma separated, ascii names automatically transliterated, convenience attribute from alternatename table, varchar(10000)
// latitude : latitude in decimal degrees (wgs84)
// longitude : longitude in decimal degrees (wgs84)
// feature class : see http://www.geonames.org/export/codes.html, char(1)
// feature code : see http://www.geonames.org/export/codes.html, varchar(10)
// country code : ISO-3166 2-letter country code, 2 characters
// cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 200 characters
// admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
// admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
// admin3 code : code for third level administrative division, varchar(20)
// admin4 code : code for fourth level administrative division, varchar(20)
// population : bigint (8 byte int)
// elevation : in meters, integer
// dem : digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.
// timezone : the iana timezone id (see file timeZone.txt) varchar(40)
// modification date : date of last modification in yyyy-MM-dd format
cities.push({
country: city[8],
name: city[1].replace('"', '').replace('"', ''),
lat: city[4],
lng: city[5],
});
}
i++;
})
.on('close', function () {
jsonfile.writeFile(file, cities, { spaces: 2 }, function (err) {
if (err) {
console.error(err);
const txtToJson = (filename, columnNameMapping = {}, jsonFilePath) => {
const txtFilePath = `./${filename}.txt`;
jsonFilePath = jsonFilePath || `./${filename}.json`;
let entries = [],
i = 0,
lineValues;
const mappedColumnIndexes = Object.keys(columnNameMapping).map((index) =>
parseInt(index)
);

readline
.createInterface({
input: fs.createReadStream(txtFilePath),
output: process.stdout,
terminal: false,
})
.on('line', function (line) {
lineValues = line.split('\t');
if (i !== 0) {
entries.push(
lineValues.reduce((entry, value, valueIndex) => {
if (mappedColumnIndexes.includes(valueIndex)) {
entry[columnNameMapping[valueIndex]] = removeDoubleQuotes(value);
}

return entry;
}, {})
);
}
i++;
})
.on('close', function () {
console.log(`Writing ${i} entries to ${jsonFilePath}`);
jsonfile.writeFile(jsonFilePath, entries, { spaces: 2 }, function (err) {
if (err) {
console.error(err);
}
});
});
});
};

// geonameid : integer id of record in geonames database
// name : name of geographical point (utf8) varchar(200)
// asciiname : name of geographical point in plain ascii characters, varchar(200)
// alternatenames : alternatenames, comma separated, ascii names automatically transliterated, convenience attribute from alternatename table, varchar(10000)
// latitude : latitude in decimal degrees (wgs84)
// longitude : longitude in decimal degrees (wgs84)
// feature class : see http://www.geonames.org/export/codes.html, char(1)
// feature code : see http://www.geonames.org/export/codes.html, varchar(10)
// country code : ISO-3166 2-letter country code, 2 characters
// cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 200 characters
// admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
// admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
// admin3 code : code for third level administrative division, varchar(20)
// admin4 code : code for fourth level administrative division, varchar(20)
// population : bigint (8 byte int)
// elevation : in meters, integer
// dem : digital elevation model, srtm3 or gtopo30, average elevation of 3''x3'' (ca 90mx90m) or 30''x30'' (ca 900mx900m) area in meters, integer. srtm processed by cgiar/ciat.
// timezone : the iana timezone id (see file timeZone.txt) varchar(40)
// modification date : date of last modification in yyyy-MM-dd format
txtToJson(
'cities1000',
{
8: 'country',
1: 'name',
4: 'lat',
5: 'lng',
10: 'admin1',
11: 'admin2',
},
'./cities.json'
);

txtToJson(
'admin1CodesASCII',
{
0: 'code',
1: 'name',
},
'./admin1.json'
);

txtToJson(
'admin2Codes',
{
0: 'code',
1: 'name',
},
'./admin2.json'
);
65 changes: 40 additions & 25 deletions download.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,36 +2,51 @@ const http = require('https'); // or 'https' for https:// URLs
const fs = require('fs');
const yauzl = require('yauzl');

const baseURL = 'https://download.geonames.org/export/dump/';
const downloadGeonameFile = (filename) => {
const writeStream = fs.createWriteStream(filename);
const fileURL = `${baseURL}${filename}`;

console.log(`Downloading ${fileURL}`);
http.get(fileURL, (response) => {
response.pipe(writeStream);
writeStream.on('finish', () => {
writeStream.close();
console.log(`Download complete: ${filename}`);
});
});
};

downloadGeonameFile('admin1CodesASCII.txt');
downloadGeonameFile('admin2Codes.txt');

const txtFilename = 'cities1000.txt';
const zipFilename = 'cities1000.zip';
const zipFile = fs.createWriteStream(zipFilename);
const request = http.get(
`https://download.geonames.org/export/dump/${zipFilename}`,
(response) => {
response.pipe(zipFile);
http.get(`${baseURL}${zipFilename}`, (response) => {
response.pipe(zipFile);

zipFile.on('finish', () => {
zipFile.close();
console.log('Download Completed');
zipFile.on('finish', () => {
zipFile.close();
console.log('Download Completed');

yauzl.open(zipFilename, { lazyEntries: true }, (err, zipfile) => {
if (err) throw err;
zipfile.readEntry();
zipfile.on('entry', (entry) => {
if (entry.fileName === txtFilename) {
const txtFile = fs.createWriteStream(entry.fileName);
zipfile.openReadStream(entry, (err, readStream) => {
if (err) {
throw err;
}
readStream.on('end', function () {
zipfile.readEntry();
});
readStream.pipe(txtFile);
yauzl.open(zipFilename, { lazyEntries: true }, (err, zipfile) => {
if (err) throw err;
zipfile.readEntry();
zipfile.on('entry', (entry) => {
if (entry.fileName === txtFilename) {
const txtFile = fs.createWriteStream(entry.fileName);
zipfile.openReadStream(entry, (err, readStream) => {
if (err) {
throw err;
}
readStream.on('end', function () {
zipfile.readEntry();
});
}
});
readStream.pipe(txtFile);
});
}
});
});
}
);
});
});
9 changes: 7 additions & 2 deletions test.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
const fs = require('fs/promises');
const jsonfile = require('jsonfile');

const jsonFilename = 'cities.json';
(async () => {
const assertParseable = async (jsonFilename) => {
// assert it can parse the file with native json parsing function
const jsonString = await fs.readFile(jsonFilename, { encoding: 'utf8' });
JSON.parse(jsonString);

// assert it can parse the file with the jsonfile library
jsonfile.readFileSync(jsonFilename);
};

(async () => {
assertParseable('cities.json');
assertParseable('admin1.json');
assertParseable('admin2.json');
})();