Skip to content

Commit 29fe8eb

Browse files
committed
inital commit of 30061038
0 parents  commit 29fe8eb

22 files changed

+3403
-0
lines changed

.eslintrc

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"extends": "airbnb-base",
3+
"rules": {
4+
"no-use-before-define": 0,
5+
"strict": 0,
6+
"comma-dangle": [2, "never"],
7+
"func-names": 0,
8+
"no-underscore-dangle": 0,
9+
"prefer-rest-params": 0,
10+
"no-param-reassign": 0,
11+
"prefer-template": 0,
12+
"new-cap": 0,
13+
"global-require": 0,
14+
"consistent-return": 0,
15+
"max-len": [2, 200],
16+
"camelcase": 0,
17+
"import/no-dynamic-require": 0
18+
},
19+
"parserOptions": {
20+
"ecmaVersion": 6
21+
},
22+
"globals": {
23+
"describe": true,
24+
"it": true,
25+
"before": true,
26+
"beforeEach": true,
27+
"after": true,
28+
"afterEach": true
29+
}
30+
}

.gitignore

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
node_modules
2+
*.zip
3+
# Logs
4+
logs
5+
*.log
6+
npm-debug.log*
7+
8+
#kyle
9+
.DS_Store
10+
*.csv
11+
12+
# Runtime data
13+
pids
14+
*.pid
15+
*.seed
16+
*.pid.lock
17+
18+
# Directory for instrumented libs generated by jscoverage/JSCover
19+
lib-cov
20+
21+
# Coverage directory used by tools like istanbul
22+
coverage
23+
24+
# nyc test coverage
25+
.nyc_output
26+
27+
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
28+
.grunt
29+
30+
# node-waf configuration
31+
.lock-wscript
32+
33+
# Compiled binary addons (http://nodejs.org/api/addons.html)
34+
build/Release
35+
36+
# Dependency directories
37+
node_modules
38+
jspm_packages
39+
40+
# Optional npm cache directory
41+
.npm
42+
43+
# Optional eslint cache
44+
.eslintcache
45+
46+
# Optional REPL history
47+
.node_repl_history

README.md

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# VA ONLINE MEMORIAL - DATA IMPORT & SYNC
2+
3+
## Dependencies
4+
- [Nodejs](https://nodejs.org/en/)
5+
- [PostgreSQL](https://www.postgresql.org/)
6+
- [eslint](http://eslint.org/)
7+
8+
## Configuration
9+
- Edit configuration in `config/default.json` and
10+
- custom environment variables names in `config/custom-environment-variables.json`,
11+
12+
## Application constants
13+
14+
- Application constants can be configured in `./constants.js`
15+
16+
## Available tools
17+
18+
- Since the data we need to download and process is huge it's better (/ safer) to use 2 different tools instead of one single script so in case that something goes wrong during processing, we'll minimise the damage.
19+
20+
### Download datasets
21+
22+
- Run `npm run download-data` to download all available datasets.
23+
- The datasets will be stored in the configured directory.
24+
- Old data will be replaced.
25+
- This operation does not affect the database.
26+
27+
### Import data from downloaded files
28+
29+
- Run `npm run import-data` to import all data using the downloaded files from the previous step.
30+
31+
## Local Deployment
32+
33+
*Before starting the application, make sure that PostgreSQL is running and you have configured everything correctly in `config/default.json`*
34+
35+
- Install dependencies `npm i`
36+
- Run lint check `npm run lint`
37+
- Start app `npm start`. This will run all tools in the following sequence:
38+
39+
`npm run download-data` => `npm run import-data`
40+
41+
*The application will print progress information and the results in the terminal.*
42+
43+
## Verification
44+
45+
- To verify that the data is imported, you can use the [pgAdmin](https://www.pgadmin.org/) tool and browser the database.
46+
47+
## Notes:
48+
49+
- The total size of all datasets is > 1.5GB so it will take quite some time, depending on your internet connection, to finish the operation.
50+
- `max_old_space_size` has been set to *4096MB* to allow parse/process such huge data files without any issues. The app will clean the memory right after using the data to prevent memory/heap leaks.
51+
- The dataset for `FOREIGN ADDRESSES` doesn't have a header in the CSV file and it has slightly different format (it has an extra column). The app handles all datasets without any issue.

common/logger.js

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
'use strict';
2+
3+
/*
4+
* Copyright (C) 2017 Topcoder Inc., All Rights Reserved.
5+
*/
6+
7+
/**
8+
* This module contains the winston logger configuration.
9+
*/
10+
11+
const winston = require('winston');
12+
const config = require('config');
13+
const chalk = require('chalk');
14+
15+
const logger = new (winston.Logger)({
16+
transports: [
17+
new (winston.transports.Console)({
18+
level: config.logLevel,
19+
timestamp: () => new Date().toISOString(),
20+
formatter(options) {
21+
const message = options.message || '';
22+
23+
let meta = '';
24+
if (options.meta && Object.keys(options.meta).length) {
25+
meta = '\n\t' + JSON.stringify(options.meta);
26+
}
27+
28+
let level = options.level.toUpperCase();
29+
switch (level) {
30+
case 'INFO':
31+
level = chalk.cyan(level);
32+
break;
33+
case 'WARN':
34+
level = chalk.yellow(level);
35+
break;
36+
case 'ERROR':
37+
level = chalk.red(level);
38+
break;
39+
default:
40+
break;
41+
}
42+
43+
return `[${options.timestamp()}][${level}] ${message} ${meta}`;
44+
}
45+
})
46+
]
47+
});
48+
49+
module.exports = logger;
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"dataset_url": "DATASET_URL",
3+
"logLevel": "LOG_LEVEL",
4+
"dbConfig": {
5+
"db_url": "DATABASE_URL"
6+
},
7+
"downloadPath": "DOWNLOAD_PATH"
8+
}

config/default.json

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"dataset_url": "https://www.data.va.gov/data.json",
3+
"logLevel": "info",
4+
"dbConfig": {
5+
"db_url": "postgres://user:pass@localhost:5432/va"
6+
},
7+
"downloadPath": "downloads"
8+
}

constants.js

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
'use strict';
2+
3+
/*
4+
* Copyright (c) 2017 Topcoder, Inc. All rights reserved.
5+
*/
6+
7+
/**
8+
* Application constants
9+
*/
10+
11+
// The accepted program codes
12+
const acceptedProgramCodes = [
13+
'029:001'
14+
];
15+
16+
// The accepted keywords
17+
const acceptedKeywords = [
18+
'burial data'
19+
];
20+
21+
// The accepted file format
22+
const acceptedFormat = 'csv';
23+
24+
// Entry names that should be ignored
25+
const ignoredNames = [
26+
// Source of cemeteries data
27+
'VA Cemeteries - Address, Location, Contact Information, Burial Space'
28+
];
29+
30+
// CSV headers
31+
const csvHeaders = [
32+
'd_first_name',
33+
'd_mid_name',
34+
'd_last_name',
35+
'd_suffix',
36+
'd_birth_date',
37+
'd_death_date',
38+
'section_id',
39+
'row_num',
40+
'site_num',
41+
'cem_name',
42+
'cem_addr_one',
43+
'cem_addr_two',
44+
'city',
45+
'state',
46+
'zip',
47+
'cem_url',
48+
'cem_phone',
49+
'relationship',
50+
'v_first_name',
51+
'v_mid_name',
52+
'v_last_name',
53+
'v_suffix',
54+
'branch',
55+
'rank',
56+
'war'
57+
];
58+
59+
module.exports = {
60+
acceptedProgramCodes,
61+
acceptedKeywords,
62+
acceptedFormat,
63+
ignoredNames,
64+
csvHeaders
65+
};

csvCounts.txt

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
0 csvCounts.txt
2+
21872 ngl_alabama.csv
3+
8880 ngl_alaska.csv
4+
96485 ngl_arizona.csv
5+
64296 ngl_arkansas.csv
6+
741842 ngl_california.csv
7+
141187 ngl_colorado.csv
8+
1 ngl_connecticut.csv
9+
21309 ngl_delaware.csv
10+
305774 ngl_florida.csv
11+
4313 ngl_foreign_addresses.csv
12+
57960 ngl_georgia.csv
13+
73123 ngl_hawaii.csv
14+
7362 ngl_idaho.csv
15+
135405 ngl_illinois.csv
16+
24972 ngl_indiana.csv
17+
9565 ngl_iowa.csv
18+
77240 ngl_kansas.csv
19+
60459 ngl_kentucky.csv
20+
35590 ngl_louisiana.csv
21+
30708 ngl_maine.csv
22+
137464 ngl_maryland.csv
23+
84708 ngl_massachusetts.csv
24+
71418 ngl_michigan.csv
25+
234249 ngl_minnesota.csv
26+
41613 ngl_mississippi.csv
27+
252711 ngl_missouri.csv
28+
3784 ngl_montana.csv
29+
12859 ngl_nebraska.csv
30+
49952 ngl_nevada.csv
31+
10217 ngl_new_hampshire.csv
32+
113334 ngl_new_jersey.csv
33+
65241 ngl_new_mexico.csv
34+
683640 ngl_new_york.csv
35+
40412 ngl_north_carolina.csv
36+
7678 ngl_north_dakota.csv
37+
90253 ngl_ohio.csv
38+
31285 ngl_oklahoma.csv
39+
202501 ngl_oregon.csv
40+
94842 ngl_pennsylvania.csv
41+
28365 ngl_rhode_island.csv
42+
45634 ngl_south_carolina.csv
43+
30243 ngl_south_dakota.csv
44+
207440 ngl_tennessee.csv
45+
384140 ngl_texas.csv
46+
90157 ngl_usa_territoties.csv
47+
6017 ngl_utah.csv
48+
28 ngl_vermont.csv
49+
122320 ngl_virginia.csv
50+
56423 ngl_washington.csv
51+
465 ngl_washingtondc.csv
52+
9229 ngl_west_virginia.csv
53+
67260 ngl_wisconsin.csv
54+
1813 ngl_wyoming_0.csv
55+
5196038 total

models/Branch.js

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
'use strict';
2+
3+
/*
4+
* Copyright (c) 2017 Topcoder, Inc. All rights reserved.
5+
*/
6+
7+
/*
8+
* Branch model definition
9+
*/
10+
module.exports = (sequelize, DataTypes) => sequelize.define('Branch', {
11+
value: {
12+
type: DataTypes.STRING,
13+
allowNull: null,
14+
primaryKey: true,
15+
unique: true
16+
}
17+
}, {
18+
timestamps: false
19+
});

models/Burial.js

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
'use strict';
2+
3+
/*
4+
* Copyright (c) 2017 Topcoder, Inc. All rights reserved.
5+
*/
6+
7+
/*
8+
* Burial model definition
9+
*/
10+
module.exports = (sequelize, DataTypes) => sequelize.define('Burial', {
11+
d_id: { type: DataTypes.STRING, allowNull: false, primaryKey: true },
12+
cem_id: { type: DataTypes.STRING, allowNull: false },
13+
section_id: DataTypes.STRING,
14+
row_num: DataTypes.STRING,
15+
site_num: DataTypes.STRING
16+
}, {
17+
timestamps: false
18+
});

models/Cemetery.js

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
'use strict';
2+
3+
/*
4+
* Copyright (c) 2017 Topcoder, Inc. All rights reserved.
5+
*/
6+
7+
/*
8+
* Cemetery model definition
9+
*/
10+
module.exports = (sequelize, DataTypes) => sequelize.define('Cemetery', {
11+
cem_id: { type: DataTypes.STRING, primaryKey: true, unique: true },
12+
cem_name: { type: DataTypes.STRING, allowNull: false },
13+
cem_addr_one: { type: DataTypes.STRING, allowNull: false },
14+
cem_addr_two: DataTypes.STRING,
15+
cem_url: DataTypes.STRING,
16+
cem_phone: DataTypes.STRING,
17+
city: DataTypes.STRING,
18+
state: DataTypes.STRING,
19+
zip: DataTypes.INTEGER
20+
}, {
21+
timestamps: false
22+
});

0 commit comments

Comments
 (0)