Skip to content

Commit 4cdcce9

Browse files
authored
Merge pull request #21 from mara/Handle-duplicate-ids
Merge release 4.0.0 changes for including the Ad-group-id
2 parents f771167 + 207d8cd commit 4cdcce9

File tree

5 files changed

+85
-65
lines changed

5 files changed

+85
-65
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
*.egg-info/
22
__pycache__
33
.idea
4+
.vscode
45
.venv/
6+
build
7+
dist

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# Changelog
22

3+
## 4.0.0 (2019-07-05)
4+
5+
- Compatible with specifications: a unique identifier is an Ad ID + Ad Group ID.
6+
- Add option to ignore downloading of data related to removed campaigns
7+
8+
**required changes**
9+
10+
- The file format changed to `v5`. Adapt etl scripts that process the output data.
11+
- Ad ID no longer unique in any files
12+
- Ad performance datasets now include Ad Group Id
13+
314
## 3.0.0 (2019-04-13)
415

516
- Change MARA_XXX variables to functions to delay importing of imports

google_ads_downloader/config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def redownload_window() -> str:
5151

5252
def output_file_version() -> str:
5353
"""A suffix that is added to output files, denoting a version of the data format"""
54-
return 'v4'
54+
return 'v5'
5555

5656

5757
def max_retries() -> int:
@@ -62,3 +62,8 @@ def max_retries() -> int:
6262
def retry_backoff_factor() -> int:
6363
"""How many seconds to wait between retries (is multiplied with retry count)"""
6464
return 5
65+
66+
67+
def ignore_removed_campaigns() -> bool:
68+
"""Whether to ignore campaigns with status 'REMOVED'"""
69+
return False

google_ads_downloader/downloader.py

Lines changed: 64 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import re
88
import shutil
99
import sys
10+
import io
1011
import tempfile
1112
import json
1213
import time
@@ -84,8 +85,8 @@ def _fetch_client_customers(self):
8485

8586
def download_data():
8687
"""Creates an AdWordsApiClient and downloads the data"""
87-
logger = logging.basicConfig(level=logging.INFO,
88-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
88+
logging.basicConfig(level=logging.INFO,
89+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
8990

9091
logging.info('Adwords API version: '+str(config.api_version()))
9192

@@ -100,22 +101,32 @@ def download_data_sets(api_client: AdWordsApiClient):
100101
api_client: AdWordsApiClient
101102
102103
"""
104+
105+
predicates = [{'field': 'Status',
106+
'operator': 'IN',
107+
'values': ['ENABLED',
108+
'PAUSED',
109+
'DISABLED']
110+
}, {
111+
'field': 'Impressions',
112+
'operator': 'GREATER_THAN',
113+
'values': [0]
114+
}]
115+
116+
if config.ignore_removed_campaigns():
117+
predicates.append({
118+
'field': 'CampaignStatus',
119+
'operator': 'NOT_EQUALS',
120+
'values': 'REMOVED'
121+
})
122+
103123
download_performance(api_client,
104124
PerformanceReportType.AD_PERFORMANCE_REPORT,
105-
fields=['Date', 'Id', 'Device', 'AdNetworkType2',
125+
fields=['Date', 'Id', 'AdGroupId', 'Device', 'AdNetworkType2',
106126
'ActiveViewImpressions', 'AveragePosition',
107127
'Clicks', 'Conversions', 'ConversionValue',
108128
'Cost', 'Impressions'],
109-
predicates=[{'field': 'Status',
110-
'operator': 'IN',
111-
'values': ['ENABLED',
112-
'PAUSED',
113-
'DISABLED']
114-
}, {
115-
'field': 'Impressions',
116-
'operator': 'GREATER_THAN',
117-
'values': [0]
118-
}]
129+
predicates=predicates
119130
)
120131

121132
download_account_structure(api_client)
@@ -132,7 +143,6 @@ def download_performance(api_client: AdWordsApiClient,
132143
performance_report_type: A PerformanceReportType object
133144
fields: A list of fields to be included in the report
134145
predicates: A list of filters for the report
135-
redownload_window: The number of days the performance is redownloaded
136146
"""
137147
client_customer_ids = api_client.client_customers.keys()
138148

@@ -194,7 +204,7 @@ def get_performance_for_single_day(api_client: AdWordsApiClient,
194204
fields=fields,
195205
predicates=predicates,
196206
)
197-
report_list.extend(_convert_report_to_list(report))
207+
report_list.extend(list(report))
198208
return report_list
199209

200210

@@ -224,7 +234,8 @@ def download_account_structure(api_client: AdWordsApiClient):
224234
ad_group_attributes = get_ad_group_attributes(api_client, client_customer_id)
225235
ad_data = get_ad_data(api_client, client_customer_id)
226236

227-
for ad_id, ad_data_dict in ad_data.items():
237+
for ad_data_dict in ad_data:
238+
ad_id = ad_data_dict['Ad ID']
228239
campaign_id = ad_data_dict['Campaign ID']
229240
ad_group_id = ad_data_dict['Ad group ID']
230241
currency_code = client_customer['Currency Code']
@@ -272,10 +283,7 @@ def get_campaign_attributes(api_client: AdWordsApiClient, client_customer_id: in
272283
'PAUSED',
273284
'REMOVED']
274285
})
275-
report_list = _convert_report_to_list(report)
276-
277-
return {row['Campaign ID']: parse_labels(row['Labels']) for row in
278-
report_list}
286+
return {row['Campaign ID']: parse_labels(row['Labels']) for row in report}
279287

280288

281289
def get_ad_group_attributes(api_client: AdWordsApiClient, client_customer_id: int) -> {}:
@@ -300,13 +308,11 @@ def get_ad_group_attributes(api_client: AdWordsApiClient, client_customer_id: in
300308
'PAUSED',
301309
'REMOVED']
302310
})
303-
report_list = _convert_report_to_list(report)
304311

305-
return {row['Ad group ID']: parse_labels(row['Labels']) for row in
306-
report_list}
312+
return {row['Ad group ID']: parse_labels(row['Labels']) for row in report}
307313

308314

309-
def get_ad_data(api_client: AdWordsApiClient, client_customer_id: int) -> {}:
315+
def get_ad_data(api_client: AdWordsApiClient, client_customer_id: int) -> [{}]:
310316
"""Downloads the ad data from the Google AdWords API for a given client_customer_id
311317
https://developers.google.com/adwords/api/docs/appendix/reports/ad-performance-report
312318
@@ -318,30 +324,42 @@ def get_ad_data(api_client: AdWordsApiClient, client_customer_id: int) -> {}:
318324
A dictionary of the form {ad_id: {key: value}}
319325
"""
320326
logging.info('get ad data for account {}'.format(client_customer_id))
321-
ad_data = {}
322327

323328
api_client.SetClientCustomerId(client_customer_id)
329+
330+
predicates = [
331+
{
332+
'field': 'Status',
333+
'operator': 'IN',
334+
'values': ['ENABLED',
335+
'PAUSED',
336+
'DISABLED']
337+
}
338+
]
339+
340+
if config.ignore_removed_campaigns():
341+
predicates.append({
342+
'field': 'CampaignStatus',
343+
'operator': 'NOT_EQUALS',
344+
'values': 'REMOVED'
345+
})
346+
324347
report = _download_adwords_report(api_client,
325348
report_type='AD_PERFORMANCE_REPORT',
326349
fields=['Id', 'AdGroupId', 'AdGroupName',
327350
'CampaignId', 'CampaignName',
328351
'Labels', 'Headline', 'AdType',
329352
'Status'],
330-
predicates={'field': 'Status',
331-
'operator': 'IN',
332-
'values': ['ENABLED',
333-
'PAUSED',
334-
'DISABLED']
335-
})
336-
report_list = _convert_report_to_list(report)
353+
predicates=predicates)
337354

338-
for row in report_list:
355+
ad_data = []
356+
for row in report:
339357
attributes = parse_labels(row['Labels'])
340358
if row['Ad type'] is not None:
341359
attributes = {**attributes, 'Ad type': row['Ad type']}
342360
if row['Ad state'] is not None:
343361
attributes = {**attributes, 'Ad state': row['Ad state']}
344-
ad_data[row['Ad ID']] = {**row, 'attributes': attributes}
362+
ad_data.append({**row, 'attributes': attributes})
345363

346364
return ad_data
347365

@@ -350,7 +368,7 @@ def _download_adwords_report(api_client: AdWordsApiClient,
350368
report_type: str,
351369
fields: [str],
352370
predicates: {},
353-
current_date: datetime = None) -> []:
371+
current_date: datetime = None) -> csv.DictReader:
354372
"""Downloads an Google Ads report from the Google Ads API
355373
356374
Args:
@@ -371,7 +389,7 @@ def _download_adwords_report(api_client: AdWordsApiClient,
371389
'reportName': '{}_#'.format(report_type),
372390
'dateRangeType': 'CUSTOM_DATE',
373391
'reportType': report_type,
374-
'downloadFormat': 'TSV',
392+
'downloadFormat': 'CSV',
375393
'selector': {
376394
'fields': fields,
377395
'predicates': predicates
@@ -393,11 +411,14 @@ def _download_adwords_report(api_client: AdWordsApiClient,
393411
while True:
394412
retry_count += 1
395413
try:
396-
report = report_downloader.DownloadReportAsString(report_filter,
397-
skip_report_header=False,
398-
skip_column_header=False,
399-
skip_report_summary=False)
400-
return report
414+
report = io.StringIO()
415+
report_downloader.DownloadReport(report_filter,
416+
output=report,
417+
skip_report_header=True,
418+
skip_column_header=False,
419+
skip_report_summary=True)
420+
report.seek(0)
421+
return csv.DictReader(report)
401422
except errors.AdWordsReportError as e:
402423
if retry_count < config.max_retries():
403424

@@ -435,7 +456,7 @@ def __init__(self, client_type=None, client_id=None, client_secret=None,
435456
self.auth_uri = auth_uri
436457
self.token_uri = token_uri
437458

438-
def Build(self):
459+
def build(self):
439460
"""Builds a client config dictionary used in the OAuth 2.0 flow."""
440461
if all((self.client_type, self.client_id, self.client_secret,
441462
self.auth_uri, self.token_uri)):
@@ -459,7 +480,7 @@ def refresh_oauth_token():
459480
client_config = ClientConfigBuilder(
460481
client_type=ClientConfigBuilder.CLIENT_TYPE_WEB, client_id=config.oauth2_client_id(),
461482
client_secret=config.oauth2_client_secret())
462-
flow = InstalledAppFlow.from_client_config(client_config.Build(),
483+
flow = InstalledAppFlow.from_client_config(client_config.build(),
463484
scopes=['https://www.googleapis.com/auth/adwords'])
464485
flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'
465486
authorize_url, _ = flow.authorization_url(prompt='consent')
@@ -493,26 +514,6 @@ def parse_labels(labels: str) -> {str: str}:
493514
return labels
494515

495516

496-
def _convert_report_to_list(report: str) -> [{}]:
497-
"""Converts a Google AdWords report to a list of dictionaries
498-
499-
Args:
500-
report: A Google AdWords report as a string
501-
502-
Returns:
503-
A list containing dictionaries with the data from the report
504-
505-
"""
506-
# Discard the first line as it only contains meta information.
507-
# The last two lines only display summaries
508-
rows = list(csv.reader(report.split('\n')[1:-2], dialect='excel-tab'))
509-
510-
# The second line holds the column names
511-
keys = rows[0]
512-
513-
return [dict(zip(keys, row)) for row in rows[1:]]
514-
515-
516517
def ensure_data_directory(relative_path: Path = None) -> Path:
517518
"""Checks if a directory in the data dir path exists. Creates it if necessary
518519

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name='google-ads-performance-downloader',
5-
version='3.0.0',
5+
version='4.0.0',
66
description="Downloads data from the Google Adwords Api to local files",
77

88
install_requires=[

0 commit comments

Comments
 (0)