Skip to content

Commit 32c4975

Browse files
fix downloads
1 parent db00a93 commit 32c4975

File tree

3 files changed

+79
-8
lines changed

3 files changed

+79
-8
lines changed

MailGrabber.cs

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public MailGrabber(string host, string user, string password)
1717
_password = password;
1818
}
1919

20-
internal List<Tuple<string,string>> FindUrl(string sender, string regexPatternDownload, string regexPatternWorkspace)
20+
internal async Task<List<Tuple<string, string>>> FindUrl(string sender, string regexPatternDownload, string regexPatternWorkspace, string subjectFilter = "")
2121
{
2222
using var client = new ImapClient();
2323
client.ServerCertificateValidationCallback = (s, c, h, e) => true;
@@ -34,10 +34,16 @@ internal List<Tuple<string,string>> FindUrl(string sender, string regexPatternDo
3434

3535
if((message.From[0] as MailboxAddress)?.Address != sender)
3636
continue;
37-
37+
38+
if (!string.IsNullOrEmpty(subjectFilter) && !message.Subject.Contains(subjectFilter))
39+
{
40+
continue;
41+
}
42+
3843
string body = message.HtmlBody;
44+
3945

40-
// Regex to extract URLs
46+
// Regex to directly extract URLs
4147
var regexDl = new Regex(regexPatternDownload, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline );
4248
var regexWorkspace = new Regex(regexPatternWorkspace, RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Multiline );
4349

@@ -60,9 +66,74 @@ internal List<Tuple<string,string>> FindUrl(string sender, string regexPatternDo
6066
{
6167
urls.Add(new Tuple<string, string>(dlUrl, workspaceUrl));
6268
}
69+
70+
// Extract mailgun URLs
71+
// Regex pattern to find all links for the domain mg.mail.notion.so
72+
string pattern = @"<a[^>]*href=""(?<url>https:\/\/mg\.mail\.notion\.so[^""]+)""[^>]*>[^<]*<\/a>";
73+
74+
List<string> links = new List<string>();
75+
76+
// Extract all matching links
77+
MatchCollection matches = Regex.Matches(body, pattern);
78+
foreach (Match match in matches)
79+
{
80+
links.Add(match.Groups["url"].Value);
81+
}
82+
83+
// Perform GET requests and extract the Location header
84+
// added here
85+
HttpClientHandler httpClientHandler = new HttpClientHandler();
86+
httpClientHandler.AllowAutoRedirect = false;
87+
using HttpClient hc = new HttpClient(httpClientHandler);
88+
string dlUrl2 = String.Empty;
89+
string workspaceUrl2 = String.Empty;
90+
91+
foreach (var link in links)
92+
{
93+
try
94+
{
95+
// Set the HttpClient to follow redirects
96+
hc.DefaultRequestHeaders.Clear();
97+
var response = await hc.GetAsync(link);
98+
99+
// Check if the response is a redirect
100+
if (response.StatusCode == System.Net.HttpStatusCode.MovedPermanently ||
101+
response.StatusCode == System.Net.HttpStatusCode.Found ||
102+
response.StatusCode == System.Net.HttpStatusCode.SeeOther)
103+
{
104+
// Extract the Location header
105+
if (response.Headers.Location != null)
106+
{
107+
string actualLink = response.Headers.Location.ToString();
108+
if (regexDl.Match(actualLink).Success)
109+
{
110+
dlUrl2 = actualLink;
111+
}
112+
if (regexWorkspace.Match(actualLink).Success)
113+
{
114+
workspaceUrl2 = actualLink;
115+
}
116+
117+
118+
}
119+
}
120+
else
121+
{
122+
Console.WriteLine("No redirect for URL: " + link);
123+
}
124+
}
125+
catch (Exception ex)
126+
{
127+
Console.WriteLine($"Error fetching {link}: {ex.Message}");
128+
}
129+
}
130+
if (!string.IsNullOrEmpty(dlUrl2))
131+
{
132+
urls.Add(new Tuple<string, string>(dlUrl2, workspaceUrl2));
133+
}
63134
}
64135

65-
client.Disconnect(true);
136+
await client.DisconnectAsync(true);
66137

67138
return urls;
68139

NotionWebsitePuppeteer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ void TryLoginWithLink()
4040
Thread.Sleep(2000);
4141
Console.WriteLine("Waiting for login code email...");
4242
List<Tuple<string, string>> loginUrls = mg.FindUrl("notify@mail.notion.so",
43-
"https://www\\.notion\\.so/loginwithemail.*?(?=\")", String.Empty);
43+
"https://www\\.notion\\.so/loginwithemail.*?(?=\")", String.Empty).Result;
4444

4545
if (loginUrls.Count == 0) continue;
4646

Program.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,8 @@ async Task<Cookie> FileCookie()
109109
// Look in Mail inbox for completed exports
110110
Console.WriteLine("Checking emails...");
111111
MailGrabber mail = new MailGrabber(mailHost, mailUser, mailPassword);
112-
List<Tuple<string, string>> downloadUrls = mail.FindUrl("export-noreply@mail.notion.so", @"https://file\.notion\.so/.+\.zip","https://www\\.notion\\.so/space/[a-z0-9]+");
113-
downloadUrls.AddRange(mail.FindUrl("notify@mail.notion.so", @"https://file\.notion\.so/.+\.zip","https://www\\.notion\\.so/space/[a-z0-9]+"));
112+
List<Tuple<string, string>> downloadUrls = mail.FindUrl("export-noreply@mail.notion.so", @"https://file\.notion\.so/.+\.zip","https://www\\.notion\\.so/space/[a-z0-9]+","export").Result;
113+
downloadUrls.AddRange(mail.FindUrl("notify@mail.notion.so", @"https://file\.notion\.so/.+\.zip","https://www\\.notion\\.so/space/[a-z0-9]+","export").Result);
114114

115115
// Download the exports using the session cookies
116116

@@ -121,7 +121,7 @@ async Task<Cookie> FileCookie()
121121
foreach (Tuple<string, string> urls in downloadUrls)
122122
{
123123
// map workspace
124-
string workspaceId = urls.Item2.Split('/').Last();
124+
string workspaceId = urls.Item2.Split('/').Last().Split('?')[0];
125125
string workspaceName = workspaces.FirstOrDefault(x => x.Id == workspaceId)?.Name ?? String.Empty;
126126

127127
if (string.IsNullOrEmpty(workspaceName))

0 commit comments

Comments
 (0)