UWP webRobot 20180704: Part1) MainPage.xaml.cs
Platform: UWP Univesal Windows 10 Platform App
Sprache: C#
Zeigt:
1) Lesen von Webseiten mit HttpClient
2) SQlite Datenbank Interaktion in UWP
3) Api-Zugriff : Lesen und Schreiben zu Web-Api
Haupt-Programm: MainPage.xaml.cs
C#
using System; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices.WindowsRuntime; using Windows.UI.Xaml; using Windows.UI.Xaml.Controls;
//< using > using HtmlAgilityPack; //*HtmlDocument using System.Threading.Tasks; //*Async Task using Microsoft.Data.Sqlite; //*SQLite for UWP from Microsoft using System.Net.Http; //*HttpClient using System.Text; //*Encoding.UTF8 using System.Net.Http.Headers; //*AuthenticationHeaderValue using Windows.Data.Json; //*JsonArray
//</ using >
namespace robot_ComputerFutures { /// <summary> /// WebRobot reading website /// </summary> public sealed partial class MainPage : Page {
#region Page //--------------------< region: Page >--------------------- public MainPage() { this.InitializeComponent(); optStop.IsChecked = false; clsDB.db_Create_Tables();
SqliteConnection cn_connection = clsDB.Get_Sqlite_Connection(); string sConnection=cn_connection.DataSource; fx_Log("database=" + sConnection);
} //--------------------</ region: Page >--------------------- #endregion /Page
#region Buttons //--------------------< region: Buttons >--------------------- private void Btn_START_Click(object sender, RoutedEventArgs e) { optStop.IsChecked = false; fx_Run_Main(); }
private async void BtnRead_Lists_Click(object sender, RoutedEventArgs e) { //--------< Button_Start_Click() >-------- optStop.IsChecked = false; await fxMain_Scan_Lists(); //--------</ Button_Start_Click() >-------- }
private async void BtnRead_Details_Click(object sender, RoutedEventArgs e) { //--------< Button_Details_Click() >-------- optStop.IsChecked = false; await fxMain_Scan_Details(); //--------</ Button_Details_Click() >-------- }
private async void BtnApi_GetList_Click(object sender, RoutedEventArgs e) { optStop.IsChecked = false; await fxMain_Api_Get_List(); } private async void BtnApi_Delete_byWebsite_Click(object sender, RoutedEventArgs e) { optStop.IsChecked = false; await fxMain_Api_Deletes_byWebsite(); } private async void BtnApi_Add_Project_Click(object sender, RoutedEventArgs e) { optStop.IsChecked = false; await fxMain_Api_Add_Projects(); } //--------------------</ region: Buttons >--------------------- #endregion /Buttons
#region Main Methods //--------------------< region: Main-Methods >--------------------- private async void fx_Run_Main() { optStop.IsChecked = false; await fxMain_Scan_Lists(); await fxMain_Scan_Details(); await fxMain_Api_Get_List(); await fxMain_Api_Deletes_byWebsite(); await fxMain_Api_Add_Projects(); }
private async Task<bool> fxMain_Scan_Lists() { if (optStop.IsChecked == true) return false; if (optRead_List.IsChecked == true) { fx_Log("------< @Read_List >------"); bool bResult = await Scan_List(app_settings._url_Startlist); fx_Log("------</ @Read_List >------"); return bResult; } else { return false; } } private async Task<bool> fxMain_Scan_Details() { if (optStop.IsChecked == true) return false; if (optRead_Details.IsChecked == true) { fx_Log("------< @Read_Details >------"); bool bResult = await Scan_all_Details(); fx_Log("------</ @Read_Details >------"); return bResult; } else { return false; } } private async Task<bool> fxMain_Api_Get_List() { if (optStop.IsChecked == true) return false;
if (optApi_Get_List.IsChecked == true) { fx_Log("------< @Api:Get_List>------"); bool bResult = await Api_Get_List(); fx_Log("------</ @Api:Get_List>------"); return bResult; } else { return false; } }
private async Task<bool> fxMain_Api_Deletes_byWebsite() { if (optStop.IsChecked == true) return false;
if (optApi_Delete_byWebsite.IsChecked == true) { fx_Log("------< @Api:Deletes_byWebsite >------"); bool bResult = await Api_Deletes_byWebsite(); fx_Log("------</ @Api:Deletes_byWebsite >------"); return bResult; } else { return false; } }
private async Task<bool> fxMain_Api_Add_Projects() { if (optStop.IsChecked == true) return false;
if (optApi_Add_Project.IsChecked == true) { fx_Log("------< @Api:Add_Projects >------"); bool bResult = await Api_Add_Projects(); fx_Log("------</ @Api:Add_Projects >------"); return bResult; } else { return false; } } //--------------------</ region: Main-Methods >--------------------- #endregion /Main Methods
#region Methods Scan //--------------------< region: Methods Scan >--------------------- private async Task<bool> Scan_List(string sURL) { //--------< Scrape_List() >-------- Textbox_URL.Text = sURL; Uri baseUrl = new Uri(sURL);
if (optStop.IsChecked == true) return false;
string sPage = sURL; //baseUrl.Query; fx_Log("--< List: " + sPage + " >--");
//< download > HtmlDocument doc = await Web_Get_HtmlDocument(sURL); if (doc == null) return false; //</ download >
//< nodes > HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("//li[@class=\"job-result-item\"]"); //*find subnode with // //</ nodes >
//------< @Loop: Detail Nodes >------ foreach (HtmlNode node in nodes) { //----< In Detail-Node >---- if (optStop.IsChecked == true) return false;
HtmlNode nodeTitle = node.SelectSingleNode(".//div[@class=\"job-title\"]"); //*find subnode with .// if (nodeTitle != null) { //--< get a_href >-- HtmlNode node_to_Detail = nodeTitle.SelectSingleNode("a"); string sTitle = node_to_Detail.InnerText; string sURL_Detail_relative = node_to_Detail.GetAttributeValue("href", ""); string sURL_Detail_absolute = new Uri(baseUrl, sURL_Detail_relative).AbsoluteUri;
//< add/update URL > await db_Update_Add_ListRecord(sURL_Detail_absolute, sTitle); //</ add/update URL >
//< print > fx_Log(sTitle); //</ print > //--</ get a_href >-- } //----</ In Detail-Node >---- } //------</ @Loop: Detail Nodes >------
fx_Log("--</ List: " + sPage + " >--"); fx_Log("");
//--< #Next >-- HtmlNode nodeGroup_Next = doc.DocumentNode.SelectSingleNode("//span[@class=\"next\"]"); if (nodeGroup_Next != null) { HtmlNode node_Next = nodeGroup_Next.SelectSingleNode("a"); string sURL_Next_relative = node_Next.GetAttributeValue("href", ""); string sURL_Next = new Uri(baseUrl, sURL_Next_relative).AbsoluteUri; Textbox_URL.Text = sURL_Next; //-< load Next >- await Scan_List(sURL_Next); //-</ load Next >- } else { Textbox_URL.Text = ""; } //--</ #Next >-- return true; //--------</ Scrape_List() >-------- }
private async Task<bool> Scan_all_Details() { //--------< Scrape_all_Details() >-------- fx_Log("----< @Read Details >----");
//----< @Loop: Alle Empty Records >---- while (1 == 1) { //----< Detail >---- if (optStop.IsChecked == true) return false;
//< find record > string sSQL = "SELECT [IDDetail] FROM tbl_Details WHERE [dtScan] IS NULL LIMIT 1"; SqliteDataReader dataReader = clsDB.Get_DataReader(sSQL); //</ find record > if (dataReader.HasRows) { dataReader.Read(); await Scan_Detail_byIDProjekt(Convert.ToInt32(dataReader["IDDetail"])); } else { fx_Log("last record"); break; } //----</ Detail >---- } //----</ @Loop: Alle Empty Records >----
fx_Log("----</ @Read Details >----"); return true; //--------</ Scrape_all_Details() >-------- }
private async Task<bool> Scan_Detail_byIDProjekt(int ID) { //--------< Scrape_Detail() >-------- if (optStop.IsChecked == true) return false;
//fx_Log("--< Read Detail >--"); fx_Log("Detail=" + ID);
string sURL = await clsDB.Get_Value_as_String("URL", "tbl_Details", "[IDDetail]=" + ID); HtmlDocument doc = await Web_Get_HtmlDocument(sURL); //< check > if (doc == null) { //--< deleted >-- fx_Log("doc is NULL ID" + ID); fx_Error_Log("HTML Website read error: " + sURL ); //< update > string sql_Error = "UPDATE tbl_Details SET [dtScan] = CURRENT_TIMESTAMP, [dtDeleted]=CURRENT_TIMESTAMP WHERE IDDetail = " + ID; //SYSDATETIME() bool bUpdate = await clsDB.Execute_SQL(sql_Error); //</ update > return false; //--</ deleted >-- } //</ check >
//----< In Detail-Node >---- //< Text > string sText = ""; HtmlNode nodeText = doc.DocumentNode.SelectSingleNode("//article[@class=\"clearfix mb30\"]"); if (nodeText != null) { sText = nodeText.InnerText; } //</ Text >
//< Special > string sOrt = html_GetText_OuterGroup_by_InnerFilter(doc, "strong", "Ort"); string sGehalt = html_GetText_OuterGroup_by_InnerFilter(doc, "strong", "Gehalt"); string sBereiche = html_GetText_OuterGroup_by_InnerFilter(doc, "strong", "Bereiche"); string sArt = html_GetText_OuterGroup_by_InnerFilter(doc, "strong", "Art"); //</ Special >
//< correct > sText = clsCheck.correct_String(sText); sOrt = clsCheck.correct_String(sOrt); sGehalt = clsCheck.correct_String(sGehalt); sBereiche = clsCheck.correct_String(sBereiche); sArt = clsCheck.correct_String(sArt); //</ correct >
fx_Log("Text=" + sText.Substring(0, 100) + "..");
//< update > string sql_Update = "UPDATE tbl_Details "; sql_Update += Environment.NewLine + " SET [Text]='" + sText + "', [dtScan] = CURRENT_TIMESTAMP"; //SYSDATETIME() sql_Update += Environment.NewLine + " ,[Ort]='" + sOrt + "',[Gehalt]='" + sGehalt + "',[Bereiche]='" + sBereiche + "',[Art]='" + sArt + "'"; sql_Update += " WHERE IDDetail = " + ID; bool bOK = await clsDB.Execute_SQL(sql_Update); //</ update >
//----</ In Detail-Node >----
//fx_Log("--</ Read Detail >--"); fx_Log("/Detail=" + ID); return true; //--------</ Scrape_Detail() >-------- } //--------------------< region: Methods Scan >--------------------- #endregion Methods Scan
#region Methods Api //--------------------< region: Methods_API >--------------------- private async Task<bool> Api_Get_List() { //--------< Api_Get_List() >-------- if (optStop.IsChecked == true) return false; fx_Log("--< API:Get_List >--");
//---------------< read_API_Data() >---------------
HttpClient client = await Create_HttpClient_with_UserToken();
//< read webApi > string sURL = app_settings._Api_Server + "/api/projects"; string sResponse_Projects = ""; try { sResponse_Projects = await client.GetStringAsync(sURL); } catch (Exception ex) { clsSys.show_Message(ex.Message); return false; } //</ read webApi >
//----< get Projects-List >---- //< get-List > JsonArray jsonArray = JsonArray.Parse(sResponse_Projects); //< get-List > //----< @Loop: Projects_on_Server >---- foreach (var jsonRow in jsonArray) { //----< Project_On_Server >---- JsonObject jsonObject = jsonRow.GetObject();
//< values > string IDProject_on_Server = jsonObject["idProject"].ToString(); //</ values >
//----< Check_to_local >---- //< find record > string sSQL = "SELECT * FROM tbl_Details WHERE [IDProject_On_Server] = " + IDProject_on_Server + " LIMIT 1"; SqliteDataReader dataReader = clsDB.Get_DataReader(sSQL); if (dataReader.HasRows == false) { //---< NoMatch: Delete on Server >--- fx_Log("delete on Server " + IDProject_on_Server); //</ nomatch >
//< read webApi > //*httpDelete /api/projects/5 string sURL_Api_Delete = app_settings._Api_Server + "/api/projects/" + IDProject_on_Server; //client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", sUserToken); HttpResponseMessage httpResponseMessage = null; try { httpResponseMessage = await client.DeleteAsync(sURL_Api_Delete); fx_Log("ok.Deleted " + IDProject_on_Server); } catch (Exception ex) { fx_Error_Log("Error Delete on Server: : " + ex.Message + "IDProject_on_Server=" + IDProject_on_Server); } //</ read webApi > //---</ NoMatch: Delete on Server >--- }
//----</ Check_to_local >----
//----</ json Row >---- } //----</ @Loop: Projects_on_Server >---- //----</ get Projects-List >---- //---------------</ read_API_Data() >---------------
fx_Log("--</ API:Get_List >--"); fx_Log("");
return true;
//--------</ Api_Get_List() >-------- }
private async Task<bool> Api_Deletes_byWebsite() { //--------< Api_Deletes_byWebsite() >-------- if (optStop.IsChecked == true) return false; fx_Log("--< API:Api_Deletes_byWebsite >--");
//---------------< read_API_Data() >---------------
HttpClient client = await Create_HttpClient_with_UserToken();
//----< Check_to_local >---- //< find record > string sSQL = "SELECT [IDProject_On_Server] FROM tbl_Details WHERE [dtDeleted] IS NOT NULL"; SqliteDataReader dataReader = clsDB.Get_DataReader(sSQL); if (dataReader.HasRows == true) { while(dataReader.Read()) { if (optStop.IsChecked == true) return false; //---< NoMatch: Delete on Server >--- string IDProject_on_Server = dataReader["IDProject_on_Server"].ToString(); fx_Log("delete byWebsite" + IDProject_on_Server); //</ nomatch >
//< read webApi > //*httpDelete /api/projects/5 string sURL_Api_Delete = app_settings._Api_Server + "/api/projects/" + IDProject_on_Server; HttpResponseMessage httpResponseMessage = null; try { httpResponseMessage = await client.DeleteAsync(sURL_Api_Delete); fx_Log("ok.Deleted " + IDProject_on_Server); } catch (Exception ex) { fx_Error_Log("Error Delete byWebsite: " + ex.Message + "IDProject_on_Server=" + IDProject_on_Server); } //</ read webApi > //---</ NoMatch: Delete on Server >--- } }
//----</ @Loop: Projects_on_Server >---- //----</ get Projects-List >---- //---------------</ read_API_Data() >---------------
fx_Log("--</ API:Api_Deletes_byWebsite >--"); fx_Log("");
return true;
//--------</ Api_Deletes_byWebsite() >-------- }
private async Task<bool> Api_Add_Projects() { //--------< Api_Add_Projects() >-------- if (optStop.IsChecked == true) return false; fx_Log("--< API:Api_Add_Projects >--");
//---------------< read_API_Data() >---------------
HttpClient client = await Create_HttpClient_with_UserToken();
//----< Check_to_local >---- //< find record > string sSQL = "SELECT * FROM tbl_Details WHERE [dtScan] IS NOT NULL AND [IDProject_on_Server] IS NULL"; SqliteDataReader dataReader = clsDB.Get_DataReader(sSQL); if (dataReader.HasRows == true) { while (dataReader.Read()) { //---< NoMatch: Delete on Server >--- string IDDetail = dataReader["IDDetail"].ToString();
string sTitle = dataReader["Title"].ToString(); string sHTML = dataReader["Text"].ToString(); string sURL = dataReader["URL"].ToString(); string sBereiche = dataReader["Bereiche"].ToString(); string sGehalt = dataReader["Gehalt"].ToString(); string sArt = dataReader["Art"].ToString(); string sOrt = dataReader["Ort"].ToString();
fx_Log("add Project IDDetail:" + IDDetail); //</ nomatch >
//< read webApi > //*httpPut /api/projects string sUrl_Api_Create = app_settings._Api_Server + "/api/projects"; HttpResponseMessage httpResponseMessage = null; try { //< create Upload_Content > JsonObject jsonObject = new JsonObject(); jsonObject["title"] = JsonValue.CreateStringValue(sTitle); jsonObject["html"] = JsonValue.CreateStringValue(sHTML); jsonObject["url"] = JsonValue.CreateStringValue(sURL); jsonObject["ort"] = JsonValue.CreateStringValue(sOrt); jsonObject["bereiche"] = JsonValue.CreateStringValue(sBereiche); jsonObject["gehalt"] = JsonValue.CreateStringValue(sGehalt); jsonObject["art"] = JsonValue.CreateStringValue(sArt);
StringContent string_to_Upload_Content = new StringContent(jsonObject.Stringify()); //</ create Upload_Content >
//< upload > //httpPost=Create httpResponseMessage = await client.PostAsync(sUrl_Api_Create, string_to_Upload_Content ); //</ upload >
if (httpResponseMessage.StatusCode==System.Net.HttpStatusCode.OK) {
} else { fx_Log("Upload Error=" + httpResponseMessage.ReasonPhrase); } //< result > string sResult = await httpResponseMessage.Content.ReadAsStringAsync() ; //</ result >
//*like: {"status":"OK","message":415} JsonObject jsonResult = JsonObject.Parse(sResult); string newIDProject_on_Server = jsonResult.GetNamedValue("newIDProject_on_Server").ToString();
//-< update local ID >- long IDProject_on_Server; bool isNumeric_IDProject = long.TryParse(newIDProject_on_Server,out IDProject_on_Server); if (isNumeric_IDProject ) { string sql_Update = "UPDATE tbl_Details SET [IDProject_on_Server] = " + newIDProject_on_Server + " WHERE IDDetail = " + IDDetail; bool bUpdate = await clsDB.Execute_SQL(sql_Update); //-</ update local ID >- fx_Log("ok.Created "); } else { fx_Log("Result not IDProject"); } } catch (Exception ex) { fx_Error_Log("Error Delete byWebsite: " + ex.Message + " IDDetail=" + IDDetail ); } //</ read webApi > //---</ NoMatch: Delete on Server >--- } }
//----</ @Loop: Projects_on_Server >---- //----</ get Projects-List >---- //---------------</ read_API_Data() >---------------
fx_Log("--</ API:Api_Add_Projects >--"); fx_Log("");
return true;
//--------</ Api_Add_Projects() >-------- } //--------------------</ region: Methods_Api >--------------------- #endregion /Methods_Api
#region Methods HTML //--------------------< region: Methods >--------------------- private async Task<HtmlDocument> Web_Get_HtmlDocument(string sURL) { //------------< fx_read_Page() >------------ //* get the HTML Document of a website-URL try { //-< init >- //< HttpClient >
HttpClientHandler handler = new HttpClientHandler(); handler.AllowAutoRedirect = true; HttpClient httpClient = new HttpClient(handler);
//httpClient string sHTML = ""; //Client Request as string try { sHTML = await httpClient.GetStringAsync(sURL); } catch (Exception ex) { //clsSys.show_Message(ex.Message); fx_Log("Error httpClient: " + ex.Message); return null; } //</ HttpClient > //-</ init >-
fx_Log("read HTML=" + sHTML.Substring(0, 10) + "..");
//< get HTMLdocument > //*create and load to local HtmlDocument HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(sHTML); //</ get HTMLdocument >
//< output > return doc; //</ output > } catch (Exception) { return null; }
//------------</ fx_read_Page() >------------ }
private async Task<HttpClient> Create_HttpClient_with_UserToken() { //--------< Create_HttpClient_with_Token() >-------- HttpClient client = new HttpClient(); client.Timeout = new TimeSpan(0, 0, 300); //*30 Seconds
//-< get user token >- //*get the User-Password valid token string sURL_Login = app_settings._Api_Server + "/api/get_usertoken"; var byteArray = Encoding.UTF8.GetBytes(app_settings._Username + ":" + app_settings._Password); var header = new AuthenticationHeaderValue("Basic", Convert.ToBase64String(byteArray)); client.DefaultRequestHeaders.Authorization = header;
string sUserToken = ""; try { sUserToken = await client.GetStringAsync(sURL_Login); } catch (Exception ex) { clsSys.show_Message(ex.Message); return null; } //-</ get user token >-
client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue("Bearer", sUserToken); return client; //--------</ Create_HttpClient_with_Token() >-------- }
private string html_GetText_OuterGroup_by_InnerFilter(HtmlDocument doc, string par_Html_Element_Type, string sFilter_Inner) { string sReturn = "";
HtmlNode node_Inner = doc.DocumentNode.SelectSingleNode("//" + par_Html_Element_Type + "[text() = '" + sFilter_Inner + "']"); if (node_Inner != null) { HtmlNode parent = node_Inner.ParentNode; node_Inner.Remove(); sReturn = parent.InnerText; sReturn.Trim(); sReturn.TrimStart(); } return sReturn; } //--------------------</ region: Methods >--------------------- #endregion /Methods Html
#region Methods Data //--------------------< region: Methods Data >---------------------
private async Task<bool> db_Update_Add_ListRecord(string sURL, string sTitle) { //--------< db_Update_Add_Record() >-------- //*Update or add Record //< correct> sURL = sURL.Replace("'", "''"); sTitle = sTitle.Replace("'", "''"); //</ correct>
//< find record > string sSQL = "SELECT * FROM tbl_Details WHERE [URL] = '" + sURL + "' LIMIT 1"; SqliteDataReader dataReader = clsDB.Get_DataReader(sSQL); if (dataReader.HasRows == false) { //< add > fx_Log("insert"); string sql_Add = "INSERT INTO tbl_Details ([URL],[Title],[dtCreated],[dtListed]) VALUES('" + sURL + "','" + sTitle + "', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)"; //SYSDATETIME bool bInsert = await clsDB.Execute_SQL(sql_Add); //</ add >
return bInsert; } else { //< update > fx_Log("update"); string IDDetail = dataReader["IDDetail"].ToString(); string sql_Update = "UPDATE tbl_Details SET [dtListed] = CURRENT_TIMESTAMP WHERE IDDetail = " + IDDetail; //SYSDATETIME() bool bUpdate = await clsDB.Execute_SQL(sql_Update); //< update > return bUpdate; }
//--------</ db_Update_Add_Record() >-------- }
//--------------------</ region: Methods Data >--------------------- #endregion /Methods Data
#region Sys //--------------------< region: Sys >--------------------- public void fx_Log(string sLog) { //------------< fx_Log() >------------ //* log Text to Textbox string sText = Textbox_Log.Text; sText = DateTime.Now + " " + sLog + Environment.NewLine + sText; if (sText.Length > 50000) { sText = sText.Substring(50000); } Textbox_Log.Text = sText; Textbox_Log.UpdateLayout();
//< refresh >
//</ refresh > //------------</ fx_Log() >------------ }
public void fx_Error_Log(string sError) { //------------< fx_Error_Log() >------------ //* log Text to Textbox
//</ refresh > //------------</ fx_Error_Log() >------------ }
//--------------------</ region: Sys >--------------------- #endregion /Sys
}
} |